1 /******************************************************************************
3 Copyright (c) 2006-2009, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/sockio.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/module.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
50 /* count xmits ourselves, rather than via drbr */
53 #include <net/if_arp.h>
54 #include <net/ethernet.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
60 #include <net/if_types.h>
61 #include <net/if_vlan_var.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/in.h>
66 #include <netinet/ip.h>
67 #include <netinet/tcp.h>
69 #include <machine/bus.h>
70 #include <machine/in_cksum.h>
71 #include <machine/resource.h>
76 #include <dev/pci/pcireg.h>
77 #include <dev/pci/pcivar.h>
78 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
80 #include <vm/vm.h> /* for pmap_mapdev() */
83 #if defined(__i386) || defined(__amd64)
84 #include <machine/specialreg.h>
87 #include <dev/mxge/mxge_mcp.h>
88 #include <dev/mxge/mcp_gen_header.h>
89 /*#define MXGE_FAKE_IFP*/
90 #include <dev/mxge/if_mxge_var.h>
92 #include <sys/buf_ring.h>
98 static int mxge_nvidia_ecrc_enable = 1;
99 static int mxge_force_firmware = 0;
100 static int mxge_intr_coal_delay = 30;
101 static int mxge_deassert_wait = 1;
102 static int mxge_flow_control = 1;
103 static int mxge_verbose = 0;
104 static int mxge_lro_cnt = 8;
105 static int mxge_ticks;
106 static int mxge_max_slices = 1;
107 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
108 static int mxge_always_promisc = 0;
109 static int mxge_initial_mtu = ETHERMTU_JUMBO;
110 static int mxge_throttle = 0;
111 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
112 static char *mxge_fw_aligned = "mxge_eth_z8e";
113 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
114 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
116 static int mxge_probe(device_t dev);
117 static int mxge_attach(device_t dev);
118 static int mxge_detach(device_t dev);
119 static int mxge_shutdown(device_t dev);
120 static void mxge_intr(void *arg);
122 static device_method_t mxge_methods[] =
124 /* Device interface */
125 DEVMETHOD(device_probe, mxge_probe),
126 DEVMETHOD(device_attach, mxge_attach),
127 DEVMETHOD(device_detach, mxge_detach),
128 DEVMETHOD(device_shutdown, mxge_shutdown),
132 static driver_t mxge_driver =
136 sizeof(mxge_softc_t),
139 static devclass_t mxge_devclass;
141 /* Declare ourselves to be a child of the PCI bus.*/
142 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
143 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
144 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
146 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
147 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
148 static int mxge_close(mxge_softc_t *sc, int down);
149 static int mxge_open(mxge_softc_t *sc);
150 static void mxge_tick(void *arg);
153 mxge_probe(device_t dev)
158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
161 rev = pci_get_revid(dev);
163 case MXGE_PCI_REV_Z8E:
164 device_set_desc(dev, "Myri10G-PCIE-8A");
166 case MXGE_PCI_REV_Z8ES:
167 device_set_desc(dev, "Myri10G-PCIE-8B");
170 device_set_desc(dev, "Myri10G-PCIE-8??");
171 device_printf(dev, "Unrecognized rev %d NIC\n",
181 mxge_enable_wc(mxge_softc_t *sc)
183 #if defined(__i386) || defined(__amd64)
188 len = rman_get_size(sc->mem_res);
189 err = pmap_change_attr((vm_offset_t) sc->sram,
190 len, PAT_WRITE_COMBINING);
192 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
200 /* callback to get our DMA address */
202 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
206 *(bus_addr_t *) arg = segs->ds_addr;
211 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
212 bus_size_t alignment)
215 device_t dev = sc->dev;
216 bus_size_t boundary, maxsegsize;
218 if (bytes > 4096 && alignment == 4096) {
226 /* allocate DMAable memory tags */
227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
228 alignment, /* alignment */
229 boundary, /* boundary */
230 BUS_SPACE_MAXADDR, /* low */
231 BUS_SPACE_MAXADDR, /* high */
232 NULL, NULL, /* filter */
235 maxsegsize, /* maxsegsize */
236 BUS_DMA_COHERENT, /* flags */
237 NULL, NULL, /* lock */
238 &dma->dmat); /* tag */
240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
244 /* allocate DMAable memory & map */
245 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
247 | BUS_DMA_ZERO), &dma->map);
249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
250 goto abort_with_dmat;
253 /* load the memory */
254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
255 mxge_dmamap_callback,
256 (void *)&dma->bus_addr, 0);
258 device_printf(dev, "couldn't load map (err = %d)\n", err);
264 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
266 (void)bus_dma_tag_destroy(dma->dmat);
272 mxge_dma_free(mxge_dma_t *dma)
274 bus_dmamap_unload(dma->dmat, dma->map);
275 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
276 (void)bus_dma_tag_destroy(dma->dmat);
280 * The eeprom strings on the lanaiX have the format
287 mxge_parse_strings(mxge_softc_t *sc)
290 int i, found_mac, found_sn2;
293 ptr = sc->eeprom_strings;
296 while (*ptr != '\0') {
297 if (strncmp(ptr, "MAC=", 4) == 0) {
300 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
301 if (endptr - ptr != 2)
310 } else if (strncmp(ptr, "PC=", 3) == 0) {
312 strlcpy(sc->product_code_string, ptr,
313 sizeof(sc->product_code_string));
314 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
316 strlcpy(sc->serial_number_string, ptr,
317 sizeof(sc->serial_number_string));
318 } else if (strncmp(ptr, "SN2=", 4) == 0) {
319 /* SN2 takes precedence over SN */
322 strlcpy(sc->serial_number_string, ptr,
323 sizeof(sc->serial_number_string));
325 while (*ptr++ != '\0') {}
332 device_printf(sc->dev, "failed to parse eeprom_strings\n");
337 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
339 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
342 unsigned long base, off;
344 device_t pdev, mcp55;
345 uint16_t vendor_id, device_id, word;
346 uintptr_t bus, slot, func, ivend, idev;
350 if (!mxge_nvidia_ecrc_enable)
353 pdev = device_get_parent(device_get_parent(sc->dev));
355 device_printf(sc->dev, "could not find parent?\n");
358 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
359 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
361 if (vendor_id != 0x10de)
366 if (device_id == 0x005d) {
367 /* ck804, base address is magic */
369 } else if (device_id >= 0x0374 && device_id <= 0x378) {
370 /* mcp55, base address stored in chipset */
371 mcp55 = pci_find_bsf(0, 0, 0);
373 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
374 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
375 word = pci_read_config(mcp55, 0x90, 2);
376 base = ((unsigned long)word & 0x7ffeU) << 25;
383 Test below is commented because it is believed that doing
384 config read/write beyond 0xff will access the config space
385 for the next larger function. Uncomment this and remove
386 the hacky pmap_mapdev() way of accessing config space when
387 FreeBSD grows support for extended pcie config space access
390 /* See if we can, by some miracle, access the extended
392 val = pci_read_config(pdev, 0x178, 4);
393 if (val != 0xffffffff) {
395 pci_write_config(pdev, 0x178, val, 4);
399 /* Rather than using normal pci config space writes, we must
400 * map the Nvidia config space ourselves. This is because on
401 * opteron/nvidia class machine the 0xe000000 mapping is
402 * handled by the nvidia chipset, that means the internal PCI
403 * device (the on-chip northbridge), or the amd-8131 bridge
404 * and things behind them are not visible by this method.
407 BUS_READ_IVAR(device_get_parent(pdev), pdev,
409 BUS_READ_IVAR(device_get_parent(pdev), pdev,
410 PCI_IVAR_SLOT, &slot);
411 BUS_READ_IVAR(device_get_parent(pdev), pdev,
412 PCI_IVAR_FUNCTION, &func);
413 BUS_READ_IVAR(device_get_parent(pdev), pdev,
414 PCI_IVAR_VENDOR, &ivend);
415 BUS_READ_IVAR(device_get_parent(pdev), pdev,
416 PCI_IVAR_DEVICE, &idev);
419 + 0x00100000UL * (unsigned long)bus
420 + 0x00001000UL * (unsigned long)(func
423 /* map it into the kernel */
424 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
428 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
431 /* get a pointer to the config space mapped into the kernel */
432 cfgptr = va + (off & PAGE_MASK);
434 /* make sure that we can really access it */
435 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
436 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
437 if (! (vendor_id == ivend && device_id == idev)) {
438 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
439 vendor_id, device_id);
440 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
444 ptr32 = (uint32_t*)(cfgptr + 0x178);
447 if (val == 0xffffffff) {
448 device_printf(sc->dev, "extended mapping failed\n");
449 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
453 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
455 device_printf(sc->dev,
456 "Enabled ECRC on upstream Nvidia bridge "
458 (int)bus, (int)slot, (int)func);
463 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
465 device_printf(sc->dev,
466 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
473 mxge_dma_test(mxge_softc_t *sc, int test_type)
476 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
482 /* Run a small DMA test.
483 * The magic multipliers to the length tell the firmware
484 * to do DMA read, write, or read+write tests. The
485 * results are returned in cmd.data0. The upper 16
486 * bits of the return is the number of transfers completed.
487 * The lower 16 bits is the time in 0.5us ticks that the
488 * transfers took to complete.
491 len = sc->tx_boundary;
493 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
494 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
495 cmd.data2 = len * 0x10000;
496 status = mxge_send_cmd(sc, test_type, &cmd);
501 sc->read_dma = ((cmd.data0>>16) * len * 2) /
502 (cmd.data0 & 0xffff);
503 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
504 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
505 cmd.data2 = len * 0x1;
506 status = mxge_send_cmd(sc, test_type, &cmd);
511 sc->write_dma = ((cmd.data0>>16) * len * 2) /
512 (cmd.data0 & 0xffff);
514 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
515 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
516 cmd.data2 = len * 0x10001;
517 status = mxge_send_cmd(sc, test_type, &cmd);
522 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
523 (cmd.data0 & 0xffff);
526 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
527 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
534 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
535 * when the PCI-E Completion packets are aligned on an 8-byte
536 * boundary. Some PCI-E chip sets always align Completion packets; on
537 * the ones that do not, the alignment can be enforced by enabling
538 * ECRC generation (if supported).
540 * When PCI-E Completion packets are not aligned, it is actually more
541 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
543 * If the driver can neither enable ECRC nor verify that it has
544 * already been enabled, then it must use a firmware image which works
545 * around unaligned completion packets (ethp_z8e.dat), and it should
546 * also ensure that it never gives the device a Read-DMA which is
547 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
548 * enabled, then the driver should use the aligned (eth_z8e.dat)
549 * firmware image, and set tx_boundary to 4KB.
553 mxge_firmware_probe(mxge_softc_t *sc)
555 device_t dev = sc->dev;
559 sc->tx_boundary = 4096;
561 * Verify the max read request size was set to 4KB
562 * before trying the test with 4KB.
564 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
565 pectl = pci_read_config(dev, reg + 0x8, 2);
566 if ((pectl & (5 << 12)) != (5 << 12)) {
567 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
569 sc->tx_boundary = 2048;
574 * load the optimized firmware (which assumes aligned PCIe
575 * completions) in order to see if it works on this host.
577 sc->fw_name = mxge_fw_aligned;
578 status = mxge_load_firmware(sc, 1);
584 * Enable ECRC if possible
586 mxge_enable_nvidia_ecrc(sc);
589 * Run a DMA test which watches for unaligned completions and
590 * aborts on the first one seen. Not required on Z8ES or newer.
592 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
594 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
596 return 0; /* keep the aligned firmware */
599 device_printf(dev, "DMA test failed: %d\n", status);
600 if (status == ENOSYS)
601 device_printf(dev, "Falling back to ethp! "
602 "Please install up to date fw\n");
607 mxge_select_firmware(mxge_softc_t *sc)
610 int force_firmware = mxge_force_firmware;
613 force_firmware = sc->throttle;
615 if (force_firmware != 0) {
616 if (force_firmware == 1)
621 device_printf(sc->dev,
622 "Assuming %s completions (forced)\n",
623 aligned ? "aligned" : "unaligned");
627 /* if the PCIe link width is 4 or less, we can use the aligned
628 firmware and skip any checks */
629 if (sc->link_width != 0 && sc->link_width <= 4) {
630 device_printf(sc->dev,
631 "PCIe x%d Link, expect reduced performance\n",
637 if (0 == mxge_firmware_probe(sc))
642 sc->fw_name = mxge_fw_aligned;
643 sc->tx_boundary = 4096;
645 sc->fw_name = mxge_fw_unaligned;
646 sc->tx_boundary = 2048;
648 return (mxge_load_firmware(sc, 0));
652 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
656 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
657 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
658 be32toh(hdr->mcp_type));
662 /* save firmware version for sysctl */
663 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
665 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
667 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
668 &sc->fw_ver_minor, &sc->fw_ver_tiny);
670 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
671 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
672 device_printf(sc->dev, "Found firmware version %s\n",
674 device_printf(sc->dev, "Driver needs %d.%d\n",
675 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
683 z_alloc(void *nil, u_int items, u_int size)
687 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
692 z_free(void *nil, void *ptr)
699 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
702 char *inflate_buffer;
703 const struct firmware *fw;
704 const mcp_gen_header_t *hdr;
711 fw = firmware_get(sc->fw_name);
713 device_printf(sc->dev, "Could not find firmware image %s\n",
720 /* setup zlib and decompress f/w */
721 bzero(&zs, sizeof (zs));
724 status = inflateInit(&zs);
725 if (status != Z_OK) {
730 /* the uncompressed size is stored as the firmware version,
731 which would otherwise go unused */
732 fw_len = (size_t) fw->version;
733 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
734 if (inflate_buffer == NULL)
736 zs.avail_in = fw->datasize;
737 zs.next_in = __DECONST(char *, fw->data);
738 zs.avail_out = fw_len;
739 zs.next_out = inflate_buffer;
740 status = inflate(&zs, Z_FINISH);
741 if (status != Z_STREAM_END) {
742 device_printf(sc->dev, "zlib %d\n", status);
744 goto abort_with_buffer;
748 hdr_offset = htobe32(*(const uint32_t *)
749 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
750 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
751 device_printf(sc->dev, "Bad firmware file");
753 goto abort_with_buffer;
755 hdr = (const void*)(inflate_buffer + hdr_offset);
757 status = mxge_validate_firmware(sc, hdr);
759 goto abort_with_buffer;
761 /* Copy the inflated firmware to NIC SRAM. */
762 for (i = 0; i < fw_len; i += 256) {
763 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
765 min(256U, (unsigned)(fw_len - i)));
774 free(inflate_buffer, M_TEMP);
778 firmware_put(fw, FIRMWARE_UNLOAD);
783 * Enable or disable periodic RDMAs from the host to make certain
784 * chipsets resend dropped PCIe messages
788 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
791 volatile uint32_t *confirm;
792 volatile char *submit;
793 uint32_t *buf, dma_low, dma_high;
796 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
798 /* clear confirmation addr */
799 confirm = (volatile uint32_t *)sc->cmd;
803 /* send an rdma command to the PCIe engine, and wait for the
804 response in the confirmation address. The firmware should
805 write a -1 there to indicate it is alive and well
808 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
809 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
810 buf[0] = htobe32(dma_high); /* confirm addr MSW */
811 buf[1] = htobe32(dma_low); /* confirm addr LSW */
812 buf[2] = htobe32(0xffffffff); /* confirm data */
813 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
814 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
815 buf[3] = htobe32(dma_high); /* dummy addr MSW */
816 buf[4] = htobe32(dma_low); /* dummy addr LSW */
817 buf[5] = htobe32(enable); /* enable? */
820 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
822 mxge_pio_copy(submit, buf, 64);
827 while (*confirm != 0xffffffff && i < 20) {
831 if (*confirm != 0xffffffff) {
832 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
833 (enable ? "enable" : "disable"), confirm,
840 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
843 char buf_bytes[sizeof(*buf) + 8];
844 volatile mcp_cmd_response_t *response = sc->cmd;
845 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
846 uint32_t dma_low, dma_high;
847 int err, sleep_total = 0;
849 /* ensure buf is aligned to 8 bytes */
850 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
852 buf->data0 = htobe32(data->data0);
853 buf->data1 = htobe32(data->data1);
854 buf->data2 = htobe32(data->data2);
855 buf->cmd = htobe32(cmd);
856 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
857 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
859 buf->response_addr.low = htobe32(dma_low);
860 buf->response_addr.high = htobe32(dma_high);
861 mtx_lock(&sc->cmd_mtx);
862 response->result = 0xffffffff;
864 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
866 /* wait up to 20ms */
868 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
869 bus_dmamap_sync(sc->cmd_dma.dmat,
870 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
872 switch (be32toh(response->result)) {
874 data->data0 = be32toh(response->data);
880 case MXGEFW_CMD_UNKNOWN:
883 case MXGEFW_CMD_ERROR_UNALIGNED:
886 case MXGEFW_CMD_ERROR_BUSY:
889 case MXGEFW_CMD_ERROR_I2C_ABSENT:
893 device_printf(sc->dev,
895 "failed, result = %d\n",
896 cmd, be32toh(response->result));
904 device_printf(sc->dev, "mxge: command %d timed out"
906 cmd, be32toh(response->result));
907 mtx_unlock(&sc->cmd_mtx);
912 mxge_adopt_running_firmware(mxge_softc_t *sc)
914 struct mcp_gen_header *hdr;
915 const size_t bytes = sizeof (struct mcp_gen_header);
919 /* find running firmware header */
920 hdr_offset = htobe32(*(volatile uint32_t *)
921 (sc->sram + MCP_HEADER_PTR_OFFSET));
923 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
924 device_printf(sc->dev,
925 "Running firmware has bad header offset (%d)\n",
930 /* copy header of running firmware from SRAM to host memory to
931 * validate firmware */
932 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
934 device_printf(sc->dev, "could not malloc firmware hdr\n");
937 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
938 rman_get_bushandle(sc->mem_res),
939 hdr_offset, (char *)hdr, bytes);
940 status = mxge_validate_firmware(sc, hdr);
944 * check to see if adopted firmware has bug where adopting
945 * it will cause broadcasts to be filtered unless the NIC
946 * is kept in ALLMULTI mode
948 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
949 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
950 sc->adopted_rx_filter_bug = 1;
951 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
952 "working around rx filter bug\n",
953 sc->fw_ver_major, sc->fw_ver_minor,
962 mxge_load_firmware(mxge_softc_t *sc, int adopt)
964 volatile uint32_t *confirm;
965 volatile char *submit;
967 uint32_t *buf, size, dma_low, dma_high;
970 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
972 size = sc->sram_size;
973 status = mxge_load_firmware_helper(sc, &size);
977 /* Try to use the currently running firmware, if
979 status = mxge_adopt_running_firmware(sc);
981 device_printf(sc->dev,
982 "failed to adopt running firmware\n");
985 device_printf(sc->dev,
986 "Successfully adopted running firmware\n");
987 if (sc->tx_boundary == 4096) {
988 device_printf(sc->dev,
989 "Using firmware currently running on NIC"
991 device_printf(sc->dev,
992 "performance consider loading optimized "
995 sc->fw_name = mxge_fw_unaligned;
996 sc->tx_boundary = 2048;
999 /* clear confirmation addr */
1000 confirm = (volatile uint32_t *)sc->cmd;
1003 /* send a reload command to the bootstrap MCP, and wait for the
1004 response in the confirmation address. The firmware should
1005 write a -1 there to indicate it is alive and well
1008 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1009 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1011 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1012 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1013 buf[2] = htobe32(0xffffffff); /* confirm data */
1015 /* FIX: All newest firmware should un-protect the bottom of
1016 the sram before handoff. However, the very first interfaces
1017 do not. Therefore the handoff copy must skip the first 8 bytes
1019 /* where the code starts*/
1020 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1021 buf[4] = htobe32(size - 8); /* length of code */
1022 buf[5] = htobe32(8); /* where to copy to */
1023 buf[6] = htobe32(0); /* where to jump to */
1025 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1026 mxge_pio_copy(submit, buf, 64);
1031 while (*confirm != 0xffffffff && i < 20) {
1034 bus_dmamap_sync(sc->cmd_dma.dmat,
1035 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1037 if (*confirm != 0xffffffff) {
1038 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1047 mxge_update_mac_address(mxge_softc_t *sc)
1050 uint8_t *addr = sc->mac_addr;
1054 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1055 | (addr[2] << 8) | addr[3]);
1057 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1059 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1064 mxge_change_pause(mxge_softc_t *sc, int pause)
1070 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1073 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1077 device_printf(sc->dev, "Failed to set flow control mode\n");
1085 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1090 if (mxge_always_promisc)
1094 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1097 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1101 device_printf(sc->dev, "Failed to set promisc mode\n");
1106 mxge_set_multicast_list(mxge_softc_t *sc)
1109 struct ifmultiaddr *ifma;
1110 struct ifnet *ifp = sc->ifp;
1113 /* This firmware is known to not support multicast */
1114 if (!sc->fw_multicast_support)
1117 /* Disable multicast filtering while we play with the lists*/
1118 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1120 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1121 " error status: %d\n", err);
1125 if (sc->adopted_rx_filter_bug)
1128 if (ifp->if_flags & IFF_ALLMULTI)
1129 /* request to disable multicast filtering, so quit here */
1132 /* Flush all the filters */
1134 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1136 device_printf(sc->dev,
1137 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1138 ", error status: %d\n", err);
1142 /* Walk the multicast list, and add each address */
1144 if_maddr_rlock(ifp);
1145 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1146 if (ifma->ifma_addr->sa_family != AF_LINK)
1148 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1150 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1152 cmd.data0 = htonl(cmd.data0);
1153 cmd.data1 = htonl(cmd.data1);
1154 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1156 device_printf(sc->dev, "Failed "
1157 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1159 /* abort, leaving multicast filtering off */
1160 if_maddr_runlock(ifp);
1164 if_maddr_runlock(ifp);
1165 /* Enable multicast filtering */
1166 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1168 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1169 ", error status: %d\n", err);
1174 mxge_max_mtu(mxge_softc_t *sc)
1179 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1180 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1182 /* try to set nbufs to see if it we can
1183 use virtually contiguous jumbos */
1185 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1188 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1190 /* otherwise, we're limited to MJUMPAGESIZE */
1191 return MJUMPAGESIZE - MXGEFW_PAD;
1195 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1197 struct mxge_slice_state *ss;
1198 mxge_rx_done_t *rx_done;
1199 volatile uint32_t *irq_claim;
1203 /* try to send a reset command to the card to see if it
1205 memset(&cmd, 0, sizeof (cmd));
1206 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1208 device_printf(sc->dev, "failed reset\n");
1212 mxge_dummy_rdma(sc, 1);
1215 /* set the intrq size */
1216 cmd.data0 = sc->rx_ring_size;
1217 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1220 * Even though we already know how many slices are supported
1221 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1222 * has magic side effects, and must be called after a reset.
1223 * It must be called prior to calling any RSS related cmds,
1224 * including assigning an interrupt queue for anything but
1225 * slice 0. It must also be called *after*
1226 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1227 * the firmware to compute offsets.
1230 if (sc->num_slices > 1) {
1231 /* ask the maximum number of slices it supports */
1232 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1235 device_printf(sc->dev,
1236 "failed to get number of slices\n");
1240 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1241 * to setting up the interrupt queue DMA
1243 cmd.data0 = sc->num_slices;
1244 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1245 #ifdef IFNET_BUF_RING
1246 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1248 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1251 device_printf(sc->dev,
1252 "failed to set number of slices\n");
1258 if (interrupts_setup) {
1259 /* Now exchange information about interrupts */
1260 for (slice = 0; slice < sc->num_slices; slice++) {
1261 rx_done = &sc->ss[slice].rx_done;
1262 memset(rx_done->entry, 0, sc->rx_ring_size);
1263 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1264 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1266 status |= mxge_send_cmd(sc,
1267 MXGEFW_CMD_SET_INTRQ_DMA,
1272 status |= mxge_send_cmd(sc,
1273 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1276 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1278 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1279 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1282 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1284 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1286 device_printf(sc->dev, "failed set interrupt parameters\n");
1291 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1294 /* run a DMA benchmark */
1295 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1297 for (slice = 0; slice < sc->num_slices; slice++) {
1298 ss = &sc->ss[slice];
1300 ss->irq_claim = irq_claim + (2 * slice);
1301 /* reset mcp/driver shared state back to 0 */
1302 ss->rx_done.idx = 0;
1303 ss->rx_done.cnt = 0;
1306 ss->tx.pkt_done = 0;
1307 ss->tx.queue_active = 0;
1308 ss->tx.activate = 0;
1309 ss->tx.deactivate = 0;
1314 ss->rx_small.cnt = 0;
1315 ss->lro_bad_csum = 0;
1317 ss->lro_flushed = 0;
1318 if (ss->fw_stats != NULL) {
1319 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1322 sc->rdma_tags_available = 15;
1323 status = mxge_update_mac_address(sc);
1324 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1325 mxge_change_pause(sc, sc->pause);
1326 mxge_set_multicast_list(sc);
1328 cmd.data0 = sc->throttle;
1329 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1331 device_printf(sc->dev,
1332 "can't enable throttle\n");
1339 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1344 unsigned int throttle;
1347 throttle = sc->throttle;
1348 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1353 if (throttle == sc->throttle)
1356 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1359 mtx_lock(&sc->driver_mtx);
1360 cmd.data0 = throttle;
1361 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1363 sc->throttle = throttle;
1364 mtx_unlock(&sc->driver_mtx);
1369 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1372 unsigned int intr_coal_delay;
1376 intr_coal_delay = sc->intr_coal_delay;
1377 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1381 if (intr_coal_delay == sc->intr_coal_delay)
1384 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1387 mtx_lock(&sc->driver_mtx);
1388 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1389 sc->intr_coal_delay = intr_coal_delay;
1391 mtx_unlock(&sc->driver_mtx);
1396 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1399 unsigned int enabled;
1403 enabled = sc->pause;
1404 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1408 if (enabled == sc->pause)
1411 mtx_lock(&sc->driver_mtx);
1412 err = mxge_change_pause(sc, enabled);
1413 mtx_unlock(&sc->driver_mtx);
1418 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1425 ifp->if_capenable &= ~IFCAP_LRO;
1427 ifp->if_capenable |= IFCAP_LRO;
1428 sc->lro_cnt = lro_cnt;
1429 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1431 err = mxge_open(sc);
1437 mxge_change_lro(SYSCTL_HANDLER_ARGS)
1440 unsigned int lro_cnt;
1444 lro_cnt = sc->lro_cnt;
1445 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1449 if (lro_cnt == sc->lro_cnt)
1455 mtx_lock(&sc->driver_mtx);
1456 err = mxge_change_lro_locked(sc, lro_cnt);
1457 mtx_unlock(&sc->driver_mtx);
1462 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1468 arg2 = be32toh(*(int *)arg1);
1470 err = sysctl_handle_int(oidp, arg1, arg2, req);
1476 mxge_rem_sysctls(mxge_softc_t *sc)
1478 struct mxge_slice_state *ss;
1481 if (sc->slice_sysctl_tree == NULL)
1484 for (slice = 0; slice < sc->num_slices; slice++) {
1485 ss = &sc->ss[slice];
1486 if (ss == NULL || ss->sysctl_tree == NULL)
1488 sysctl_ctx_free(&ss->sysctl_ctx);
1489 ss->sysctl_tree = NULL;
1491 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1492 sc->slice_sysctl_tree = NULL;
1496 mxge_add_sysctls(mxge_softc_t *sc)
1498 struct sysctl_ctx_list *ctx;
1499 struct sysctl_oid_list *children;
1501 struct mxge_slice_state *ss;
1505 ctx = device_get_sysctl_ctx(sc->dev);
1506 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1507 fw = sc->ss[0].fw_stats;
1509 /* random information */
1510 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1512 CTLFLAG_RD, &sc->fw_version,
1513 0, "firmware version");
1514 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1516 CTLFLAG_RD, &sc->serial_number_string,
1517 0, "serial number");
1518 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1520 CTLFLAG_RD, &sc->product_code_string,
1522 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1524 CTLFLAG_RD, &sc->link_width,
1526 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1528 CTLFLAG_RD, &sc->tx_boundary,
1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1532 CTLFLAG_RD, &sc->wc,
1533 0, "write combining PIO?");
1534 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1536 CTLFLAG_RD, &sc->read_dma,
1537 0, "DMA Read speed in MB/s");
1538 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1540 CTLFLAG_RD, &sc->write_dma,
1541 0, "DMA Write speed in MB/s");
1542 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1543 "read_write_dma_MBs",
1544 CTLFLAG_RD, &sc->read_write_dma,
1545 0, "DMA concurrent Read/Write speed in MB/s");
1546 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1548 CTLFLAG_RD, &sc->watchdog_resets,
1549 0, "Number of times NIC was reset");
1552 /* performance related tunables */
1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1555 CTLTYPE_INT|CTLFLAG_RW, sc,
1556 0, mxge_change_intr_coal,
1557 "I", "interrupt coalescing delay in usecs");
1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1561 CTLTYPE_INT|CTLFLAG_RW, sc,
1562 0, mxge_change_throttle,
1563 "I", "transmit throttling");
1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1566 "flow_control_enabled",
1567 CTLTYPE_INT|CTLFLAG_RW, sc,
1568 0, mxge_change_flow_control,
1569 "I", "interrupt coalescing delay in usecs");
1571 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1573 CTLFLAG_RW, &mxge_deassert_wait,
1574 0, "Wait for IRQ line to go low in ihandler");
1576 /* stats block from firmware is in network byte order.
1578 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1580 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1581 0, mxge_handle_be32,
1583 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1584 "rdma_tags_available",
1585 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1586 0, mxge_handle_be32,
1587 "I", "rdma_tags_available");
1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1589 "dropped_bad_crc32",
1590 CTLTYPE_INT|CTLFLAG_RD,
1591 &fw->dropped_bad_crc32,
1592 0, mxge_handle_be32,
1593 "I", "dropped_bad_crc32");
1594 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1596 CTLTYPE_INT|CTLFLAG_RD,
1597 &fw->dropped_bad_phy,
1598 0, mxge_handle_be32,
1599 "I", "dropped_bad_phy");
1600 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1601 "dropped_link_error_or_filtered",
1602 CTLTYPE_INT|CTLFLAG_RD,
1603 &fw->dropped_link_error_or_filtered,
1604 0, mxge_handle_be32,
1605 "I", "dropped_link_error_or_filtered");
1606 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1607 "dropped_link_overflow",
1608 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1609 0, mxge_handle_be32,
1610 "I", "dropped_link_overflow");
1611 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1612 "dropped_multicast_filtered",
1613 CTLTYPE_INT|CTLFLAG_RD,
1614 &fw->dropped_multicast_filtered,
1615 0, mxge_handle_be32,
1616 "I", "dropped_multicast_filtered");
1617 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1618 "dropped_no_big_buffer",
1619 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1620 0, mxge_handle_be32,
1621 "I", "dropped_no_big_buffer");
1622 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1623 "dropped_no_small_buffer",
1624 CTLTYPE_INT|CTLFLAG_RD,
1625 &fw->dropped_no_small_buffer,
1626 0, mxge_handle_be32,
1627 "I", "dropped_no_small_buffer");
1628 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1630 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1631 0, mxge_handle_be32,
1632 "I", "dropped_overrun");
1633 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1635 CTLTYPE_INT|CTLFLAG_RD,
1637 0, mxge_handle_be32,
1638 "I", "dropped_pause");
1639 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1641 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1642 0, mxge_handle_be32,
1643 "I", "dropped_runt");
1645 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1646 "dropped_unicast_filtered",
1647 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1648 0, mxge_handle_be32,
1649 "I", "dropped_unicast_filtered");
1651 /* verbose printing? */
1652 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1654 CTLFLAG_RW, &mxge_verbose,
1655 0, "verbose printing");
1658 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1660 CTLTYPE_INT|CTLFLAG_RW, sc,
1662 "I", "number of lro merge queues");
1665 /* add counters exported for debugging from all slices */
1666 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1667 sc->slice_sysctl_tree =
1668 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1669 "slice", CTLFLAG_RD, 0, "");
1671 for (slice = 0; slice < sc->num_slices; slice++) {
1672 ss = &sc->ss[slice];
1673 sysctl_ctx_init(&ss->sysctl_ctx);
1674 ctx = &ss->sysctl_ctx;
1675 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1676 sprintf(slice_num, "%d", slice);
1678 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1680 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1681 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1683 CTLFLAG_RD, &ss->rx_small.cnt,
1685 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1687 CTLFLAG_RD, &ss->rx_big.cnt,
1689 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1690 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1691 0, "number of lro merge queues flushed");
1693 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1694 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1695 0, "number of frames appended to lro merge"
1698 #ifndef IFNET_BUF_RING
1699 /* only transmit from slice 0 for now */
1703 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1705 CTLFLAG_RD, &ss->tx.req,
1708 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1710 CTLFLAG_RD, &ss->tx.done,
1712 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1714 CTLFLAG_RD, &ss->tx.pkt_done,
1716 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1718 CTLFLAG_RD, &ss->tx.stall,
1720 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1722 CTLFLAG_RD, &ss->tx.wake,
1724 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1726 CTLFLAG_RD, &ss->tx.defrag,
1728 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1730 CTLFLAG_RD, &ss->tx.queue_active,
1731 0, "tx_queue_active");
1732 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1734 CTLFLAG_RD, &ss->tx.activate,
1736 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1738 CTLFLAG_RD, &ss->tx.deactivate,
1739 0, "tx_deactivate");
1743 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1744 backwards one at a time and handle ring wraps */
1747 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1748 mcp_kreq_ether_send_t *src, int cnt)
1750 int idx, starting_slot;
1751 starting_slot = tx->req;
1754 idx = (starting_slot + cnt) & tx->mask;
1755 mxge_pio_copy(&tx->lanai[idx],
1756 &src[cnt], sizeof(*src));
1762 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1763 * at most 32 bytes at a time, so as to avoid involving the software
1764 * pio handler in the nic. We re-write the first segment's flags
1765 * to mark them valid only after writing the entire chain
1769 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1774 volatile uint32_t *dst_ints;
1775 mcp_kreq_ether_send_t *srcp;
1776 volatile mcp_kreq_ether_send_t *dstp, *dst;
1779 idx = tx->req & tx->mask;
1781 last_flags = src->flags;
1784 dst = dstp = &tx->lanai[idx];
1787 if ((idx + cnt) < tx->mask) {
1788 for (i = 0; i < (cnt - 1); i += 2) {
1789 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1790 wmb(); /* force write every 32 bytes */
1795 /* submit all but the first request, and ensure
1796 that it is submitted below */
1797 mxge_submit_req_backwards(tx, src, cnt);
1801 /* submit the first request */
1802 mxge_pio_copy(dstp, srcp, sizeof(*src));
1803 wmb(); /* barrier before setting valid flag */
1806 /* re-write the last 32-bits with the valid flags */
1807 src->flags = last_flags;
1808 src_ints = (uint32_t *)src;
1810 dst_ints = (volatile uint32_t *)dst;
1812 *dst_ints = *src_ints;
1820 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1821 int busdma_seg_cnt, int ip_off)
1824 mcp_kreq_ether_send_t *req;
1825 bus_dma_segment_t *seg;
1828 uint32_t low, high_swapped;
1829 int len, seglen, cum_len, cum_len_next;
1830 int next_is_first, chop, cnt, rdma_count, small;
1831 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1832 uint8_t flags, flags_next;
1835 mss = m->m_pkthdr.tso_segsz;
1837 /* negative cum_len signifies to the
1838 * send loop that we are still in the
1839 * header portion of the TSO packet.
1842 /* ensure we have the ethernet, IP and TCP
1843 header together in the first mbuf, copy
1844 it to a scratch buffer if not */
1845 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1846 m_copydata(m, 0, ip_off + sizeof (*ip),
1848 ip = (struct ip *)(ss->scratch + ip_off);
1850 ip = (struct ip *)(mtod(m, char *) + ip_off);
1852 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1854 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1855 + sizeof (*tcp), ss->scratch);
1856 ip = (struct ip *)(mtod(m, char *) + ip_off);
1859 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1860 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1861 cksum_offset = ip_off + (ip->ip_hl << 2);
1863 /* TSO implies checksum offload on this hardware */
1864 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP)) == 0)) {
1866 * If packet has full TCP csum, replace it with pseudo hdr
1867 * sum that the NIC expects, otherwise the NIC will emit
1868 * packets with bad TCP checksums.
1870 m->m_pkthdr.csum_flags = CSUM_TCP;
1871 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1872 tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
1873 htons(IPPROTO_TCP + (m->m_pkthdr.len - cksum_offset)));
1875 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1878 /* for TSO, pseudo_hdr_offset holds mss.
1879 * The firmware figures out where to put
1880 * the checksum by parsing the header. */
1881 pseudo_hdr_offset = htobe16(mss);
1888 /* "rdma_count" is the number of RDMAs belonging to the
1889 * current packet BEFORE the current send request. For
1890 * non-TSO packets, this is equal to "count".
1891 * For TSO packets, rdma_count needs to be reset
1892 * to 0 after a segment cut.
1894 * The rdma_count field of the send request is
1895 * the number of RDMAs of the packet starting at
1896 * that request. For TSO send requests with one ore more cuts
1897 * in the middle, this is the number of RDMAs starting
1898 * after the last cut in the request. All previous
1899 * segments before the last cut implicitly have 1 RDMA.
1901 * Since the number of RDMAs is not known beforehand,
1902 * it must be filled-in retroactively - after each
1903 * segmentation cut or at the end of the entire packet.
1906 while (busdma_seg_cnt) {
1907 /* Break the busdma segment up into pieces*/
1908 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1909 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1913 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1915 cum_len_next = cum_len + seglen;
1916 (req-rdma_count)->rdma_count = rdma_count + 1;
1917 if (__predict_true(cum_len >= 0)) {
1919 chop = (cum_len_next > mss);
1920 cum_len_next = cum_len_next % mss;
1921 next_is_first = (cum_len_next == 0);
1922 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1923 flags_next |= next_is_first *
1925 rdma_count |= -(chop | next_is_first);
1926 rdma_count += chop & !next_is_first;
1927 } else if (cum_len_next >= 0) {
1932 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1933 flags_next = MXGEFW_FLAGS_TSO_PLD |
1934 MXGEFW_FLAGS_FIRST |
1935 (small * MXGEFW_FLAGS_SMALL);
1938 req->addr_high = high_swapped;
1939 req->addr_low = htobe32(low);
1940 req->pseudo_hdr_offset = pseudo_hdr_offset;
1942 req->rdma_count = 1;
1943 req->length = htobe16(seglen);
1944 req->cksum_offset = cksum_offset;
1945 req->flags = flags | ((cum_len & 1) *
1946 MXGEFW_FLAGS_ALIGN_ODD);
1949 cum_len = cum_len_next;
1954 if (__predict_false(cksum_offset > seglen))
1955 cksum_offset -= seglen;
1958 if (__predict_false(cnt > tx->max_desc))
1964 (req-rdma_count)->rdma_count = rdma_count;
1968 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1969 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1971 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1972 mxge_submit_req(tx, tx->req_list, cnt);
1973 #ifdef IFNET_BUF_RING
1974 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1975 /* tell the NIC to start polling this slice */
1977 tx->queue_active = 1;
1985 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1989 printf("tx->max_desc exceeded via TSO!\n");
1990 printf("mss = %d, %ld, %d!\n", mss,
1991 (long)seg - (long)tx->seg_list, tx->max_desc);
1998 #endif /* IFCAP_TSO4 */
2000 #ifdef MXGE_NEW_VLAN_API
2002 * We reproduce the software vlan tag insertion from
2003 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2004 * vlan tag insertion. We need to advertise this in order to have the
2005 * vlan interface respect our csum offload flags.
2007 static struct mbuf *
2008 mxge_vlan_tag_insert(struct mbuf *m)
2010 struct ether_vlan_header *evl;
2012 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
2013 if (__predict_false(m == NULL))
2015 if (m->m_len < sizeof(*evl)) {
2016 m = m_pullup(m, sizeof(*evl));
2017 if (__predict_false(m == NULL))
2021 * Transform the Ethernet header into an Ethernet header
2022 * with 802.1Q encapsulation.
2024 evl = mtod(m, struct ether_vlan_header *);
2025 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2026 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2027 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2028 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2029 m->m_flags &= ~M_VLANTAG;
2032 #endif /* MXGE_NEW_VLAN_API */
2035 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2038 mcp_kreq_ether_send_t *req;
2039 bus_dma_segment_t *seg;
2044 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
2045 uint16_t pseudo_hdr_offset;
2046 uint8_t flags, cksum_offset;
2053 ip_off = sizeof (struct ether_header);
2054 #ifdef MXGE_NEW_VLAN_API
2055 if (m->m_flags & M_VLANTAG) {
2056 m = mxge_vlan_tag_insert(m);
2057 if (__predict_false(m == NULL))
2059 ip_off += ETHER_VLAN_ENCAP_LEN;
2062 /* (try to) map the frame for DMA */
2063 idx = tx->req & tx->mask;
2064 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2065 m, tx->seg_list, &cnt,
2067 if (__predict_false(err == EFBIG)) {
2068 /* Too many segments in the chain. Try
2070 m_tmp = m_defrag(m, M_NOWAIT);
2071 if (m_tmp == NULL) {
2076 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2078 m, tx->seg_list, &cnt,
2081 if (__predict_false(err != 0)) {
2082 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2083 " packet len = %d\n", err, m->m_pkthdr.len);
2086 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2087 BUS_DMASYNC_PREWRITE);
2088 tx->info[idx].m = m;
2091 /* TSO is different enough, we handle it in another routine */
2092 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2093 mxge_encap_tso(ss, m, cnt, ip_off);
2100 pseudo_hdr_offset = 0;
2101 flags = MXGEFW_FLAGS_NO_TSO;
2103 /* checksum offloading? */
2104 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2105 /* ensure ip header is in first mbuf, copy
2106 it to a scratch buffer if not */
2107 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2108 m_copydata(m, 0, ip_off + sizeof (*ip),
2110 ip = (struct ip *)(ss->scratch + ip_off);
2112 ip = (struct ip *)(mtod(m, char *) + ip_off);
2114 cksum_offset = ip_off + (ip->ip_hl << 2);
2115 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2116 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2117 req->cksum_offset = cksum_offset;
2118 flags |= MXGEFW_FLAGS_CKSUM;
2119 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2123 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2124 flags |= MXGEFW_FLAGS_SMALL;
2126 /* convert segments into a request list */
2129 req->flags = MXGEFW_FLAGS_FIRST;
2130 for (i = 0; i < cnt; i++) {
2132 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2134 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2135 req->length = htobe16(seg->ds_len);
2136 req->cksum_offset = cksum_offset;
2137 if (cksum_offset > seg->ds_len)
2138 cksum_offset -= seg->ds_len;
2141 req->pseudo_hdr_offset = pseudo_hdr_offset;
2142 req->pad = 0; /* complete solid 16-byte block */
2143 req->rdma_count = 1;
2144 req->flags |= flags | ((cum_len & 1) * odd_flag);
2145 cum_len += seg->ds_len;
2151 /* pad runts to 60 bytes */
2155 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2157 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2158 req->length = htobe16(60 - cum_len);
2159 req->cksum_offset = 0;
2160 req->pseudo_hdr_offset = pseudo_hdr_offset;
2161 req->pad = 0; /* complete solid 16-byte block */
2162 req->rdma_count = 1;
2163 req->flags |= flags | ((cum_len & 1) * odd_flag);
2167 tx->req_list[0].rdma_count = cnt;
2169 /* print what the firmware will see */
2170 for (i = 0; i < cnt; i++) {
2171 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2172 "cso:%d, flags:0x%x, rdma:%d\n",
2173 i, (int)ntohl(tx->req_list[i].addr_high),
2174 (int)ntohl(tx->req_list[i].addr_low),
2175 (int)ntohs(tx->req_list[i].length),
2176 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2177 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2178 tx->req_list[i].rdma_count);
2180 printf("--------------\n");
2182 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2183 mxge_submit_req(tx, tx->req_list, cnt);
2184 #ifdef IFNET_BUF_RING
2185 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2186 /* tell the NIC to start polling this slice */
2188 tx->queue_active = 1;
2201 #ifdef IFNET_BUF_RING
2203 mxge_qflush(struct ifnet *ifp)
2205 mxge_softc_t *sc = ifp->if_softc;
2210 for (slice = 0; slice < sc->num_slices; slice++) {
2211 tx = &sc->ss[slice].tx;
2213 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2215 mtx_unlock(&tx->mtx);
2221 mxge_start_locked(struct mxge_slice_state *ss)
2232 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2233 m = drbr_dequeue(ifp, tx->br);
2237 /* let BPF see it */
2240 /* give it to the nic */
2243 /* ran out of transmit slots */
2244 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2245 && (!drbr_empty(ifp, tx->br))) {
2246 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2252 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2263 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2265 err = drbr_enqueue(ifp, tx->br, m);
2269 if (!drbr_needs_enqueue(ifp, tx->br) &&
2270 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2271 /* let BPF see it */
2273 /* give it to the nic */
2275 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2278 if (!drbr_empty(ifp, tx->br))
2279 mxge_start_locked(ss);
2284 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2286 mxge_softc_t *sc = ifp->if_softc;
2287 struct mxge_slice_state *ss;
2292 slice = m->m_pkthdr.flowid;
2293 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2295 ss = &sc->ss[slice];
2298 if (mtx_trylock(&tx->mtx)) {
2299 err = mxge_transmit_locked(ss, m);
2300 mtx_unlock(&tx->mtx);
2302 err = drbr_enqueue(ifp, tx->br, m);
2311 mxge_start_locked(struct mxge_slice_state *ss)
2321 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2322 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2326 /* let BPF see it */
2329 /* give it to the nic */
2332 /* ran out of transmit slots */
2333 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2334 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2340 mxge_start(struct ifnet *ifp)
2342 mxge_softc_t *sc = ifp->if_softc;
2343 struct mxge_slice_state *ss;
2345 /* only use the first slice for now */
2347 mtx_lock(&ss->tx.mtx);
2348 mxge_start_locked(ss);
2349 mtx_unlock(&ss->tx.mtx);
2353 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2354 * at most 32 bytes at a time, so as to avoid involving the software
2355 * pio handler in the nic. We re-write the first segment's low
2356 * DMA address to mark it valid only after we write the entire chunk
2360 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2361 mcp_kreq_ether_recv_t *src)
2365 low = src->addr_low;
2366 src->addr_low = 0xffffffff;
2367 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2369 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2371 src->addr_low = low;
2372 dst->addr_low = low;
2377 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2379 bus_dma_segment_t seg;
2381 mxge_rx_ring_t *rx = &ss->rx_small;
2384 m = m_gethdr(M_DONTWAIT, MT_DATA);
2391 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2392 &seg, &cnt, BUS_DMA_NOWAIT);
2397 rx->info[idx].m = m;
2398 rx->shadow[idx].addr_low =
2399 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2400 rx->shadow[idx].addr_high =
2401 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2405 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2410 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2412 bus_dma_segment_t seg[3];
2414 mxge_rx_ring_t *rx = &ss->rx_big;
2417 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2423 m->m_len = rx->mlen;
2424 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2425 seg, &cnt, BUS_DMA_NOWAIT);
2430 rx->info[idx].m = m;
2431 rx->shadow[idx].addr_low =
2432 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2433 rx->shadow[idx].addr_high =
2434 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2436 #if MXGE_VIRT_JUMBOS
2437 for (i = 1; i < cnt; i++) {
2438 rx->shadow[idx + i].addr_low =
2439 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2440 rx->shadow[idx + i].addr_high =
2441 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2446 for (i = 0; i < rx->nbufs; i++) {
2447 if ((idx & 7) == 7) {
2448 mxge_submit_8rx(&rx->lanai[idx - 7],
2449 &rx->shadow[idx - 7]);
2457 * Myri10GE hardware checksums are not valid if the sender
2458 * padded the frame with non-zero padding. This is because
2459 * the firmware just does a simple 16-bit 1s complement
2460 * checksum across the entire frame, excluding the first 14
2461 * bytes. It is best to simply to check the checksum and
2462 * tell the stack about it only if the checksum is good
2465 static inline uint16_t
2466 mxge_rx_csum(struct mbuf *m, int csum)
2468 struct ether_header *eh;
2472 eh = mtod(m, struct ether_header *);
2474 /* only deal with IPv4 TCP & UDP for now */
2475 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2477 ip = (struct ip *)(eh + 1);
2478 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2479 ip->ip_p != IPPROTO_UDP))
2482 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2483 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2484 - (ip->ip_hl << 2) + ip->ip_p));
2493 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2495 struct ether_vlan_header *evl;
2496 struct ether_header *eh;
2499 evl = mtod(m, struct ether_vlan_header *);
2500 eh = mtod(m, struct ether_header *);
2503 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2504 * after what the firmware thought was the end of the ethernet
2508 /* put checksum into host byte order */
2509 *csum = ntohs(*csum);
2510 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2511 (*csum) += ~partial;
2512 (*csum) += ((*csum) < ~partial);
2513 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2514 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2516 /* restore checksum to network byte order;
2517 later consumers expect this */
2518 *csum = htons(*csum);
2521 #ifdef MXGE_NEW_VLAN_API
2522 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2526 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2530 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2531 m_tag_prepend(m, mtag);
2535 m->m_flags |= M_VLANTAG;
2538 * Remove the 802.1q header by copying the Ethernet
2539 * addresses over it and adjusting the beginning of
2540 * the data in the mbuf. The encapsulated Ethernet
2541 * type field is already in place.
2543 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2544 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2545 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2550 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2555 struct ether_header *eh;
2557 bus_dmamap_t old_map;
2559 uint16_t tcpudp_csum;
2564 idx = rx->cnt & rx->mask;
2565 rx->cnt += rx->nbufs;
2566 /* save a pointer to the received mbuf */
2567 m = rx->info[idx].m;
2568 /* try to replace the received mbuf */
2569 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2570 /* drop the frame -- the old mbuf is re-cycled */
2575 /* unmap the received buffer */
2576 old_map = rx->info[idx].map;
2577 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2578 bus_dmamap_unload(rx->dmat, old_map);
2580 /* swap the bus_dmamap_t's */
2581 rx->info[idx].map = rx->extra_map;
2582 rx->extra_map = old_map;
2584 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2586 m->m_data += MXGEFW_PAD;
2588 m->m_pkthdr.rcvif = ifp;
2589 m->m_len = m->m_pkthdr.len = len;
2591 eh = mtod(m, struct ether_header *);
2592 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2593 mxge_vlan_tag_remove(m, &csum);
2595 /* if the checksum is valid, mark it in the mbuf header */
2596 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2597 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2599 /* otherwise, it was a UDP frame, or a TCP frame which
2600 we could not do LRO on. Tell the stack that the
2602 m->m_pkthdr.csum_data = 0xffff;
2603 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2605 /* flowid only valid if RSS hashing is enabled */
2606 if (sc->num_slices > 1) {
2607 m->m_pkthdr.flowid = (ss - sc->ss);
2608 m->m_flags |= M_FLOWID;
2610 /* pass the frame up the stack */
2611 (*ifp->if_input)(ifp, m);
2615 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2619 struct ether_header *eh;
2622 bus_dmamap_t old_map;
2624 uint16_t tcpudp_csum;
2629 idx = rx->cnt & rx->mask;
2631 /* save a pointer to the received mbuf */
2632 m = rx->info[idx].m;
2633 /* try to replace the received mbuf */
2634 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2635 /* drop the frame -- the old mbuf is re-cycled */
2640 /* unmap the received buffer */
2641 old_map = rx->info[idx].map;
2642 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2643 bus_dmamap_unload(rx->dmat, old_map);
2645 /* swap the bus_dmamap_t's */
2646 rx->info[idx].map = rx->extra_map;
2647 rx->extra_map = old_map;
2649 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2651 m->m_data += MXGEFW_PAD;
2653 m->m_pkthdr.rcvif = ifp;
2654 m->m_len = m->m_pkthdr.len = len;
2656 eh = mtod(m, struct ether_header *);
2657 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2658 mxge_vlan_tag_remove(m, &csum);
2660 /* if the checksum is valid, mark it in the mbuf header */
2661 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2662 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2664 /* otherwise, it was a UDP frame, or a TCP frame which
2665 we could not do LRO on. Tell the stack that the
2667 m->m_pkthdr.csum_data = 0xffff;
2668 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2670 /* flowid only valid if RSS hashing is enabled */
2671 if (sc->num_slices > 1) {
2672 m->m_pkthdr.flowid = (ss - sc->ss);
2673 m->m_flags |= M_FLOWID;
2675 /* pass the frame up the stack */
2676 (*ifp->if_input)(ifp, m);
2680 mxge_clean_rx_done(struct mxge_slice_state *ss)
2682 mxge_rx_done_t *rx_done = &ss->rx_done;
2688 while (rx_done->entry[rx_done->idx].length != 0) {
2689 length = ntohs(rx_done->entry[rx_done->idx].length);
2690 rx_done->entry[rx_done->idx].length = 0;
2691 checksum = rx_done->entry[rx_done->idx].checksum;
2692 if (length <= (MHLEN - MXGEFW_PAD))
2693 mxge_rx_done_small(ss, length, checksum);
2695 mxge_rx_done_big(ss, length, checksum);
2697 rx_done->idx = rx_done->cnt & rx_done->mask;
2699 /* limit potential for livelock */
2700 if (__predict_false(++limit > rx_done->mask / 2))
2704 while (!SLIST_EMPTY(&ss->lro_active)) {
2705 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2706 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2707 mxge_lro_flush(ss, lro);
2714 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2725 while (tx->pkt_done != mcp_idx) {
2726 idx = tx->done & tx->mask;
2728 m = tx->info[idx].m;
2729 /* mbuf and DMA map only attached to the first
2732 ss->obytes += m->m_pkthdr.len;
2733 if (m->m_flags & M_MCAST)
2736 tx->info[idx].m = NULL;
2737 map = tx->info[idx].map;
2738 bus_dmamap_unload(tx->dmat, map);
2741 if (tx->info[idx].flag) {
2742 tx->info[idx].flag = 0;
2747 /* If we have space, clear IFF_OACTIVE to tell the stack that
2748 its OK to send packets */
2749 #ifdef IFNET_BUF_RING
2750 flags = &ss->if_drv_flags;
2752 flags = &ifp->if_drv_flags;
2754 mtx_lock(&ss->tx.mtx);
2755 if ((*flags) & IFF_DRV_OACTIVE &&
2756 tx->req - tx->done < (tx->mask + 1)/4) {
2757 *(flags) &= ~IFF_DRV_OACTIVE;
2759 mxge_start_locked(ss);
2761 #ifdef IFNET_BUF_RING
2762 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2763 /* let the NIC stop polling this queue, since there
2764 * are no more transmits pending */
2765 if (tx->req == tx->done) {
2767 tx->queue_active = 0;
2773 mtx_unlock(&ss->tx.mtx);
2777 static struct mxge_media_type mxge_xfp_media_types[] =
2779 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2780 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2781 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2782 {0, (1 << 5), "10GBASE-ER"},
2783 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2784 {0, (1 << 3), "10GBASE-SW"},
2785 {0, (1 << 2), "10GBASE-LW"},
2786 {0, (1 << 1), "10GBASE-EW"},
2787 {0, (1 << 0), "Reserved"}
2789 static struct mxge_media_type mxge_sfp_media_types[] =
2791 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2792 {0, (1 << 7), "Reserved"},
2793 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2794 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2795 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2796 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2800 mxge_media_set(mxge_softc_t *sc, int media_type)
2804 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2806 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2807 sc->current_media = media_type;
2808 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2812 mxge_media_init(mxge_softc_t *sc)
2817 ifmedia_removeall(&sc->media);
2818 mxge_media_set(sc, IFM_AUTO);
2821 * parse the product code to deterimine the interface type
2822 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2823 * after the 3rd dash in the driver's cached copy of the
2824 * EEPROM's product code string.
2826 ptr = sc->product_code_string;
2828 device_printf(sc->dev, "Missing product code\n");
2832 for (i = 0; i < 3; i++, ptr++) {
2833 ptr = index(ptr, '-');
2835 device_printf(sc->dev,
2836 "only %d dashes in PC?!?\n", i);
2840 if (*ptr == 'C' || *(ptr +1) == 'C') {
2842 sc->connector = MXGE_CX4;
2843 mxge_media_set(sc, IFM_10G_CX4);
2844 } else if (*ptr == 'Q') {
2845 /* -Q is Quad Ribbon Fiber */
2846 sc->connector = MXGE_QRF;
2847 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2848 /* FreeBSD has no media type for Quad ribbon fiber */
2849 } else if (*ptr == 'R') {
2851 sc->connector = MXGE_XFP;
2852 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2853 /* -S or -2S is SFP+ */
2854 sc->connector = MXGE_SFP;
2856 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2861 * Determine the media type for a NIC. Some XFPs will identify
2862 * themselves only when their link is up, so this is initiated via a
2863 * link up interrupt. However, this can potentially take up to
2864 * several milliseconds, so it is run via the watchdog routine, rather
2865 * than in the interrupt handler itself.
2868 mxge_media_probe(mxge_softc_t *sc)
2873 struct mxge_media_type *mxge_media_types = NULL;
2874 int i, err, ms, mxge_media_type_entries;
2877 sc->need_media_probe = 0;
2879 if (sc->connector == MXGE_XFP) {
2881 mxge_media_types = mxge_xfp_media_types;
2882 mxge_media_type_entries =
2883 sizeof (mxge_xfp_media_types) /
2884 sizeof (mxge_xfp_media_types[0]);
2885 byte = MXGE_XFP_COMPLIANCE_BYTE;
2887 } else if (sc->connector == MXGE_SFP) {
2888 /* -S or -2S is SFP+ */
2889 mxge_media_types = mxge_sfp_media_types;
2890 mxge_media_type_entries =
2891 sizeof (mxge_sfp_media_types) /
2892 sizeof (mxge_sfp_media_types[0]);
2896 /* nothing to do; media type cannot change */
2901 * At this point we know the NIC has an XFP cage, so now we
2902 * try to determine what is in the cage by using the
2903 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2904 * register. We read just one byte, which may take over
2908 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2910 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2911 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2912 device_printf(sc->dev, "failed to read XFP\n");
2914 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2915 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2917 if (err != MXGEFW_CMD_OK) {
2921 /* now we wait for the data to be cached */
2923 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2924 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2927 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2929 if (err != MXGEFW_CMD_OK) {
2930 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2931 cage_type, err, ms);
2935 if (cmd.data0 == mxge_media_types[0].bitmask) {
2937 device_printf(sc->dev, "%s:%s\n", cage_type,
2938 mxge_media_types[0].name);
2939 if (sc->current_media != mxge_media_types[0].flag) {
2940 mxge_media_init(sc);
2941 mxge_media_set(sc, mxge_media_types[0].flag);
2945 for (i = 1; i < mxge_media_type_entries; i++) {
2946 if (cmd.data0 & mxge_media_types[i].bitmask) {
2948 device_printf(sc->dev, "%s:%s\n",
2950 mxge_media_types[i].name);
2952 if (sc->current_media != mxge_media_types[i].flag) {
2953 mxge_media_init(sc);
2954 mxge_media_set(sc, mxge_media_types[i].flag);
2960 device_printf(sc->dev, "%s media 0x%x unknown\n",
2961 cage_type, cmd.data0);
2967 mxge_intr(void *arg)
2969 struct mxge_slice_state *ss = arg;
2970 mxge_softc_t *sc = ss->sc;
2971 mcp_irq_data_t *stats = ss->fw_stats;
2972 mxge_tx_ring_t *tx = &ss->tx;
2973 mxge_rx_done_t *rx_done = &ss->rx_done;
2974 uint32_t send_done_count;
2978 #ifndef IFNET_BUF_RING
2979 /* an interrupt on a non-zero slice is implicitly valid
2980 since MSI-X irqs are not shared */
2982 mxge_clean_rx_done(ss);
2983 *ss->irq_claim = be32toh(3);
2988 /* make sure the DMA has finished */
2989 if (!stats->valid) {
2992 valid = stats->valid;
2994 if (sc->legacy_irq) {
2995 /* lower legacy IRQ */
2996 *sc->irq_deassert = 0;
2997 if (!mxge_deassert_wait)
2998 /* don't wait for conf. that irq is low */
3004 /* loop while waiting for legacy irq deassertion */
3006 /* check for transmit completes and receives */
3007 send_done_count = be32toh(stats->send_done_count);
3008 while ((send_done_count != tx->pkt_done) ||
3009 (rx_done->entry[rx_done->idx].length != 0)) {
3010 if (send_done_count != tx->pkt_done)
3011 mxge_tx_done(ss, (int)send_done_count);
3012 mxge_clean_rx_done(ss);
3013 send_done_count = be32toh(stats->send_done_count);
3015 if (sc->legacy_irq && mxge_deassert_wait)
3017 } while (*((volatile uint8_t *) &stats->valid));
3019 /* fw link & error stats meaningful only on the first slice */
3020 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3021 if (sc->link_state != stats->link_up) {
3022 sc->link_state = stats->link_up;
3023 if (sc->link_state) {
3024 if_link_state_change(sc->ifp, LINK_STATE_UP);
3025 sc->ifp->if_baudrate = IF_Gbps(10UL);
3027 device_printf(sc->dev, "link up\n");
3029 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3030 sc->ifp->if_baudrate = 0;
3032 device_printf(sc->dev, "link down\n");
3034 sc->need_media_probe = 1;
3036 if (sc->rdma_tags_available !=
3037 be32toh(stats->rdma_tags_available)) {
3038 sc->rdma_tags_available =
3039 be32toh(stats->rdma_tags_available);
3040 device_printf(sc->dev, "RDMA timed out! %d tags "
3041 "left\n", sc->rdma_tags_available);
3044 if (stats->link_down) {
3045 sc->down_cnt += stats->link_down;
3047 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3051 /* check to see if we have rx token to pass back */
3053 *ss->irq_claim = be32toh(3);
3054 *(ss->irq_claim + 1) = be32toh(3);
3058 mxge_init(void *arg)
3060 mxge_softc_t *sc = arg;
3061 struct ifnet *ifp = sc->ifp;
3064 mtx_lock(&sc->driver_mtx);
3065 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3066 (void) mxge_open(sc);
3067 mtx_unlock(&sc->driver_mtx);
3073 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3075 struct lro_entry *lro_entry;
3078 while (!SLIST_EMPTY(&ss->lro_free)) {
3079 lro_entry = SLIST_FIRST(&ss->lro_free);
3080 SLIST_REMOVE_HEAD(&ss->lro_free, next);
3081 free(lro_entry, M_DEVBUF);
3084 for (i = 0; i <= ss->rx_big.mask; i++) {
3085 if (ss->rx_big.info[i].m == NULL)
3087 bus_dmamap_unload(ss->rx_big.dmat,
3088 ss->rx_big.info[i].map);
3089 m_freem(ss->rx_big.info[i].m);
3090 ss->rx_big.info[i].m = NULL;
3093 for (i = 0; i <= ss->rx_small.mask; i++) {
3094 if (ss->rx_small.info[i].m == NULL)
3096 bus_dmamap_unload(ss->rx_small.dmat,
3097 ss->rx_small.info[i].map);
3098 m_freem(ss->rx_small.info[i].m);
3099 ss->rx_small.info[i].m = NULL;
3102 /* transmit ring used only on the first slice */
3103 if (ss->tx.info == NULL)
3106 for (i = 0; i <= ss->tx.mask; i++) {
3107 ss->tx.info[i].flag = 0;
3108 if (ss->tx.info[i].m == NULL)
3110 bus_dmamap_unload(ss->tx.dmat,
3111 ss->tx.info[i].map);
3112 m_freem(ss->tx.info[i].m);
3113 ss->tx.info[i].m = NULL;
3118 mxge_free_mbufs(mxge_softc_t *sc)
3122 for (slice = 0; slice < sc->num_slices; slice++)
3123 mxge_free_slice_mbufs(&sc->ss[slice]);
3127 mxge_free_slice_rings(struct mxge_slice_state *ss)
3132 if (ss->rx_done.entry != NULL)
3133 mxge_dma_free(&ss->rx_done.dma);
3134 ss->rx_done.entry = NULL;
3136 if (ss->tx.req_bytes != NULL)
3137 free(ss->tx.req_bytes, M_DEVBUF);
3138 ss->tx.req_bytes = NULL;
3140 if (ss->tx.seg_list != NULL)
3141 free(ss->tx.seg_list, M_DEVBUF);
3142 ss->tx.seg_list = NULL;
3144 if (ss->rx_small.shadow != NULL)
3145 free(ss->rx_small.shadow, M_DEVBUF);
3146 ss->rx_small.shadow = NULL;
3148 if (ss->rx_big.shadow != NULL)
3149 free(ss->rx_big.shadow, M_DEVBUF);
3150 ss->rx_big.shadow = NULL;
3152 if (ss->tx.info != NULL) {
3153 if (ss->tx.dmat != NULL) {
3154 for (i = 0; i <= ss->tx.mask; i++) {
3155 bus_dmamap_destroy(ss->tx.dmat,
3156 ss->tx.info[i].map);
3158 bus_dma_tag_destroy(ss->tx.dmat);
3160 free(ss->tx.info, M_DEVBUF);
3164 if (ss->rx_small.info != NULL) {
3165 if (ss->rx_small.dmat != NULL) {
3166 for (i = 0; i <= ss->rx_small.mask; i++) {
3167 bus_dmamap_destroy(ss->rx_small.dmat,
3168 ss->rx_small.info[i].map);
3170 bus_dmamap_destroy(ss->rx_small.dmat,
3171 ss->rx_small.extra_map);
3172 bus_dma_tag_destroy(ss->rx_small.dmat);
3174 free(ss->rx_small.info, M_DEVBUF);
3176 ss->rx_small.info = NULL;
3178 if (ss->rx_big.info != NULL) {
3179 if (ss->rx_big.dmat != NULL) {
3180 for (i = 0; i <= ss->rx_big.mask; i++) {
3181 bus_dmamap_destroy(ss->rx_big.dmat,
3182 ss->rx_big.info[i].map);
3184 bus_dmamap_destroy(ss->rx_big.dmat,
3185 ss->rx_big.extra_map);
3186 bus_dma_tag_destroy(ss->rx_big.dmat);
3188 free(ss->rx_big.info, M_DEVBUF);
3190 ss->rx_big.info = NULL;
3194 mxge_free_rings(mxge_softc_t *sc)
3198 for (slice = 0; slice < sc->num_slices; slice++)
3199 mxge_free_slice_rings(&sc->ss[slice]);
3203 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3204 int tx_ring_entries)
3206 mxge_softc_t *sc = ss->sc;
3210 /* allocate per-slice receive resources */
3212 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3213 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3215 /* allocate the rx shadow rings */
3216 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3217 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3219 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3220 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3222 /* allocate the rx host info rings */
3223 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3224 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3226 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3227 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3229 /* allocate the rx busdma resources */
3230 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3232 4096, /* boundary */
3233 BUS_SPACE_MAXADDR, /* low */
3234 BUS_SPACE_MAXADDR, /* high */
3235 NULL, NULL, /* filter */
3236 MHLEN, /* maxsize */
3238 MHLEN, /* maxsegsize */
3239 BUS_DMA_ALLOCNOW, /* flags */
3240 NULL, NULL, /* lock */
3241 &ss->rx_small.dmat); /* tag */
3243 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3248 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3250 #if MXGE_VIRT_JUMBOS
3251 4096, /* boundary */
3255 BUS_SPACE_MAXADDR, /* low */
3256 BUS_SPACE_MAXADDR, /* high */
3257 NULL, NULL, /* filter */
3258 3*4096, /* maxsize */
3259 #if MXGE_VIRT_JUMBOS
3261 4096, /* maxsegsize*/
3264 MJUM9BYTES, /* maxsegsize*/
3266 BUS_DMA_ALLOCNOW, /* flags */
3267 NULL, NULL, /* lock */
3268 &ss->rx_big.dmat); /* tag */
3270 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3274 for (i = 0; i <= ss->rx_small.mask; i++) {
3275 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3276 &ss->rx_small.info[i].map);
3278 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3283 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3284 &ss->rx_small.extra_map);
3286 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3291 for (i = 0; i <= ss->rx_big.mask; i++) {
3292 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3293 &ss->rx_big.info[i].map);
3295 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3300 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3301 &ss->rx_big.extra_map);
3303 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3308 /* now allocate TX resouces */
3310 #ifndef IFNET_BUF_RING
3311 /* only use a single TX ring for now */
3312 if (ss != ss->sc->ss)
3316 ss->tx.mask = tx_ring_entries - 1;
3317 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3320 /* allocate the tx request copy block */
3322 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3323 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3324 /* ensure req_list entries are aligned to 8 bytes */
3325 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3326 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3328 /* allocate the tx busdma segment list */
3329 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3330 ss->tx.seg_list = (bus_dma_segment_t *)
3331 malloc(bytes, M_DEVBUF, M_WAITOK);
3333 /* allocate the tx host info ring */
3334 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3335 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3337 /* allocate the tx busdma resources */
3338 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3340 sc->tx_boundary, /* boundary */
3341 BUS_SPACE_MAXADDR, /* low */
3342 BUS_SPACE_MAXADDR, /* high */
3343 NULL, NULL, /* filter */
3344 65536 + 256, /* maxsize */
3345 ss->tx.max_desc - 2, /* num segs */
3346 sc->tx_boundary, /* maxsegsz */
3347 BUS_DMA_ALLOCNOW, /* flags */
3348 NULL, NULL, /* lock */
3349 &ss->tx.dmat); /* tag */
3352 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3357 /* now use these tags to setup dmamaps for each slot
3359 for (i = 0; i <= ss->tx.mask; i++) {
3360 err = bus_dmamap_create(ss->tx.dmat, 0,
3361 &ss->tx.info[i].map);
3363 device_printf(sc->dev, "Err %d tx dmamap\n",
3373 mxge_alloc_rings(mxge_softc_t *sc)
3377 int tx_ring_entries, rx_ring_entries;
3380 /* get ring sizes */
3381 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3382 tx_ring_size = cmd.data0;
3384 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3388 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3389 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3390 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3391 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3392 IFQ_SET_READY(&sc->ifp->if_snd);
3394 for (slice = 0; slice < sc->num_slices; slice++) {
3395 err = mxge_alloc_slice_rings(&sc->ss[slice],
3404 mxge_free_rings(sc);
3411 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3413 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3415 if (bufsize < MCLBYTES) {
3416 /* easy, everything fits in a single buffer */
3417 *big_buf_size = MCLBYTES;
3418 *cl_size = MCLBYTES;
3423 if (bufsize < MJUMPAGESIZE) {
3424 /* still easy, everything still fits in a single buffer */
3425 *big_buf_size = MJUMPAGESIZE;
3426 *cl_size = MJUMPAGESIZE;
3430 #if MXGE_VIRT_JUMBOS
3431 /* now we need to use virtually contiguous buffers */
3432 *cl_size = MJUM9BYTES;
3433 *big_buf_size = 4096;
3434 *nbufs = mtu / 4096 + 1;
3435 /* needs to be a power of two, so round up */
3439 *cl_size = MJUM9BYTES;
3440 *big_buf_size = MJUM9BYTES;
3446 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3451 struct lro_entry *lro_entry;
3456 slice = ss - sc->ss;
3458 SLIST_INIT(&ss->lro_free);
3459 SLIST_INIT(&ss->lro_active);
3461 for (i = 0; i < sc->lro_cnt; i++) {
3462 lro_entry = (struct lro_entry *)
3463 malloc(sizeof (*lro_entry), M_DEVBUF,
3465 if (lro_entry == NULL) {
3469 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3471 /* get the lanai pointers to the send and receive rings */
3474 #ifndef IFNET_BUF_RING
3475 /* We currently only send from the first slice */
3479 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3481 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3482 ss->tx.send_go = (volatile uint32_t *)
3483 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3484 ss->tx.send_stop = (volatile uint32_t *)
3485 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3486 #ifndef IFNET_BUF_RING
3490 err |= mxge_send_cmd(sc,
3491 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3492 ss->rx_small.lanai =
3493 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3495 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3497 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3500 device_printf(sc->dev,
3501 "failed to get ring sizes or locations\n");
3505 /* stock receive rings */
3506 for (i = 0; i <= ss->rx_small.mask; i++) {
3507 map = ss->rx_small.info[i].map;
3508 err = mxge_get_buf_small(ss, map, i);
3510 device_printf(sc->dev, "alloced %d/%d smalls\n",
3511 i, ss->rx_small.mask + 1);
3515 for (i = 0; i <= ss->rx_big.mask; i++) {
3516 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3517 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3519 ss->rx_big.nbufs = nbufs;
3520 ss->rx_big.cl_size = cl_size;
3521 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3522 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3523 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3524 map = ss->rx_big.info[i].map;
3525 err = mxge_get_buf_big(ss, map, i);
3527 device_printf(sc->dev, "alloced %d/%d bigs\n",
3528 i, ss->rx_big.mask + 1);
3536 mxge_open(mxge_softc_t *sc)
3539 int err, big_bytes, nbufs, slice, cl_size, i;
3541 volatile uint8_t *itable;
3542 struct mxge_slice_state *ss;
3544 /* Copy the MAC address in case it was overridden */
3545 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3547 err = mxge_reset(sc, 1);
3549 device_printf(sc->dev, "failed to reset\n");
3553 if (sc->num_slices > 1) {
3554 /* setup the indirection table */
3555 cmd.data0 = sc->num_slices;
3556 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3559 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3562 device_printf(sc->dev,
3563 "failed to setup rss tables\n");
3567 /* just enable an identity mapping */
3568 itable = sc->sram + cmd.data0;
3569 for (i = 0; i < sc->num_slices; i++)
3570 itable[i] = (uint8_t)i;
3573 cmd.data1 = mxge_rss_hash_type;
3574 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3576 device_printf(sc->dev, "failed to enable slices\n");
3582 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3585 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3587 /* error is only meaningful if we're trying to set
3588 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3589 if (err && nbufs > 1) {
3590 device_printf(sc->dev,
3591 "Failed to set alway-use-n to %d\n",
3595 /* Give the firmware the mtu and the big and small buffer
3596 sizes. The firmware wants the big buf size to be a power
3597 of two. Luckily, FreeBSD's clusters are powers of two */
3598 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3599 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3600 cmd.data0 = MHLEN - MXGEFW_PAD;
3601 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3603 cmd.data0 = big_bytes;
3604 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3607 device_printf(sc->dev, "failed to setup params\n");
3611 /* Now give him the pointer to the stats block */
3613 #ifdef IFNET_BUF_RING
3614 slice < sc->num_slices;
3619 ss = &sc->ss[slice];
3621 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3623 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3624 cmd.data2 = sizeof(struct mcp_irq_data);
3625 cmd.data2 |= (slice << 16);
3626 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3630 bus = sc->ss->fw_stats_dma.bus_addr;
3631 bus += offsetof(struct mcp_irq_data, send_done_count);
3632 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3633 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3634 err = mxge_send_cmd(sc,
3635 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3637 /* Firmware cannot support multicast without STATS_DMA_V2 */
3638 sc->fw_multicast_support = 0;
3640 sc->fw_multicast_support = 1;
3644 device_printf(sc->dev, "failed to setup params\n");
3648 for (slice = 0; slice < sc->num_slices; slice++) {
3649 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3651 device_printf(sc->dev, "couldn't open slice %d\n",
3657 /* Finally, start the firmware running */
3658 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3660 device_printf(sc->dev, "Couldn't bring up link\n");
3663 #ifdef IFNET_BUF_RING
3664 for (slice = 0; slice < sc->num_slices; slice++) {
3665 ss = &sc->ss[slice];
3666 ss->if_drv_flags |= IFF_DRV_RUNNING;
3667 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3670 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3671 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3677 mxge_free_mbufs(sc);
3683 mxge_close(mxge_softc_t *sc, int down)
3686 int err, old_down_cnt;
3687 #ifdef IFNET_BUF_RING
3688 struct mxge_slice_state *ss;
3692 #ifdef IFNET_BUF_RING
3693 for (slice = 0; slice < sc->num_slices; slice++) {
3694 ss = &sc->ss[slice];
3695 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3698 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3700 old_down_cnt = sc->down_cnt;
3702 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3704 device_printf(sc->dev,
3705 "Couldn't bring down link\n");
3707 if (old_down_cnt == sc->down_cnt) {
3708 /* wait for down irq */
3709 DELAY(10 * sc->intr_coal_delay);
3712 if (old_down_cnt == sc->down_cnt) {
3713 device_printf(sc->dev, "never got down irq\n");
3716 mxge_free_mbufs(sc);
3722 mxge_setup_cfg_space(mxge_softc_t *sc)
3724 device_t dev = sc->dev;
3726 uint16_t cmd, lnk, pectl;
3728 /* find the PCIe link width and set max read request to 4KB*/
3729 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
3730 lnk = pci_read_config(dev, reg + 0x12, 2);
3731 sc->link_width = (lnk >> 4) & 0x3f;
3733 if (sc->pectl == 0) {
3734 pectl = pci_read_config(dev, reg + 0x8, 2);
3735 pectl = (pectl & ~0x7000) | (5 << 12);
3736 pci_write_config(dev, reg + 0x8, pectl, 2);
3739 /* restore saved pectl after watchdog reset */
3740 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3744 /* Enable DMA and Memory space access */
3745 pci_enable_busmaster(dev);
3746 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3747 cmd |= PCIM_CMD_MEMEN;
3748 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3752 mxge_read_reboot(mxge_softc_t *sc)
3754 device_t dev = sc->dev;
3757 /* find the vendor specific offset */
3758 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3759 device_printf(sc->dev,
3760 "could not find vendor specific offset\n");
3761 return (uint32_t)-1;
3763 /* enable read32 mode */
3764 pci_write_config(dev, vs + 0x10, 0x3, 1);
3765 /* tell NIC which register to read */
3766 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3767 return (pci_read_config(dev, vs + 0x14, 4));
3771 mxge_watchdog_reset(mxge_softc_t *sc)
3773 struct pci_devinfo *dinfo;
3774 struct mxge_slice_state *ss;
3775 int err, running, s, num_tx_slices = 1;
3781 device_printf(sc->dev, "Watchdog reset!\n");
3784 * check to see if the NIC rebooted. If it did, then all of
3785 * PCI config space has been reset, and things like the
3786 * busmaster bit will be zero. If this is the case, then we
3787 * must restore PCI config space before the NIC can be used
3790 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3791 if (cmd == 0xffff) {
3793 * maybe the watchdog caught the NIC rebooting; wait
3794 * up to 100ms for it to finish. If it does not come
3795 * back, then give up
3798 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3799 if (cmd == 0xffff) {
3800 device_printf(sc->dev, "NIC disappeared!\n");
3803 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3804 /* print the reboot status */
3805 reboot = mxge_read_reboot(sc);
3806 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3808 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3812 * quiesce NIC so that TX routines will not try to
3813 * xmit after restoration of BAR
3816 /* Mark the link as down */
3817 if (sc->link_state) {
3819 if_link_state_change(sc->ifp,
3822 #ifdef IFNET_BUF_RING
3823 num_tx_slices = sc->num_slices;
3825 /* grab all TX locks to ensure no tx */
3826 for (s = 0; s < num_tx_slices; s++) {
3828 mtx_lock(&ss->tx.mtx);
3832 /* restore PCI configuration space */
3833 dinfo = device_get_ivars(sc->dev);
3834 pci_cfg_restore(sc->dev, dinfo);
3836 /* and redo any changes we made to our config space */
3837 mxge_setup_cfg_space(sc);
3840 err = mxge_load_firmware(sc, 0);
3842 device_printf(sc->dev,
3843 "Unable to re-load f/w\n");
3847 err = mxge_open(sc);
3848 /* release all TX locks */
3849 for (s = 0; s < num_tx_slices; s++) {
3851 #ifdef IFNET_BUF_RING
3852 mxge_start_locked(ss);
3854 mtx_unlock(&ss->tx.mtx);
3857 sc->watchdog_resets++;
3859 device_printf(sc->dev,
3860 "NIC did not reboot, not resetting\n");
3864 device_printf(sc->dev, "watchdog reset failed\n");
3868 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3873 mxge_watchdog_task(void *arg, int pending)
3875 mxge_softc_t *sc = arg;
3878 mtx_lock(&sc->driver_mtx);
3879 mxge_watchdog_reset(sc);
3880 mtx_unlock(&sc->driver_mtx);
3884 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3886 tx = &sc->ss[slice].tx;
3887 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3888 device_printf(sc->dev,
3889 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3890 tx->req, tx->done, tx->queue_active);
3891 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3892 tx->activate, tx->deactivate);
3893 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3895 be32toh(sc->ss->fw_stats->send_done_count));
3899 mxge_watchdog(mxge_softc_t *sc)
3902 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3905 /* see if we have outstanding transmits, which
3906 have been pending for more than mxge_ticks */
3908 #ifdef IFNET_BUF_RING
3909 (i < sc->num_slices) && (err == 0);
3911 (i < 1) && (err == 0);
3915 if (tx->req != tx->done &&
3916 tx->watchdog_req != tx->watchdog_done &&
3917 tx->done == tx->watchdog_done) {
3918 /* check for pause blocking before resetting */
3919 if (tx->watchdog_rx_pause == rx_pause) {
3920 mxge_warn_stuck(sc, tx, i);
3921 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3925 device_printf(sc->dev, "Flow control blocking "
3926 "xmits, check link partner\n");
3929 tx->watchdog_req = tx->req;
3930 tx->watchdog_done = tx->done;
3931 tx->watchdog_rx_pause = rx_pause;
3934 if (sc->need_media_probe)
3935 mxge_media_probe(sc);
3940 mxge_update_stats(mxge_softc_t *sc)
3942 struct mxge_slice_state *ss;
3944 u_long ipackets = 0;
3945 u_long opackets = 0;
3946 #ifdef IFNET_BUF_RING
3954 for (slice = 0; slice < sc->num_slices; slice++) {
3955 ss = &sc->ss[slice];
3956 ipackets += ss->ipackets;
3957 opackets += ss->opackets;
3958 #ifdef IFNET_BUF_RING
3959 obytes += ss->obytes;
3960 omcasts += ss->omcasts;
3961 odrops += ss->tx.br->br_drops;
3963 oerrors += ss->oerrors;
3965 pkts = (ipackets - sc->ifp->if_ipackets);
3966 pkts += (opackets - sc->ifp->if_opackets);
3967 sc->ifp->if_ipackets = ipackets;
3968 sc->ifp->if_opackets = opackets;
3969 #ifdef IFNET_BUF_RING
3970 sc->ifp->if_obytes = obytes;
3971 sc->ifp->if_omcasts = omcasts;
3972 sc->ifp->if_snd.ifq_drops = odrops;
3974 sc->ifp->if_oerrors = oerrors;
3979 mxge_tick(void *arg)
3981 mxge_softc_t *sc = arg;
3988 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3990 /* aggregate stats from different slices */
3991 pkts = mxge_update_stats(sc);
3992 if (!sc->watchdog_countdown) {
3993 err = mxge_watchdog(sc);
3994 sc->watchdog_countdown = 4;
3996 sc->watchdog_countdown--;
3999 /* ensure NIC did not suffer h/w fault while idle */
4000 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4001 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4003 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4006 /* look less often if NIC is idle */
4011 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4016 mxge_media_change(struct ifnet *ifp)
4022 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4024 struct ifnet *ifp = sc->ifp;
4025 int real_mtu, old_mtu;
4029 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4030 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4032 mtx_lock(&sc->driver_mtx);
4033 old_mtu = ifp->if_mtu;
4035 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4037 err = mxge_open(sc);
4039 ifp->if_mtu = old_mtu;
4041 (void) mxge_open(sc);
4044 mtx_unlock(&sc->driver_mtx);
4049 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4051 mxge_softc_t *sc = ifp->if_softc;
4056 ifmr->ifm_status = IFM_AVALID;
4057 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4058 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4059 ifmr->ifm_active |= sc->current_media;
4063 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4065 mxge_softc_t *sc = ifp->if_softc;
4066 struct ifreq *ifr = (struct ifreq *)data;
4073 err = ether_ioctl(ifp, command, data);
4077 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4081 mtx_lock(&sc->driver_mtx);
4083 mtx_unlock(&sc->driver_mtx);
4086 if (ifp->if_flags & IFF_UP) {
4087 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4088 err = mxge_open(sc);
4090 /* take care of promis can allmulti
4092 mxge_change_promisc(sc,
4093 ifp->if_flags & IFF_PROMISC);
4094 mxge_set_multicast_list(sc);
4097 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4101 mtx_unlock(&sc->driver_mtx);
4106 mtx_lock(&sc->driver_mtx);
4107 mxge_set_multicast_list(sc);
4108 mtx_unlock(&sc->driver_mtx);
4112 mtx_lock(&sc->driver_mtx);
4113 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4114 if (mask & IFCAP_TXCSUM) {
4115 if (IFCAP_TXCSUM & ifp->if_capenable) {
4116 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4117 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
4120 ifp->if_capenable |= IFCAP_TXCSUM;
4121 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4123 } else if (mask & IFCAP_RXCSUM) {
4124 if (IFCAP_RXCSUM & ifp->if_capenable) {
4125 ifp->if_capenable &= ~IFCAP_RXCSUM;
4128 ifp->if_capenable |= IFCAP_RXCSUM;
4132 if (mask & IFCAP_TSO4) {
4133 if (IFCAP_TSO4 & ifp->if_capenable) {
4134 ifp->if_capenable &= ~IFCAP_TSO4;
4135 ifp->if_hwassist &= ~CSUM_TSO;
4136 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4137 ifp->if_capenable |= IFCAP_TSO4;
4138 ifp->if_hwassist |= CSUM_TSO;
4140 printf("mxge requires tx checksum offload"
4141 " be enabled to use TSO\n");
4145 if (mask & IFCAP_LRO) {
4146 if (IFCAP_LRO & ifp->if_capenable)
4147 err = mxge_change_lro_locked(sc, 0);
4149 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
4151 if (mask & IFCAP_VLAN_HWTAGGING)
4152 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4153 if (mask & IFCAP_VLAN_HWTSO)
4154 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4156 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4157 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4158 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4160 mtx_unlock(&sc->driver_mtx);
4161 VLAN_CAPABILITIES(ifp);
4166 mtx_lock(&sc->driver_mtx);
4167 mxge_media_probe(sc);
4168 mtx_unlock(&sc->driver_mtx);
4169 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4170 &sc->media, command);
4181 mxge_fetch_tunables(mxge_softc_t *sc)
4184 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4185 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4186 &mxge_flow_control);
4187 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4188 &mxge_intr_coal_delay);
4189 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4190 &mxge_nvidia_ecrc_enable);
4191 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4192 &mxge_force_firmware);
4193 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4194 &mxge_deassert_wait);
4195 TUNABLE_INT_FETCH("hw.mxge.verbose",
4197 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4198 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
4199 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4200 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4201 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4202 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4203 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4204 if (sc->lro_cnt != 0)
4205 mxge_lro_cnt = sc->lro_cnt;
4209 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4210 mxge_intr_coal_delay = 30;
4211 if (mxge_ticks == 0)
4212 mxge_ticks = hz / 2;
4213 sc->pause = mxge_flow_control;
4214 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4215 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4216 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4218 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4219 mxge_initial_mtu < ETHER_MIN_LEN)
4220 mxge_initial_mtu = ETHERMTU_JUMBO;
4222 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4223 mxge_throttle = MXGE_MAX_THROTTLE;
4224 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4225 mxge_throttle = MXGE_MIN_THROTTLE;
4226 sc->throttle = mxge_throttle;
4231 mxge_free_slices(mxge_softc_t *sc)
4233 struct mxge_slice_state *ss;
4240 for (i = 0; i < sc->num_slices; i++) {
4242 if (ss->fw_stats != NULL) {
4243 mxge_dma_free(&ss->fw_stats_dma);
4244 ss->fw_stats = NULL;
4245 #ifdef IFNET_BUF_RING
4246 if (ss->tx.br != NULL) {
4247 drbr_free(ss->tx.br, M_DEVBUF);
4251 mtx_destroy(&ss->tx.mtx);
4253 if (ss->rx_done.entry != NULL) {
4254 mxge_dma_free(&ss->rx_done.dma);
4255 ss->rx_done.entry = NULL;
4258 free(sc->ss, M_DEVBUF);
4263 mxge_alloc_slices(mxge_softc_t *sc)
4266 struct mxge_slice_state *ss;
4268 int err, i, max_intr_slots;
4270 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4272 device_printf(sc->dev, "Cannot determine rx ring size\n");
4275 sc->rx_ring_size = cmd.data0;
4276 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4278 bytes = sizeof (*sc->ss) * sc->num_slices;
4279 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4282 for (i = 0; i < sc->num_slices; i++) {
4287 /* allocate per-slice rx interrupt queues */
4289 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4290 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4293 ss->rx_done.entry = ss->rx_done.dma.addr;
4294 bzero(ss->rx_done.entry, bytes);
4297 * allocate the per-slice firmware stats; stats
4298 * (including tx) are used used only on the first
4301 #ifndef IFNET_BUF_RING
4306 bytes = sizeof (*ss->fw_stats);
4307 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4308 sizeof (*ss->fw_stats), 64);
4311 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4312 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4313 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4314 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4315 #ifdef IFNET_BUF_RING
4316 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4324 mxge_free_slices(sc);
4329 mxge_slice_probe(mxge_softc_t *sc)
4333 int msix_cnt, status, max_intr_slots;
4337 * don't enable multiple slices if they are not enabled,
4338 * or if this is not an SMP system
4341 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4344 /* see how many MSI-X interrupts are available */
4345 msix_cnt = pci_msix_count(sc->dev);
4349 /* now load the slice aware firmware see what it supports */
4350 old_fw = sc->fw_name;
4351 if (old_fw == mxge_fw_aligned)
4352 sc->fw_name = mxge_fw_rss_aligned;
4354 sc->fw_name = mxge_fw_rss_unaligned;
4355 status = mxge_load_firmware(sc, 0);
4357 device_printf(sc->dev, "Falling back to a single slice\n");
4361 /* try to send a reset command to the card to see if it
4363 memset(&cmd, 0, sizeof (cmd));
4364 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4366 device_printf(sc->dev, "failed reset\n");
4370 /* get rx ring size */
4371 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4373 device_printf(sc->dev, "Cannot determine rx ring size\n");
4376 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4378 /* tell it the size of the interrupt queues */
4379 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4380 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4382 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4386 /* ask the maximum number of slices it supports */
4387 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4389 device_printf(sc->dev,
4390 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4393 sc->num_slices = cmd.data0;
4394 if (sc->num_slices > msix_cnt)
4395 sc->num_slices = msix_cnt;
4397 if (mxge_max_slices == -1) {
4398 /* cap to number of CPUs in system */
4399 if (sc->num_slices > mp_ncpus)
4400 sc->num_slices = mp_ncpus;
4402 if (sc->num_slices > mxge_max_slices)
4403 sc->num_slices = mxge_max_slices;
4405 /* make sure it is a power of two */
4406 while (sc->num_slices & (sc->num_slices - 1))
4410 device_printf(sc->dev, "using %d slices\n",
4416 sc->fw_name = old_fw;
4417 (void) mxge_load_firmware(sc, 0);
4421 mxge_add_msix_irqs(mxge_softc_t *sc)
4424 int count, err, i, rid;
4427 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4430 if (sc->msix_table_res == NULL) {
4431 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4435 count = sc->num_slices;
4436 err = pci_alloc_msix(sc->dev, &count);
4438 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4439 "err = %d \n", sc->num_slices, err);
4440 goto abort_with_msix_table;
4442 if (count < sc->num_slices) {
4443 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4444 count, sc->num_slices);
4445 device_printf(sc->dev,
4446 "Try setting hw.mxge.max_slices to %d\n",
4449 goto abort_with_msix;
4451 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4452 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4453 if (sc->msix_irq_res == NULL) {
4455 goto abort_with_msix;
4458 for (i = 0; i < sc->num_slices; i++) {
4460 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4463 if (sc->msix_irq_res[i] == NULL) {
4464 device_printf(sc->dev, "couldn't allocate IRQ res"
4465 " for message %d\n", i);
4467 goto abort_with_res;
4471 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4472 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4474 for (i = 0; i < sc->num_slices; i++) {
4475 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4476 INTR_TYPE_NET | INTR_MPSAFE,
4477 #if __FreeBSD_version > 700030
4480 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4482 device_printf(sc->dev, "couldn't setup intr for "
4484 goto abort_with_intr;
4486 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4487 sc->msix_ih[i], "s%d", i);
4491 device_printf(sc->dev, "using %d msix IRQs:",
4493 for (i = 0; i < sc->num_slices; i++)
4494 printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4500 for (i = 0; i < sc->num_slices; i++) {
4501 if (sc->msix_ih[i] != NULL) {
4502 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4504 sc->msix_ih[i] = NULL;
4507 free(sc->msix_ih, M_DEVBUF);
4511 for (i = 0; i < sc->num_slices; i++) {
4513 if (sc->msix_irq_res[i] != NULL)
4514 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4515 sc->msix_irq_res[i]);
4516 sc->msix_irq_res[i] = NULL;
4518 free(sc->msix_irq_res, M_DEVBUF);
4522 pci_release_msi(sc->dev);
4524 abort_with_msix_table:
4525 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4526 sc->msix_table_res);
4532 mxge_add_single_irq(mxge_softc_t *sc)
4534 int count, err, rid;
4536 count = pci_msi_count(sc->dev);
4537 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4543 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4544 1, RF_SHAREABLE | RF_ACTIVE);
4545 if (sc->irq_res == NULL) {
4546 device_printf(sc->dev, "could not alloc interrupt\n");
4550 device_printf(sc->dev, "using %s irq %ld\n",
4551 sc->legacy_irq ? "INTx" : "MSI",
4552 rman_get_start(sc->irq_res));
4553 err = bus_setup_intr(sc->dev, sc->irq_res,
4554 INTR_TYPE_NET | INTR_MPSAFE,
4555 #if __FreeBSD_version > 700030
4558 mxge_intr, &sc->ss[0], &sc->ih);
4560 bus_release_resource(sc->dev, SYS_RES_IRQ,
4561 sc->legacy_irq ? 0 : 1, sc->irq_res);
4562 if (!sc->legacy_irq)
4563 pci_release_msi(sc->dev);
4569 mxge_rem_msix_irqs(mxge_softc_t *sc)
4573 for (i = 0; i < sc->num_slices; i++) {
4574 if (sc->msix_ih[i] != NULL) {
4575 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4577 sc->msix_ih[i] = NULL;
4580 free(sc->msix_ih, M_DEVBUF);
4582 for (i = 0; i < sc->num_slices; i++) {
4584 if (sc->msix_irq_res[i] != NULL)
4585 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4586 sc->msix_irq_res[i]);
4587 sc->msix_irq_res[i] = NULL;
4589 free(sc->msix_irq_res, M_DEVBUF);
4591 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4592 sc->msix_table_res);
4594 pci_release_msi(sc->dev);
4599 mxge_rem_single_irq(mxge_softc_t *sc)
4601 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4602 bus_release_resource(sc->dev, SYS_RES_IRQ,
4603 sc->legacy_irq ? 0 : 1, sc->irq_res);
4604 if (!sc->legacy_irq)
4605 pci_release_msi(sc->dev);
4609 mxge_rem_irq(mxge_softc_t *sc)
4611 if (sc->num_slices > 1)
4612 mxge_rem_msix_irqs(sc);
4614 mxge_rem_single_irq(sc);
4618 mxge_add_irq(mxge_softc_t *sc)
4622 if (sc->num_slices > 1)
4623 err = mxge_add_msix_irqs(sc);
4625 err = mxge_add_single_irq(sc);
4627 if (0 && err == 0 && sc->num_slices > 1) {
4628 mxge_rem_msix_irqs(sc);
4629 err = mxge_add_msix_irqs(sc);
4636 mxge_attach(device_t dev)
4638 mxge_softc_t *sc = device_get_softc(dev);
4643 mxge_fetch_tunables(sc);
4645 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4646 sc->tq = taskqueue_create_fast("mxge_taskq", M_WAITOK,
4647 taskqueue_thread_enqueue,
4649 if (sc->tq == NULL) {
4651 goto abort_with_nothing;
4654 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4657 BUS_SPACE_MAXADDR, /* low */
4658 BUS_SPACE_MAXADDR, /* high */
4659 NULL, NULL, /* filter */
4660 65536 + 256, /* maxsize */
4661 MXGE_MAX_SEND_DESC, /* num segs */
4662 65536, /* maxsegsize */
4664 NULL, NULL, /* lock */
4665 &sc->parent_dmat); /* tag */
4668 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4673 ifp = sc->ifp = if_alloc(IFT_ETHER);
4675 device_printf(dev, "can not if_alloc()\n");
4677 goto abort_with_parent_dmat;
4679 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4681 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4682 device_get_nameunit(dev));
4683 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4684 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4685 "%s:drv", device_get_nameunit(dev));
4686 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4687 MTX_NETWORK_LOCK, MTX_DEF);
4689 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4691 mxge_setup_cfg_space(sc);
4693 /* Map the board into the kernel */
4695 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4697 if (sc->mem_res == NULL) {
4698 device_printf(dev, "could not map memory\n");
4700 goto abort_with_lock;
4702 sc->sram = rman_get_virtual(sc->mem_res);
4703 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4704 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4705 device_printf(dev, "impossible memory region size %ld\n",
4706 rman_get_size(sc->mem_res));
4708 goto abort_with_mem_res;
4711 /* make NULL terminated copy of the EEPROM strings section of
4713 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4714 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4715 rman_get_bushandle(sc->mem_res),
4716 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4718 MXGE_EEPROM_STRINGS_SIZE - 2);
4719 err = mxge_parse_strings(sc);
4721 goto abort_with_mem_res;
4723 /* Enable write combining for efficient use of PCIe bus */
4726 /* Allocate the out of band dma memory */
4727 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4728 sizeof (mxge_cmd_t), 64);
4730 goto abort_with_mem_res;
4731 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4732 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4734 goto abort_with_cmd_dma;
4736 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4738 goto abort_with_zeropad_dma;
4740 /* select & load the firmware */
4741 err = mxge_select_firmware(sc);
4743 goto abort_with_dmabench;
4744 sc->intr_coal_delay = mxge_intr_coal_delay;
4746 mxge_slice_probe(sc);
4747 err = mxge_alloc_slices(sc);
4749 goto abort_with_dmabench;
4751 err = mxge_reset(sc, 0);
4753 goto abort_with_slices;
4755 err = mxge_alloc_rings(sc);
4757 device_printf(sc->dev, "failed to allocate rings\n");
4758 goto abort_with_slices;
4761 err = mxge_add_irq(sc);
4763 device_printf(sc->dev, "failed to add irq\n");
4764 goto abort_with_rings;
4767 ifp->if_baudrate = IF_Gbps(10UL);
4768 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4771 ifp->if_capabilities |= IFCAP_LRO;
4774 #ifdef MXGE_NEW_VLAN_API
4775 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4777 /* Only FW 1.4.32 and newer can do TSO over vlans */
4778 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4779 sc->fw_ver_tiny >= 32)
4780 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4783 sc->max_mtu = mxge_max_mtu(sc);
4784 if (sc->max_mtu >= 9000)
4785 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4787 device_printf(dev, "MTU limited to %d. Install "
4788 "latest firmware for 9000 byte jumbo support\n",
4789 sc->max_mtu - ETHER_HDR_LEN);
4790 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4791 ifp->if_capenable = ifp->if_capabilities;
4792 if (sc->lro_cnt == 0)
4793 ifp->if_capenable &= ~IFCAP_LRO;
4795 ifp->if_init = mxge_init;
4797 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4798 ifp->if_ioctl = mxge_ioctl;
4799 ifp->if_start = mxge_start;
4800 /* Initialise the ifmedia structure */
4801 ifmedia_init(&sc->media, 0, mxge_media_change,
4803 mxge_media_init(sc);
4804 mxge_media_probe(sc);
4806 ether_ifattach(ifp, sc->mac_addr);
4807 /* ether_ifattach sets mtu to ETHERMTU */
4808 if (mxge_initial_mtu != ETHERMTU)
4809 mxge_change_mtu(sc, mxge_initial_mtu);
4811 mxge_add_sysctls(sc);
4812 #ifdef IFNET_BUF_RING
4813 ifp->if_transmit = mxge_transmit;
4814 ifp->if_qflush = mxge_qflush;
4816 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4817 device_get_nameunit(sc->dev));
4818 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4822 mxge_free_rings(sc);
4824 mxge_free_slices(sc);
4825 abort_with_dmabench:
4826 mxge_dma_free(&sc->dmabench_dma);
4827 abort_with_zeropad_dma:
4828 mxge_dma_free(&sc->zeropad_dma);
4830 mxge_dma_free(&sc->cmd_dma);
4832 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4834 pci_disable_busmaster(dev);
4835 mtx_destroy(&sc->cmd_mtx);
4836 mtx_destroy(&sc->driver_mtx);
4838 abort_with_parent_dmat:
4839 bus_dma_tag_destroy(sc->parent_dmat);
4841 if (sc->tq != NULL) {
4842 taskqueue_drain(sc->tq, &sc->watchdog_task);
4843 taskqueue_free(sc->tq);
4851 mxge_detach(device_t dev)
4853 mxge_softc_t *sc = device_get_softc(dev);
4855 if (mxge_vlans_active(sc)) {
4856 device_printf(sc->dev,
4857 "Detach vlans before removing module\n");
4860 mtx_lock(&sc->driver_mtx);
4862 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4864 mtx_unlock(&sc->driver_mtx);
4865 ether_ifdetach(sc->ifp);
4866 if (sc->tq != NULL) {
4867 taskqueue_drain(sc->tq, &sc->watchdog_task);
4868 taskqueue_free(sc->tq);
4871 callout_drain(&sc->co_hdl);
4872 ifmedia_removeall(&sc->media);
4873 mxge_dummy_rdma(sc, 0);
4874 mxge_rem_sysctls(sc);
4876 mxge_free_rings(sc);
4877 mxge_free_slices(sc);
4878 mxge_dma_free(&sc->dmabench_dma);
4879 mxge_dma_free(&sc->zeropad_dma);
4880 mxge_dma_free(&sc->cmd_dma);
4881 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4882 pci_disable_busmaster(dev);
4883 mtx_destroy(&sc->cmd_mtx);
4884 mtx_destroy(&sc->driver_mtx);
4886 bus_dma_tag_destroy(sc->parent_dmat);
4891 mxge_shutdown(device_t dev)
4897 This file uses Myri10GE driver indentation.
4900 c-file-style:"linux"