1 /******************************************************************************
3 Copyright (c) 2006-2009, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/sockio.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/module.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
50 /* count xmits ourselves, rather than via drbr */
53 #include <net/if_arp.h>
54 #include <net/ethernet.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
60 #include <net/if_types.h>
61 #include <net/if_vlan_var.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/in.h>
66 #include <netinet/ip.h>
67 #include <netinet/tcp.h>
69 #include <machine/bus.h>
70 #include <machine/in_cksum.h>
71 #include <machine/resource.h>
76 #include <dev/pci/pcireg.h>
77 #include <dev/pci/pcivar.h>
78 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
80 #include <vm/vm.h> /* for pmap_mapdev() */
83 #if defined(__i386) || defined(__amd64)
84 #include <machine/specialreg.h>
87 #include <dev/mxge/mxge_mcp.h>
88 #include <dev/mxge/mcp_gen_header.h>
89 /*#define MXGE_FAKE_IFP*/
90 #include <dev/mxge/if_mxge_var.h>
92 #include <sys/buf_ring.h>
98 static int mxge_nvidia_ecrc_enable = 1;
99 static int mxge_force_firmware = 0;
100 static int mxge_intr_coal_delay = 30;
101 static int mxge_deassert_wait = 1;
102 static int mxge_flow_control = 1;
103 static int mxge_verbose = 0;
104 static int mxge_lro_cnt = 8;
105 static int mxge_ticks;
106 static int mxge_max_slices = 1;
107 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
108 static int mxge_always_promisc = 0;
109 static int mxge_initial_mtu = ETHERMTU_JUMBO;
110 static int mxge_throttle = 0;
111 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
112 static char *mxge_fw_aligned = "mxge_eth_z8e";
113 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
114 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
116 static int mxge_probe(device_t dev);
117 static int mxge_attach(device_t dev);
118 static int mxge_detach(device_t dev);
119 static int mxge_shutdown(device_t dev);
120 static void mxge_intr(void *arg);
122 static device_method_t mxge_methods[] =
124 /* Device interface */
125 DEVMETHOD(device_probe, mxge_probe),
126 DEVMETHOD(device_attach, mxge_attach),
127 DEVMETHOD(device_detach, mxge_detach),
128 DEVMETHOD(device_shutdown, mxge_shutdown),
132 static driver_t mxge_driver =
136 sizeof(mxge_softc_t),
139 static devclass_t mxge_devclass;
141 /* Declare ourselves to be a child of the PCI bus.*/
142 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
143 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
144 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
146 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
147 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
148 static int mxge_close(mxge_softc_t *sc, int down);
149 static int mxge_open(mxge_softc_t *sc);
150 static void mxge_tick(void *arg);
153 mxge_probe(device_t dev)
158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
161 rev = pci_get_revid(dev);
163 case MXGE_PCI_REV_Z8E:
164 device_set_desc(dev, "Myri10G-PCIE-8A");
166 case MXGE_PCI_REV_Z8ES:
167 device_set_desc(dev, "Myri10G-PCIE-8B");
170 device_set_desc(dev, "Myri10G-PCIE-8??");
171 device_printf(dev, "Unrecognized rev %d NIC\n",
181 mxge_enable_wc(mxge_softc_t *sc)
183 #if defined(__i386) || defined(__amd64)
188 len = rman_get_size(sc->mem_res);
189 err = pmap_change_attr((vm_offset_t) sc->sram,
190 len, PAT_WRITE_COMBINING);
192 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
200 /* callback to get our DMA address */
202 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
206 *(bus_addr_t *) arg = segs->ds_addr;
211 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
212 bus_size_t alignment)
215 device_t dev = sc->dev;
216 bus_size_t boundary, maxsegsize;
218 if (bytes > 4096 && alignment == 4096) {
226 /* allocate DMAable memory tags */
227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
228 alignment, /* alignment */
229 boundary, /* boundary */
230 BUS_SPACE_MAXADDR, /* low */
231 BUS_SPACE_MAXADDR, /* high */
232 NULL, NULL, /* filter */
235 maxsegsize, /* maxsegsize */
236 BUS_DMA_COHERENT, /* flags */
237 NULL, NULL, /* lock */
238 &dma->dmat); /* tag */
240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
244 /* allocate DMAable memory & map */
245 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
247 | BUS_DMA_ZERO), &dma->map);
249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
250 goto abort_with_dmat;
253 /* load the memory */
254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
255 mxge_dmamap_callback,
256 (void *)&dma->bus_addr, 0);
258 device_printf(dev, "couldn't load map (err = %d)\n", err);
264 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
266 (void)bus_dma_tag_destroy(dma->dmat);
272 mxge_dma_free(mxge_dma_t *dma)
274 bus_dmamap_unload(dma->dmat, dma->map);
275 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
276 (void)bus_dma_tag_destroy(dma->dmat);
280 * The eeprom strings on the lanaiX have the format
287 mxge_parse_strings(mxge_softc_t *sc)
289 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
294 ptr = sc->eeprom_strings;
295 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
297 while (ptr < limit && *ptr != '\0') {
298 if (memcmp(ptr, "MAC=", 4) == 0) {
300 sc->mac_addr_string = ptr;
301 for (i = 0; i < 6; i++) {
303 if ((ptr + 2) > limit)
305 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
308 } else if (memcmp(ptr, "PC=", 3) == 0) {
310 strncpy(sc->product_code_string, ptr,
311 sizeof (sc->product_code_string) - 1);
312 } else if (memcmp(ptr, "SN=", 3) == 0) {
314 strncpy(sc->serial_number_string, ptr,
315 sizeof (sc->serial_number_string) - 1);
317 MXGE_NEXT_STRING(ptr);
324 device_printf(sc->dev, "failed to parse eeprom_strings\n");
329 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
331 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
334 unsigned long base, off;
336 device_t pdev, mcp55;
337 uint16_t vendor_id, device_id, word;
338 uintptr_t bus, slot, func, ivend, idev;
342 if (!mxge_nvidia_ecrc_enable)
345 pdev = device_get_parent(device_get_parent(sc->dev));
347 device_printf(sc->dev, "could not find parent?\n");
350 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
351 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
353 if (vendor_id != 0x10de)
358 if (device_id == 0x005d) {
359 /* ck804, base address is magic */
361 } else if (device_id >= 0x0374 && device_id <= 0x378) {
362 /* mcp55, base address stored in chipset */
363 mcp55 = pci_find_bsf(0, 0, 0);
365 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
366 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
367 word = pci_read_config(mcp55, 0x90, 2);
368 base = ((unsigned long)word & 0x7ffeU) << 25;
375 Test below is commented because it is believed that doing
376 config read/write beyond 0xff will access the config space
377 for the next larger function. Uncomment this and remove
378 the hacky pmap_mapdev() way of accessing config space when
379 FreeBSD grows support for extended pcie config space access
382 /* See if we can, by some miracle, access the extended
384 val = pci_read_config(pdev, 0x178, 4);
385 if (val != 0xffffffff) {
387 pci_write_config(pdev, 0x178, val, 4);
391 /* Rather than using normal pci config space writes, we must
392 * map the Nvidia config space ourselves. This is because on
393 * opteron/nvidia class machine the 0xe000000 mapping is
394 * handled by the nvidia chipset, that means the internal PCI
395 * device (the on-chip northbridge), or the amd-8131 bridge
396 * and things behind them are not visible by this method.
399 BUS_READ_IVAR(device_get_parent(pdev), pdev,
401 BUS_READ_IVAR(device_get_parent(pdev), pdev,
402 PCI_IVAR_SLOT, &slot);
403 BUS_READ_IVAR(device_get_parent(pdev), pdev,
404 PCI_IVAR_FUNCTION, &func);
405 BUS_READ_IVAR(device_get_parent(pdev), pdev,
406 PCI_IVAR_VENDOR, &ivend);
407 BUS_READ_IVAR(device_get_parent(pdev), pdev,
408 PCI_IVAR_DEVICE, &idev);
411 + 0x00100000UL * (unsigned long)bus
412 + 0x00001000UL * (unsigned long)(func
415 /* map it into the kernel */
416 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
420 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
423 /* get a pointer to the config space mapped into the kernel */
424 cfgptr = va + (off & PAGE_MASK);
426 /* make sure that we can really access it */
427 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
428 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
429 if (! (vendor_id == ivend && device_id == idev)) {
430 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
431 vendor_id, device_id);
432 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
436 ptr32 = (uint32_t*)(cfgptr + 0x178);
439 if (val == 0xffffffff) {
440 device_printf(sc->dev, "extended mapping failed\n");
441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
447 device_printf(sc->dev,
448 "Enabled ECRC on upstream Nvidia bridge "
450 (int)bus, (int)slot, (int)func);
455 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
457 device_printf(sc->dev,
458 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
465 mxge_dma_test(mxge_softc_t *sc, int test_type)
468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
474 /* Run a small DMA test.
475 * The magic multipliers to the length tell the firmware
476 * to do DMA read, write, or read+write tests. The
477 * results are returned in cmd.data0. The upper 16
478 * bits of the return is the number of transfers completed.
479 * The lower 16 bits is the time in 0.5us ticks that the
480 * transfers took to complete.
483 len = sc->tx_boundary;
485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
487 cmd.data2 = len * 0x10000;
488 status = mxge_send_cmd(sc, test_type, &cmd);
493 sc->read_dma = ((cmd.data0>>16) * len * 2) /
494 (cmd.data0 & 0xffff);
495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
497 cmd.data2 = len * 0x1;
498 status = mxge_send_cmd(sc, test_type, &cmd);
503 sc->write_dma = ((cmd.data0>>16) * len * 2) /
504 (cmd.data0 & 0xffff);
506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
508 cmd.data2 = len * 0x10001;
509 status = mxge_send_cmd(sc, test_type, &cmd);
514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
515 (cmd.data0 & 0xffff);
518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
527 * when the PCI-E Completion packets are aligned on an 8-byte
528 * boundary. Some PCI-E chip sets always align Completion packets; on
529 * the ones that do not, the alignment can be enforced by enabling
530 * ECRC generation (if supported).
532 * When PCI-E Completion packets are not aligned, it is actually more
533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
535 * If the driver can neither enable ECRC nor verify that it has
536 * already been enabled, then it must use a firmware image which works
537 * around unaligned completion packets (ethp_z8e.dat), and it should
538 * also ensure that it never gives the device a Read-DMA which is
539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
540 * enabled, then the driver should use the aligned (eth_z8e.dat)
541 * firmware image, and set tx_boundary to 4KB.
545 mxge_firmware_probe(mxge_softc_t *sc)
547 device_t dev = sc->dev;
551 sc->tx_boundary = 4096;
553 * Verify the max read request size was set to 4KB
554 * before trying the test with 4KB.
556 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
557 pectl = pci_read_config(dev, reg + 0x8, 2);
558 if ((pectl & (5 << 12)) != (5 << 12)) {
559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
561 sc->tx_boundary = 2048;
566 * load the optimized firmware (which assumes aligned PCIe
567 * completions) in order to see if it works on this host.
569 sc->fw_name = mxge_fw_aligned;
570 status = mxge_load_firmware(sc, 1);
576 * Enable ECRC if possible
578 mxge_enable_nvidia_ecrc(sc);
581 * Run a DMA test which watches for unaligned completions and
582 * aborts on the first one seen.
585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
587 return 0; /* keep the aligned firmware */
590 device_printf(dev, "DMA test failed: %d\n", status);
591 if (status == ENOSYS)
592 device_printf(dev, "Falling back to ethp! "
593 "Please install up to date fw\n");
598 mxge_select_firmware(mxge_softc_t *sc)
601 int force_firmware = mxge_force_firmware;
604 force_firmware = sc->throttle;
606 if (force_firmware != 0) {
607 if (force_firmware == 1)
612 device_printf(sc->dev,
613 "Assuming %s completions (forced)\n",
614 aligned ? "aligned" : "unaligned");
618 /* if the PCIe link width is 4 or less, we can use the aligned
619 firmware and skip any checks */
620 if (sc->link_width != 0 && sc->link_width <= 4) {
621 device_printf(sc->dev,
622 "PCIe x%d Link, expect reduced performance\n",
628 if (0 == mxge_firmware_probe(sc))
633 sc->fw_name = mxge_fw_aligned;
634 sc->tx_boundary = 4096;
636 sc->fw_name = mxge_fw_unaligned;
637 sc->tx_boundary = 2048;
639 return (mxge_load_firmware(sc, 0));
649 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
654 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
655 be32toh(hdr->mcp_type));
659 /* save firmware version for sysctl */
660 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
662 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
665 &sc->fw_ver_minor, &sc->fw_ver_tiny);
667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
669 device_printf(sc->dev, "Found firmware version %s\n",
671 device_printf(sc->dev, "Driver needs %d.%d\n",
672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
680 z_alloc(void *nil, u_int items, u_int size)
684 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
689 z_free(void *nil, void *ptr)
696 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
699 char *inflate_buffer;
700 const struct firmware *fw;
701 const mcp_gen_header_t *hdr;
708 fw = firmware_get(sc->fw_name);
710 device_printf(sc->dev, "Could not find firmware image %s\n",
717 /* setup zlib and decompress f/w */
718 bzero(&zs, sizeof (zs));
721 status = inflateInit(&zs);
722 if (status != Z_OK) {
727 /* the uncompressed size is stored as the firmware version,
728 which would otherwise go unused */
729 fw_len = (size_t) fw->version;
730 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
731 if (inflate_buffer == NULL)
733 zs.avail_in = fw->datasize;
734 zs.next_in = __DECONST(char *, fw->data);
735 zs.avail_out = fw_len;
736 zs.next_out = inflate_buffer;
737 status = inflate(&zs, Z_FINISH);
738 if (status != Z_STREAM_END) {
739 device_printf(sc->dev, "zlib %d\n", status);
741 goto abort_with_buffer;
745 hdr_offset = htobe32(*(const uint32_t *)
746 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
747 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
748 device_printf(sc->dev, "Bad firmware file");
750 goto abort_with_buffer;
752 hdr = (const void*)(inflate_buffer + hdr_offset);
754 status = mxge_validate_firmware(sc, hdr);
756 goto abort_with_buffer;
758 /* Copy the inflated firmware to NIC SRAM. */
759 for (i = 0; i < fw_len; i += 256) {
760 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
762 min(256U, (unsigned)(fw_len - i)));
771 free(inflate_buffer, M_TEMP);
775 firmware_put(fw, FIRMWARE_UNLOAD);
780 * Enable or disable periodic RDMAs from the host to make certain
781 * chipsets resend dropped PCIe messages
785 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
788 volatile uint32_t *confirm;
789 volatile char *submit;
790 uint32_t *buf, dma_low, dma_high;
793 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
795 /* clear confirmation addr */
796 confirm = (volatile uint32_t *)sc->cmd;
800 /* send an rdma command to the PCIe engine, and wait for the
801 response in the confirmation address. The firmware should
802 write a -1 there to indicate it is alive and well
805 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
806 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
807 buf[0] = htobe32(dma_high); /* confirm addr MSW */
808 buf[1] = htobe32(dma_low); /* confirm addr LSW */
809 buf[2] = htobe32(0xffffffff); /* confirm data */
810 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
811 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
812 buf[3] = htobe32(dma_high); /* dummy addr MSW */
813 buf[4] = htobe32(dma_low); /* dummy addr LSW */
814 buf[5] = htobe32(enable); /* enable? */
817 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
819 mxge_pio_copy(submit, buf, 64);
824 while (*confirm != 0xffffffff && i < 20) {
828 if (*confirm != 0xffffffff) {
829 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
830 (enable ? "enable" : "disable"), confirm,
837 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
840 char buf_bytes[sizeof(*buf) + 8];
841 volatile mcp_cmd_response_t *response = sc->cmd;
842 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
843 uint32_t dma_low, dma_high;
844 int err, sleep_total = 0;
846 /* ensure buf is aligned to 8 bytes */
847 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
849 buf->data0 = htobe32(data->data0);
850 buf->data1 = htobe32(data->data1);
851 buf->data2 = htobe32(data->data2);
852 buf->cmd = htobe32(cmd);
853 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
854 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
856 buf->response_addr.low = htobe32(dma_low);
857 buf->response_addr.high = htobe32(dma_high);
858 mtx_lock(&sc->cmd_mtx);
859 response->result = 0xffffffff;
861 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
863 /* wait up to 20ms */
865 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
866 bus_dmamap_sync(sc->cmd_dma.dmat,
867 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
869 switch (be32toh(response->result)) {
871 data->data0 = be32toh(response->data);
877 case MXGEFW_CMD_UNKNOWN:
880 case MXGEFW_CMD_ERROR_UNALIGNED:
883 case MXGEFW_CMD_ERROR_BUSY:
886 case MXGEFW_CMD_ERROR_I2C_ABSENT:
890 device_printf(sc->dev,
892 "failed, result = %d\n",
893 cmd, be32toh(response->result));
901 device_printf(sc->dev, "mxge: command %d timed out"
903 cmd, be32toh(response->result));
904 mtx_unlock(&sc->cmd_mtx);
909 mxge_adopt_running_firmware(mxge_softc_t *sc)
911 struct mcp_gen_header *hdr;
912 const size_t bytes = sizeof (struct mcp_gen_header);
916 /* find running firmware header */
917 hdr_offset = htobe32(*(volatile uint32_t *)
918 (sc->sram + MCP_HEADER_PTR_OFFSET));
920 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
921 device_printf(sc->dev,
922 "Running firmware has bad header offset (%d)\n",
927 /* copy header of running firmware from SRAM to host memory to
928 * validate firmware */
929 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
931 device_printf(sc->dev, "could not malloc firmware hdr\n");
934 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
935 rman_get_bushandle(sc->mem_res),
936 hdr_offset, (char *)hdr, bytes);
937 status = mxge_validate_firmware(sc, hdr);
941 * check to see if adopted firmware has bug where adopting
942 * it will cause broadcasts to be filtered unless the NIC
943 * is kept in ALLMULTI mode
945 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
946 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
947 sc->adopted_rx_filter_bug = 1;
948 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
949 "working around rx filter bug\n",
950 sc->fw_ver_major, sc->fw_ver_minor,
959 mxge_load_firmware(mxge_softc_t *sc, int adopt)
961 volatile uint32_t *confirm;
962 volatile char *submit;
964 uint32_t *buf, size, dma_low, dma_high;
967 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
969 size = sc->sram_size;
970 status = mxge_load_firmware_helper(sc, &size);
974 /* Try to use the currently running firmware, if
976 status = mxge_adopt_running_firmware(sc);
978 device_printf(sc->dev,
979 "failed to adopt running firmware\n");
982 device_printf(sc->dev,
983 "Successfully adopted running firmware\n");
984 if (sc->tx_boundary == 4096) {
985 device_printf(sc->dev,
986 "Using firmware currently running on NIC"
988 device_printf(sc->dev,
989 "performance consider loading optimized "
992 sc->fw_name = mxge_fw_unaligned;
993 sc->tx_boundary = 2048;
996 /* clear confirmation addr */
997 confirm = (volatile uint32_t *)sc->cmd;
1000 /* send a reload command to the bootstrap MCP, and wait for the
1001 response in the confirmation address. The firmware should
1002 write a -1 there to indicate it is alive and well
1005 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1006 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1008 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1009 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1010 buf[2] = htobe32(0xffffffff); /* confirm data */
1012 /* FIX: All newest firmware should un-protect the bottom of
1013 the sram before handoff. However, the very first interfaces
1014 do not. Therefore the handoff copy must skip the first 8 bytes
1016 /* where the code starts*/
1017 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1018 buf[4] = htobe32(size - 8); /* length of code */
1019 buf[5] = htobe32(8); /* where to copy to */
1020 buf[6] = htobe32(0); /* where to jump to */
1022 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1023 mxge_pio_copy(submit, buf, 64);
1028 while (*confirm != 0xffffffff && i < 20) {
1031 bus_dmamap_sync(sc->cmd_dma.dmat,
1032 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1034 if (*confirm != 0xffffffff) {
1035 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1044 mxge_update_mac_address(mxge_softc_t *sc)
1047 uint8_t *addr = sc->mac_addr;
1051 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1052 | (addr[2] << 8) | addr[3]);
1054 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1056 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1061 mxge_change_pause(mxge_softc_t *sc, int pause)
1067 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1070 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1074 device_printf(sc->dev, "Failed to set flow control mode\n");
1082 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1087 if (mxge_always_promisc)
1091 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1094 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1098 device_printf(sc->dev, "Failed to set promisc mode\n");
1103 mxge_set_multicast_list(mxge_softc_t *sc)
1106 struct ifmultiaddr *ifma;
1107 struct ifnet *ifp = sc->ifp;
1110 /* This firmware is known to not support multicast */
1111 if (!sc->fw_multicast_support)
1114 /* Disable multicast filtering while we play with the lists*/
1115 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1117 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1118 " error status: %d\n", err);
1122 if (sc->adopted_rx_filter_bug)
1125 if (ifp->if_flags & IFF_ALLMULTI)
1126 /* request to disable multicast filtering, so quit here */
1129 /* Flush all the filters */
1131 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1133 device_printf(sc->dev,
1134 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1135 ", error status: %d\n", err);
1139 /* Walk the multicast list, and add each address */
1141 if_maddr_rlock(ifp);
1142 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1143 if (ifma->ifma_addr->sa_family != AF_LINK)
1145 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1147 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1149 cmd.data0 = htonl(cmd.data0);
1150 cmd.data1 = htonl(cmd.data1);
1151 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1153 device_printf(sc->dev, "Failed "
1154 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1156 /* abort, leaving multicast filtering off */
1157 if_maddr_runlock(ifp);
1161 if_maddr_runlock(ifp);
1162 /* Enable multicast filtering */
1163 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1165 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1166 ", error status: %d\n", err);
1171 mxge_max_mtu(mxge_softc_t *sc)
1176 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1177 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1179 /* try to set nbufs to see if it we can
1180 use virtually contiguous jumbos */
1182 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1185 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1187 /* otherwise, we're limited to MJUMPAGESIZE */
1188 return MJUMPAGESIZE - MXGEFW_PAD;
1192 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1194 struct mxge_slice_state *ss;
1195 mxge_rx_done_t *rx_done;
1196 volatile uint32_t *irq_claim;
1200 /* try to send a reset command to the card to see if it
1202 memset(&cmd, 0, sizeof (cmd));
1203 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1205 device_printf(sc->dev, "failed reset\n");
1209 mxge_dummy_rdma(sc, 1);
1212 /* set the intrq size */
1213 cmd.data0 = sc->rx_ring_size;
1214 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1217 * Even though we already know how many slices are supported
1218 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1219 * has magic side effects, and must be called after a reset.
1220 * It must be called prior to calling any RSS related cmds,
1221 * including assigning an interrupt queue for anything but
1222 * slice 0. It must also be called *after*
1223 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1224 * the firmware to compute offsets.
1227 if (sc->num_slices > 1) {
1228 /* ask the maximum number of slices it supports */
1229 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1232 device_printf(sc->dev,
1233 "failed to get number of slices\n");
1237 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1238 * to setting up the interrupt queue DMA
1240 cmd.data0 = sc->num_slices;
1241 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1242 #ifdef IFNET_BUF_RING
1243 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1245 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1248 device_printf(sc->dev,
1249 "failed to set number of slices\n");
1255 if (interrupts_setup) {
1256 /* Now exchange information about interrupts */
1257 for (slice = 0; slice < sc->num_slices; slice++) {
1258 rx_done = &sc->ss[slice].rx_done;
1259 memset(rx_done->entry, 0, sc->rx_ring_size);
1260 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1261 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1263 status |= mxge_send_cmd(sc,
1264 MXGEFW_CMD_SET_INTRQ_DMA,
1269 status |= mxge_send_cmd(sc,
1270 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1273 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1275 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1276 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1279 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1281 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1283 device_printf(sc->dev, "failed set interrupt parameters\n");
1288 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1291 /* run a DMA benchmark */
1292 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1294 for (slice = 0; slice < sc->num_slices; slice++) {
1295 ss = &sc->ss[slice];
1297 ss->irq_claim = irq_claim + (2 * slice);
1298 /* reset mcp/driver shared state back to 0 */
1299 ss->rx_done.idx = 0;
1300 ss->rx_done.cnt = 0;
1303 ss->tx.pkt_done = 0;
1304 ss->tx.queue_active = 0;
1305 ss->tx.activate = 0;
1306 ss->tx.deactivate = 0;
1311 ss->rx_small.cnt = 0;
1312 ss->lro_bad_csum = 0;
1314 ss->lro_flushed = 0;
1315 if (ss->fw_stats != NULL) {
1316 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1319 sc->rdma_tags_available = 15;
1320 status = mxge_update_mac_address(sc);
1321 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1322 mxge_change_pause(sc, sc->pause);
1323 mxge_set_multicast_list(sc);
1325 cmd.data0 = sc->throttle;
1326 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1328 device_printf(sc->dev,
1329 "can't enable throttle\n");
1336 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1341 unsigned int throttle;
1344 throttle = sc->throttle;
1345 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1350 if (throttle == sc->throttle)
1353 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1356 mtx_lock(&sc->driver_mtx);
1357 cmd.data0 = throttle;
1358 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1360 sc->throttle = throttle;
1361 mtx_unlock(&sc->driver_mtx);
1366 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1369 unsigned int intr_coal_delay;
1373 intr_coal_delay = sc->intr_coal_delay;
1374 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1378 if (intr_coal_delay == sc->intr_coal_delay)
1381 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1384 mtx_lock(&sc->driver_mtx);
1385 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1386 sc->intr_coal_delay = intr_coal_delay;
1388 mtx_unlock(&sc->driver_mtx);
1393 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1396 unsigned int enabled;
1400 enabled = sc->pause;
1401 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1405 if (enabled == sc->pause)
1408 mtx_lock(&sc->driver_mtx);
1409 err = mxge_change_pause(sc, enabled);
1410 mtx_unlock(&sc->driver_mtx);
1415 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1422 ifp->if_capenable &= ~IFCAP_LRO;
1424 ifp->if_capenable |= IFCAP_LRO;
1425 sc->lro_cnt = lro_cnt;
1426 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1428 err = mxge_open(sc);
1434 mxge_change_lro(SYSCTL_HANDLER_ARGS)
1437 unsigned int lro_cnt;
1441 lro_cnt = sc->lro_cnt;
1442 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1446 if (lro_cnt == sc->lro_cnt)
1452 mtx_lock(&sc->driver_mtx);
1453 err = mxge_change_lro_locked(sc, lro_cnt);
1454 mtx_unlock(&sc->driver_mtx);
1459 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1465 arg2 = be32toh(*(int *)arg1);
1467 err = sysctl_handle_int(oidp, arg1, arg2, req);
1473 mxge_rem_sysctls(mxge_softc_t *sc)
1475 struct mxge_slice_state *ss;
1478 if (sc->slice_sysctl_tree == NULL)
1481 for (slice = 0; slice < sc->num_slices; slice++) {
1482 ss = &sc->ss[slice];
1483 if (ss == NULL || ss->sysctl_tree == NULL)
1485 sysctl_ctx_free(&ss->sysctl_ctx);
1486 ss->sysctl_tree = NULL;
1488 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1489 sc->slice_sysctl_tree = NULL;
1493 mxge_add_sysctls(mxge_softc_t *sc)
1495 struct sysctl_ctx_list *ctx;
1496 struct sysctl_oid_list *children;
1498 struct mxge_slice_state *ss;
1502 ctx = device_get_sysctl_ctx(sc->dev);
1503 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1504 fw = sc->ss[0].fw_stats;
1506 /* random information */
1507 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1509 CTLFLAG_RD, &sc->fw_version,
1510 0, "firmware version");
1511 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1513 CTLFLAG_RD, &sc->serial_number_string,
1514 0, "serial number");
1515 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1517 CTLFLAG_RD, &sc->product_code_string,
1519 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1521 CTLFLAG_RD, &sc->link_width,
1523 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1525 CTLFLAG_RD, &sc->tx_boundary,
1527 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1529 CTLFLAG_RD, &sc->wc,
1530 0, "write combining PIO?");
1531 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1533 CTLFLAG_RD, &sc->read_dma,
1534 0, "DMA Read speed in MB/s");
1535 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1537 CTLFLAG_RD, &sc->write_dma,
1538 0, "DMA Write speed in MB/s");
1539 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1540 "read_write_dma_MBs",
1541 CTLFLAG_RD, &sc->read_write_dma,
1542 0, "DMA concurrent Read/Write speed in MB/s");
1543 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1545 CTLFLAG_RD, &sc->watchdog_resets,
1546 0, "Number of times NIC was reset");
1549 /* performance related tunables */
1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1552 CTLTYPE_INT|CTLFLAG_RW, sc,
1553 0, mxge_change_intr_coal,
1554 "I", "interrupt coalescing delay in usecs");
1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1558 CTLTYPE_INT|CTLFLAG_RW, sc,
1559 0, mxge_change_throttle,
1560 "I", "transmit throttling");
1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1563 "flow_control_enabled",
1564 CTLTYPE_INT|CTLFLAG_RW, sc,
1565 0, mxge_change_flow_control,
1566 "I", "interrupt coalescing delay in usecs");
1568 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1570 CTLFLAG_RW, &mxge_deassert_wait,
1571 0, "Wait for IRQ line to go low in ihandler");
1573 /* stats block from firmware is in network byte order.
1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1577 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1578 0, mxge_handle_be32,
1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1581 "rdma_tags_available",
1582 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1583 0, mxge_handle_be32,
1584 "I", "rdma_tags_available");
1585 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1586 "dropped_bad_crc32",
1587 CTLTYPE_INT|CTLFLAG_RD,
1588 &fw->dropped_bad_crc32,
1589 0, mxge_handle_be32,
1590 "I", "dropped_bad_crc32");
1591 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1593 CTLTYPE_INT|CTLFLAG_RD,
1594 &fw->dropped_bad_phy,
1595 0, mxge_handle_be32,
1596 "I", "dropped_bad_phy");
1597 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1598 "dropped_link_error_or_filtered",
1599 CTLTYPE_INT|CTLFLAG_RD,
1600 &fw->dropped_link_error_or_filtered,
1601 0, mxge_handle_be32,
1602 "I", "dropped_link_error_or_filtered");
1603 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1604 "dropped_link_overflow",
1605 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1606 0, mxge_handle_be32,
1607 "I", "dropped_link_overflow");
1608 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1609 "dropped_multicast_filtered",
1610 CTLTYPE_INT|CTLFLAG_RD,
1611 &fw->dropped_multicast_filtered,
1612 0, mxge_handle_be32,
1613 "I", "dropped_multicast_filtered");
1614 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1615 "dropped_no_big_buffer",
1616 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1617 0, mxge_handle_be32,
1618 "I", "dropped_no_big_buffer");
1619 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1620 "dropped_no_small_buffer",
1621 CTLTYPE_INT|CTLFLAG_RD,
1622 &fw->dropped_no_small_buffer,
1623 0, mxge_handle_be32,
1624 "I", "dropped_no_small_buffer");
1625 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1627 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1628 0, mxge_handle_be32,
1629 "I", "dropped_overrun");
1630 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1632 CTLTYPE_INT|CTLFLAG_RD,
1634 0, mxge_handle_be32,
1635 "I", "dropped_pause");
1636 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1638 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1639 0, mxge_handle_be32,
1640 "I", "dropped_runt");
1642 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1643 "dropped_unicast_filtered",
1644 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1645 0, mxge_handle_be32,
1646 "I", "dropped_unicast_filtered");
1648 /* verbose printing? */
1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1651 CTLFLAG_RW, &mxge_verbose,
1652 0, "verbose printing");
1655 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1657 CTLTYPE_INT|CTLFLAG_RW, sc,
1659 "I", "number of lro merge queues");
1662 /* add counters exported for debugging from all slices */
1663 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1664 sc->slice_sysctl_tree =
1665 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1666 "slice", CTLFLAG_RD, 0, "");
1668 for (slice = 0; slice < sc->num_slices; slice++) {
1669 ss = &sc->ss[slice];
1670 sysctl_ctx_init(&ss->sysctl_ctx);
1671 ctx = &ss->sysctl_ctx;
1672 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1673 sprintf(slice_num, "%d", slice);
1675 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1677 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1678 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1680 CTLFLAG_RD, &ss->rx_small.cnt,
1682 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1684 CTLFLAG_RD, &ss->rx_big.cnt,
1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1687 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1688 0, "number of lro merge queues flushed");
1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1691 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1692 0, "number of frames appended to lro merge"
1695 #ifndef IFNET_BUF_RING
1696 /* only transmit from slice 0 for now */
1700 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1702 CTLFLAG_RD, &ss->tx.req,
1705 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1707 CTLFLAG_RD, &ss->tx.done,
1709 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1711 CTLFLAG_RD, &ss->tx.pkt_done,
1713 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1715 CTLFLAG_RD, &ss->tx.stall,
1717 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1719 CTLFLAG_RD, &ss->tx.wake,
1721 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1723 CTLFLAG_RD, &ss->tx.defrag,
1725 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1727 CTLFLAG_RD, &ss->tx.queue_active,
1728 0, "tx_queue_active");
1729 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1731 CTLFLAG_RD, &ss->tx.activate,
1733 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1735 CTLFLAG_RD, &ss->tx.deactivate,
1736 0, "tx_deactivate");
1740 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1741 backwards one at a time and handle ring wraps */
1744 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1745 mcp_kreq_ether_send_t *src, int cnt)
1747 int idx, starting_slot;
1748 starting_slot = tx->req;
1751 idx = (starting_slot + cnt) & tx->mask;
1752 mxge_pio_copy(&tx->lanai[idx],
1753 &src[cnt], sizeof(*src));
1759 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1760 * at most 32 bytes at a time, so as to avoid involving the software
1761 * pio handler in the nic. We re-write the first segment's flags
1762 * to mark them valid only after writing the entire chain
1766 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1771 volatile uint32_t *dst_ints;
1772 mcp_kreq_ether_send_t *srcp;
1773 volatile mcp_kreq_ether_send_t *dstp, *dst;
1776 idx = tx->req & tx->mask;
1778 last_flags = src->flags;
1781 dst = dstp = &tx->lanai[idx];
1784 if ((idx + cnt) < tx->mask) {
1785 for (i = 0; i < (cnt - 1); i += 2) {
1786 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1787 wmb(); /* force write every 32 bytes */
1792 /* submit all but the first request, and ensure
1793 that it is submitted below */
1794 mxge_submit_req_backwards(tx, src, cnt);
1798 /* submit the first request */
1799 mxge_pio_copy(dstp, srcp, sizeof(*src));
1800 wmb(); /* barrier before setting valid flag */
1803 /* re-write the last 32-bits with the valid flags */
1804 src->flags = last_flags;
1805 src_ints = (uint32_t *)src;
1807 dst_ints = (volatile uint32_t *)dst;
1809 *dst_ints = *src_ints;
1817 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1818 int busdma_seg_cnt, int ip_off)
1821 mcp_kreq_ether_send_t *req;
1822 bus_dma_segment_t *seg;
1825 uint32_t low, high_swapped;
1826 int len, seglen, cum_len, cum_len_next;
1827 int next_is_first, chop, cnt, rdma_count, small;
1828 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1829 uint8_t flags, flags_next;
1832 mss = m->m_pkthdr.tso_segsz;
1834 /* negative cum_len signifies to the
1835 * send loop that we are still in the
1836 * header portion of the TSO packet.
1839 /* ensure we have the ethernet, IP and TCP
1840 header together in the first mbuf, copy
1841 it to a scratch buffer if not */
1842 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1843 m_copydata(m, 0, ip_off + sizeof (*ip),
1845 ip = (struct ip *)(ss->scratch + ip_off);
1847 ip = (struct ip *)(mtod(m, char *) + ip_off);
1849 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1851 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1852 + sizeof (*tcp), ss->scratch);
1853 ip = (struct ip *)(mtod(m, char *) + ip_off);
1856 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1857 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1858 cksum_offset = ip_off + (ip->ip_hl << 2);
1860 /* TSO implies checksum offload on this hardware */
1861 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP)) == 0)) {
1863 * If packet has full TCP csum, replace it with pseudo hdr
1864 * sum that the NIC expects, otherwise the NIC will emit
1865 * packets with bad TCP checksums.
1867 m->m_pkthdr.csum_flags = CSUM_TCP;
1868 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1869 tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
1870 htons(IPPROTO_TCP + (m->m_pkthdr.len - cksum_offset)));
1872 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1875 /* for TSO, pseudo_hdr_offset holds mss.
1876 * The firmware figures out where to put
1877 * the checksum by parsing the header. */
1878 pseudo_hdr_offset = htobe16(mss);
1885 /* "rdma_count" is the number of RDMAs belonging to the
1886 * current packet BEFORE the current send request. For
1887 * non-TSO packets, this is equal to "count".
1888 * For TSO packets, rdma_count needs to be reset
1889 * to 0 after a segment cut.
1891 * The rdma_count field of the send request is
1892 * the number of RDMAs of the packet starting at
1893 * that request. For TSO send requests with one ore more cuts
1894 * in the middle, this is the number of RDMAs starting
1895 * after the last cut in the request. All previous
1896 * segments before the last cut implicitly have 1 RDMA.
1898 * Since the number of RDMAs is not known beforehand,
1899 * it must be filled-in retroactively - after each
1900 * segmentation cut or at the end of the entire packet.
1903 while (busdma_seg_cnt) {
1904 /* Break the busdma segment up into pieces*/
1905 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1906 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1910 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1912 cum_len_next = cum_len + seglen;
1913 (req-rdma_count)->rdma_count = rdma_count + 1;
1914 if (__predict_true(cum_len >= 0)) {
1916 chop = (cum_len_next > mss);
1917 cum_len_next = cum_len_next % mss;
1918 next_is_first = (cum_len_next == 0);
1919 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1920 flags_next |= next_is_first *
1922 rdma_count |= -(chop | next_is_first);
1923 rdma_count += chop & !next_is_first;
1924 } else if (cum_len_next >= 0) {
1929 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1930 flags_next = MXGEFW_FLAGS_TSO_PLD |
1931 MXGEFW_FLAGS_FIRST |
1932 (small * MXGEFW_FLAGS_SMALL);
1935 req->addr_high = high_swapped;
1936 req->addr_low = htobe32(low);
1937 req->pseudo_hdr_offset = pseudo_hdr_offset;
1939 req->rdma_count = 1;
1940 req->length = htobe16(seglen);
1941 req->cksum_offset = cksum_offset;
1942 req->flags = flags | ((cum_len & 1) *
1943 MXGEFW_FLAGS_ALIGN_ODD);
1946 cum_len = cum_len_next;
1951 if (__predict_false(cksum_offset > seglen))
1952 cksum_offset -= seglen;
1955 if (__predict_false(cnt > tx->max_desc))
1961 (req-rdma_count)->rdma_count = rdma_count;
1965 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1966 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1968 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1969 mxge_submit_req(tx, tx->req_list, cnt);
1970 #ifdef IFNET_BUF_RING
1971 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1972 /* tell the NIC to start polling this slice */
1974 tx->queue_active = 1;
1982 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1986 printf("tx->max_desc exceeded via TSO!\n");
1987 printf("mss = %d, %ld, %d!\n", mss,
1988 (long)seg - (long)tx->seg_list, tx->max_desc);
1995 #endif /* IFCAP_TSO4 */
1997 #ifdef MXGE_NEW_VLAN_API
1999 * We reproduce the software vlan tag insertion from
2000 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2001 * vlan tag insertion. We need to advertise this in order to have the
2002 * vlan interface respect our csum offload flags.
2004 static struct mbuf *
2005 mxge_vlan_tag_insert(struct mbuf *m)
2007 struct ether_vlan_header *evl;
2009 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
2010 if (__predict_false(m == NULL))
2012 if (m->m_len < sizeof(*evl)) {
2013 m = m_pullup(m, sizeof(*evl));
2014 if (__predict_false(m == NULL))
2018 * Transform the Ethernet header into an Ethernet header
2019 * with 802.1Q encapsulation.
2021 evl = mtod(m, struct ether_vlan_header *);
2022 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2023 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2024 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2025 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2026 m->m_flags &= ~M_VLANTAG;
2029 #endif /* MXGE_NEW_VLAN_API */
2032 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2035 mcp_kreq_ether_send_t *req;
2036 bus_dma_segment_t *seg;
2041 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
2042 uint16_t pseudo_hdr_offset;
2043 uint8_t flags, cksum_offset;
2050 ip_off = sizeof (struct ether_header);
2051 #ifdef MXGE_NEW_VLAN_API
2052 if (m->m_flags & M_VLANTAG) {
2053 m = mxge_vlan_tag_insert(m);
2054 if (__predict_false(m == NULL))
2056 ip_off += ETHER_VLAN_ENCAP_LEN;
2059 /* (try to) map the frame for DMA */
2060 idx = tx->req & tx->mask;
2061 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2062 m, tx->seg_list, &cnt,
2064 if (__predict_false(err == EFBIG)) {
2065 /* Too many segments in the chain. Try
2067 m_tmp = m_defrag(m, M_NOWAIT);
2068 if (m_tmp == NULL) {
2073 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2075 m, tx->seg_list, &cnt,
2078 if (__predict_false(err != 0)) {
2079 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2080 " packet len = %d\n", err, m->m_pkthdr.len);
2083 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2084 BUS_DMASYNC_PREWRITE);
2085 tx->info[idx].m = m;
2088 /* TSO is different enough, we handle it in another routine */
2089 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2090 mxge_encap_tso(ss, m, cnt, ip_off);
2097 pseudo_hdr_offset = 0;
2098 flags = MXGEFW_FLAGS_NO_TSO;
2100 /* checksum offloading? */
2101 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2102 /* ensure ip header is in first mbuf, copy
2103 it to a scratch buffer if not */
2104 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2105 m_copydata(m, 0, ip_off + sizeof (*ip),
2107 ip = (struct ip *)(ss->scratch + ip_off);
2109 ip = (struct ip *)(mtod(m, char *) + ip_off);
2111 cksum_offset = ip_off + (ip->ip_hl << 2);
2112 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2113 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2114 req->cksum_offset = cksum_offset;
2115 flags |= MXGEFW_FLAGS_CKSUM;
2116 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2120 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2121 flags |= MXGEFW_FLAGS_SMALL;
2123 /* convert segments into a request list */
2126 req->flags = MXGEFW_FLAGS_FIRST;
2127 for (i = 0; i < cnt; i++) {
2129 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2131 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2132 req->length = htobe16(seg->ds_len);
2133 req->cksum_offset = cksum_offset;
2134 if (cksum_offset > seg->ds_len)
2135 cksum_offset -= seg->ds_len;
2138 req->pseudo_hdr_offset = pseudo_hdr_offset;
2139 req->pad = 0; /* complete solid 16-byte block */
2140 req->rdma_count = 1;
2141 req->flags |= flags | ((cum_len & 1) * odd_flag);
2142 cum_len += seg->ds_len;
2148 /* pad runts to 60 bytes */
2152 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2154 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2155 req->length = htobe16(60 - cum_len);
2156 req->cksum_offset = 0;
2157 req->pseudo_hdr_offset = pseudo_hdr_offset;
2158 req->pad = 0; /* complete solid 16-byte block */
2159 req->rdma_count = 1;
2160 req->flags |= flags | ((cum_len & 1) * odd_flag);
2164 tx->req_list[0].rdma_count = cnt;
2166 /* print what the firmware will see */
2167 for (i = 0; i < cnt; i++) {
2168 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2169 "cso:%d, flags:0x%x, rdma:%d\n",
2170 i, (int)ntohl(tx->req_list[i].addr_high),
2171 (int)ntohl(tx->req_list[i].addr_low),
2172 (int)ntohs(tx->req_list[i].length),
2173 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2174 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2175 tx->req_list[i].rdma_count);
2177 printf("--------------\n");
2179 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2180 mxge_submit_req(tx, tx->req_list, cnt);
2181 #ifdef IFNET_BUF_RING
2182 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2183 /* tell the NIC to start polling this slice */
2185 tx->queue_active = 1;
2198 #ifdef IFNET_BUF_RING
2200 mxge_qflush(struct ifnet *ifp)
2202 mxge_softc_t *sc = ifp->if_softc;
2207 for (slice = 0; slice < sc->num_slices; slice++) {
2208 tx = &sc->ss[slice].tx;
2210 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2212 mtx_unlock(&tx->mtx);
2218 mxge_start_locked(struct mxge_slice_state *ss)
2229 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2230 m = drbr_dequeue(ifp, tx->br);
2234 /* let BPF see it */
2237 /* give it to the nic */
2240 /* ran out of transmit slots */
2241 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2242 && (!drbr_empty(ifp, tx->br))) {
2243 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2249 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2260 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2262 err = drbr_enqueue(ifp, tx->br, m);
2266 if (!drbr_needs_enqueue(ifp, tx->br) &&
2267 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2268 /* let BPF see it */
2270 /* give it to the nic */
2272 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2275 if (!drbr_empty(ifp, tx->br))
2276 mxge_start_locked(ss);
2281 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2283 mxge_softc_t *sc = ifp->if_softc;
2284 struct mxge_slice_state *ss;
2289 slice = m->m_pkthdr.flowid;
2290 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2292 ss = &sc->ss[slice];
2295 if (mtx_trylock(&tx->mtx)) {
2296 err = mxge_transmit_locked(ss, m);
2297 mtx_unlock(&tx->mtx);
2299 err = drbr_enqueue(ifp, tx->br, m);
2308 mxge_start_locked(struct mxge_slice_state *ss)
2318 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2319 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2323 /* let BPF see it */
2326 /* give it to the nic */
2329 /* ran out of transmit slots */
2330 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2331 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2337 mxge_start(struct ifnet *ifp)
2339 mxge_softc_t *sc = ifp->if_softc;
2340 struct mxge_slice_state *ss;
2342 /* only use the first slice for now */
2344 mtx_lock(&ss->tx.mtx);
2345 mxge_start_locked(ss);
2346 mtx_unlock(&ss->tx.mtx);
2350 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2351 * at most 32 bytes at a time, so as to avoid involving the software
2352 * pio handler in the nic. We re-write the first segment's low
2353 * DMA address to mark it valid only after we write the entire chunk
2357 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2358 mcp_kreq_ether_recv_t *src)
2362 low = src->addr_low;
2363 src->addr_low = 0xffffffff;
2364 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2366 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2368 src->addr_low = low;
2369 dst->addr_low = low;
2374 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2376 bus_dma_segment_t seg;
2378 mxge_rx_ring_t *rx = &ss->rx_small;
2381 m = m_gethdr(M_DONTWAIT, MT_DATA);
2388 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2389 &seg, &cnt, BUS_DMA_NOWAIT);
2394 rx->info[idx].m = m;
2395 rx->shadow[idx].addr_low =
2396 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2397 rx->shadow[idx].addr_high =
2398 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2402 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2407 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2409 bus_dma_segment_t seg[3];
2411 mxge_rx_ring_t *rx = &ss->rx_big;
2414 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2420 m->m_len = rx->mlen;
2421 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2422 seg, &cnt, BUS_DMA_NOWAIT);
2427 rx->info[idx].m = m;
2428 rx->shadow[idx].addr_low =
2429 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2430 rx->shadow[idx].addr_high =
2431 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2433 #if MXGE_VIRT_JUMBOS
2434 for (i = 1; i < cnt; i++) {
2435 rx->shadow[idx + i].addr_low =
2436 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2437 rx->shadow[idx + i].addr_high =
2438 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2443 for (i = 0; i < rx->nbufs; i++) {
2444 if ((idx & 7) == 7) {
2445 mxge_submit_8rx(&rx->lanai[idx - 7],
2446 &rx->shadow[idx - 7]);
2454 * Myri10GE hardware checksums are not valid if the sender
2455 * padded the frame with non-zero padding. This is because
2456 * the firmware just does a simple 16-bit 1s complement
2457 * checksum across the entire frame, excluding the first 14
2458 * bytes. It is best to simply to check the checksum and
2459 * tell the stack about it only if the checksum is good
2462 static inline uint16_t
2463 mxge_rx_csum(struct mbuf *m, int csum)
2465 struct ether_header *eh;
2469 eh = mtod(m, struct ether_header *);
2471 /* only deal with IPv4 TCP & UDP for now */
2472 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2474 ip = (struct ip *)(eh + 1);
2475 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2476 ip->ip_p != IPPROTO_UDP))
2479 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2480 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2481 - (ip->ip_hl << 2) + ip->ip_p));
2490 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2492 struct ether_vlan_header *evl;
2493 struct ether_header *eh;
2496 evl = mtod(m, struct ether_vlan_header *);
2497 eh = mtod(m, struct ether_header *);
2500 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2501 * after what the firmware thought was the end of the ethernet
2505 /* put checksum into host byte order */
2506 *csum = ntohs(*csum);
2507 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2508 (*csum) += ~partial;
2509 (*csum) += ((*csum) < ~partial);
2510 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2511 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2513 /* restore checksum to network byte order;
2514 later consumers expect this */
2515 *csum = htons(*csum);
2518 #ifdef MXGE_NEW_VLAN_API
2519 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2523 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2527 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2528 m_tag_prepend(m, mtag);
2532 m->m_flags |= M_VLANTAG;
2535 * Remove the 802.1q header by copying the Ethernet
2536 * addresses over it and adjusting the beginning of
2537 * the data in the mbuf. The encapsulated Ethernet
2538 * type field is already in place.
2540 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2541 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2542 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2547 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2552 struct ether_header *eh;
2554 bus_dmamap_t old_map;
2556 uint16_t tcpudp_csum;
2561 idx = rx->cnt & rx->mask;
2562 rx->cnt += rx->nbufs;
2563 /* save a pointer to the received mbuf */
2564 m = rx->info[idx].m;
2565 /* try to replace the received mbuf */
2566 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2567 /* drop the frame -- the old mbuf is re-cycled */
2572 /* unmap the received buffer */
2573 old_map = rx->info[idx].map;
2574 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2575 bus_dmamap_unload(rx->dmat, old_map);
2577 /* swap the bus_dmamap_t's */
2578 rx->info[idx].map = rx->extra_map;
2579 rx->extra_map = old_map;
2581 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2583 m->m_data += MXGEFW_PAD;
2585 m->m_pkthdr.rcvif = ifp;
2586 m->m_len = m->m_pkthdr.len = len;
2588 eh = mtod(m, struct ether_header *);
2589 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2590 mxge_vlan_tag_remove(m, &csum);
2592 /* if the checksum is valid, mark it in the mbuf header */
2593 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2594 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2596 /* otherwise, it was a UDP frame, or a TCP frame which
2597 we could not do LRO on. Tell the stack that the
2599 m->m_pkthdr.csum_data = 0xffff;
2600 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2602 /* flowid only valid if RSS hashing is enabled */
2603 if (sc->num_slices > 1) {
2604 m->m_pkthdr.flowid = (ss - sc->ss);
2605 m->m_flags |= M_FLOWID;
2607 /* pass the frame up the stack */
2608 (*ifp->if_input)(ifp, m);
2612 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2616 struct ether_header *eh;
2619 bus_dmamap_t old_map;
2621 uint16_t tcpudp_csum;
2626 idx = rx->cnt & rx->mask;
2628 /* save a pointer to the received mbuf */
2629 m = rx->info[idx].m;
2630 /* try to replace the received mbuf */
2631 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2632 /* drop the frame -- the old mbuf is re-cycled */
2637 /* unmap the received buffer */
2638 old_map = rx->info[idx].map;
2639 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2640 bus_dmamap_unload(rx->dmat, old_map);
2642 /* swap the bus_dmamap_t's */
2643 rx->info[idx].map = rx->extra_map;
2644 rx->extra_map = old_map;
2646 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2648 m->m_data += MXGEFW_PAD;
2650 m->m_pkthdr.rcvif = ifp;
2651 m->m_len = m->m_pkthdr.len = len;
2653 eh = mtod(m, struct ether_header *);
2654 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2655 mxge_vlan_tag_remove(m, &csum);
2657 /* if the checksum is valid, mark it in the mbuf header */
2658 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2659 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2661 /* otherwise, it was a UDP frame, or a TCP frame which
2662 we could not do LRO on. Tell the stack that the
2664 m->m_pkthdr.csum_data = 0xffff;
2665 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2667 /* flowid only valid if RSS hashing is enabled */
2668 if (sc->num_slices > 1) {
2669 m->m_pkthdr.flowid = (ss - sc->ss);
2670 m->m_flags |= M_FLOWID;
2672 /* pass the frame up the stack */
2673 (*ifp->if_input)(ifp, m);
2677 mxge_clean_rx_done(struct mxge_slice_state *ss)
2679 mxge_rx_done_t *rx_done = &ss->rx_done;
2685 while (rx_done->entry[rx_done->idx].length != 0) {
2686 length = ntohs(rx_done->entry[rx_done->idx].length);
2687 rx_done->entry[rx_done->idx].length = 0;
2688 checksum = rx_done->entry[rx_done->idx].checksum;
2689 if (length <= (MHLEN - MXGEFW_PAD))
2690 mxge_rx_done_small(ss, length, checksum);
2692 mxge_rx_done_big(ss, length, checksum);
2694 rx_done->idx = rx_done->cnt & rx_done->mask;
2696 /* limit potential for livelock */
2697 if (__predict_false(++limit > rx_done->mask / 2))
2701 while (!SLIST_EMPTY(&ss->lro_active)) {
2702 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2703 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2704 mxge_lro_flush(ss, lro);
2711 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2722 while (tx->pkt_done != mcp_idx) {
2723 idx = tx->done & tx->mask;
2725 m = tx->info[idx].m;
2726 /* mbuf and DMA map only attached to the first
2729 ss->obytes += m->m_pkthdr.len;
2730 if (m->m_flags & M_MCAST)
2733 tx->info[idx].m = NULL;
2734 map = tx->info[idx].map;
2735 bus_dmamap_unload(tx->dmat, map);
2738 if (tx->info[idx].flag) {
2739 tx->info[idx].flag = 0;
2744 /* If we have space, clear IFF_OACTIVE to tell the stack that
2745 its OK to send packets */
2746 #ifdef IFNET_BUF_RING
2747 flags = &ss->if_drv_flags;
2749 flags = &ifp->if_drv_flags;
2751 mtx_lock(&ss->tx.mtx);
2752 if ((*flags) & IFF_DRV_OACTIVE &&
2753 tx->req - tx->done < (tx->mask + 1)/4) {
2754 *(flags) &= ~IFF_DRV_OACTIVE;
2756 mxge_start_locked(ss);
2758 #ifdef IFNET_BUF_RING
2759 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2760 /* let the NIC stop polling this queue, since there
2761 * are no more transmits pending */
2762 if (tx->req == tx->done) {
2764 tx->queue_active = 0;
2770 mtx_unlock(&ss->tx.mtx);
2774 static struct mxge_media_type mxge_xfp_media_types[] =
2776 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2777 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2778 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2779 {0, (1 << 5), "10GBASE-ER"},
2780 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2781 {0, (1 << 3), "10GBASE-SW"},
2782 {0, (1 << 2), "10GBASE-LW"},
2783 {0, (1 << 1), "10GBASE-EW"},
2784 {0, (1 << 0), "Reserved"}
2786 static struct mxge_media_type mxge_sfp_media_types[] =
2788 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2789 {0, (1 << 7), "Reserved"},
2790 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2791 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2792 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2793 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2797 mxge_media_set(mxge_softc_t *sc, int media_type)
2801 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2803 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2804 sc->current_media = media_type;
2805 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2809 mxge_media_init(mxge_softc_t *sc)
2814 ifmedia_removeall(&sc->media);
2815 mxge_media_set(sc, IFM_AUTO);
2818 * parse the product code to deterimine the interface type
2819 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2820 * after the 3rd dash in the driver's cached copy of the
2821 * EEPROM's product code string.
2823 ptr = sc->product_code_string;
2825 device_printf(sc->dev, "Missing product code\n");
2829 for (i = 0; i < 3; i++, ptr++) {
2830 ptr = index(ptr, '-');
2832 device_printf(sc->dev,
2833 "only %d dashes in PC?!?\n", i);
2837 if (*ptr == 'C' || *(ptr +1) == 'C') {
2839 sc->connector = MXGE_CX4;
2840 mxge_media_set(sc, IFM_10G_CX4);
2841 } else if (*ptr == 'Q') {
2842 /* -Q is Quad Ribbon Fiber */
2843 sc->connector = MXGE_QRF;
2844 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2845 /* FreeBSD has no media type for Quad ribbon fiber */
2846 } else if (*ptr == 'R') {
2848 sc->connector = MXGE_XFP;
2849 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2850 /* -S or -2S is SFP+ */
2851 sc->connector = MXGE_SFP;
2853 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2858 * Determine the media type for a NIC. Some XFPs will identify
2859 * themselves only when their link is up, so this is initiated via a
2860 * link up interrupt. However, this can potentially take up to
2861 * several milliseconds, so it is run via the watchdog routine, rather
2862 * than in the interrupt handler itself.
2865 mxge_media_probe(mxge_softc_t *sc)
2870 struct mxge_media_type *mxge_media_types = NULL;
2871 int i, err, ms, mxge_media_type_entries;
2874 sc->need_media_probe = 0;
2876 if (sc->connector == MXGE_XFP) {
2878 mxge_media_types = mxge_xfp_media_types;
2879 mxge_media_type_entries =
2880 sizeof (mxge_xfp_media_types) /
2881 sizeof (mxge_xfp_media_types[0]);
2882 byte = MXGE_XFP_COMPLIANCE_BYTE;
2884 } else if (sc->connector == MXGE_SFP) {
2885 /* -S or -2S is SFP+ */
2886 mxge_media_types = mxge_sfp_media_types;
2887 mxge_media_type_entries =
2888 sizeof (mxge_sfp_media_types) /
2889 sizeof (mxge_sfp_media_types[0]);
2893 /* nothing to do; media type cannot change */
2898 * At this point we know the NIC has an XFP cage, so now we
2899 * try to determine what is in the cage by using the
2900 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2901 * register. We read just one byte, which may take over
2905 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2907 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2908 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2909 device_printf(sc->dev, "failed to read XFP\n");
2911 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2912 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2914 if (err != MXGEFW_CMD_OK) {
2918 /* now we wait for the data to be cached */
2920 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2921 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2924 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2926 if (err != MXGEFW_CMD_OK) {
2927 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2928 cage_type, err, ms);
2932 if (cmd.data0 == mxge_media_types[0].bitmask) {
2934 device_printf(sc->dev, "%s:%s\n", cage_type,
2935 mxge_media_types[0].name);
2936 if (sc->current_media != mxge_media_types[0].flag) {
2937 mxge_media_init(sc);
2938 mxge_media_set(sc, mxge_media_types[0].flag);
2942 for (i = 1; i < mxge_media_type_entries; i++) {
2943 if (cmd.data0 & mxge_media_types[i].bitmask) {
2945 device_printf(sc->dev, "%s:%s\n",
2947 mxge_media_types[i].name);
2949 if (sc->current_media != mxge_media_types[i].flag) {
2950 mxge_media_init(sc);
2951 mxge_media_set(sc, mxge_media_types[i].flag);
2957 device_printf(sc->dev, "%s media 0x%x unknown\n",
2958 cage_type, cmd.data0);
2964 mxge_intr(void *arg)
2966 struct mxge_slice_state *ss = arg;
2967 mxge_softc_t *sc = ss->sc;
2968 mcp_irq_data_t *stats = ss->fw_stats;
2969 mxge_tx_ring_t *tx = &ss->tx;
2970 mxge_rx_done_t *rx_done = &ss->rx_done;
2971 uint32_t send_done_count;
2975 #ifndef IFNET_BUF_RING
2976 /* an interrupt on a non-zero slice is implicitly valid
2977 since MSI-X irqs are not shared */
2979 mxge_clean_rx_done(ss);
2980 *ss->irq_claim = be32toh(3);
2985 /* make sure the DMA has finished */
2986 if (!stats->valid) {
2989 valid = stats->valid;
2991 if (sc->legacy_irq) {
2992 /* lower legacy IRQ */
2993 *sc->irq_deassert = 0;
2994 if (!mxge_deassert_wait)
2995 /* don't wait for conf. that irq is low */
3001 /* loop while waiting for legacy irq deassertion */
3003 /* check for transmit completes and receives */
3004 send_done_count = be32toh(stats->send_done_count);
3005 while ((send_done_count != tx->pkt_done) ||
3006 (rx_done->entry[rx_done->idx].length != 0)) {
3007 if (send_done_count != tx->pkt_done)
3008 mxge_tx_done(ss, (int)send_done_count);
3009 mxge_clean_rx_done(ss);
3010 send_done_count = be32toh(stats->send_done_count);
3012 if (sc->legacy_irq && mxge_deassert_wait)
3014 } while (*((volatile uint8_t *) &stats->valid));
3016 /* fw link & error stats meaningful only on the first slice */
3017 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3018 if (sc->link_state != stats->link_up) {
3019 sc->link_state = stats->link_up;
3020 if (sc->link_state) {
3021 if_link_state_change(sc->ifp, LINK_STATE_UP);
3022 sc->ifp->if_baudrate = IF_Gbps(10UL);
3024 device_printf(sc->dev, "link up\n");
3026 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3027 sc->ifp->if_baudrate = 0;
3029 device_printf(sc->dev, "link down\n");
3031 sc->need_media_probe = 1;
3033 if (sc->rdma_tags_available !=
3034 be32toh(stats->rdma_tags_available)) {
3035 sc->rdma_tags_available =
3036 be32toh(stats->rdma_tags_available);
3037 device_printf(sc->dev, "RDMA timed out! %d tags "
3038 "left\n", sc->rdma_tags_available);
3041 if (stats->link_down) {
3042 sc->down_cnt += stats->link_down;
3044 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3048 /* check to see if we have rx token to pass back */
3050 *ss->irq_claim = be32toh(3);
3051 *(ss->irq_claim + 1) = be32toh(3);
3055 mxge_init(void *arg)
3057 mxge_softc_t *sc = arg;
3058 struct ifnet *ifp = sc->ifp;
3061 mtx_lock(&sc->driver_mtx);
3062 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3063 (void) mxge_open(sc);
3064 mtx_unlock(&sc->driver_mtx);
3070 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3072 struct lro_entry *lro_entry;
3075 while (!SLIST_EMPTY(&ss->lro_free)) {
3076 lro_entry = SLIST_FIRST(&ss->lro_free);
3077 SLIST_REMOVE_HEAD(&ss->lro_free, next);
3078 free(lro_entry, M_DEVBUF);
3081 for (i = 0; i <= ss->rx_big.mask; i++) {
3082 if (ss->rx_big.info[i].m == NULL)
3084 bus_dmamap_unload(ss->rx_big.dmat,
3085 ss->rx_big.info[i].map);
3086 m_freem(ss->rx_big.info[i].m);
3087 ss->rx_big.info[i].m = NULL;
3090 for (i = 0; i <= ss->rx_small.mask; i++) {
3091 if (ss->rx_small.info[i].m == NULL)
3093 bus_dmamap_unload(ss->rx_small.dmat,
3094 ss->rx_small.info[i].map);
3095 m_freem(ss->rx_small.info[i].m);
3096 ss->rx_small.info[i].m = NULL;
3099 /* transmit ring used only on the first slice */
3100 if (ss->tx.info == NULL)
3103 for (i = 0; i <= ss->tx.mask; i++) {
3104 ss->tx.info[i].flag = 0;
3105 if (ss->tx.info[i].m == NULL)
3107 bus_dmamap_unload(ss->tx.dmat,
3108 ss->tx.info[i].map);
3109 m_freem(ss->tx.info[i].m);
3110 ss->tx.info[i].m = NULL;
3115 mxge_free_mbufs(mxge_softc_t *sc)
3119 for (slice = 0; slice < sc->num_slices; slice++)
3120 mxge_free_slice_mbufs(&sc->ss[slice]);
3124 mxge_free_slice_rings(struct mxge_slice_state *ss)
3129 if (ss->rx_done.entry != NULL)
3130 mxge_dma_free(&ss->rx_done.dma);
3131 ss->rx_done.entry = NULL;
3133 if (ss->tx.req_bytes != NULL)
3134 free(ss->tx.req_bytes, M_DEVBUF);
3135 ss->tx.req_bytes = NULL;
3137 if (ss->tx.seg_list != NULL)
3138 free(ss->tx.seg_list, M_DEVBUF);
3139 ss->tx.seg_list = NULL;
3141 if (ss->rx_small.shadow != NULL)
3142 free(ss->rx_small.shadow, M_DEVBUF);
3143 ss->rx_small.shadow = NULL;
3145 if (ss->rx_big.shadow != NULL)
3146 free(ss->rx_big.shadow, M_DEVBUF);
3147 ss->rx_big.shadow = NULL;
3149 if (ss->tx.info != NULL) {
3150 if (ss->tx.dmat != NULL) {
3151 for (i = 0; i <= ss->tx.mask; i++) {
3152 bus_dmamap_destroy(ss->tx.dmat,
3153 ss->tx.info[i].map);
3155 bus_dma_tag_destroy(ss->tx.dmat);
3157 free(ss->tx.info, M_DEVBUF);
3161 if (ss->rx_small.info != NULL) {
3162 if (ss->rx_small.dmat != NULL) {
3163 for (i = 0; i <= ss->rx_small.mask; i++) {
3164 bus_dmamap_destroy(ss->rx_small.dmat,
3165 ss->rx_small.info[i].map);
3167 bus_dmamap_destroy(ss->rx_small.dmat,
3168 ss->rx_small.extra_map);
3169 bus_dma_tag_destroy(ss->rx_small.dmat);
3171 free(ss->rx_small.info, M_DEVBUF);
3173 ss->rx_small.info = NULL;
3175 if (ss->rx_big.info != NULL) {
3176 if (ss->rx_big.dmat != NULL) {
3177 for (i = 0; i <= ss->rx_big.mask; i++) {
3178 bus_dmamap_destroy(ss->rx_big.dmat,
3179 ss->rx_big.info[i].map);
3181 bus_dmamap_destroy(ss->rx_big.dmat,
3182 ss->rx_big.extra_map);
3183 bus_dma_tag_destroy(ss->rx_big.dmat);
3185 free(ss->rx_big.info, M_DEVBUF);
3187 ss->rx_big.info = NULL;
3191 mxge_free_rings(mxge_softc_t *sc)
3195 for (slice = 0; slice < sc->num_slices; slice++)
3196 mxge_free_slice_rings(&sc->ss[slice]);
3200 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3201 int tx_ring_entries)
3203 mxge_softc_t *sc = ss->sc;
3209 /* allocate per-slice receive resources */
3211 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3212 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3214 /* allocate the rx shadow rings */
3215 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3216 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3217 if (ss->rx_small.shadow == NULL)
3220 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3221 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3222 if (ss->rx_big.shadow == NULL)
3225 /* allocate the rx host info rings */
3226 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3227 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3228 if (ss->rx_small.info == NULL)
3231 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3232 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3233 if (ss->rx_big.info == NULL)
3236 /* allocate the rx busdma resources */
3237 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3239 4096, /* boundary */
3240 BUS_SPACE_MAXADDR, /* low */
3241 BUS_SPACE_MAXADDR, /* high */
3242 NULL, NULL, /* filter */
3243 MHLEN, /* maxsize */
3245 MHLEN, /* maxsegsize */
3246 BUS_DMA_ALLOCNOW, /* flags */
3247 NULL, NULL, /* lock */
3248 &ss->rx_small.dmat); /* tag */
3250 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3255 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3257 #if MXGE_VIRT_JUMBOS
3258 4096, /* boundary */
3262 BUS_SPACE_MAXADDR, /* low */
3263 BUS_SPACE_MAXADDR, /* high */
3264 NULL, NULL, /* filter */
3265 3*4096, /* maxsize */
3266 #if MXGE_VIRT_JUMBOS
3268 4096, /* maxsegsize*/
3271 MJUM9BYTES, /* maxsegsize*/
3273 BUS_DMA_ALLOCNOW, /* flags */
3274 NULL, NULL, /* lock */
3275 &ss->rx_big.dmat); /* tag */
3277 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3281 for (i = 0; i <= ss->rx_small.mask; i++) {
3282 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3283 &ss->rx_small.info[i].map);
3285 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3290 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3291 &ss->rx_small.extra_map);
3293 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3298 for (i = 0; i <= ss->rx_big.mask; i++) {
3299 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3300 &ss->rx_big.info[i].map);
3302 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3307 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3308 &ss->rx_big.extra_map);
3310 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3315 /* now allocate TX resouces */
3317 #ifndef IFNET_BUF_RING
3318 /* only use a single TX ring for now */
3319 if (ss != ss->sc->ss)
3323 ss->tx.mask = tx_ring_entries - 1;
3324 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3327 /* allocate the tx request copy block */
3329 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3330 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3331 if (ss->tx.req_bytes == NULL)
3333 /* ensure req_list entries are aligned to 8 bytes */
3334 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3335 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3337 /* allocate the tx busdma segment list */
3338 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3339 ss->tx.seg_list = (bus_dma_segment_t *)
3340 malloc(bytes, M_DEVBUF, M_WAITOK);
3341 if (ss->tx.seg_list == NULL)
3344 /* allocate the tx host info ring */
3345 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3346 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3347 if (ss->tx.info == NULL)
3350 /* allocate the tx busdma resources */
3351 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3353 sc->tx_boundary, /* boundary */
3354 BUS_SPACE_MAXADDR, /* low */
3355 BUS_SPACE_MAXADDR, /* high */
3356 NULL, NULL, /* filter */
3357 65536 + 256, /* maxsize */
3358 ss->tx.max_desc - 2, /* num segs */
3359 sc->tx_boundary, /* maxsegsz */
3360 BUS_DMA_ALLOCNOW, /* flags */
3361 NULL, NULL, /* lock */
3362 &ss->tx.dmat); /* tag */
3365 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3370 /* now use these tags to setup dmamaps for each slot
3372 for (i = 0; i <= ss->tx.mask; i++) {
3373 err = bus_dmamap_create(ss->tx.dmat, 0,
3374 &ss->tx.info[i].map);
3376 device_printf(sc->dev, "Err %d tx dmamap\n",
3386 mxge_alloc_rings(mxge_softc_t *sc)
3390 int tx_ring_entries, rx_ring_entries;
3393 /* get ring sizes */
3394 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3395 tx_ring_size = cmd.data0;
3397 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3401 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3402 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3403 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3404 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3405 IFQ_SET_READY(&sc->ifp->if_snd);
3407 for (slice = 0; slice < sc->num_slices; slice++) {
3408 err = mxge_alloc_slice_rings(&sc->ss[slice],
3417 mxge_free_rings(sc);
3424 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3426 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3428 if (bufsize < MCLBYTES) {
3429 /* easy, everything fits in a single buffer */
3430 *big_buf_size = MCLBYTES;
3431 *cl_size = MCLBYTES;
3436 if (bufsize < MJUMPAGESIZE) {
3437 /* still easy, everything still fits in a single buffer */
3438 *big_buf_size = MJUMPAGESIZE;
3439 *cl_size = MJUMPAGESIZE;
3443 #if MXGE_VIRT_JUMBOS
3444 /* now we need to use virtually contiguous buffers */
3445 *cl_size = MJUM9BYTES;
3446 *big_buf_size = 4096;
3447 *nbufs = mtu / 4096 + 1;
3448 /* needs to be a power of two, so round up */
3452 *cl_size = MJUM9BYTES;
3453 *big_buf_size = MJUM9BYTES;
3459 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3464 struct lro_entry *lro_entry;
3469 slice = ss - sc->ss;
3471 SLIST_INIT(&ss->lro_free);
3472 SLIST_INIT(&ss->lro_active);
3474 for (i = 0; i < sc->lro_cnt; i++) {
3475 lro_entry = (struct lro_entry *)
3476 malloc(sizeof (*lro_entry), M_DEVBUF,
3478 if (lro_entry == NULL) {
3482 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3484 /* get the lanai pointers to the send and receive rings */
3487 #ifndef IFNET_BUF_RING
3488 /* We currently only send from the first slice */
3492 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3494 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3495 ss->tx.send_go = (volatile uint32_t *)
3496 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3497 ss->tx.send_stop = (volatile uint32_t *)
3498 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3499 #ifndef IFNET_BUF_RING
3503 err |= mxge_send_cmd(sc,
3504 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3505 ss->rx_small.lanai =
3506 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3508 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3510 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3513 device_printf(sc->dev,
3514 "failed to get ring sizes or locations\n");
3518 /* stock receive rings */
3519 for (i = 0; i <= ss->rx_small.mask; i++) {
3520 map = ss->rx_small.info[i].map;
3521 err = mxge_get_buf_small(ss, map, i);
3523 device_printf(sc->dev, "alloced %d/%d smalls\n",
3524 i, ss->rx_small.mask + 1);
3528 for (i = 0; i <= ss->rx_big.mask; i++) {
3529 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3530 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3532 ss->rx_big.nbufs = nbufs;
3533 ss->rx_big.cl_size = cl_size;
3534 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3535 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3536 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3537 map = ss->rx_big.info[i].map;
3538 err = mxge_get_buf_big(ss, map, i);
3540 device_printf(sc->dev, "alloced %d/%d bigs\n",
3541 i, ss->rx_big.mask + 1);
3549 mxge_open(mxge_softc_t *sc)
3552 int err, big_bytes, nbufs, slice, cl_size, i;
3554 volatile uint8_t *itable;
3555 struct mxge_slice_state *ss;
3557 /* Copy the MAC address in case it was overridden */
3558 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3560 err = mxge_reset(sc, 1);
3562 device_printf(sc->dev, "failed to reset\n");
3566 if (sc->num_slices > 1) {
3567 /* setup the indirection table */
3568 cmd.data0 = sc->num_slices;
3569 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3572 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3575 device_printf(sc->dev,
3576 "failed to setup rss tables\n");
3580 /* just enable an identity mapping */
3581 itable = sc->sram + cmd.data0;
3582 for (i = 0; i < sc->num_slices; i++)
3583 itable[i] = (uint8_t)i;
3586 cmd.data1 = mxge_rss_hash_type;
3587 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3589 device_printf(sc->dev, "failed to enable slices\n");
3595 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3598 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3600 /* error is only meaningful if we're trying to set
3601 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3602 if (err && nbufs > 1) {
3603 device_printf(sc->dev,
3604 "Failed to set alway-use-n to %d\n",
3608 /* Give the firmware the mtu and the big and small buffer
3609 sizes. The firmware wants the big buf size to be a power
3610 of two. Luckily, FreeBSD's clusters are powers of two */
3611 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3612 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3613 cmd.data0 = MHLEN - MXGEFW_PAD;
3614 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3616 cmd.data0 = big_bytes;
3617 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3620 device_printf(sc->dev, "failed to setup params\n");
3624 /* Now give him the pointer to the stats block */
3626 #ifdef IFNET_BUF_RING
3627 slice < sc->num_slices;
3632 ss = &sc->ss[slice];
3634 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3636 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3637 cmd.data2 = sizeof(struct mcp_irq_data);
3638 cmd.data2 |= (slice << 16);
3639 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3643 bus = sc->ss->fw_stats_dma.bus_addr;
3644 bus += offsetof(struct mcp_irq_data, send_done_count);
3645 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3646 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3647 err = mxge_send_cmd(sc,
3648 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3650 /* Firmware cannot support multicast without STATS_DMA_V2 */
3651 sc->fw_multicast_support = 0;
3653 sc->fw_multicast_support = 1;
3657 device_printf(sc->dev, "failed to setup params\n");
3661 for (slice = 0; slice < sc->num_slices; slice++) {
3662 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3664 device_printf(sc->dev, "couldn't open slice %d\n",
3670 /* Finally, start the firmware running */
3671 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3673 device_printf(sc->dev, "Couldn't bring up link\n");
3676 #ifdef IFNET_BUF_RING
3677 for (slice = 0; slice < sc->num_slices; slice++) {
3678 ss = &sc->ss[slice];
3679 ss->if_drv_flags |= IFF_DRV_RUNNING;
3680 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3683 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3684 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3690 mxge_free_mbufs(sc);
3696 mxge_close(mxge_softc_t *sc, int down)
3699 int err, old_down_cnt;
3700 #ifdef IFNET_BUF_RING
3701 struct mxge_slice_state *ss;
3705 #ifdef IFNET_BUF_RING
3706 for (slice = 0; slice < sc->num_slices; slice++) {
3707 ss = &sc->ss[slice];
3708 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3711 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3713 old_down_cnt = sc->down_cnt;
3715 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3717 device_printf(sc->dev,
3718 "Couldn't bring down link\n");
3720 if (old_down_cnt == sc->down_cnt) {
3721 /* wait for down irq */
3722 DELAY(10 * sc->intr_coal_delay);
3725 if (old_down_cnt == sc->down_cnt) {
3726 device_printf(sc->dev, "never got down irq\n");
3729 mxge_free_mbufs(sc);
3735 mxge_setup_cfg_space(mxge_softc_t *sc)
3737 device_t dev = sc->dev;
3739 uint16_t cmd, lnk, pectl;
3741 /* find the PCIe link width and set max read request to 4KB*/
3742 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3743 lnk = pci_read_config(dev, reg + 0x12, 2);
3744 sc->link_width = (lnk >> 4) & 0x3f;
3746 if (sc->pectl == 0) {
3747 pectl = pci_read_config(dev, reg + 0x8, 2);
3748 pectl = (pectl & ~0x7000) | (5 << 12);
3749 pci_write_config(dev, reg + 0x8, pectl, 2);
3752 /* restore saved pectl after watchdog reset */
3753 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3757 /* Enable DMA and Memory space access */
3758 pci_enable_busmaster(dev);
3759 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3760 cmd |= PCIM_CMD_MEMEN;
3761 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3765 mxge_read_reboot(mxge_softc_t *sc)
3767 device_t dev = sc->dev;
3770 /* find the vendor specific offset */
3771 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3772 device_printf(sc->dev,
3773 "could not find vendor specific offset\n");
3774 return (uint32_t)-1;
3776 /* enable read32 mode */
3777 pci_write_config(dev, vs + 0x10, 0x3, 1);
3778 /* tell NIC which register to read */
3779 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3780 return (pci_read_config(dev, vs + 0x14, 4));
3784 mxge_watchdog_reset(mxge_softc_t *sc)
3786 struct pci_devinfo *dinfo;
3787 struct mxge_slice_state *ss;
3788 int err, running, s, num_tx_slices = 1;
3794 device_printf(sc->dev, "Watchdog reset!\n");
3797 * check to see if the NIC rebooted. If it did, then all of
3798 * PCI config space has been reset, and things like the
3799 * busmaster bit will be zero. If this is the case, then we
3800 * must restore PCI config space before the NIC can be used
3803 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3804 if (cmd == 0xffff) {
3806 * maybe the watchdog caught the NIC rebooting; wait
3807 * up to 100ms for it to finish. If it does not come
3808 * back, then give up
3811 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3812 if (cmd == 0xffff) {
3813 device_printf(sc->dev, "NIC disappeared!\n");
3816 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3817 /* print the reboot status */
3818 reboot = mxge_read_reboot(sc);
3819 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3821 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3825 * quiesce NIC so that TX routines will not try to
3826 * xmit after restoration of BAR
3829 /* Mark the link as down */
3830 if (sc->link_state) {
3832 if_link_state_change(sc->ifp,
3835 #ifdef IFNET_BUF_RING
3836 num_tx_slices = sc->num_slices;
3838 /* grab all TX locks to ensure no tx */
3839 for (s = 0; s < num_tx_slices; s++) {
3841 mtx_lock(&ss->tx.mtx);
3845 /* restore PCI configuration space */
3846 dinfo = device_get_ivars(sc->dev);
3847 pci_cfg_restore(sc->dev, dinfo);
3849 /* and redo any changes we made to our config space */
3850 mxge_setup_cfg_space(sc);
3853 err = mxge_load_firmware(sc, 0);
3855 device_printf(sc->dev,
3856 "Unable to re-load f/w\n");
3860 err = mxge_open(sc);
3861 /* release all TX locks */
3862 for (s = 0; s < num_tx_slices; s++) {
3864 #ifdef IFNET_BUF_RING
3865 mxge_start_locked(ss);
3867 mtx_unlock(&ss->tx.mtx);
3870 sc->watchdog_resets++;
3872 device_printf(sc->dev,
3873 "NIC did not reboot, not resetting\n");
3877 device_printf(sc->dev, "watchdog reset failed\n");
3881 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3886 mxge_watchdog_task(void *arg, int pending)
3888 mxge_softc_t *sc = arg;
3891 mtx_lock(&sc->driver_mtx);
3892 mxge_watchdog_reset(sc);
3893 mtx_unlock(&sc->driver_mtx);
3897 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3899 tx = &sc->ss[slice].tx;
3900 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3901 device_printf(sc->dev,
3902 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3903 tx->req, tx->done, tx->queue_active);
3904 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3905 tx->activate, tx->deactivate);
3906 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3908 be32toh(sc->ss->fw_stats->send_done_count));
3912 mxge_watchdog(mxge_softc_t *sc)
3915 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3918 /* see if we have outstanding transmits, which
3919 have been pending for more than mxge_ticks */
3921 #ifdef IFNET_BUF_RING
3922 (i < sc->num_slices) && (err == 0);
3924 (i < 1) && (err == 0);
3928 if (tx->req != tx->done &&
3929 tx->watchdog_req != tx->watchdog_done &&
3930 tx->done == tx->watchdog_done) {
3931 /* check for pause blocking before resetting */
3932 if (tx->watchdog_rx_pause == rx_pause) {
3933 mxge_warn_stuck(sc, tx, i);
3934 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3938 device_printf(sc->dev, "Flow control blocking "
3939 "xmits, check link partner\n");
3942 tx->watchdog_req = tx->req;
3943 tx->watchdog_done = tx->done;
3944 tx->watchdog_rx_pause = rx_pause;
3947 if (sc->need_media_probe)
3948 mxge_media_probe(sc);
3953 mxge_update_stats(mxge_softc_t *sc)
3955 struct mxge_slice_state *ss;
3957 u_long ipackets = 0;
3958 u_long opackets = 0;
3959 #ifdef IFNET_BUF_RING
3967 for (slice = 0; slice < sc->num_slices; slice++) {
3968 ss = &sc->ss[slice];
3969 ipackets += ss->ipackets;
3970 opackets += ss->opackets;
3971 #ifdef IFNET_BUF_RING
3972 obytes += ss->obytes;
3973 omcasts += ss->omcasts;
3974 odrops += ss->tx.br->br_drops;
3976 oerrors += ss->oerrors;
3978 pkts = (ipackets - sc->ifp->if_ipackets);
3979 pkts += (opackets - sc->ifp->if_opackets);
3980 sc->ifp->if_ipackets = ipackets;
3981 sc->ifp->if_opackets = opackets;
3982 #ifdef IFNET_BUF_RING
3983 sc->ifp->if_obytes = obytes;
3984 sc->ifp->if_omcasts = omcasts;
3985 sc->ifp->if_snd.ifq_drops = odrops;
3987 sc->ifp->if_oerrors = oerrors;
3992 mxge_tick(void *arg)
3994 mxge_softc_t *sc = arg;
4001 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4003 /* aggregate stats from different slices */
4004 pkts = mxge_update_stats(sc);
4005 if (!sc->watchdog_countdown) {
4006 err = mxge_watchdog(sc);
4007 sc->watchdog_countdown = 4;
4009 sc->watchdog_countdown--;
4012 /* ensure NIC did not suffer h/w fault while idle */
4013 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4014 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4016 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4019 /* look less often if NIC is idle */
4024 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4029 mxge_media_change(struct ifnet *ifp)
4035 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4037 struct ifnet *ifp = sc->ifp;
4038 int real_mtu, old_mtu;
4042 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4043 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4045 mtx_lock(&sc->driver_mtx);
4046 old_mtu = ifp->if_mtu;
4048 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4050 err = mxge_open(sc);
4052 ifp->if_mtu = old_mtu;
4054 (void) mxge_open(sc);
4057 mtx_unlock(&sc->driver_mtx);
4062 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4064 mxge_softc_t *sc = ifp->if_softc;
4069 ifmr->ifm_status = IFM_AVALID;
4070 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4071 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4072 ifmr->ifm_active |= sc->current_media;
4076 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4078 mxge_softc_t *sc = ifp->if_softc;
4079 struct ifreq *ifr = (struct ifreq *)data;
4086 err = ether_ioctl(ifp, command, data);
4090 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4094 mtx_lock(&sc->driver_mtx);
4096 mtx_unlock(&sc->driver_mtx);
4099 if (ifp->if_flags & IFF_UP) {
4100 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4101 err = mxge_open(sc);
4103 /* take care of promis can allmulti
4105 mxge_change_promisc(sc,
4106 ifp->if_flags & IFF_PROMISC);
4107 mxge_set_multicast_list(sc);
4110 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4114 mtx_unlock(&sc->driver_mtx);
4119 mtx_lock(&sc->driver_mtx);
4120 mxge_set_multicast_list(sc);
4121 mtx_unlock(&sc->driver_mtx);
4125 mtx_lock(&sc->driver_mtx);
4126 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4127 if (mask & IFCAP_TXCSUM) {
4128 if (IFCAP_TXCSUM & ifp->if_capenable) {
4129 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4130 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
4133 ifp->if_capenable |= IFCAP_TXCSUM;
4134 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4136 } else if (mask & IFCAP_RXCSUM) {
4137 if (IFCAP_RXCSUM & ifp->if_capenable) {
4138 ifp->if_capenable &= ~IFCAP_RXCSUM;
4141 ifp->if_capenable |= IFCAP_RXCSUM;
4145 if (mask & IFCAP_TSO4) {
4146 if (IFCAP_TSO4 & ifp->if_capenable) {
4147 ifp->if_capenable &= ~IFCAP_TSO4;
4148 ifp->if_hwassist &= ~CSUM_TSO;
4149 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4150 ifp->if_capenable |= IFCAP_TSO4;
4151 ifp->if_hwassist |= CSUM_TSO;
4153 printf("mxge requires tx checksum offload"
4154 " be enabled to use TSO\n");
4158 if (mask & IFCAP_LRO) {
4159 if (IFCAP_LRO & ifp->if_capenable)
4160 err = mxge_change_lro_locked(sc, 0);
4162 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
4164 if (mask & IFCAP_VLAN_HWTAGGING)
4165 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4166 if (mask & IFCAP_VLAN_HWTSO)
4167 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4169 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4170 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4171 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4173 mtx_unlock(&sc->driver_mtx);
4174 VLAN_CAPABILITIES(ifp);
4179 mtx_lock(&sc->driver_mtx);
4180 mxge_media_probe(sc);
4181 mtx_unlock(&sc->driver_mtx);
4182 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4183 &sc->media, command);
4193 mxge_fetch_tunables(mxge_softc_t *sc)
4196 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4197 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4198 &mxge_flow_control);
4199 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4200 &mxge_intr_coal_delay);
4201 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4202 &mxge_nvidia_ecrc_enable);
4203 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4204 &mxge_force_firmware);
4205 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4206 &mxge_deassert_wait);
4207 TUNABLE_INT_FETCH("hw.mxge.verbose",
4209 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4210 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
4211 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4212 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4213 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4214 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4215 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4216 if (sc->lro_cnt != 0)
4217 mxge_lro_cnt = sc->lro_cnt;
4221 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4222 mxge_intr_coal_delay = 30;
4223 if (mxge_ticks == 0)
4224 mxge_ticks = hz / 2;
4225 sc->pause = mxge_flow_control;
4226 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4227 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4228 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4230 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4231 mxge_initial_mtu < ETHER_MIN_LEN)
4232 mxge_initial_mtu = ETHERMTU_JUMBO;
4234 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4235 mxge_throttle = MXGE_MAX_THROTTLE;
4236 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4237 mxge_throttle = MXGE_MIN_THROTTLE;
4238 sc->throttle = mxge_throttle;
4243 mxge_free_slices(mxge_softc_t *sc)
4245 struct mxge_slice_state *ss;
4252 for (i = 0; i < sc->num_slices; i++) {
4254 if (ss->fw_stats != NULL) {
4255 mxge_dma_free(&ss->fw_stats_dma);
4256 ss->fw_stats = NULL;
4257 #ifdef IFNET_BUF_RING
4258 if (ss->tx.br != NULL) {
4259 drbr_free(ss->tx.br, M_DEVBUF);
4263 mtx_destroy(&ss->tx.mtx);
4265 if (ss->rx_done.entry != NULL) {
4266 mxge_dma_free(&ss->rx_done.dma);
4267 ss->rx_done.entry = NULL;
4270 free(sc->ss, M_DEVBUF);
4275 mxge_alloc_slices(mxge_softc_t *sc)
4278 struct mxge_slice_state *ss;
4280 int err, i, max_intr_slots;
4282 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4284 device_printf(sc->dev, "Cannot determine rx ring size\n");
4287 sc->rx_ring_size = cmd.data0;
4288 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4290 bytes = sizeof (*sc->ss) * sc->num_slices;
4291 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4294 for (i = 0; i < sc->num_slices; i++) {
4299 /* allocate per-slice rx interrupt queues */
4301 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4302 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4305 ss->rx_done.entry = ss->rx_done.dma.addr;
4306 bzero(ss->rx_done.entry, bytes);
4309 * allocate the per-slice firmware stats; stats
4310 * (including tx) are used used only on the first
4313 #ifndef IFNET_BUF_RING
4318 bytes = sizeof (*ss->fw_stats);
4319 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4320 sizeof (*ss->fw_stats), 64);
4323 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4324 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4325 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4326 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4327 #ifdef IFNET_BUF_RING
4328 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4336 mxge_free_slices(sc);
4341 mxge_slice_probe(mxge_softc_t *sc)
4345 int msix_cnt, status, max_intr_slots;
4349 * don't enable multiple slices if they are not enabled,
4350 * or if this is not an SMP system
4353 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4356 /* see how many MSI-X interrupts are available */
4357 msix_cnt = pci_msix_count(sc->dev);
4361 /* now load the slice aware firmware see what it supports */
4362 old_fw = sc->fw_name;
4363 if (old_fw == mxge_fw_aligned)
4364 sc->fw_name = mxge_fw_rss_aligned;
4366 sc->fw_name = mxge_fw_rss_unaligned;
4367 status = mxge_load_firmware(sc, 0);
4369 device_printf(sc->dev, "Falling back to a single slice\n");
4373 /* try to send a reset command to the card to see if it
4375 memset(&cmd, 0, sizeof (cmd));
4376 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4378 device_printf(sc->dev, "failed reset\n");
4382 /* get rx ring size */
4383 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4385 device_printf(sc->dev, "Cannot determine rx ring size\n");
4388 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4390 /* tell it the size of the interrupt queues */
4391 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4392 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4394 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4398 /* ask the maximum number of slices it supports */
4399 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4401 device_printf(sc->dev,
4402 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4405 sc->num_slices = cmd.data0;
4406 if (sc->num_slices > msix_cnt)
4407 sc->num_slices = msix_cnt;
4409 if (mxge_max_slices == -1) {
4410 /* cap to number of CPUs in system */
4411 if (sc->num_slices > mp_ncpus)
4412 sc->num_slices = mp_ncpus;
4414 if (sc->num_slices > mxge_max_slices)
4415 sc->num_slices = mxge_max_slices;
4417 /* make sure it is a power of two */
4418 while (sc->num_slices & (sc->num_slices - 1))
4422 device_printf(sc->dev, "using %d slices\n",
4428 sc->fw_name = old_fw;
4429 (void) mxge_load_firmware(sc, 0);
4433 mxge_add_msix_irqs(mxge_softc_t *sc)
4436 int count, err, i, rid;
4439 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4442 if (sc->msix_table_res == NULL) {
4443 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4447 count = sc->num_slices;
4448 err = pci_alloc_msix(sc->dev, &count);
4450 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4451 "err = %d \n", sc->num_slices, err);
4452 goto abort_with_msix_table;
4454 if (count < sc->num_slices) {
4455 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4456 count, sc->num_slices);
4457 device_printf(sc->dev,
4458 "Try setting hw.mxge.max_slices to %d\n",
4461 goto abort_with_msix;
4463 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4464 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4465 if (sc->msix_irq_res == NULL) {
4467 goto abort_with_msix;
4470 for (i = 0; i < sc->num_slices; i++) {
4472 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4475 if (sc->msix_irq_res[i] == NULL) {
4476 device_printf(sc->dev, "couldn't allocate IRQ res"
4477 " for message %d\n", i);
4479 goto abort_with_res;
4483 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4484 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4486 for (i = 0; i < sc->num_slices; i++) {
4487 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4488 INTR_TYPE_NET | INTR_MPSAFE,
4489 #if __FreeBSD_version > 700030
4492 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4494 device_printf(sc->dev, "couldn't setup intr for "
4496 goto abort_with_intr;
4498 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4499 sc->msix_ih[i], "s%d", i);
4503 device_printf(sc->dev, "using %d msix IRQs:",
4505 for (i = 0; i < sc->num_slices; i++)
4506 printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4512 for (i = 0; i < sc->num_slices; i++) {
4513 if (sc->msix_ih[i] != NULL) {
4514 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4516 sc->msix_ih[i] = NULL;
4519 free(sc->msix_ih, M_DEVBUF);
4523 for (i = 0; i < sc->num_slices; i++) {
4525 if (sc->msix_irq_res[i] != NULL)
4526 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4527 sc->msix_irq_res[i]);
4528 sc->msix_irq_res[i] = NULL;
4530 free(sc->msix_irq_res, M_DEVBUF);
4534 pci_release_msi(sc->dev);
4536 abort_with_msix_table:
4537 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4538 sc->msix_table_res);
4544 mxge_add_single_irq(mxge_softc_t *sc)
4546 int count, err, rid;
4548 count = pci_msi_count(sc->dev);
4549 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4555 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4556 1, RF_SHAREABLE | RF_ACTIVE);
4557 if (sc->irq_res == NULL) {
4558 device_printf(sc->dev, "could not alloc interrupt\n");
4562 device_printf(sc->dev, "using %s irq %ld\n",
4563 sc->legacy_irq ? "INTx" : "MSI",
4564 rman_get_start(sc->irq_res));
4565 err = bus_setup_intr(sc->dev, sc->irq_res,
4566 INTR_TYPE_NET | INTR_MPSAFE,
4567 #if __FreeBSD_version > 700030
4570 mxge_intr, &sc->ss[0], &sc->ih);
4572 bus_release_resource(sc->dev, SYS_RES_IRQ,
4573 sc->legacy_irq ? 0 : 1, sc->irq_res);
4574 if (!sc->legacy_irq)
4575 pci_release_msi(sc->dev);
4581 mxge_rem_msix_irqs(mxge_softc_t *sc)
4585 for (i = 0; i < sc->num_slices; i++) {
4586 if (sc->msix_ih[i] != NULL) {
4587 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4589 sc->msix_ih[i] = NULL;
4592 free(sc->msix_ih, M_DEVBUF);
4594 for (i = 0; i < sc->num_slices; i++) {
4596 if (sc->msix_irq_res[i] != NULL)
4597 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4598 sc->msix_irq_res[i]);
4599 sc->msix_irq_res[i] = NULL;
4601 free(sc->msix_irq_res, M_DEVBUF);
4603 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4604 sc->msix_table_res);
4606 pci_release_msi(sc->dev);
4611 mxge_rem_single_irq(mxge_softc_t *sc)
4613 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4614 bus_release_resource(sc->dev, SYS_RES_IRQ,
4615 sc->legacy_irq ? 0 : 1, sc->irq_res);
4616 if (!sc->legacy_irq)
4617 pci_release_msi(sc->dev);
4621 mxge_rem_irq(mxge_softc_t *sc)
4623 if (sc->num_slices > 1)
4624 mxge_rem_msix_irqs(sc);
4626 mxge_rem_single_irq(sc);
4630 mxge_add_irq(mxge_softc_t *sc)
4634 if (sc->num_slices > 1)
4635 err = mxge_add_msix_irqs(sc);
4637 err = mxge_add_single_irq(sc);
4639 if (0 && err == 0 && sc->num_slices > 1) {
4640 mxge_rem_msix_irqs(sc);
4641 err = mxge_add_msix_irqs(sc);
4648 mxge_attach(device_t dev)
4650 mxge_softc_t *sc = device_get_softc(dev);
4655 mxge_fetch_tunables(sc);
4657 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4658 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4659 taskqueue_thread_enqueue, &sc->tq);
4660 if (sc->tq == NULL) {
4662 goto abort_with_nothing;
4665 err = bus_dma_tag_create(NULL, /* parent */
4668 BUS_SPACE_MAXADDR, /* low */
4669 BUS_SPACE_MAXADDR, /* high */
4670 NULL, NULL, /* filter */
4671 65536 + 256, /* maxsize */
4672 MXGE_MAX_SEND_DESC, /* num segs */
4673 65536, /* maxsegsize */
4675 NULL, NULL, /* lock */
4676 &sc->parent_dmat); /* tag */
4679 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4684 ifp = sc->ifp = if_alloc(IFT_ETHER);
4686 device_printf(dev, "can not if_alloc()\n");
4688 goto abort_with_parent_dmat;
4690 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4692 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4693 device_get_nameunit(dev));
4694 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4695 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4696 "%s:drv", device_get_nameunit(dev));
4697 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4698 MTX_NETWORK_LOCK, MTX_DEF);
4700 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4702 mxge_setup_cfg_space(sc);
4704 /* Map the board into the kernel */
4706 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4708 if (sc->mem_res == NULL) {
4709 device_printf(dev, "could not map memory\n");
4711 goto abort_with_lock;
4713 sc->sram = rman_get_virtual(sc->mem_res);
4714 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4715 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4716 device_printf(dev, "impossible memory region size %ld\n",
4717 rman_get_size(sc->mem_res));
4719 goto abort_with_mem_res;
4722 /* make NULL terminated copy of the EEPROM strings section of
4724 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4725 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4726 rman_get_bushandle(sc->mem_res),
4727 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4729 MXGE_EEPROM_STRINGS_SIZE - 2);
4730 err = mxge_parse_strings(sc);
4732 goto abort_with_mem_res;
4734 /* Enable write combining for efficient use of PCIe bus */
4737 /* Allocate the out of band dma memory */
4738 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4739 sizeof (mxge_cmd_t), 64);
4741 goto abort_with_mem_res;
4742 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4743 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4745 goto abort_with_cmd_dma;
4747 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4749 goto abort_with_zeropad_dma;
4751 /* select & load the firmware */
4752 err = mxge_select_firmware(sc);
4754 goto abort_with_dmabench;
4755 sc->intr_coal_delay = mxge_intr_coal_delay;
4757 mxge_slice_probe(sc);
4758 err = mxge_alloc_slices(sc);
4760 goto abort_with_dmabench;
4762 err = mxge_reset(sc, 0);
4764 goto abort_with_slices;
4766 err = mxge_alloc_rings(sc);
4768 device_printf(sc->dev, "failed to allocate rings\n");
4769 goto abort_with_slices;
4772 err = mxge_add_irq(sc);
4774 device_printf(sc->dev, "failed to add irq\n");
4775 goto abort_with_rings;
4778 ifp->if_baudrate = IF_Gbps(10UL);
4779 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4780 IFCAP_VLAN_MTU | IFCAP_LINKSTATE;
4782 ifp->if_capabilities |= IFCAP_LRO;
4785 #ifdef MXGE_NEW_VLAN_API
4786 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4788 /* Only FW 1.4.32 and newer can do TSO over vlans */
4789 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4790 sc->fw_ver_tiny >= 32)
4791 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4794 sc->max_mtu = mxge_max_mtu(sc);
4795 if (sc->max_mtu >= 9000)
4796 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4798 device_printf(dev, "MTU limited to %d. Install "
4799 "latest firmware for 9000 byte jumbo support\n",
4800 sc->max_mtu - ETHER_HDR_LEN);
4801 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4802 ifp->if_capenable = ifp->if_capabilities;
4803 if (sc->lro_cnt == 0)
4804 ifp->if_capenable &= ~IFCAP_LRO;
4806 ifp->if_init = mxge_init;
4808 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4809 ifp->if_ioctl = mxge_ioctl;
4810 ifp->if_start = mxge_start;
4811 /* Initialise the ifmedia structure */
4812 ifmedia_init(&sc->media, 0, mxge_media_change,
4814 mxge_media_init(sc);
4815 mxge_media_probe(sc);
4817 ether_ifattach(ifp, sc->mac_addr);
4818 /* ether_ifattach sets mtu to ETHERMTU */
4819 if (mxge_initial_mtu != ETHERMTU)
4820 mxge_change_mtu(sc, mxge_initial_mtu);
4822 mxge_add_sysctls(sc);
4823 #ifdef IFNET_BUF_RING
4824 ifp->if_transmit = mxge_transmit;
4825 ifp->if_qflush = mxge_qflush;
4827 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4828 device_get_nameunit(sc->dev));
4829 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4833 mxge_free_rings(sc);
4835 mxge_free_slices(sc);
4836 abort_with_dmabench:
4837 mxge_dma_free(&sc->dmabench_dma);
4838 abort_with_zeropad_dma:
4839 mxge_dma_free(&sc->zeropad_dma);
4841 mxge_dma_free(&sc->cmd_dma);
4843 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4845 pci_disable_busmaster(dev);
4846 mtx_destroy(&sc->cmd_mtx);
4847 mtx_destroy(&sc->driver_mtx);
4849 abort_with_parent_dmat:
4850 bus_dma_tag_destroy(sc->parent_dmat);
4852 if (sc->tq != NULL) {
4853 taskqueue_drain(sc->tq, &sc->watchdog_task);
4854 taskqueue_free(sc->tq);
4862 mxge_detach(device_t dev)
4864 mxge_softc_t *sc = device_get_softc(dev);
4866 if (mxge_vlans_active(sc)) {
4867 device_printf(sc->dev,
4868 "Detach vlans before removing module\n");
4871 mtx_lock(&sc->driver_mtx);
4873 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4875 mtx_unlock(&sc->driver_mtx);
4876 ether_ifdetach(sc->ifp);
4877 if (sc->tq != NULL) {
4878 taskqueue_drain(sc->tq, &sc->watchdog_task);
4879 taskqueue_free(sc->tq);
4882 callout_drain(&sc->co_hdl);
4883 ifmedia_removeall(&sc->media);
4884 mxge_dummy_rdma(sc, 0);
4885 mxge_rem_sysctls(sc);
4887 mxge_free_rings(sc);
4888 mxge_free_slices(sc);
4889 mxge_dma_free(&sc->dmabench_dma);
4890 mxge_dma_free(&sc->zeropad_dma);
4891 mxge_dma_free(&sc->cmd_dma);
4892 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4893 pci_disable_busmaster(dev);
4894 mtx_destroy(&sc->cmd_mtx);
4895 mtx_destroy(&sc->driver_mtx);
4897 bus_dma_tag_destroy(sc->parent_dmat);
4902 mxge_shutdown(device_t dev)
4908 This file uses Myri10GE driver indentation.
4911 c-file-style:"linux"