1 /******************************************************************************
3 Copyright (c) 2006-2009, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/sockio.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/module.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
51 #include <net/if_arp.h>
52 #include <net/ethernet.h>
53 #include <net/if_dl.h>
54 #include <net/if_media.h>
58 #include <net/if_types.h>
59 #include <net/if_vlan_var.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/in.h>
64 #include <netinet/ip.h>
65 #include <netinet/tcp.h>
67 #include <machine/bus.h>
68 #include <machine/in_cksum.h>
69 #include <machine/resource.h>
74 #include <dev/pci/pcireg.h>
75 #include <dev/pci/pcivar.h>
76 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
78 #include <vm/vm.h> /* for pmap_mapdev() */
81 #if defined(__i386) || defined(__amd64)
82 #include <machine/specialreg.h>
85 #include <dev/mxge/mxge_mcp.h>
86 #include <dev/mxge/mcp_gen_header.h>
87 /*#define MXGE_FAKE_IFP*/
88 #include <dev/mxge/if_mxge_var.h>
90 #include <sys/buf_ring.h>
96 static int mxge_nvidia_ecrc_enable = 1;
97 static int mxge_force_firmware = 0;
98 static int mxge_intr_coal_delay = 30;
99 static int mxge_deassert_wait = 1;
100 static int mxge_flow_control = 1;
101 static int mxge_verbose = 0;
102 static int mxge_lro_cnt = 8;
103 static int mxge_ticks;
104 static int mxge_max_slices = 1;
105 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
106 static int mxge_always_promisc = 0;
107 static int mxge_initial_mtu = ETHERMTU_JUMBO;
108 static int mxge_throttle = 0;
109 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
110 static char *mxge_fw_aligned = "mxge_eth_z8e";
111 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
112 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
114 static int mxge_probe(device_t dev);
115 static int mxge_attach(device_t dev);
116 static int mxge_detach(device_t dev);
117 static int mxge_shutdown(device_t dev);
118 static void mxge_intr(void *arg);
120 static device_method_t mxge_methods[] =
122 /* Device interface */
123 DEVMETHOD(device_probe, mxge_probe),
124 DEVMETHOD(device_attach, mxge_attach),
125 DEVMETHOD(device_detach, mxge_detach),
126 DEVMETHOD(device_shutdown, mxge_shutdown),
130 static driver_t mxge_driver =
134 sizeof(mxge_softc_t),
137 static devclass_t mxge_devclass;
139 /* Declare ourselves to be a child of the PCI bus.*/
140 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
141 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
142 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
144 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
145 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
146 static int mxge_close(mxge_softc_t *sc, int down);
147 static int mxge_open(mxge_softc_t *sc);
148 static void mxge_tick(void *arg);
151 mxge_probe(device_t dev)
156 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
157 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
158 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
159 rev = pci_get_revid(dev);
161 case MXGE_PCI_REV_Z8E:
162 device_set_desc(dev, "Myri10G-PCIE-8A");
164 case MXGE_PCI_REV_Z8ES:
165 device_set_desc(dev, "Myri10G-PCIE-8B");
168 device_set_desc(dev, "Myri10G-PCIE-8??");
169 device_printf(dev, "Unrecognized rev %d NIC\n",
179 mxge_enable_wc(mxge_softc_t *sc)
181 #if defined(__i386) || defined(__amd64)
186 len = rman_get_size(sc->mem_res);
187 err = pmap_change_attr((vm_offset_t) sc->sram,
188 len, PAT_WRITE_COMBINING);
190 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
198 /* callback to get our DMA address */
200 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
204 *(bus_addr_t *) arg = segs->ds_addr;
209 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
210 bus_size_t alignment)
213 device_t dev = sc->dev;
214 bus_size_t boundary, maxsegsize;
216 if (bytes > 4096 && alignment == 4096) {
224 /* allocate DMAable memory tags */
225 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
226 alignment, /* alignment */
227 boundary, /* boundary */
228 BUS_SPACE_MAXADDR, /* low */
229 BUS_SPACE_MAXADDR, /* high */
230 NULL, NULL, /* filter */
233 maxsegsize, /* maxsegsize */
234 BUS_DMA_COHERENT, /* flags */
235 NULL, NULL, /* lock */
236 &dma->dmat); /* tag */
238 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
242 /* allocate DMAable memory & map */
243 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
244 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
245 | BUS_DMA_ZERO), &dma->map);
247 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
248 goto abort_with_dmat;
251 /* load the memory */
252 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
253 mxge_dmamap_callback,
254 (void *)&dma->bus_addr, 0);
256 device_printf(dev, "couldn't load map (err = %d)\n", err);
262 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
264 (void)bus_dma_tag_destroy(dma->dmat);
270 mxge_dma_free(mxge_dma_t *dma)
272 bus_dmamap_unload(dma->dmat, dma->map);
273 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
274 (void)bus_dma_tag_destroy(dma->dmat);
278 * The eeprom strings on the lanaiX have the format
285 mxge_parse_strings(mxge_softc_t *sc)
287 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
292 ptr = sc->eeprom_strings;
293 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
295 while (ptr < limit && *ptr != '\0') {
296 if (memcmp(ptr, "MAC=", 4) == 0) {
298 sc->mac_addr_string = ptr;
299 for (i = 0; i < 6; i++) {
301 if ((ptr + 2) > limit)
303 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
306 } else if (memcmp(ptr, "PC=", 3) == 0) {
308 strncpy(sc->product_code_string, ptr,
309 sizeof (sc->product_code_string) - 1);
310 } else if (memcmp(ptr, "SN=", 3) == 0) {
312 strncpy(sc->serial_number_string, ptr,
313 sizeof (sc->serial_number_string) - 1);
315 MXGE_NEXT_STRING(ptr);
322 device_printf(sc->dev, "failed to parse eeprom_strings\n");
327 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
329 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
332 unsigned long base, off;
334 device_t pdev, mcp55;
335 uint16_t vendor_id, device_id, word;
336 uintptr_t bus, slot, func, ivend, idev;
340 if (!mxge_nvidia_ecrc_enable)
343 pdev = device_get_parent(device_get_parent(sc->dev));
345 device_printf(sc->dev, "could not find parent?\n");
348 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
349 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
351 if (vendor_id != 0x10de)
356 if (device_id == 0x005d) {
357 /* ck804, base address is magic */
359 } else if (device_id >= 0x0374 && device_id <= 0x378) {
360 /* mcp55, base address stored in chipset */
361 mcp55 = pci_find_bsf(0, 0, 0);
363 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
364 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
365 word = pci_read_config(mcp55, 0x90, 2);
366 base = ((unsigned long)word & 0x7ffeU) << 25;
373 Test below is commented because it is believed that doing
374 config read/write beyond 0xff will access the config space
375 for the next larger function. Uncomment this and remove
376 the hacky pmap_mapdev() way of accessing config space when
377 FreeBSD grows support for extended pcie config space access
380 /* See if we can, by some miracle, access the extended
382 val = pci_read_config(pdev, 0x178, 4);
383 if (val != 0xffffffff) {
385 pci_write_config(pdev, 0x178, val, 4);
389 /* Rather than using normal pci config space writes, we must
390 * map the Nvidia config space ourselves. This is because on
391 * opteron/nvidia class machine the 0xe000000 mapping is
392 * handled by the nvidia chipset, that means the internal PCI
393 * device (the on-chip northbridge), or the amd-8131 bridge
394 * and things behind them are not visible by this method.
397 BUS_READ_IVAR(device_get_parent(pdev), pdev,
399 BUS_READ_IVAR(device_get_parent(pdev), pdev,
400 PCI_IVAR_SLOT, &slot);
401 BUS_READ_IVAR(device_get_parent(pdev), pdev,
402 PCI_IVAR_FUNCTION, &func);
403 BUS_READ_IVAR(device_get_parent(pdev), pdev,
404 PCI_IVAR_VENDOR, &ivend);
405 BUS_READ_IVAR(device_get_parent(pdev), pdev,
406 PCI_IVAR_DEVICE, &idev);
409 + 0x00100000UL * (unsigned long)bus
410 + 0x00001000UL * (unsigned long)(func
413 /* map it into the kernel */
414 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
418 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
421 /* get a pointer to the config space mapped into the kernel */
422 cfgptr = va + (off & PAGE_MASK);
424 /* make sure that we can really access it */
425 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
426 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
427 if (! (vendor_id == ivend && device_id == idev)) {
428 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
429 vendor_id, device_id);
430 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
434 ptr32 = (uint32_t*)(cfgptr + 0x178);
437 if (val == 0xffffffff) {
438 device_printf(sc->dev, "extended mapping failed\n");
439 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
443 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
445 device_printf(sc->dev,
446 "Enabled ECRC on upstream Nvidia bridge "
448 (int)bus, (int)slot, (int)func);
453 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
455 device_printf(sc->dev,
456 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
463 mxge_dma_test(mxge_softc_t *sc, int test_type)
466 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
472 /* Run a small DMA test.
473 * The magic multipliers to the length tell the firmware
474 * to do DMA read, write, or read+write tests. The
475 * results are returned in cmd.data0. The upper 16
476 * bits of the return is the number of transfers completed.
477 * The lower 16 bits is the time in 0.5us ticks that the
478 * transfers took to complete.
481 len = sc->tx_boundary;
483 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
484 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
485 cmd.data2 = len * 0x10000;
486 status = mxge_send_cmd(sc, test_type, &cmd);
491 sc->read_dma = ((cmd.data0>>16) * len * 2) /
492 (cmd.data0 & 0xffff);
493 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
494 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
495 cmd.data2 = len * 0x1;
496 status = mxge_send_cmd(sc, test_type, &cmd);
501 sc->write_dma = ((cmd.data0>>16) * len * 2) /
502 (cmd.data0 & 0xffff);
504 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
505 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
506 cmd.data2 = len * 0x10001;
507 status = mxge_send_cmd(sc, test_type, &cmd);
512 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
513 (cmd.data0 & 0xffff);
516 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
517 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
524 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
525 * when the PCI-E Completion packets are aligned on an 8-byte
526 * boundary. Some PCI-E chip sets always align Completion packets; on
527 * the ones that do not, the alignment can be enforced by enabling
528 * ECRC generation (if supported).
530 * When PCI-E Completion packets are not aligned, it is actually more
531 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
533 * If the driver can neither enable ECRC nor verify that it has
534 * already been enabled, then it must use a firmware image which works
535 * around unaligned completion packets (ethp_z8e.dat), and it should
536 * also ensure that it never gives the device a Read-DMA which is
537 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
538 * enabled, then the driver should use the aligned (eth_z8e.dat)
539 * firmware image, and set tx_boundary to 4KB.
543 mxge_firmware_probe(mxge_softc_t *sc)
545 device_t dev = sc->dev;
549 sc->tx_boundary = 4096;
551 * Verify the max read request size was set to 4KB
552 * before trying the test with 4KB.
554 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
555 pectl = pci_read_config(dev, reg + 0x8, 2);
556 if ((pectl & (5 << 12)) != (5 << 12)) {
557 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
559 sc->tx_boundary = 2048;
564 * load the optimized firmware (which assumes aligned PCIe
565 * completions) in order to see if it works on this host.
567 sc->fw_name = mxge_fw_aligned;
568 status = mxge_load_firmware(sc, 1);
574 * Enable ECRC if possible
576 mxge_enable_nvidia_ecrc(sc);
579 * Run a DMA test which watches for unaligned completions and
580 * aborts on the first one seen.
583 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
585 return 0; /* keep the aligned firmware */
588 device_printf(dev, "DMA test failed: %d\n", status);
589 if (status == ENOSYS)
590 device_printf(dev, "Falling back to ethp! "
591 "Please install up to date fw\n");
596 mxge_select_firmware(mxge_softc_t *sc)
599 int force_firmware = mxge_force_firmware;
602 force_firmware = sc->throttle;
604 if (force_firmware != 0) {
605 if (force_firmware == 1)
610 device_printf(sc->dev,
611 "Assuming %s completions (forced)\n",
612 aligned ? "aligned" : "unaligned");
616 /* if the PCIe link width is 4 or less, we can use the aligned
617 firmware and skip any checks */
618 if (sc->link_width != 0 && sc->link_width <= 4) {
619 device_printf(sc->dev,
620 "PCIe x%d Link, expect reduced performance\n",
626 if (0 == mxge_firmware_probe(sc))
631 sc->fw_name = mxge_fw_aligned;
632 sc->tx_boundary = 4096;
634 sc->fw_name = mxge_fw_unaligned;
635 sc->tx_boundary = 2048;
637 return (mxge_load_firmware(sc, 0));
647 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
651 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
652 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
653 be32toh(hdr->mcp_type));
657 /* save firmware version for sysctl */
658 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
660 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
662 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
663 &sc->fw_ver_minor, &sc->fw_ver_tiny);
665 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
666 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
667 device_printf(sc->dev, "Found firmware version %s\n",
669 device_printf(sc->dev, "Driver needs %d.%d\n",
670 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
678 z_alloc(void *nil, u_int items, u_int size)
682 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
687 z_free(void *nil, void *ptr)
694 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
697 char *inflate_buffer;
698 const struct firmware *fw;
699 const mcp_gen_header_t *hdr;
706 fw = firmware_get(sc->fw_name);
708 device_printf(sc->dev, "Could not find firmware image %s\n",
715 /* setup zlib and decompress f/w */
716 bzero(&zs, sizeof (zs));
719 status = inflateInit(&zs);
720 if (status != Z_OK) {
725 /* the uncompressed size is stored as the firmware version,
726 which would otherwise go unused */
727 fw_len = (size_t) fw->version;
728 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
729 if (inflate_buffer == NULL)
731 zs.avail_in = fw->datasize;
732 zs.next_in = __DECONST(char *, fw->data);
733 zs.avail_out = fw_len;
734 zs.next_out = inflate_buffer;
735 status = inflate(&zs, Z_FINISH);
736 if (status != Z_STREAM_END) {
737 device_printf(sc->dev, "zlib %d\n", status);
739 goto abort_with_buffer;
743 hdr_offset = htobe32(*(const uint32_t *)
744 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
745 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
746 device_printf(sc->dev, "Bad firmware file");
748 goto abort_with_buffer;
750 hdr = (const void*)(inflate_buffer + hdr_offset);
752 status = mxge_validate_firmware(sc, hdr);
754 goto abort_with_buffer;
756 /* Copy the inflated firmware to NIC SRAM. */
757 for (i = 0; i < fw_len; i += 256) {
758 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
760 min(256U, (unsigned)(fw_len - i)));
769 free(inflate_buffer, M_TEMP);
773 firmware_put(fw, FIRMWARE_UNLOAD);
778 * Enable or disable periodic RDMAs from the host to make certain
779 * chipsets resend dropped PCIe messages
783 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
786 volatile uint32_t *confirm;
787 volatile char *submit;
788 uint32_t *buf, dma_low, dma_high;
791 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
793 /* clear confirmation addr */
794 confirm = (volatile uint32_t *)sc->cmd;
798 /* send an rdma command to the PCIe engine, and wait for the
799 response in the confirmation address. The firmware should
800 write a -1 there to indicate it is alive and well
803 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
804 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
805 buf[0] = htobe32(dma_high); /* confirm addr MSW */
806 buf[1] = htobe32(dma_low); /* confirm addr LSW */
807 buf[2] = htobe32(0xffffffff); /* confirm data */
808 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
809 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
810 buf[3] = htobe32(dma_high); /* dummy addr MSW */
811 buf[4] = htobe32(dma_low); /* dummy addr LSW */
812 buf[5] = htobe32(enable); /* enable? */
815 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
817 mxge_pio_copy(submit, buf, 64);
822 while (*confirm != 0xffffffff && i < 20) {
826 if (*confirm != 0xffffffff) {
827 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
828 (enable ? "enable" : "disable"), confirm,
835 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
838 char buf_bytes[sizeof(*buf) + 8];
839 volatile mcp_cmd_response_t *response = sc->cmd;
840 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
841 uint32_t dma_low, dma_high;
842 int err, sleep_total = 0;
844 /* ensure buf is aligned to 8 bytes */
845 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
847 buf->data0 = htobe32(data->data0);
848 buf->data1 = htobe32(data->data1);
849 buf->data2 = htobe32(data->data2);
850 buf->cmd = htobe32(cmd);
851 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
852 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
854 buf->response_addr.low = htobe32(dma_low);
855 buf->response_addr.high = htobe32(dma_high);
856 mtx_lock(&sc->cmd_mtx);
857 response->result = 0xffffffff;
859 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
861 /* wait up to 20ms */
863 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
864 bus_dmamap_sync(sc->cmd_dma.dmat,
865 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
867 switch (be32toh(response->result)) {
869 data->data0 = be32toh(response->data);
875 case MXGEFW_CMD_UNKNOWN:
878 case MXGEFW_CMD_ERROR_UNALIGNED:
881 case MXGEFW_CMD_ERROR_BUSY:
884 case MXGEFW_CMD_ERROR_I2C_ABSENT:
888 device_printf(sc->dev,
890 "failed, result = %d\n",
891 cmd, be32toh(response->result));
899 device_printf(sc->dev, "mxge: command %d timed out"
901 cmd, be32toh(response->result));
902 mtx_unlock(&sc->cmd_mtx);
907 mxge_adopt_running_firmware(mxge_softc_t *sc)
909 struct mcp_gen_header *hdr;
910 const size_t bytes = sizeof (struct mcp_gen_header);
914 /* find running firmware header */
915 hdr_offset = htobe32(*(volatile uint32_t *)
916 (sc->sram + MCP_HEADER_PTR_OFFSET));
918 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
919 device_printf(sc->dev,
920 "Running firmware has bad header offset (%d)\n",
925 /* copy header of running firmware from SRAM to host memory to
926 * validate firmware */
927 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
929 device_printf(sc->dev, "could not malloc firmware hdr\n");
932 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
933 rman_get_bushandle(sc->mem_res),
934 hdr_offset, (char *)hdr, bytes);
935 status = mxge_validate_firmware(sc, hdr);
939 * check to see if adopted firmware has bug where adopting
940 * it will cause broadcasts to be filtered unless the NIC
941 * is kept in ALLMULTI mode
943 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
944 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
945 sc->adopted_rx_filter_bug = 1;
946 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
947 "working around rx filter bug\n",
948 sc->fw_ver_major, sc->fw_ver_minor,
957 mxge_load_firmware(mxge_softc_t *sc, int adopt)
959 volatile uint32_t *confirm;
960 volatile char *submit;
962 uint32_t *buf, size, dma_low, dma_high;
965 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
967 size = sc->sram_size;
968 status = mxge_load_firmware_helper(sc, &size);
972 /* Try to use the currently running firmware, if
974 status = mxge_adopt_running_firmware(sc);
976 device_printf(sc->dev,
977 "failed to adopt running firmware\n");
980 device_printf(sc->dev,
981 "Successfully adopted running firmware\n");
982 if (sc->tx_boundary == 4096) {
983 device_printf(sc->dev,
984 "Using firmware currently running on NIC"
986 device_printf(sc->dev,
987 "performance consider loading optimized "
990 sc->fw_name = mxge_fw_unaligned;
991 sc->tx_boundary = 2048;
994 /* clear confirmation addr */
995 confirm = (volatile uint32_t *)sc->cmd;
998 /* send a reload command to the bootstrap MCP, and wait for the
999 response in the confirmation address. The firmware should
1000 write a -1 there to indicate it is alive and well
1003 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1004 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1006 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1007 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1008 buf[2] = htobe32(0xffffffff); /* confirm data */
1010 /* FIX: All newest firmware should un-protect the bottom of
1011 the sram before handoff. However, the very first interfaces
1012 do not. Therefore the handoff copy must skip the first 8 bytes
1014 /* where the code starts*/
1015 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1016 buf[4] = htobe32(size - 8); /* length of code */
1017 buf[5] = htobe32(8); /* where to copy to */
1018 buf[6] = htobe32(0); /* where to jump to */
1020 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1021 mxge_pio_copy(submit, buf, 64);
1026 while (*confirm != 0xffffffff && i < 20) {
1029 bus_dmamap_sync(sc->cmd_dma.dmat,
1030 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1032 if (*confirm != 0xffffffff) {
1033 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1042 mxge_update_mac_address(mxge_softc_t *sc)
1045 uint8_t *addr = sc->mac_addr;
1049 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1050 | (addr[2] << 8) | addr[3]);
1052 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1054 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1059 mxge_change_pause(mxge_softc_t *sc, int pause)
1065 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1068 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1072 device_printf(sc->dev, "Failed to set flow control mode\n");
1080 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1085 if (mxge_always_promisc)
1089 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1092 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1096 device_printf(sc->dev, "Failed to set promisc mode\n");
1101 mxge_set_multicast_list(mxge_softc_t *sc)
1104 struct ifmultiaddr *ifma;
1105 struct ifnet *ifp = sc->ifp;
1108 /* This firmware is known to not support multicast */
1109 if (!sc->fw_multicast_support)
1112 /* Disable multicast filtering while we play with the lists*/
1113 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1115 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1116 " error status: %d\n", err);
1120 if (sc->adopted_rx_filter_bug)
1123 if (ifp->if_flags & IFF_ALLMULTI)
1124 /* request to disable multicast filtering, so quit here */
1127 /* Flush all the filters */
1129 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1131 device_printf(sc->dev,
1132 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1133 ", error status: %d\n", err);
1137 /* Walk the multicast list, and add each address */
1139 if_maddr_rlock(ifp);
1140 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1141 if (ifma->ifma_addr->sa_family != AF_LINK)
1143 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1145 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1147 cmd.data0 = htonl(cmd.data0);
1148 cmd.data1 = htonl(cmd.data1);
1149 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1151 device_printf(sc->dev, "Failed "
1152 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1154 /* abort, leaving multicast filtering off */
1155 if_maddr_runlock(ifp);
1159 if_maddr_runlock(ifp);
1160 /* Enable multicast filtering */
1161 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1163 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1164 ", error status: %d\n", err);
1169 mxge_max_mtu(mxge_softc_t *sc)
1174 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1175 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1177 /* try to set nbufs to see if it we can
1178 use virtually contiguous jumbos */
1180 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1183 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1185 /* otherwise, we're limited to MJUMPAGESIZE */
1186 return MJUMPAGESIZE - MXGEFW_PAD;
1190 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1192 struct mxge_slice_state *ss;
1193 mxge_rx_done_t *rx_done;
1194 volatile uint32_t *irq_claim;
1198 /* try to send a reset command to the card to see if it
1200 memset(&cmd, 0, sizeof (cmd));
1201 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1203 device_printf(sc->dev, "failed reset\n");
1207 mxge_dummy_rdma(sc, 1);
1210 /* set the intrq size */
1211 cmd.data0 = sc->rx_ring_size;
1212 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1215 * Even though we already know how many slices are supported
1216 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1217 * has magic side effects, and must be called after a reset.
1218 * It must be called prior to calling any RSS related cmds,
1219 * including assigning an interrupt queue for anything but
1220 * slice 0. It must also be called *after*
1221 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1222 * the firmware to compute offsets.
1225 if (sc->num_slices > 1) {
1226 /* ask the maximum number of slices it supports */
1227 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1230 device_printf(sc->dev,
1231 "failed to get number of slices\n");
1235 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1236 * to setting up the interrupt queue DMA
1238 cmd.data0 = sc->num_slices;
1239 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1240 #ifdef IFNET_BUF_RING
1241 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1243 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1246 device_printf(sc->dev,
1247 "failed to set number of slices\n");
1253 if (interrupts_setup) {
1254 /* Now exchange information about interrupts */
1255 for (slice = 0; slice < sc->num_slices; slice++) {
1256 rx_done = &sc->ss[slice].rx_done;
1257 memset(rx_done->entry, 0, sc->rx_ring_size);
1258 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1259 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1261 status |= mxge_send_cmd(sc,
1262 MXGEFW_CMD_SET_INTRQ_DMA,
1267 status |= mxge_send_cmd(sc,
1268 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1271 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1273 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1274 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1277 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1279 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1281 device_printf(sc->dev, "failed set interrupt parameters\n");
1286 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1289 /* run a DMA benchmark */
1290 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1292 for (slice = 0; slice < sc->num_slices; slice++) {
1293 ss = &sc->ss[slice];
1295 ss->irq_claim = irq_claim + (2 * slice);
1296 /* reset mcp/driver shared state back to 0 */
1297 ss->rx_done.idx = 0;
1298 ss->rx_done.cnt = 0;
1301 ss->tx.pkt_done = 0;
1302 ss->tx.queue_active = 0;
1303 ss->tx.activate = 0;
1304 ss->tx.deactivate = 0;
1309 ss->rx_small.cnt = 0;
1310 ss->lro_bad_csum = 0;
1312 ss->lro_flushed = 0;
1313 if (ss->fw_stats != NULL) {
1314 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1317 sc->rdma_tags_available = 15;
1318 status = mxge_update_mac_address(sc);
1319 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1320 mxge_change_pause(sc, sc->pause);
1321 mxge_set_multicast_list(sc);
1323 cmd.data0 = sc->throttle;
1324 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1326 device_printf(sc->dev,
1327 "can't enable throttle\n");
1334 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1339 unsigned int throttle;
1342 throttle = sc->throttle;
1343 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1348 if (throttle == sc->throttle)
1351 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1354 mtx_lock(&sc->driver_mtx);
1355 cmd.data0 = throttle;
1356 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1358 sc->throttle = throttle;
1359 mtx_unlock(&sc->driver_mtx);
1364 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1367 unsigned int intr_coal_delay;
1371 intr_coal_delay = sc->intr_coal_delay;
1372 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1376 if (intr_coal_delay == sc->intr_coal_delay)
1379 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1382 mtx_lock(&sc->driver_mtx);
1383 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1384 sc->intr_coal_delay = intr_coal_delay;
1386 mtx_unlock(&sc->driver_mtx);
1391 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1394 unsigned int enabled;
1398 enabled = sc->pause;
1399 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1403 if (enabled == sc->pause)
1406 mtx_lock(&sc->driver_mtx);
1407 err = mxge_change_pause(sc, enabled);
1408 mtx_unlock(&sc->driver_mtx);
1413 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1420 ifp->if_capenable &= ~IFCAP_LRO;
1422 ifp->if_capenable |= IFCAP_LRO;
1423 sc->lro_cnt = lro_cnt;
1424 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1426 err = mxge_open(sc);
1432 mxge_change_lro(SYSCTL_HANDLER_ARGS)
1435 unsigned int lro_cnt;
1439 lro_cnt = sc->lro_cnt;
1440 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1444 if (lro_cnt == sc->lro_cnt)
1450 mtx_lock(&sc->driver_mtx);
1451 err = mxge_change_lro_locked(sc, lro_cnt);
1452 mtx_unlock(&sc->driver_mtx);
1457 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1463 arg2 = be32toh(*(int *)arg1);
1465 err = sysctl_handle_int(oidp, arg1, arg2, req);
1471 mxge_rem_sysctls(mxge_softc_t *sc)
1473 struct mxge_slice_state *ss;
1476 if (sc->slice_sysctl_tree == NULL)
1479 for (slice = 0; slice < sc->num_slices; slice++) {
1480 ss = &sc->ss[slice];
1481 if (ss == NULL || ss->sysctl_tree == NULL)
1483 sysctl_ctx_free(&ss->sysctl_ctx);
1484 ss->sysctl_tree = NULL;
1486 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1487 sc->slice_sysctl_tree = NULL;
1491 mxge_add_sysctls(mxge_softc_t *sc)
1493 struct sysctl_ctx_list *ctx;
1494 struct sysctl_oid_list *children;
1496 struct mxge_slice_state *ss;
1500 ctx = device_get_sysctl_ctx(sc->dev);
1501 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1502 fw = sc->ss[0].fw_stats;
1504 /* random information */
1505 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1507 CTLFLAG_RD, &sc->fw_version,
1508 0, "firmware version");
1509 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1511 CTLFLAG_RD, &sc->serial_number_string,
1512 0, "serial number");
1513 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1515 CTLFLAG_RD, &sc->product_code_string,
1517 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1519 CTLFLAG_RD, &sc->link_width,
1521 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1523 CTLFLAG_RD, &sc->tx_boundary,
1525 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1527 CTLFLAG_RD, &sc->wc,
1528 0, "write combining PIO?");
1529 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1531 CTLFLAG_RD, &sc->read_dma,
1532 0, "DMA Read speed in MB/s");
1533 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1535 CTLFLAG_RD, &sc->write_dma,
1536 0, "DMA Write speed in MB/s");
1537 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1538 "read_write_dma_MBs",
1539 CTLFLAG_RD, &sc->read_write_dma,
1540 0, "DMA concurrent Read/Write speed in MB/s");
1541 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1543 CTLFLAG_RD, &sc->watchdog_resets,
1544 0, "Number of times NIC was reset");
1547 /* performance related tunables */
1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1550 CTLTYPE_INT|CTLFLAG_RW, sc,
1551 0, mxge_change_intr_coal,
1552 "I", "interrupt coalescing delay in usecs");
1554 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1556 CTLTYPE_INT|CTLFLAG_RW, sc,
1557 0, mxge_change_throttle,
1558 "I", "transmit throttling");
1560 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1561 "flow_control_enabled",
1562 CTLTYPE_INT|CTLFLAG_RW, sc,
1563 0, mxge_change_flow_control,
1564 "I", "interrupt coalescing delay in usecs");
1566 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1568 CTLFLAG_RW, &mxge_deassert_wait,
1569 0, "Wait for IRQ line to go low in ihandler");
1571 /* stats block from firmware is in network byte order.
1573 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1575 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1576 0, mxge_handle_be32,
1578 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1579 "rdma_tags_available",
1580 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1581 0, mxge_handle_be32,
1582 "I", "rdma_tags_available");
1583 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1584 "dropped_bad_crc32",
1585 CTLTYPE_INT|CTLFLAG_RD,
1586 &fw->dropped_bad_crc32,
1587 0, mxge_handle_be32,
1588 "I", "dropped_bad_crc32");
1589 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1591 CTLTYPE_INT|CTLFLAG_RD,
1592 &fw->dropped_bad_phy,
1593 0, mxge_handle_be32,
1594 "I", "dropped_bad_phy");
1595 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1596 "dropped_link_error_or_filtered",
1597 CTLTYPE_INT|CTLFLAG_RD,
1598 &fw->dropped_link_error_or_filtered,
1599 0, mxge_handle_be32,
1600 "I", "dropped_link_error_or_filtered");
1601 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1602 "dropped_link_overflow",
1603 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1604 0, mxge_handle_be32,
1605 "I", "dropped_link_overflow");
1606 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1607 "dropped_multicast_filtered",
1608 CTLTYPE_INT|CTLFLAG_RD,
1609 &fw->dropped_multicast_filtered,
1610 0, mxge_handle_be32,
1611 "I", "dropped_multicast_filtered");
1612 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1613 "dropped_no_big_buffer",
1614 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1615 0, mxge_handle_be32,
1616 "I", "dropped_no_big_buffer");
1617 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1618 "dropped_no_small_buffer",
1619 CTLTYPE_INT|CTLFLAG_RD,
1620 &fw->dropped_no_small_buffer,
1621 0, mxge_handle_be32,
1622 "I", "dropped_no_small_buffer");
1623 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1625 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1626 0, mxge_handle_be32,
1627 "I", "dropped_overrun");
1628 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1630 CTLTYPE_INT|CTLFLAG_RD,
1632 0, mxge_handle_be32,
1633 "I", "dropped_pause");
1634 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1636 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1637 0, mxge_handle_be32,
1638 "I", "dropped_runt");
1640 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1641 "dropped_unicast_filtered",
1642 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1643 0, mxge_handle_be32,
1644 "I", "dropped_unicast_filtered");
1646 /* verbose printing? */
1647 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1649 CTLFLAG_RW, &mxge_verbose,
1650 0, "verbose printing");
1653 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1655 CTLTYPE_INT|CTLFLAG_RW, sc,
1657 "I", "number of lro merge queues");
1660 /* add counters exported for debugging from all slices */
1661 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1662 sc->slice_sysctl_tree =
1663 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1664 "slice", CTLFLAG_RD, 0, "");
1666 for (slice = 0; slice < sc->num_slices; slice++) {
1667 ss = &sc->ss[slice];
1668 sysctl_ctx_init(&ss->sysctl_ctx);
1669 ctx = &ss->sysctl_ctx;
1670 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1671 sprintf(slice_num, "%d", slice);
1673 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1675 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1676 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1678 CTLFLAG_RD, &ss->rx_small.cnt,
1680 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1682 CTLFLAG_RD, &ss->rx_big.cnt,
1684 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1685 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1686 0, "number of lro merge queues flushed");
1688 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1689 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1690 0, "number of frames appended to lro merge"
1693 #ifndef IFNET_BUF_RING
1694 /* only transmit from slice 0 for now */
1698 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1700 CTLFLAG_RD, &ss->tx.req,
1703 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1705 CTLFLAG_RD, &ss->tx.done,
1707 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1709 CTLFLAG_RD, &ss->tx.pkt_done,
1711 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1713 CTLFLAG_RD, &ss->tx.stall,
1715 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1717 CTLFLAG_RD, &ss->tx.wake,
1719 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1721 CTLFLAG_RD, &ss->tx.defrag,
1723 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1725 CTLFLAG_RD, &ss->tx.queue_active,
1726 0, "tx_queue_active");
1727 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1729 CTLFLAG_RD, &ss->tx.activate,
1731 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1733 CTLFLAG_RD, &ss->tx.deactivate,
1734 0, "tx_deactivate");
1738 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1739 backwards one at a time and handle ring wraps */
1742 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1743 mcp_kreq_ether_send_t *src, int cnt)
1745 int idx, starting_slot;
1746 starting_slot = tx->req;
1749 idx = (starting_slot + cnt) & tx->mask;
1750 mxge_pio_copy(&tx->lanai[idx],
1751 &src[cnt], sizeof(*src));
1757 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1758 * at most 32 bytes at a time, so as to avoid involving the software
1759 * pio handler in the nic. We re-write the first segment's flags
1760 * to mark them valid only after writing the entire chain
1764 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1769 volatile uint32_t *dst_ints;
1770 mcp_kreq_ether_send_t *srcp;
1771 volatile mcp_kreq_ether_send_t *dstp, *dst;
1774 idx = tx->req & tx->mask;
1776 last_flags = src->flags;
1779 dst = dstp = &tx->lanai[idx];
1782 if ((idx + cnt) < tx->mask) {
1783 for (i = 0; i < (cnt - 1); i += 2) {
1784 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1785 wmb(); /* force write every 32 bytes */
1790 /* submit all but the first request, and ensure
1791 that it is submitted below */
1792 mxge_submit_req_backwards(tx, src, cnt);
1796 /* submit the first request */
1797 mxge_pio_copy(dstp, srcp, sizeof(*src));
1798 wmb(); /* barrier before setting valid flag */
1801 /* re-write the last 32-bits with the valid flags */
1802 src->flags = last_flags;
1803 src_ints = (uint32_t *)src;
1805 dst_ints = (volatile uint32_t *)dst;
1807 *dst_ints = *src_ints;
1815 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1816 int busdma_seg_cnt, int ip_off)
1819 mcp_kreq_ether_send_t *req;
1820 bus_dma_segment_t *seg;
1823 uint32_t low, high_swapped;
1824 int len, seglen, cum_len, cum_len_next;
1825 int next_is_first, chop, cnt, rdma_count, small;
1826 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1827 uint8_t flags, flags_next;
1830 mss = m->m_pkthdr.tso_segsz;
1832 /* negative cum_len signifies to the
1833 * send loop that we are still in the
1834 * header portion of the TSO packet.
1837 /* ensure we have the ethernet, IP and TCP
1838 header together in the first mbuf, copy
1839 it to a scratch buffer if not */
1840 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1841 m_copydata(m, 0, ip_off + sizeof (*ip),
1843 ip = (struct ip *)(ss->scratch + ip_off);
1845 ip = (struct ip *)(mtod(m, char *) + ip_off);
1847 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1849 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1850 + sizeof (*tcp), ss->scratch);
1851 ip = (struct ip *)(mtod(m, char *) + ip_off);
1854 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1855 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1856 cksum_offset = ip_off + (ip->ip_hl << 2);
1858 /* TSO implies checksum offload on this hardware */
1859 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP)) == 0)) {
1861 * If packet has full TCP csum, replace it with pseudo hdr
1862 * sum that the NIC expects, otherwise the NIC will emit
1863 * packets with bad TCP checksums.
1865 m->m_pkthdr.csum_flags = CSUM_TCP;
1866 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1867 tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
1868 htons(IPPROTO_TCP + (m->m_pkthdr.len - cksum_offset)));
1870 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1873 /* for TSO, pseudo_hdr_offset holds mss.
1874 * The firmware figures out where to put
1875 * the checksum by parsing the header. */
1876 pseudo_hdr_offset = htobe16(mss);
1883 /* "rdma_count" is the number of RDMAs belonging to the
1884 * current packet BEFORE the current send request. For
1885 * non-TSO packets, this is equal to "count".
1886 * For TSO packets, rdma_count needs to be reset
1887 * to 0 after a segment cut.
1889 * The rdma_count field of the send request is
1890 * the number of RDMAs of the packet starting at
1891 * that request. For TSO send requests with one ore more cuts
1892 * in the middle, this is the number of RDMAs starting
1893 * after the last cut in the request. All previous
1894 * segments before the last cut implicitly have 1 RDMA.
1896 * Since the number of RDMAs is not known beforehand,
1897 * it must be filled-in retroactively - after each
1898 * segmentation cut or at the end of the entire packet.
1901 while (busdma_seg_cnt) {
1902 /* Break the busdma segment up into pieces*/
1903 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1904 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1908 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1910 cum_len_next = cum_len + seglen;
1911 (req-rdma_count)->rdma_count = rdma_count + 1;
1912 if (__predict_true(cum_len >= 0)) {
1914 chop = (cum_len_next > mss);
1915 cum_len_next = cum_len_next % mss;
1916 next_is_first = (cum_len_next == 0);
1917 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1918 flags_next |= next_is_first *
1920 rdma_count |= -(chop | next_is_first);
1921 rdma_count += chop & !next_is_first;
1922 } else if (cum_len_next >= 0) {
1927 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1928 flags_next = MXGEFW_FLAGS_TSO_PLD |
1929 MXGEFW_FLAGS_FIRST |
1930 (small * MXGEFW_FLAGS_SMALL);
1933 req->addr_high = high_swapped;
1934 req->addr_low = htobe32(low);
1935 req->pseudo_hdr_offset = pseudo_hdr_offset;
1937 req->rdma_count = 1;
1938 req->length = htobe16(seglen);
1939 req->cksum_offset = cksum_offset;
1940 req->flags = flags | ((cum_len & 1) *
1941 MXGEFW_FLAGS_ALIGN_ODD);
1944 cum_len = cum_len_next;
1949 if (__predict_false(cksum_offset > seglen))
1950 cksum_offset -= seglen;
1953 if (__predict_false(cnt > tx->max_desc))
1959 (req-rdma_count)->rdma_count = rdma_count;
1963 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1964 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1966 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1967 mxge_submit_req(tx, tx->req_list, cnt);
1968 #ifdef IFNET_BUF_RING
1969 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1970 /* tell the NIC to start polling this slice */
1972 tx->queue_active = 1;
1980 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1984 printf("tx->max_desc exceeded via TSO!\n");
1985 printf("mss = %d, %ld, %d!\n", mss,
1986 (long)seg - (long)tx->seg_list, tx->max_desc);
1993 #endif /* IFCAP_TSO4 */
1995 #ifdef MXGE_NEW_VLAN_API
1997 * We reproduce the software vlan tag insertion from
1998 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1999 * vlan tag insertion. We need to advertise this in order to have the
2000 * vlan interface respect our csum offload flags.
2002 static struct mbuf *
2003 mxge_vlan_tag_insert(struct mbuf *m)
2005 struct ether_vlan_header *evl;
2007 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
2008 if (__predict_false(m == NULL))
2010 if (m->m_len < sizeof(*evl)) {
2011 m = m_pullup(m, sizeof(*evl));
2012 if (__predict_false(m == NULL))
2016 * Transform the Ethernet header into an Ethernet header
2017 * with 802.1Q encapsulation.
2019 evl = mtod(m, struct ether_vlan_header *);
2020 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2021 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2022 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2023 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2024 m->m_flags &= ~M_VLANTAG;
2027 #endif /* MXGE_NEW_VLAN_API */
2030 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2033 mcp_kreq_ether_send_t *req;
2034 bus_dma_segment_t *seg;
2039 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
2040 uint16_t pseudo_hdr_offset;
2041 uint8_t flags, cksum_offset;
2048 ip_off = sizeof (struct ether_header);
2049 #ifdef MXGE_NEW_VLAN_API
2050 if (m->m_flags & M_VLANTAG) {
2051 m = mxge_vlan_tag_insert(m);
2052 if (__predict_false(m == NULL))
2054 ip_off += ETHER_VLAN_ENCAP_LEN;
2057 /* (try to) map the frame for DMA */
2058 idx = tx->req & tx->mask;
2059 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2060 m, tx->seg_list, &cnt,
2062 if (__predict_false(err == EFBIG)) {
2063 /* Too many segments in the chain. Try
2065 m_tmp = m_defrag(m, M_NOWAIT);
2066 if (m_tmp == NULL) {
2071 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2073 m, tx->seg_list, &cnt,
2076 if (__predict_false(err != 0)) {
2077 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2078 " packet len = %d\n", err, m->m_pkthdr.len);
2081 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2082 BUS_DMASYNC_PREWRITE);
2083 tx->info[idx].m = m;
2086 /* TSO is different enough, we handle it in another routine */
2087 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2088 mxge_encap_tso(ss, m, cnt, ip_off);
2095 pseudo_hdr_offset = 0;
2096 flags = MXGEFW_FLAGS_NO_TSO;
2098 /* checksum offloading? */
2099 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2100 /* ensure ip header is in first mbuf, copy
2101 it to a scratch buffer if not */
2102 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2103 m_copydata(m, 0, ip_off + sizeof (*ip),
2105 ip = (struct ip *)(ss->scratch + ip_off);
2107 ip = (struct ip *)(mtod(m, char *) + ip_off);
2109 cksum_offset = ip_off + (ip->ip_hl << 2);
2110 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2111 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2112 req->cksum_offset = cksum_offset;
2113 flags |= MXGEFW_FLAGS_CKSUM;
2114 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2118 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2119 flags |= MXGEFW_FLAGS_SMALL;
2121 /* convert segments into a request list */
2124 req->flags = MXGEFW_FLAGS_FIRST;
2125 for (i = 0; i < cnt; i++) {
2127 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2129 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2130 req->length = htobe16(seg->ds_len);
2131 req->cksum_offset = cksum_offset;
2132 if (cksum_offset > seg->ds_len)
2133 cksum_offset -= seg->ds_len;
2136 req->pseudo_hdr_offset = pseudo_hdr_offset;
2137 req->pad = 0; /* complete solid 16-byte block */
2138 req->rdma_count = 1;
2139 req->flags |= flags | ((cum_len & 1) * odd_flag);
2140 cum_len += seg->ds_len;
2146 /* pad runts to 60 bytes */
2150 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2152 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2153 req->length = htobe16(60 - cum_len);
2154 req->cksum_offset = 0;
2155 req->pseudo_hdr_offset = pseudo_hdr_offset;
2156 req->pad = 0; /* complete solid 16-byte block */
2157 req->rdma_count = 1;
2158 req->flags |= flags | ((cum_len & 1) * odd_flag);
2162 tx->req_list[0].rdma_count = cnt;
2164 /* print what the firmware will see */
2165 for (i = 0; i < cnt; i++) {
2166 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2167 "cso:%d, flags:0x%x, rdma:%d\n",
2168 i, (int)ntohl(tx->req_list[i].addr_high),
2169 (int)ntohl(tx->req_list[i].addr_low),
2170 (int)ntohs(tx->req_list[i].length),
2171 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2172 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2173 tx->req_list[i].rdma_count);
2175 printf("--------------\n");
2177 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2178 mxge_submit_req(tx, tx->req_list, cnt);
2179 #ifdef IFNET_BUF_RING
2180 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2181 /* tell the NIC to start polling this slice */
2183 tx->queue_active = 1;
2196 #ifdef IFNET_BUF_RING
2198 mxge_qflush(struct ifnet *ifp)
2200 mxge_softc_t *sc = ifp->if_softc;
2205 for (slice = 0; slice < sc->num_slices; slice++) {
2206 tx = &sc->ss[slice].tx;
2208 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2210 mtx_unlock(&tx->mtx);
2216 mxge_start_locked(struct mxge_slice_state *ss)
2227 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2228 m = drbr_dequeue(ifp, tx->br);
2232 /* let BPF see it */
2235 /* give it to the nic */
2238 /* ran out of transmit slots */
2239 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2240 && (!drbr_empty(ifp, tx->br))) {
2241 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2247 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2258 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2260 err = drbr_enqueue(ifp, tx->br, m);
2264 if (!drbr_needs_enqueue(ifp, tx->br) &&
2265 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2266 /* let BPF see it */
2268 /* give it to the nic */
2270 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2273 if (!drbr_empty(ifp, tx->br))
2274 mxge_start_locked(ss);
2279 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2281 mxge_softc_t *sc = ifp->if_softc;
2282 struct mxge_slice_state *ss;
2287 slice = m->m_pkthdr.flowid;
2288 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2290 ss = &sc->ss[slice];
2293 if (mtx_trylock(&tx->mtx)) {
2294 err = mxge_transmit_locked(ss, m);
2295 mtx_unlock(&tx->mtx);
2297 err = drbr_enqueue(ifp, tx->br, m);
2306 mxge_start_locked(struct mxge_slice_state *ss)
2316 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2317 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2321 /* let BPF see it */
2324 /* give it to the nic */
2327 /* ran out of transmit slots */
2328 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2329 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2335 mxge_start(struct ifnet *ifp)
2337 mxge_softc_t *sc = ifp->if_softc;
2338 struct mxge_slice_state *ss;
2340 /* only use the first slice for now */
2342 mtx_lock(&ss->tx.mtx);
2343 mxge_start_locked(ss);
2344 mtx_unlock(&ss->tx.mtx);
2348 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2349 * at most 32 bytes at a time, so as to avoid involving the software
2350 * pio handler in the nic. We re-write the first segment's low
2351 * DMA address to mark it valid only after we write the entire chunk
2355 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2356 mcp_kreq_ether_recv_t *src)
2360 low = src->addr_low;
2361 src->addr_low = 0xffffffff;
2362 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2364 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2366 src->addr_low = low;
2367 dst->addr_low = low;
2372 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2374 bus_dma_segment_t seg;
2376 mxge_rx_ring_t *rx = &ss->rx_small;
2379 m = m_gethdr(M_DONTWAIT, MT_DATA);
2386 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2387 &seg, &cnt, BUS_DMA_NOWAIT);
2392 rx->info[idx].m = m;
2393 rx->shadow[idx].addr_low =
2394 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2395 rx->shadow[idx].addr_high =
2396 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2400 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2405 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2407 bus_dma_segment_t seg[3];
2409 mxge_rx_ring_t *rx = &ss->rx_big;
2412 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2418 m->m_len = rx->mlen;
2419 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2420 seg, &cnt, BUS_DMA_NOWAIT);
2425 rx->info[idx].m = m;
2426 rx->shadow[idx].addr_low =
2427 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2428 rx->shadow[idx].addr_high =
2429 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2431 #if MXGE_VIRT_JUMBOS
2432 for (i = 1; i < cnt; i++) {
2433 rx->shadow[idx + i].addr_low =
2434 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2435 rx->shadow[idx + i].addr_high =
2436 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2441 for (i = 0; i < rx->nbufs; i++) {
2442 if ((idx & 7) == 7) {
2443 mxge_submit_8rx(&rx->lanai[idx - 7],
2444 &rx->shadow[idx - 7]);
2452 * Myri10GE hardware checksums are not valid if the sender
2453 * padded the frame with non-zero padding. This is because
2454 * the firmware just does a simple 16-bit 1s complement
2455 * checksum across the entire frame, excluding the first 14
2456 * bytes. It is best to simply to check the checksum and
2457 * tell the stack about it only if the checksum is good
2460 static inline uint16_t
2461 mxge_rx_csum(struct mbuf *m, int csum)
2463 struct ether_header *eh;
2467 eh = mtod(m, struct ether_header *);
2469 /* only deal with IPv4 TCP & UDP for now */
2470 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2472 ip = (struct ip *)(eh + 1);
2473 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2474 ip->ip_p != IPPROTO_UDP))
2477 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2478 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2479 - (ip->ip_hl << 2) + ip->ip_p));
2488 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2490 struct ether_vlan_header *evl;
2491 struct ether_header *eh;
2494 evl = mtod(m, struct ether_vlan_header *);
2495 eh = mtod(m, struct ether_header *);
2498 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2499 * after what the firmware thought was the end of the ethernet
2503 /* put checksum into host byte order */
2504 *csum = ntohs(*csum);
2505 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2506 (*csum) += ~partial;
2507 (*csum) += ((*csum) < ~partial);
2508 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2509 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2511 /* restore checksum to network byte order;
2512 later consumers expect this */
2513 *csum = htons(*csum);
2516 #ifdef MXGE_NEW_VLAN_API
2517 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2521 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2525 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2526 m_tag_prepend(m, mtag);
2530 m->m_flags |= M_VLANTAG;
2533 * Remove the 802.1q header by copying the Ethernet
2534 * addresses over it and adjusting the beginning of
2535 * the data in the mbuf. The encapsulated Ethernet
2536 * type field is already in place.
2538 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2539 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2540 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2545 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2550 struct ether_header *eh;
2552 bus_dmamap_t old_map;
2554 uint16_t tcpudp_csum;
2559 idx = rx->cnt & rx->mask;
2560 rx->cnt += rx->nbufs;
2561 /* save a pointer to the received mbuf */
2562 m = rx->info[idx].m;
2563 /* try to replace the received mbuf */
2564 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2565 /* drop the frame -- the old mbuf is re-cycled */
2570 /* unmap the received buffer */
2571 old_map = rx->info[idx].map;
2572 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2573 bus_dmamap_unload(rx->dmat, old_map);
2575 /* swap the bus_dmamap_t's */
2576 rx->info[idx].map = rx->extra_map;
2577 rx->extra_map = old_map;
2579 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2581 m->m_data += MXGEFW_PAD;
2583 m->m_pkthdr.rcvif = ifp;
2584 m->m_len = m->m_pkthdr.len = len;
2586 eh = mtod(m, struct ether_header *);
2587 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2588 mxge_vlan_tag_remove(m, &csum);
2590 /* if the checksum is valid, mark it in the mbuf header */
2591 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2592 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2594 /* otherwise, it was a UDP frame, or a TCP frame which
2595 we could not do LRO on. Tell the stack that the
2597 m->m_pkthdr.csum_data = 0xffff;
2598 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2600 /* flowid only valid if RSS hashing is enabled */
2601 if (sc->num_slices > 1) {
2602 m->m_pkthdr.flowid = (ss - sc->ss);
2603 m->m_flags |= M_FLOWID;
2605 /* pass the frame up the stack */
2606 (*ifp->if_input)(ifp, m);
2610 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2614 struct ether_header *eh;
2617 bus_dmamap_t old_map;
2619 uint16_t tcpudp_csum;
2624 idx = rx->cnt & rx->mask;
2626 /* save a pointer to the received mbuf */
2627 m = rx->info[idx].m;
2628 /* try to replace the received mbuf */
2629 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2630 /* drop the frame -- the old mbuf is re-cycled */
2635 /* unmap the received buffer */
2636 old_map = rx->info[idx].map;
2637 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2638 bus_dmamap_unload(rx->dmat, old_map);
2640 /* swap the bus_dmamap_t's */
2641 rx->info[idx].map = rx->extra_map;
2642 rx->extra_map = old_map;
2644 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2646 m->m_data += MXGEFW_PAD;
2648 m->m_pkthdr.rcvif = ifp;
2649 m->m_len = m->m_pkthdr.len = len;
2651 eh = mtod(m, struct ether_header *);
2652 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2653 mxge_vlan_tag_remove(m, &csum);
2655 /* if the checksum is valid, mark it in the mbuf header */
2656 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2657 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2659 /* otherwise, it was a UDP frame, or a TCP frame which
2660 we could not do LRO on. Tell the stack that the
2662 m->m_pkthdr.csum_data = 0xffff;
2663 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2665 /* flowid only valid if RSS hashing is enabled */
2666 if (sc->num_slices > 1) {
2667 m->m_pkthdr.flowid = (ss - sc->ss);
2668 m->m_flags |= M_FLOWID;
2670 /* pass the frame up the stack */
2671 (*ifp->if_input)(ifp, m);
2675 mxge_clean_rx_done(struct mxge_slice_state *ss)
2677 mxge_rx_done_t *rx_done = &ss->rx_done;
2683 while (rx_done->entry[rx_done->idx].length != 0) {
2684 length = ntohs(rx_done->entry[rx_done->idx].length);
2685 rx_done->entry[rx_done->idx].length = 0;
2686 checksum = rx_done->entry[rx_done->idx].checksum;
2687 if (length <= (MHLEN - MXGEFW_PAD))
2688 mxge_rx_done_small(ss, length, checksum);
2690 mxge_rx_done_big(ss, length, checksum);
2692 rx_done->idx = rx_done->cnt & rx_done->mask;
2694 /* limit potential for livelock */
2695 if (__predict_false(++limit > rx_done->mask / 2))
2699 while (!SLIST_EMPTY(&ss->lro_active)) {
2700 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2701 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2702 mxge_lro_flush(ss, lro);
2709 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2720 while (tx->pkt_done != mcp_idx) {
2721 idx = tx->done & tx->mask;
2723 m = tx->info[idx].m;
2724 /* mbuf and DMA map only attached to the first
2727 ss->obytes += m->m_pkthdr.len;
2728 if (m->m_flags & M_MCAST)
2731 tx->info[idx].m = NULL;
2732 map = tx->info[idx].map;
2733 bus_dmamap_unload(tx->dmat, map);
2736 if (tx->info[idx].flag) {
2737 tx->info[idx].flag = 0;
2742 /* If we have space, clear IFF_OACTIVE to tell the stack that
2743 its OK to send packets */
2744 #ifdef IFNET_BUF_RING
2745 flags = &ss->if_drv_flags;
2747 flags = &ifp->if_drv_flags;
2749 mtx_lock(&ss->tx.mtx);
2750 if ((*flags) & IFF_DRV_OACTIVE &&
2751 tx->req - tx->done < (tx->mask + 1)/4) {
2752 *(flags) &= ~IFF_DRV_OACTIVE;
2754 mxge_start_locked(ss);
2756 #ifdef IFNET_BUF_RING
2757 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2758 /* let the NIC stop polling this queue, since there
2759 * are no more transmits pending */
2760 if (tx->req == tx->done) {
2762 tx->queue_active = 0;
2768 mtx_unlock(&ss->tx.mtx);
2772 static struct mxge_media_type mxge_xfp_media_types[] =
2774 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2775 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2776 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2777 {0, (1 << 5), "10GBASE-ER"},
2778 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2779 {0, (1 << 3), "10GBASE-SW"},
2780 {0, (1 << 2), "10GBASE-LW"},
2781 {0, (1 << 1), "10GBASE-EW"},
2782 {0, (1 << 0), "Reserved"}
2784 static struct mxge_media_type mxge_sfp_media_types[] =
2786 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2787 {0, (1 << 7), "Reserved"},
2788 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2789 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2790 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2791 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2795 mxge_media_set(mxge_softc_t *sc, int media_type)
2799 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2801 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2802 sc->current_media = media_type;
2803 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2807 mxge_media_init(mxge_softc_t *sc)
2812 ifmedia_removeall(&sc->media);
2813 mxge_media_set(sc, IFM_AUTO);
2816 * parse the product code to deterimine the interface type
2817 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2818 * after the 3rd dash in the driver's cached copy of the
2819 * EEPROM's product code string.
2821 ptr = sc->product_code_string;
2823 device_printf(sc->dev, "Missing product code\n");
2827 for (i = 0; i < 3; i++, ptr++) {
2828 ptr = index(ptr, '-');
2830 device_printf(sc->dev,
2831 "only %d dashes in PC?!?\n", i);
2835 if (*ptr == 'C' || *(ptr +1) == 'C') {
2837 sc->connector = MXGE_CX4;
2838 mxge_media_set(sc, IFM_10G_CX4);
2839 } else if (*ptr == 'Q') {
2840 /* -Q is Quad Ribbon Fiber */
2841 sc->connector = MXGE_QRF;
2842 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2843 /* FreeBSD has no media type for Quad ribbon fiber */
2844 } else if (*ptr == 'R') {
2846 sc->connector = MXGE_XFP;
2847 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2848 /* -S or -2S is SFP+ */
2849 sc->connector = MXGE_SFP;
2851 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2856 * Determine the media type for a NIC. Some XFPs will identify
2857 * themselves only when their link is up, so this is initiated via a
2858 * link up interrupt. However, this can potentially take up to
2859 * several milliseconds, so it is run via the watchdog routine, rather
2860 * than in the interrupt handler itself.
2863 mxge_media_probe(mxge_softc_t *sc)
2868 struct mxge_media_type *mxge_media_types = NULL;
2869 int i, err, ms, mxge_media_type_entries;
2872 sc->need_media_probe = 0;
2874 if (sc->connector == MXGE_XFP) {
2876 mxge_media_types = mxge_xfp_media_types;
2877 mxge_media_type_entries =
2878 sizeof (mxge_xfp_media_types) /
2879 sizeof (mxge_xfp_media_types[0]);
2880 byte = MXGE_XFP_COMPLIANCE_BYTE;
2882 } else if (sc->connector == MXGE_SFP) {
2883 /* -S or -2S is SFP+ */
2884 mxge_media_types = mxge_sfp_media_types;
2885 mxge_media_type_entries =
2886 sizeof (mxge_sfp_media_types) /
2887 sizeof (mxge_sfp_media_types[0]);
2891 /* nothing to do; media type cannot change */
2896 * At this point we know the NIC has an XFP cage, so now we
2897 * try to determine what is in the cage by using the
2898 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2899 * register. We read just one byte, which may take over
2903 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2905 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2906 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2907 device_printf(sc->dev, "failed to read XFP\n");
2909 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2910 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2912 if (err != MXGEFW_CMD_OK) {
2916 /* now we wait for the data to be cached */
2918 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2919 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2922 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2924 if (err != MXGEFW_CMD_OK) {
2925 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2926 cage_type, err, ms);
2930 if (cmd.data0 == mxge_media_types[0].bitmask) {
2932 device_printf(sc->dev, "%s:%s\n", cage_type,
2933 mxge_media_types[0].name);
2934 if (sc->current_media != mxge_media_types[0].flag) {
2935 mxge_media_init(sc);
2936 mxge_media_set(sc, mxge_media_types[0].flag);
2940 for (i = 1; i < mxge_media_type_entries; i++) {
2941 if (cmd.data0 & mxge_media_types[i].bitmask) {
2943 device_printf(sc->dev, "%s:%s\n",
2945 mxge_media_types[i].name);
2947 if (sc->current_media != mxge_media_types[i].flag) {
2948 mxge_media_init(sc);
2949 mxge_media_set(sc, mxge_media_types[i].flag);
2955 device_printf(sc->dev, "%s media 0x%x unknown\n",
2956 cage_type, cmd.data0);
2962 mxge_intr(void *arg)
2964 struct mxge_slice_state *ss = arg;
2965 mxge_softc_t *sc = ss->sc;
2966 mcp_irq_data_t *stats = ss->fw_stats;
2967 mxge_tx_ring_t *tx = &ss->tx;
2968 mxge_rx_done_t *rx_done = &ss->rx_done;
2969 uint32_t send_done_count;
2973 #ifndef IFNET_BUF_RING
2974 /* an interrupt on a non-zero slice is implicitly valid
2975 since MSI-X irqs are not shared */
2977 mxge_clean_rx_done(ss);
2978 *ss->irq_claim = be32toh(3);
2983 /* make sure the DMA has finished */
2984 if (!stats->valid) {
2987 valid = stats->valid;
2989 if (sc->legacy_irq) {
2990 /* lower legacy IRQ */
2991 *sc->irq_deassert = 0;
2992 if (!mxge_deassert_wait)
2993 /* don't wait for conf. that irq is low */
2999 /* loop while waiting for legacy irq deassertion */
3001 /* check for transmit completes and receives */
3002 send_done_count = be32toh(stats->send_done_count);
3003 while ((send_done_count != tx->pkt_done) ||
3004 (rx_done->entry[rx_done->idx].length != 0)) {
3005 if (send_done_count != tx->pkt_done)
3006 mxge_tx_done(ss, (int)send_done_count);
3007 mxge_clean_rx_done(ss);
3008 send_done_count = be32toh(stats->send_done_count);
3010 if (sc->legacy_irq && mxge_deassert_wait)
3012 } while (*((volatile uint8_t *) &stats->valid));
3014 /* fw link & error stats meaningful only on the first slice */
3015 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3016 if (sc->link_state != stats->link_up) {
3017 sc->link_state = stats->link_up;
3018 if (sc->link_state) {
3019 if_link_state_change(sc->ifp, LINK_STATE_UP);
3020 sc->ifp->if_baudrate = IF_Gbps(10UL);
3022 device_printf(sc->dev, "link up\n");
3024 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3025 sc->ifp->if_baudrate = 0;
3027 device_printf(sc->dev, "link down\n");
3029 sc->need_media_probe = 1;
3031 if (sc->rdma_tags_available !=
3032 be32toh(stats->rdma_tags_available)) {
3033 sc->rdma_tags_available =
3034 be32toh(stats->rdma_tags_available);
3035 device_printf(sc->dev, "RDMA timed out! %d tags "
3036 "left\n", sc->rdma_tags_available);
3039 if (stats->link_down) {
3040 sc->down_cnt += stats->link_down;
3042 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3046 /* check to see if we have rx token to pass back */
3048 *ss->irq_claim = be32toh(3);
3049 *(ss->irq_claim + 1) = be32toh(3);
3053 mxge_init(void *arg)
3055 mxge_softc_t *sc = arg;
3056 struct ifnet *ifp = sc->ifp;
3059 mtx_lock(&sc->driver_mtx);
3060 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3061 (void) mxge_open(sc);
3062 mtx_unlock(&sc->driver_mtx);
3068 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3070 struct lro_entry *lro_entry;
3073 while (!SLIST_EMPTY(&ss->lro_free)) {
3074 lro_entry = SLIST_FIRST(&ss->lro_free);
3075 SLIST_REMOVE_HEAD(&ss->lro_free, next);
3076 free(lro_entry, M_DEVBUF);
3079 for (i = 0; i <= ss->rx_big.mask; i++) {
3080 if (ss->rx_big.info[i].m == NULL)
3082 bus_dmamap_unload(ss->rx_big.dmat,
3083 ss->rx_big.info[i].map);
3084 m_freem(ss->rx_big.info[i].m);
3085 ss->rx_big.info[i].m = NULL;
3088 for (i = 0; i <= ss->rx_small.mask; i++) {
3089 if (ss->rx_small.info[i].m == NULL)
3091 bus_dmamap_unload(ss->rx_small.dmat,
3092 ss->rx_small.info[i].map);
3093 m_freem(ss->rx_small.info[i].m);
3094 ss->rx_small.info[i].m = NULL;
3097 /* transmit ring used only on the first slice */
3098 if (ss->tx.info == NULL)
3101 for (i = 0; i <= ss->tx.mask; i++) {
3102 ss->tx.info[i].flag = 0;
3103 if (ss->tx.info[i].m == NULL)
3105 bus_dmamap_unload(ss->tx.dmat,
3106 ss->tx.info[i].map);
3107 m_freem(ss->tx.info[i].m);
3108 ss->tx.info[i].m = NULL;
3113 mxge_free_mbufs(mxge_softc_t *sc)
3117 for (slice = 0; slice < sc->num_slices; slice++)
3118 mxge_free_slice_mbufs(&sc->ss[slice]);
3122 mxge_free_slice_rings(struct mxge_slice_state *ss)
3127 if (ss->rx_done.entry != NULL)
3128 mxge_dma_free(&ss->rx_done.dma);
3129 ss->rx_done.entry = NULL;
3131 if (ss->tx.req_bytes != NULL)
3132 free(ss->tx.req_bytes, M_DEVBUF);
3133 ss->tx.req_bytes = NULL;
3135 if (ss->tx.seg_list != NULL)
3136 free(ss->tx.seg_list, M_DEVBUF);
3137 ss->tx.seg_list = NULL;
3139 if (ss->rx_small.shadow != NULL)
3140 free(ss->rx_small.shadow, M_DEVBUF);
3141 ss->rx_small.shadow = NULL;
3143 if (ss->rx_big.shadow != NULL)
3144 free(ss->rx_big.shadow, M_DEVBUF);
3145 ss->rx_big.shadow = NULL;
3147 if (ss->tx.info != NULL) {
3148 if (ss->tx.dmat != NULL) {
3149 for (i = 0; i <= ss->tx.mask; i++) {
3150 bus_dmamap_destroy(ss->tx.dmat,
3151 ss->tx.info[i].map);
3153 bus_dma_tag_destroy(ss->tx.dmat);
3155 free(ss->tx.info, M_DEVBUF);
3159 if (ss->rx_small.info != NULL) {
3160 if (ss->rx_small.dmat != NULL) {
3161 for (i = 0; i <= ss->rx_small.mask; i++) {
3162 bus_dmamap_destroy(ss->rx_small.dmat,
3163 ss->rx_small.info[i].map);
3165 bus_dmamap_destroy(ss->rx_small.dmat,
3166 ss->rx_small.extra_map);
3167 bus_dma_tag_destroy(ss->rx_small.dmat);
3169 free(ss->rx_small.info, M_DEVBUF);
3171 ss->rx_small.info = NULL;
3173 if (ss->rx_big.info != NULL) {
3174 if (ss->rx_big.dmat != NULL) {
3175 for (i = 0; i <= ss->rx_big.mask; i++) {
3176 bus_dmamap_destroy(ss->rx_big.dmat,
3177 ss->rx_big.info[i].map);
3179 bus_dmamap_destroy(ss->rx_big.dmat,
3180 ss->rx_big.extra_map);
3181 bus_dma_tag_destroy(ss->rx_big.dmat);
3183 free(ss->rx_big.info, M_DEVBUF);
3185 ss->rx_big.info = NULL;
3189 mxge_free_rings(mxge_softc_t *sc)
3193 for (slice = 0; slice < sc->num_slices; slice++)
3194 mxge_free_slice_rings(&sc->ss[slice]);
3198 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3199 int tx_ring_entries)
3201 mxge_softc_t *sc = ss->sc;
3207 /* allocate per-slice receive resources */
3209 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3210 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3212 /* allocate the rx shadow rings */
3213 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3214 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3215 if (ss->rx_small.shadow == NULL)
3218 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3219 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3220 if (ss->rx_big.shadow == NULL)
3223 /* allocate the rx host info rings */
3224 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3225 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3226 if (ss->rx_small.info == NULL)
3229 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3230 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3231 if (ss->rx_big.info == NULL)
3234 /* allocate the rx busdma resources */
3235 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3237 4096, /* boundary */
3238 BUS_SPACE_MAXADDR, /* low */
3239 BUS_SPACE_MAXADDR, /* high */
3240 NULL, NULL, /* filter */
3241 MHLEN, /* maxsize */
3243 MHLEN, /* maxsegsize */
3244 BUS_DMA_ALLOCNOW, /* flags */
3245 NULL, NULL, /* lock */
3246 &ss->rx_small.dmat); /* tag */
3248 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3253 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3255 #if MXGE_VIRT_JUMBOS
3256 4096, /* boundary */
3260 BUS_SPACE_MAXADDR, /* low */
3261 BUS_SPACE_MAXADDR, /* high */
3262 NULL, NULL, /* filter */
3263 3*4096, /* maxsize */
3264 #if MXGE_VIRT_JUMBOS
3266 4096, /* maxsegsize*/
3269 MJUM9BYTES, /* maxsegsize*/
3271 BUS_DMA_ALLOCNOW, /* flags */
3272 NULL, NULL, /* lock */
3273 &ss->rx_big.dmat); /* tag */
3275 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3279 for (i = 0; i <= ss->rx_small.mask; i++) {
3280 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3281 &ss->rx_small.info[i].map);
3283 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3288 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3289 &ss->rx_small.extra_map);
3291 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3296 for (i = 0; i <= ss->rx_big.mask; i++) {
3297 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3298 &ss->rx_big.info[i].map);
3300 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3305 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3306 &ss->rx_big.extra_map);
3308 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3313 /* now allocate TX resouces */
3315 #ifndef IFNET_BUF_RING
3316 /* only use a single TX ring for now */
3317 if (ss != ss->sc->ss)
3321 ss->tx.mask = tx_ring_entries - 1;
3322 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3325 /* allocate the tx request copy block */
3327 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3328 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3329 if (ss->tx.req_bytes == NULL)
3331 /* ensure req_list entries are aligned to 8 bytes */
3332 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3333 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3335 /* allocate the tx busdma segment list */
3336 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3337 ss->tx.seg_list = (bus_dma_segment_t *)
3338 malloc(bytes, M_DEVBUF, M_WAITOK);
3339 if (ss->tx.seg_list == NULL)
3342 /* allocate the tx host info ring */
3343 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3344 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3345 if (ss->tx.info == NULL)
3348 /* allocate the tx busdma resources */
3349 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3351 sc->tx_boundary, /* boundary */
3352 BUS_SPACE_MAXADDR, /* low */
3353 BUS_SPACE_MAXADDR, /* high */
3354 NULL, NULL, /* filter */
3355 65536 + 256, /* maxsize */
3356 ss->tx.max_desc - 2, /* num segs */
3357 sc->tx_boundary, /* maxsegsz */
3358 BUS_DMA_ALLOCNOW, /* flags */
3359 NULL, NULL, /* lock */
3360 &ss->tx.dmat); /* tag */
3363 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3368 /* now use these tags to setup dmamaps for each slot
3370 for (i = 0; i <= ss->tx.mask; i++) {
3371 err = bus_dmamap_create(ss->tx.dmat, 0,
3372 &ss->tx.info[i].map);
3374 device_printf(sc->dev, "Err %d tx dmamap\n",
3384 mxge_alloc_rings(mxge_softc_t *sc)
3388 int tx_ring_entries, rx_ring_entries;
3391 /* get ring sizes */
3392 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3393 tx_ring_size = cmd.data0;
3395 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3399 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3400 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3401 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3402 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3403 IFQ_SET_READY(&sc->ifp->if_snd);
3405 for (slice = 0; slice < sc->num_slices; slice++) {
3406 err = mxge_alloc_slice_rings(&sc->ss[slice],
3415 mxge_free_rings(sc);
3422 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3424 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3426 if (bufsize < MCLBYTES) {
3427 /* easy, everything fits in a single buffer */
3428 *big_buf_size = MCLBYTES;
3429 *cl_size = MCLBYTES;
3434 if (bufsize < MJUMPAGESIZE) {
3435 /* still easy, everything still fits in a single buffer */
3436 *big_buf_size = MJUMPAGESIZE;
3437 *cl_size = MJUMPAGESIZE;
3441 #if MXGE_VIRT_JUMBOS
3442 /* now we need to use virtually contiguous buffers */
3443 *cl_size = MJUM9BYTES;
3444 *big_buf_size = 4096;
3445 *nbufs = mtu / 4096 + 1;
3446 /* needs to be a power of two, so round up */
3450 *cl_size = MJUM9BYTES;
3451 *big_buf_size = MJUM9BYTES;
3457 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3462 struct lro_entry *lro_entry;
3467 slice = ss - sc->ss;
3469 SLIST_INIT(&ss->lro_free);
3470 SLIST_INIT(&ss->lro_active);
3472 for (i = 0; i < sc->lro_cnt; i++) {
3473 lro_entry = (struct lro_entry *)
3474 malloc(sizeof (*lro_entry), M_DEVBUF,
3476 if (lro_entry == NULL) {
3480 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3482 /* get the lanai pointers to the send and receive rings */
3485 #ifndef IFNET_BUF_RING
3486 /* We currently only send from the first slice */
3490 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3492 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3493 ss->tx.send_go = (volatile uint32_t *)
3494 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3495 ss->tx.send_stop = (volatile uint32_t *)
3496 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3497 #ifndef IFNET_BUF_RING
3501 err |= mxge_send_cmd(sc,
3502 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3503 ss->rx_small.lanai =
3504 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3506 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3508 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3511 device_printf(sc->dev,
3512 "failed to get ring sizes or locations\n");
3516 /* stock receive rings */
3517 for (i = 0; i <= ss->rx_small.mask; i++) {
3518 map = ss->rx_small.info[i].map;
3519 err = mxge_get_buf_small(ss, map, i);
3521 device_printf(sc->dev, "alloced %d/%d smalls\n",
3522 i, ss->rx_small.mask + 1);
3526 for (i = 0; i <= ss->rx_big.mask; i++) {
3527 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3528 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3530 ss->rx_big.nbufs = nbufs;
3531 ss->rx_big.cl_size = cl_size;
3532 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3533 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3534 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3535 map = ss->rx_big.info[i].map;
3536 err = mxge_get_buf_big(ss, map, i);
3538 device_printf(sc->dev, "alloced %d/%d bigs\n",
3539 i, ss->rx_big.mask + 1);
3547 mxge_open(mxge_softc_t *sc)
3550 int err, big_bytes, nbufs, slice, cl_size, i;
3552 volatile uint8_t *itable;
3553 struct mxge_slice_state *ss;
3555 /* Copy the MAC address in case it was overridden */
3556 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3558 err = mxge_reset(sc, 1);
3560 device_printf(sc->dev, "failed to reset\n");
3564 if (sc->num_slices > 1) {
3565 /* setup the indirection table */
3566 cmd.data0 = sc->num_slices;
3567 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3570 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3573 device_printf(sc->dev,
3574 "failed to setup rss tables\n");
3578 /* just enable an identity mapping */
3579 itable = sc->sram + cmd.data0;
3580 for (i = 0; i < sc->num_slices; i++)
3581 itable[i] = (uint8_t)i;
3584 cmd.data1 = mxge_rss_hash_type;
3585 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3587 device_printf(sc->dev, "failed to enable slices\n");
3593 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3596 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3598 /* error is only meaningful if we're trying to set
3599 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3600 if (err && nbufs > 1) {
3601 device_printf(sc->dev,
3602 "Failed to set alway-use-n to %d\n",
3606 /* Give the firmware the mtu and the big and small buffer
3607 sizes. The firmware wants the big buf size to be a power
3608 of two. Luckily, FreeBSD's clusters are powers of two */
3609 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3610 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3611 cmd.data0 = MHLEN - MXGEFW_PAD;
3612 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3614 cmd.data0 = big_bytes;
3615 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3618 device_printf(sc->dev, "failed to setup params\n");
3622 /* Now give him the pointer to the stats block */
3624 #ifdef IFNET_BUF_RING
3625 slice < sc->num_slices;
3630 ss = &sc->ss[slice];
3632 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3634 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3635 cmd.data2 = sizeof(struct mcp_irq_data);
3636 cmd.data2 |= (slice << 16);
3637 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3641 bus = sc->ss->fw_stats_dma.bus_addr;
3642 bus += offsetof(struct mcp_irq_data, send_done_count);
3643 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3644 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3645 err = mxge_send_cmd(sc,
3646 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3648 /* Firmware cannot support multicast without STATS_DMA_V2 */
3649 sc->fw_multicast_support = 0;
3651 sc->fw_multicast_support = 1;
3655 device_printf(sc->dev, "failed to setup params\n");
3659 for (slice = 0; slice < sc->num_slices; slice++) {
3660 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3662 device_printf(sc->dev, "couldn't open slice %d\n",
3668 /* Finally, start the firmware running */
3669 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3671 device_printf(sc->dev, "Couldn't bring up link\n");
3674 #ifdef IFNET_BUF_RING
3675 for (slice = 0; slice < sc->num_slices; slice++) {
3676 ss = &sc->ss[slice];
3677 ss->if_drv_flags |= IFF_DRV_RUNNING;
3678 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3681 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3682 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3688 mxge_free_mbufs(sc);
3694 mxge_close(mxge_softc_t *sc, int down)
3697 int err, old_down_cnt;
3698 #ifdef IFNET_BUF_RING
3699 struct mxge_slice_state *ss;
3703 #ifdef IFNET_BUF_RING
3704 for (slice = 0; slice < sc->num_slices; slice++) {
3705 ss = &sc->ss[slice];
3706 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3709 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3711 old_down_cnt = sc->down_cnt;
3713 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3715 device_printf(sc->dev,
3716 "Couldn't bring down link\n");
3718 if (old_down_cnt == sc->down_cnt) {
3719 /* wait for down irq */
3720 DELAY(10 * sc->intr_coal_delay);
3723 if (old_down_cnt == sc->down_cnt) {
3724 device_printf(sc->dev, "never got down irq\n");
3727 mxge_free_mbufs(sc);
3733 mxge_setup_cfg_space(mxge_softc_t *sc)
3735 device_t dev = sc->dev;
3737 uint16_t cmd, lnk, pectl;
3739 /* find the PCIe link width and set max read request to 4KB*/
3740 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3741 lnk = pci_read_config(dev, reg + 0x12, 2);
3742 sc->link_width = (lnk >> 4) & 0x3f;
3744 if (sc->pectl == 0) {
3745 pectl = pci_read_config(dev, reg + 0x8, 2);
3746 pectl = (pectl & ~0x7000) | (5 << 12);
3747 pci_write_config(dev, reg + 0x8, pectl, 2);
3750 /* restore saved pectl after watchdog reset */
3751 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3755 /* Enable DMA and Memory space access */
3756 pci_enable_busmaster(dev);
3757 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3758 cmd |= PCIM_CMD_MEMEN;
3759 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3763 mxge_read_reboot(mxge_softc_t *sc)
3765 device_t dev = sc->dev;
3768 /* find the vendor specific offset */
3769 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3770 device_printf(sc->dev,
3771 "could not find vendor specific offset\n");
3772 return (uint32_t)-1;
3774 /* enable read32 mode */
3775 pci_write_config(dev, vs + 0x10, 0x3, 1);
3776 /* tell NIC which register to read */
3777 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3778 return (pci_read_config(dev, vs + 0x14, 4));
3782 mxge_watchdog_reset(mxge_softc_t *sc)
3784 struct pci_devinfo *dinfo;
3785 struct mxge_slice_state *ss;
3786 int err, running, s, num_tx_slices = 1;
3792 device_printf(sc->dev, "Watchdog reset!\n");
3795 * check to see if the NIC rebooted. If it did, then all of
3796 * PCI config space has been reset, and things like the
3797 * busmaster bit will be zero. If this is the case, then we
3798 * must restore PCI config space before the NIC can be used
3801 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3802 if (cmd == 0xffff) {
3804 * maybe the watchdog caught the NIC rebooting; wait
3805 * up to 100ms for it to finish. If it does not come
3806 * back, then give up
3809 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3810 if (cmd == 0xffff) {
3811 device_printf(sc->dev, "NIC disappeared!\n");
3814 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3815 /* print the reboot status */
3816 reboot = mxge_read_reboot(sc);
3817 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3819 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3823 * quiesce NIC so that TX routines will not try to
3824 * xmit after restoration of BAR
3827 /* Mark the link as down */
3828 if (sc->link_state) {
3830 if_link_state_change(sc->ifp,
3833 #ifdef IFNET_BUF_RING
3834 num_tx_slices = sc->num_slices;
3836 /* grab all TX locks to ensure no tx */
3837 for (s = 0; s < num_tx_slices; s++) {
3839 mtx_lock(&ss->tx.mtx);
3843 /* restore PCI configuration space */
3844 dinfo = device_get_ivars(sc->dev);
3845 pci_cfg_restore(sc->dev, dinfo);
3847 /* and redo any changes we made to our config space */
3848 mxge_setup_cfg_space(sc);
3851 err = mxge_load_firmware(sc, 0);
3853 device_printf(sc->dev,
3854 "Unable to re-load f/w\n");
3858 err = mxge_open(sc);
3859 /* release all TX locks */
3860 for (s = 0; s < num_tx_slices; s++) {
3862 #ifdef IFNET_BUF_RING
3863 mxge_start_locked(ss);
3865 mtx_unlock(&ss->tx.mtx);
3868 sc->watchdog_resets++;
3870 device_printf(sc->dev,
3871 "NIC did not reboot, not resetting\n");
3875 device_printf(sc->dev, "watchdog reset failed\n");
3879 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3884 mxge_watchdog_task(void *arg, int pending)
3886 mxge_softc_t *sc = arg;
3889 mtx_lock(&sc->driver_mtx);
3890 mxge_watchdog_reset(sc);
3891 mtx_unlock(&sc->driver_mtx);
3895 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3897 tx = &sc->ss[slice].tx;
3898 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3899 device_printf(sc->dev,
3900 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3901 tx->req, tx->done, tx->queue_active);
3902 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3903 tx->activate, tx->deactivate);
3904 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3906 be32toh(sc->ss->fw_stats->send_done_count));
3910 mxge_watchdog(mxge_softc_t *sc)
3913 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3916 /* see if we have outstanding transmits, which
3917 have been pending for more than mxge_ticks */
3919 #ifdef IFNET_BUF_RING
3920 (i < sc->num_slices) && (err == 0);
3922 (i < 1) && (err == 0);
3926 if (tx->req != tx->done &&
3927 tx->watchdog_req != tx->watchdog_done &&
3928 tx->done == tx->watchdog_done) {
3929 /* check for pause blocking before resetting */
3930 if (tx->watchdog_rx_pause == rx_pause) {
3931 mxge_warn_stuck(sc, tx, i);
3932 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3936 device_printf(sc->dev, "Flow control blocking "
3937 "xmits, check link partner\n");
3940 tx->watchdog_req = tx->req;
3941 tx->watchdog_done = tx->done;
3942 tx->watchdog_rx_pause = rx_pause;
3945 if (sc->need_media_probe)
3946 mxge_media_probe(sc);
3951 mxge_update_stats(mxge_softc_t *sc)
3953 struct mxge_slice_state *ss;
3955 u_long ipackets = 0;
3956 u_long opackets = 0;
3957 #ifdef IFNET_BUF_RING
3965 for (slice = 0; slice < sc->num_slices; slice++) {
3966 ss = &sc->ss[slice];
3967 ipackets += ss->ipackets;
3968 opackets += ss->opackets;
3969 #ifdef IFNET_BUF_RING
3970 obytes += ss->obytes;
3971 omcasts += ss->omcasts;
3972 odrops += ss->tx.br->br_drops;
3974 oerrors += ss->oerrors;
3976 pkts = (ipackets - sc->ifp->if_ipackets);
3977 pkts += (opackets - sc->ifp->if_opackets);
3978 sc->ifp->if_ipackets = ipackets;
3979 sc->ifp->if_opackets = opackets;
3980 #ifdef IFNET_BUF_RING
3981 sc->ifp->if_obytes = obytes;
3982 sc->ifp->if_omcasts = omcasts;
3983 sc->ifp->if_snd.ifq_drops = odrops;
3985 sc->ifp->if_oerrors = oerrors;
3990 mxge_tick(void *arg)
3992 mxge_softc_t *sc = arg;
3999 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4001 /* aggregate stats from different slices */
4002 pkts = mxge_update_stats(sc);
4003 if (!sc->watchdog_countdown) {
4004 err = mxge_watchdog(sc);
4005 sc->watchdog_countdown = 4;
4007 sc->watchdog_countdown--;
4010 /* ensure NIC did not suffer h/w fault while idle */
4011 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4012 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4014 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4017 /* look less often if NIC is idle */
4022 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4027 mxge_media_change(struct ifnet *ifp)
4033 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4035 struct ifnet *ifp = sc->ifp;
4036 int real_mtu, old_mtu;
4040 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4041 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4043 mtx_lock(&sc->driver_mtx);
4044 old_mtu = ifp->if_mtu;
4046 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4048 err = mxge_open(sc);
4050 ifp->if_mtu = old_mtu;
4052 (void) mxge_open(sc);
4055 mtx_unlock(&sc->driver_mtx);
4060 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4062 mxge_softc_t *sc = ifp->if_softc;
4067 ifmr->ifm_status = IFM_AVALID;
4068 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4069 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4070 ifmr->ifm_active |= sc->current_media;
4074 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4076 mxge_softc_t *sc = ifp->if_softc;
4077 struct ifreq *ifr = (struct ifreq *)data;
4084 err = ether_ioctl(ifp, command, data);
4088 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4092 mtx_lock(&sc->driver_mtx);
4094 mtx_unlock(&sc->driver_mtx);
4097 if (ifp->if_flags & IFF_UP) {
4098 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4099 err = mxge_open(sc);
4101 /* take care of promis can allmulti
4103 mxge_change_promisc(sc,
4104 ifp->if_flags & IFF_PROMISC);
4105 mxge_set_multicast_list(sc);
4108 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4112 mtx_unlock(&sc->driver_mtx);
4117 mtx_lock(&sc->driver_mtx);
4118 mxge_set_multicast_list(sc);
4119 mtx_unlock(&sc->driver_mtx);
4123 mtx_lock(&sc->driver_mtx);
4124 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4125 if (mask & IFCAP_TXCSUM) {
4126 if (IFCAP_TXCSUM & ifp->if_capenable) {
4127 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4128 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
4131 ifp->if_capenable |= IFCAP_TXCSUM;
4132 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4134 } else if (mask & IFCAP_RXCSUM) {
4135 if (IFCAP_RXCSUM & ifp->if_capenable) {
4136 ifp->if_capenable &= ~IFCAP_RXCSUM;
4139 ifp->if_capenable |= IFCAP_RXCSUM;
4143 if (mask & IFCAP_TSO4) {
4144 if (IFCAP_TSO4 & ifp->if_capenable) {
4145 ifp->if_capenable &= ~IFCAP_TSO4;
4146 ifp->if_hwassist &= ~CSUM_TSO;
4147 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4148 ifp->if_capenable |= IFCAP_TSO4;
4149 ifp->if_hwassist |= CSUM_TSO;
4151 printf("mxge requires tx checksum offload"
4152 " be enabled to use TSO\n");
4156 if (mask & IFCAP_LRO) {
4157 if (IFCAP_LRO & ifp->if_capenable)
4158 err = mxge_change_lro_locked(sc, 0);
4160 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
4162 if (mask & IFCAP_VLAN_HWTAGGING)
4163 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4164 if (mask & IFCAP_VLAN_HWTSO)
4165 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4167 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4168 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4169 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4171 mtx_unlock(&sc->driver_mtx);
4172 VLAN_CAPABILITIES(ifp);
4177 mtx_lock(&sc->driver_mtx);
4178 mxge_media_probe(sc);
4179 mtx_unlock(&sc->driver_mtx);
4180 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4181 &sc->media, command);
4191 mxge_fetch_tunables(mxge_softc_t *sc)
4194 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4195 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4196 &mxge_flow_control);
4197 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4198 &mxge_intr_coal_delay);
4199 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4200 &mxge_nvidia_ecrc_enable);
4201 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4202 &mxge_force_firmware);
4203 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4204 &mxge_deassert_wait);
4205 TUNABLE_INT_FETCH("hw.mxge.verbose",
4207 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4208 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
4209 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4210 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4211 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4212 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4213 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4214 if (sc->lro_cnt != 0)
4215 mxge_lro_cnt = sc->lro_cnt;
4219 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4220 mxge_intr_coal_delay = 30;
4221 if (mxge_ticks == 0)
4222 mxge_ticks = hz / 2;
4223 sc->pause = mxge_flow_control;
4224 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4225 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4226 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4228 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4229 mxge_initial_mtu < ETHER_MIN_LEN)
4230 mxge_initial_mtu = ETHERMTU_JUMBO;
4232 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4233 mxge_throttle = MXGE_MAX_THROTTLE;
4234 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4235 mxge_throttle = MXGE_MIN_THROTTLE;
4236 sc->throttle = mxge_throttle;
4241 mxge_free_slices(mxge_softc_t *sc)
4243 struct mxge_slice_state *ss;
4250 for (i = 0; i < sc->num_slices; i++) {
4252 if (ss->fw_stats != NULL) {
4253 mxge_dma_free(&ss->fw_stats_dma);
4254 ss->fw_stats = NULL;
4255 #ifdef IFNET_BUF_RING
4256 if (ss->tx.br != NULL) {
4257 drbr_free(ss->tx.br, M_DEVBUF);
4261 mtx_destroy(&ss->tx.mtx);
4263 if (ss->rx_done.entry != NULL) {
4264 mxge_dma_free(&ss->rx_done.dma);
4265 ss->rx_done.entry = NULL;
4268 free(sc->ss, M_DEVBUF);
4273 mxge_alloc_slices(mxge_softc_t *sc)
4276 struct mxge_slice_state *ss;
4278 int err, i, max_intr_slots;
4280 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4282 device_printf(sc->dev, "Cannot determine rx ring size\n");
4285 sc->rx_ring_size = cmd.data0;
4286 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4288 bytes = sizeof (*sc->ss) * sc->num_slices;
4289 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4292 for (i = 0; i < sc->num_slices; i++) {
4297 /* allocate per-slice rx interrupt queues */
4299 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4300 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4303 ss->rx_done.entry = ss->rx_done.dma.addr;
4304 bzero(ss->rx_done.entry, bytes);
4307 * allocate the per-slice firmware stats; stats
4308 * (including tx) are used used only on the first
4311 #ifndef IFNET_BUF_RING
4316 bytes = sizeof (*ss->fw_stats);
4317 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4318 sizeof (*ss->fw_stats), 64);
4321 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4322 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4323 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4324 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4325 #ifdef IFNET_BUF_RING
4326 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4334 mxge_free_slices(sc);
4339 mxge_slice_probe(mxge_softc_t *sc)
4343 int msix_cnt, status, max_intr_slots;
4347 * don't enable multiple slices if they are not enabled,
4348 * or if this is not an SMP system
4351 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4354 /* see how many MSI-X interrupts are available */
4355 msix_cnt = pci_msix_count(sc->dev);
4359 /* now load the slice aware firmware see what it supports */
4360 old_fw = sc->fw_name;
4361 if (old_fw == mxge_fw_aligned)
4362 sc->fw_name = mxge_fw_rss_aligned;
4364 sc->fw_name = mxge_fw_rss_unaligned;
4365 status = mxge_load_firmware(sc, 0);
4367 device_printf(sc->dev, "Falling back to a single slice\n");
4371 /* try to send a reset command to the card to see if it
4373 memset(&cmd, 0, sizeof (cmd));
4374 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4376 device_printf(sc->dev, "failed reset\n");
4380 /* get rx ring size */
4381 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4383 device_printf(sc->dev, "Cannot determine rx ring size\n");
4386 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4388 /* tell it the size of the interrupt queues */
4389 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4390 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4392 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4396 /* ask the maximum number of slices it supports */
4397 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4399 device_printf(sc->dev,
4400 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4403 sc->num_slices = cmd.data0;
4404 if (sc->num_slices > msix_cnt)
4405 sc->num_slices = msix_cnt;
4407 if (mxge_max_slices == -1) {
4408 /* cap to number of CPUs in system */
4409 if (sc->num_slices > mp_ncpus)
4410 sc->num_slices = mp_ncpus;
4412 if (sc->num_slices > mxge_max_slices)
4413 sc->num_slices = mxge_max_slices;
4415 /* make sure it is a power of two */
4416 while (sc->num_slices & (sc->num_slices - 1))
4420 device_printf(sc->dev, "using %d slices\n",
4426 sc->fw_name = old_fw;
4427 (void) mxge_load_firmware(sc, 0);
4431 mxge_add_msix_irqs(mxge_softc_t *sc)
4434 int count, err, i, rid;
4437 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4440 if (sc->msix_table_res == NULL) {
4441 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4445 count = sc->num_slices;
4446 err = pci_alloc_msix(sc->dev, &count);
4448 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4449 "err = %d \n", sc->num_slices, err);
4450 goto abort_with_msix_table;
4452 if (count < sc->num_slices) {
4453 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4454 count, sc->num_slices);
4455 device_printf(sc->dev,
4456 "Try setting hw.mxge.max_slices to %d\n",
4459 goto abort_with_msix;
4461 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4462 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4463 if (sc->msix_irq_res == NULL) {
4465 goto abort_with_msix;
4468 for (i = 0; i < sc->num_slices; i++) {
4470 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4473 if (sc->msix_irq_res[i] == NULL) {
4474 device_printf(sc->dev, "couldn't allocate IRQ res"
4475 " for message %d\n", i);
4477 goto abort_with_res;
4481 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4482 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4484 for (i = 0; i < sc->num_slices; i++) {
4485 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4486 INTR_TYPE_NET | INTR_MPSAFE,
4487 #if __FreeBSD_version > 700030
4490 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4492 device_printf(sc->dev, "couldn't setup intr for "
4494 goto abort_with_intr;
4496 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4497 sc->msix_ih[i], "s%d", i);
4501 device_printf(sc->dev, "using %d msix IRQs:",
4503 for (i = 0; i < sc->num_slices; i++)
4504 printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4510 for (i = 0; i < sc->num_slices; i++) {
4511 if (sc->msix_ih[i] != NULL) {
4512 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4514 sc->msix_ih[i] = NULL;
4517 free(sc->msix_ih, M_DEVBUF);
4521 for (i = 0; i < sc->num_slices; i++) {
4523 if (sc->msix_irq_res[i] != NULL)
4524 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4525 sc->msix_irq_res[i]);
4526 sc->msix_irq_res[i] = NULL;
4528 free(sc->msix_irq_res, M_DEVBUF);
4532 pci_release_msi(sc->dev);
4534 abort_with_msix_table:
4535 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4536 sc->msix_table_res);
4542 mxge_add_single_irq(mxge_softc_t *sc)
4544 int count, err, rid;
4546 count = pci_msi_count(sc->dev);
4547 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4553 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4554 1, RF_SHAREABLE | RF_ACTIVE);
4555 if (sc->irq_res == NULL) {
4556 device_printf(sc->dev, "could not alloc interrupt\n");
4560 device_printf(sc->dev, "using %s irq %ld\n",
4561 sc->legacy_irq ? "INTx" : "MSI",
4562 rman_get_start(sc->irq_res));
4563 err = bus_setup_intr(sc->dev, sc->irq_res,
4564 INTR_TYPE_NET | INTR_MPSAFE,
4565 #if __FreeBSD_version > 700030
4568 mxge_intr, &sc->ss[0], &sc->ih);
4570 bus_release_resource(sc->dev, SYS_RES_IRQ,
4571 sc->legacy_irq ? 0 : 1, sc->irq_res);
4572 if (!sc->legacy_irq)
4573 pci_release_msi(sc->dev);
4579 mxge_rem_msix_irqs(mxge_softc_t *sc)
4583 for (i = 0; i < sc->num_slices; i++) {
4584 if (sc->msix_ih[i] != NULL) {
4585 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4587 sc->msix_ih[i] = NULL;
4590 free(sc->msix_ih, M_DEVBUF);
4592 for (i = 0; i < sc->num_slices; i++) {
4594 if (sc->msix_irq_res[i] != NULL)
4595 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4596 sc->msix_irq_res[i]);
4597 sc->msix_irq_res[i] = NULL;
4599 free(sc->msix_irq_res, M_DEVBUF);
4601 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4602 sc->msix_table_res);
4604 pci_release_msi(sc->dev);
4609 mxge_rem_single_irq(mxge_softc_t *sc)
4611 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4612 bus_release_resource(sc->dev, SYS_RES_IRQ,
4613 sc->legacy_irq ? 0 : 1, sc->irq_res);
4614 if (!sc->legacy_irq)
4615 pci_release_msi(sc->dev);
4619 mxge_rem_irq(mxge_softc_t *sc)
4621 if (sc->num_slices > 1)
4622 mxge_rem_msix_irqs(sc);
4624 mxge_rem_single_irq(sc);
4628 mxge_add_irq(mxge_softc_t *sc)
4632 if (sc->num_slices > 1)
4633 err = mxge_add_msix_irqs(sc);
4635 err = mxge_add_single_irq(sc);
4637 if (0 && err == 0 && sc->num_slices > 1) {
4638 mxge_rem_msix_irqs(sc);
4639 err = mxge_add_msix_irqs(sc);
4646 mxge_attach(device_t dev)
4648 mxge_softc_t *sc = device_get_softc(dev);
4653 mxge_fetch_tunables(sc);
4655 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4656 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4657 taskqueue_thread_enqueue, &sc->tq);
4658 if (sc->tq == NULL) {
4660 goto abort_with_nothing;
4663 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4666 BUS_SPACE_MAXADDR, /* low */
4667 BUS_SPACE_MAXADDR, /* high */
4668 NULL, NULL, /* filter */
4669 65536 + 256, /* maxsize */
4670 MXGE_MAX_SEND_DESC, /* num segs */
4671 65536, /* maxsegsize */
4673 NULL, NULL, /* lock */
4674 &sc->parent_dmat); /* tag */
4677 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4682 ifp = sc->ifp = if_alloc(IFT_ETHER);
4684 device_printf(dev, "can not if_alloc()\n");
4686 goto abort_with_parent_dmat;
4688 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4690 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4691 device_get_nameunit(dev));
4692 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4693 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4694 "%s:drv", device_get_nameunit(dev));
4695 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4696 MTX_NETWORK_LOCK, MTX_DEF);
4698 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4700 mxge_setup_cfg_space(sc);
4702 /* Map the board into the kernel */
4704 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4706 if (sc->mem_res == NULL) {
4707 device_printf(dev, "could not map memory\n");
4709 goto abort_with_lock;
4711 sc->sram = rman_get_virtual(sc->mem_res);
4712 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4713 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4714 device_printf(dev, "impossible memory region size %ld\n",
4715 rman_get_size(sc->mem_res));
4717 goto abort_with_mem_res;
4720 /* make NULL terminated copy of the EEPROM strings section of
4722 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4723 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4724 rman_get_bushandle(sc->mem_res),
4725 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4727 MXGE_EEPROM_STRINGS_SIZE - 2);
4728 err = mxge_parse_strings(sc);
4730 goto abort_with_mem_res;
4732 /* Enable write combining for efficient use of PCIe bus */
4735 /* Allocate the out of band dma memory */
4736 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4737 sizeof (mxge_cmd_t), 64);
4739 goto abort_with_mem_res;
4740 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4741 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4743 goto abort_with_cmd_dma;
4745 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4747 goto abort_with_zeropad_dma;
4749 /* select & load the firmware */
4750 err = mxge_select_firmware(sc);
4752 goto abort_with_dmabench;
4753 sc->intr_coal_delay = mxge_intr_coal_delay;
4755 mxge_slice_probe(sc);
4756 err = mxge_alloc_slices(sc);
4758 goto abort_with_dmabench;
4760 err = mxge_reset(sc, 0);
4762 goto abort_with_slices;
4764 err = mxge_alloc_rings(sc);
4766 device_printf(sc->dev, "failed to allocate rings\n");
4767 goto abort_with_slices;
4770 err = mxge_add_irq(sc);
4772 device_printf(sc->dev, "failed to add irq\n");
4773 goto abort_with_rings;
4776 ifp->if_baudrate = IF_Gbps(10UL);
4777 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4778 IFCAP_VLAN_MTU | IFCAP_LINKSTATE;
4780 ifp->if_capabilities |= IFCAP_LRO;
4783 #ifdef MXGE_NEW_VLAN_API
4784 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4786 /* Only FW 1.4.32 and newer can do TSO over vlans */
4787 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4788 sc->fw_ver_tiny >= 32)
4789 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4792 sc->max_mtu = mxge_max_mtu(sc);
4793 if (sc->max_mtu >= 9000)
4794 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4796 device_printf(dev, "MTU limited to %d. Install "
4797 "latest firmware for 9000 byte jumbo support\n",
4798 sc->max_mtu - ETHER_HDR_LEN);
4799 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4800 ifp->if_capenable = ifp->if_capabilities;
4801 if (sc->lro_cnt == 0)
4802 ifp->if_capenable &= ~IFCAP_LRO;
4804 ifp->if_init = mxge_init;
4806 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4807 ifp->if_ioctl = mxge_ioctl;
4808 ifp->if_start = mxge_start;
4809 /* Initialise the ifmedia structure */
4810 ifmedia_init(&sc->media, 0, mxge_media_change,
4812 mxge_media_init(sc);
4813 mxge_media_probe(sc);
4815 ether_ifattach(ifp, sc->mac_addr);
4816 /* ether_ifattach sets mtu to ETHERMTU */
4817 if (mxge_initial_mtu != ETHERMTU)
4818 mxge_change_mtu(sc, mxge_initial_mtu);
4820 mxge_add_sysctls(sc);
4821 #ifdef IFNET_BUF_RING
4822 ifp->if_transmit = mxge_transmit;
4823 ifp->if_qflush = mxge_qflush;
4825 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4826 device_get_nameunit(sc->dev));
4827 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4831 mxge_free_rings(sc);
4833 mxge_free_slices(sc);
4834 abort_with_dmabench:
4835 mxge_dma_free(&sc->dmabench_dma);
4836 abort_with_zeropad_dma:
4837 mxge_dma_free(&sc->zeropad_dma);
4839 mxge_dma_free(&sc->cmd_dma);
4841 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4843 pci_disable_busmaster(dev);
4844 mtx_destroy(&sc->cmd_mtx);
4845 mtx_destroy(&sc->driver_mtx);
4847 abort_with_parent_dmat:
4848 bus_dma_tag_destroy(sc->parent_dmat);
4850 if (sc->tq != NULL) {
4851 taskqueue_drain(sc->tq, &sc->watchdog_task);
4852 taskqueue_free(sc->tq);
4860 mxge_detach(device_t dev)
4862 mxge_softc_t *sc = device_get_softc(dev);
4864 if (mxge_vlans_active(sc)) {
4865 device_printf(sc->dev,
4866 "Detach vlans before removing module\n");
4869 mtx_lock(&sc->driver_mtx);
4871 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4873 mtx_unlock(&sc->driver_mtx);
4874 ether_ifdetach(sc->ifp);
4875 if (sc->tq != NULL) {
4876 taskqueue_drain(sc->tq, &sc->watchdog_task);
4877 taskqueue_free(sc->tq);
4880 callout_drain(&sc->co_hdl);
4881 ifmedia_removeall(&sc->media);
4882 mxge_dummy_rdma(sc, 0);
4883 mxge_rem_sysctls(sc);
4885 mxge_free_rings(sc);
4886 mxge_free_slices(sc);
4887 mxge_dma_free(&sc->dmabench_dma);
4888 mxge_dma_free(&sc->zeropad_dma);
4889 mxge_dma_free(&sc->cmd_dma);
4890 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4891 pci_disable_busmaster(dev);
4892 mtx_destroy(&sc->cmd_mtx);
4893 mtx_destroy(&sc->driver_mtx);
4895 bus_dma_tag_destroy(sc->parent_dmat);
4900 mxge_shutdown(device_t dev)
4906 This file uses Myri10GE driver indentation.
4909 c-file-style:"linux"