1 /******************************************************************************
3 Copyright (c) 2006-2013, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/sockio.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/module.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
52 #include <net/if_var.h>
53 #include <net/if_arp.h>
54 #include <net/ethernet.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
60 #include <net/if_types.h>
61 #include <net/if_vlan_var.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/ip.h>
66 #include <netinet/ip6.h>
67 #include <netinet/tcp.h>
68 #include <netinet/tcp_lro.h>
69 #include <netinet6/ip6_var.h>
71 #include <machine/bus.h>
72 #include <machine/in_cksum.h>
73 #include <machine/resource.h>
78 #include <dev/pci/pcireg.h>
79 #include <dev/pci/pcivar.h>
80 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
82 #include <vm/vm.h> /* for pmap_mapdev() */
85 #if defined(__i386) || defined(__amd64)
86 #include <machine/specialreg.h>
89 #include <dev/mxge/mxge_mcp.h>
90 #include <dev/mxge/mcp_gen_header.h>
91 /*#define MXGE_FAKE_IFP*/
92 #include <dev/mxge/if_mxge_var.h>
94 #include <sys/buf_ring.h>
98 #include "opt_inet6.h"
101 static int mxge_nvidia_ecrc_enable = 1;
102 static int mxge_force_firmware = 0;
103 static int mxge_intr_coal_delay = 30;
104 static int mxge_deassert_wait = 1;
105 static int mxge_flow_control = 1;
106 static int mxge_verbose = 0;
107 static int mxge_ticks;
108 static int mxge_max_slices = 1;
109 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
110 static int mxge_always_promisc = 0;
111 static int mxge_initial_mtu = ETHERMTU_JUMBO;
112 static int mxge_throttle = 0;
113 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
114 static char *mxge_fw_aligned = "mxge_eth_z8e";
115 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
116 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
118 static int mxge_probe(device_t dev);
119 static int mxge_attach(device_t dev);
120 static int mxge_detach(device_t dev);
121 static int mxge_shutdown(device_t dev);
122 static void mxge_intr(void *arg);
124 static device_method_t mxge_methods[] =
126 /* Device interface */
127 DEVMETHOD(device_probe, mxge_probe),
128 DEVMETHOD(device_attach, mxge_attach),
129 DEVMETHOD(device_detach, mxge_detach),
130 DEVMETHOD(device_shutdown, mxge_shutdown),
135 static driver_t mxge_driver =
139 sizeof(mxge_softc_t),
142 static devclass_t mxge_devclass;
144 /* Declare ourselves to be a child of the PCI bus.*/
145 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
146 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
147 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
149 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
150 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
151 static int mxge_close(mxge_softc_t *sc, int down);
152 static int mxge_open(mxge_softc_t *sc);
153 static void mxge_tick(void *arg);
156 mxge_probe(device_t dev)
161 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
162 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
163 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
164 rev = pci_get_revid(dev);
166 case MXGE_PCI_REV_Z8E:
167 device_set_desc(dev, "Myri10G-PCIE-8A");
169 case MXGE_PCI_REV_Z8ES:
170 device_set_desc(dev, "Myri10G-PCIE-8B");
173 device_set_desc(dev, "Myri10G-PCIE-8??");
174 device_printf(dev, "Unrecognized rev %d NIC\n",
184 mxge_enable_wc(mxge_softc_t *sc)
186 #if defined(__i386) || defined(__amd64)
191 len = rman_get_size(sc->mem_res);
192 err = pmap_change_attr((vm_offset_t) sc->sram,
193 len, PAT_WRITE_COMBINING);
195 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
203 /* callback to get our DMA address */
205 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
209 *(bus_addr_t *) arg = segs->ds_addr;
214 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
215 bus_size_t alignment)
218 device_t dev = sc->dev;
219 bus_size_t boundary, maxsegsize;
221 if (bytes > 4096 && alignment == 4096) {
229 /* allocate DMAable memory tags */
230 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
231 alignment, /* alignment */
232 boundary, /* boundary */
233 BUS_SPACE_MAXADDR, /* low */
234 BUS_SPACE_MAXADDR, /* high */
235 NULL, NULL, /* filter */
238 maxsegsize, /* maxsegsize */
239 BUS_DMA_COHERENT, /* flags */
240 NULL, NULL, /* lock */
241 &dma->dmat); /* tag */
243 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
247 /* allocate DMAable memory & map */
248 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
249 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
250 | BUS_DMA_ZERO), &dma->map);
252 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
253 goto abort_with_dmat;
256 /* load the memory */
257 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
258 mxge_dmamap_callback,
259 (void *)&dma->bus_addr, 0);
261 device_printf(dev, "couldn't load map (err = %d)\n", err);
267 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
269 (void)bus_dma_tag_destroy(dma->dmat);
275 mxge_dma_free(mxge_dma_t *dma)
277 bus_dmamap_unload(dma->dmat, dma->map);
278 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
279 (void)bus_dma_tag_destroy(dma->dmat);
283 * The eeprom strings on the lanaiX have the format
290 mxge_parse_strings(mxge_softc_t *sc)
293 int i, found_mac, found_sn2;
296 ptr = sc->eeprom_strings;
299 while (*ptr != '\0') {
300 if (strncmp(ptr, "MAC=", 4) == 0) {
303 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
304 if (endptr - ptr != 2)
313 } else if (strncmp(ptr, "PC=", 3) == 0) {
315 strlcpy(sc->product_code_string, ptr,
316 sizeof(sc->product_code_string));
317 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
319 strlcpy(sc->serial_number_string, ptr,
320 sizeof(sc->serial_number_string));
321 } else if (strncmp(ptr, "SN2=", 4) == 0) {
322 /* SN2 takes precedence over SN */
325 strlcpy(sc->serial_number_string, ptr,
326 sizeof(sc->serial_number_string));
328 while (*ptr++ != '\0') {}
335 device_printf(sc->dev, "failed to parse eeprom_strings\n");
340 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
342 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
345 unsigned long base, off;
347 device_t pdev, mcp55;
348 uint16_t vendor_id, device_id, word;
349 uintptr_t bus, slot, func, ivend, idev;
353 if (!mxge_nvidia_ecrc_enable)
356 pdev = device_get_parent(device_get_parent(sc->dev));
358 device_printf(sc->dev, "could not find parent?\n");
361 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
362 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
364 if (vendor_id != 0x10de)
369 if (device_id == 0x005d) {
370 /* ck804, base address is magic */
372 } else if (device_id >= 0x0374 && device_id <= 0x378) {
373 /* mcp55, base address stored in chipset */
374 mcp55 = pci_find_bsf(0, 0, 0);
376 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
377 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
378 word = pci_read_config(mcp55, 0x90, 2);
379 base = ((unsigned long)word & 0x7ffeU) << 25;
386 Test below is commented because it is believed that doing
387 config read/write beyond 0xff will access the config space
388 for the next larger function. Uncomment this and remove
389 the hacky pmap_mapdev() way of accessing config space when
390 FreeBSD grows support for extended pcie config space access
393 /* See if we can, by some miracle, access the extended
395 val = pci_read_config(pdev, 0x178, 4);
396 if (val != 0xffffffff) {
398 pci_write_config(pdev, 0x178, val, 4);
402 /* Rather than using normal pci config space writes, we must
403 * map the Nvidia config space ourselves. This is because on
404 * opteron/nvidia class machine the 0xe000000 mapping is
405 * handled by the nvidia chipset, that means the internal PCI
406 * device (the on-chip northbridge), or the amd-8131 bridge
407 * and things behind them are not visible by this method.
410 BUS_READ_IVAR(device_get_parent(pdev), pdev,
412 BUS_READ_IVAR(device_get_parent(pdev), pdev,
413 PCI_IVAR_SLOT, &slot);
414 BUS_READ_IVAR(device_get_parent(pdev), pdev,
415 PCI_IVAR_FUNCTION, &func);
416 BUS_READ_IVAR(device_get_parent(pdev), pdev,
417 PCI_IVAR_VENDOR, &ivend);
418 BUS_READ_IVAR(device_get_parent(pdev), pdev,
419 PCI_IVAR_DEVICE, &idev);
422 + 0x00100000UL * (unsigned long)bus
423 + 0x00001000UL * (unsigned long)(func
426 /* map it into the kernel */
427 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
431 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
434 /* get a pointer to the config space mapped into the kernel */
435 cfgptr = va + (off & PAGE_MASK);
437 /* make sure that we can really access it */
438 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
439 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
440 if (! (vendor_id == ivend && device_id == idev)) {
441 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
442 vendor_id, device_id);
443 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
447 ptr32 = (uint32_t*)(cfgptr + 0x178);
450 if (val == 0xffffffff) {
451 device_printf(sc->dev, "extended mapping failed\n");
452 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
456 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
458 device_printf(sc->dev,
459 "Enabled ECRC on upstream Nvidia bridge "
461 (int)bus, (int)slot, (int)func);
466 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
468 device_printf(sc->dev,
469 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
476 mxge_dma_test(mxge_softc_t *sc, int test_type)
479 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
485 /* Run a small DMA test.
486 * The magic multipliers to the length tell the firmware
487 * to do DMA read, write, or read+write tests. The
488 * results are returned in cmd.data0. The upper 16
489 * bits of the return is the number of transfers completed.
490 * The lower 16 bits is the time in 0.5us ticks that the
491 * transfers took to complete.
494 len = sc->tx_boundary;
496 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
497 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
498 cmd.data2 = len * 0x10000;
499 status = mxge_send_cmd(sc, test_type, &cmd);
504 sc->read_dma = ((cmd.data0>>16) * len * 2) /
505 (cmd.data0 & 0xffff);
506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
508 cmd.data2 = len * 0x1;
509 status = mxge_send_cmd(sc, test_type, &cmd);
514 sc->write_dma = ((cmd.data0>>16) * len * 2) /
515 (cmd.data0 & 0xffff);
517 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
518 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
519 cmd.data2 = len * 0x10001;
520 status = mxge_send_cmd(sc, test_type, &cmd);
525 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
526 (cmd.data0 & 0xffff);
529 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
530 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
537 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
538 * when the PCI-E Completion packets are aligned on an 8-byte
539 * boundary. Some PCI-E chip sets always align Completion packets; on
540 * the ones that do not, the alignment can be enforced by enabling
541 * ECRC generation (if supported).
543 * When PCI-E Completion packets are not aligned, it is actually more
544 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
546 * If the driver can neither enable ECRC nor verify that it has
547 * already been enabled, then it must use a firmware image which works
548 * around unaligned completion packets (ethp_z8e.dat), and it should
549 * also ensure that it never gives the device a Read-DMA which is
550 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
551 * enabled, then the driver should use the aligned (eth_z8e.dat)
552 * firmware image, and set tx_boundary to 4KB.
556 mxge_firmware_probe(mxge_softc_t *sc)
558 device_t dev = sc->dev;
562 sc->tx_boundary = 4096;
564 * Verify the max read request size was set to 4KB
565 * before trying the test with 4KB.
567 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
568 pectl = pci_read_config(dev, reg + 0x8, 2);
569 if ((pectl & (5 << 12)) != (5 << 12)) {
570 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
572 sc->tx_boundary = 2048;
577 * load the optimized firmware (which assumes aligned PCIe
578 * completions) in order to see if it works on this host.
580 sc->fw_name = mxge_fw_aligned;
581 status = mxge_load_firmware(sc, 1);
587 * Enable ECRC if possible
589 mxge_enable_nvidia_ecrc(sc);
592 * Run a DMA test which watches for unaligned completions and
593 * aborts on the first one seen. Not required on Z8ES or newer.
595 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
597 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
599 return 0; /* keep the aligned firmware */
602 device_printf(dev, "DMA test failed: %d\n", status);
603 if (status == ENOSYS)
604 device_printf(dev, "Falling back to ethp! "
605 "Please install up to date fw\n");
610 mxge_select_firmware(mxge_softc_t *sc)
613 int force_firmware = mxge_force_firmware;
616 force_firmware = sc->throttle;
618 if (force_firmware != 0) {
619 if (force_firmware == 1)
624 device_printf(sc->dev,
625 "Assuming %s completions (forced)\n",
626 aligned ? "aligned" : "unaligned");
630 /* if the PCIe link width is 4 or less, we can use the aligned
631 firmware and skip any checks */
632 if (sc->link_width != 0 && sc->link_width <= 4) {
633 device_printf(sc->dev,
634 "PCIe x%d Link, expect reduced performance\n",
640 if (0 == mxge_firmware_probe(sc))
645 sc->fw_name = mxge_fw_aligned;
646 sc->tx_boundary = 4096;
648 sc->fw_name = mxge_fw_unaligned;
649 sc->tx_boundary = 2048;
651 return (mxge_load_firmware(sc, 0));
655 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
659 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
660 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
661 be32toh(hdr->mcp_type));
665 /* save firmware version for sysctl */
666 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
668 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
670 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
671 &sc->fw_ver_minor, &sc->fw_ver_tiny);
673 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
674 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
675 device_printf(sc->dev, "Found firmware version %s\n",
677 device_printf(sc->dev, "Driver needs %d.%d\n",
678 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
686 z_alloc(void *nil, u_int items, u_int size)
690 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
695 z_free(void *nil, void *ptr)
702 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
705 char *inflate_buffer;
706 const struct firmware *fw;
707 const mcp_gen_header_t *hdr;
714 fw = firmware_get(sc->fw_name);
716 device_printf(sc->dev, "Could not find firmware image %s\n",
723 /* setup zlib and decompress f/w */
724 bzero(&zs, sizeof (zs));
727 status = inflateInit(&zs);
728 if (status != Z_OK) {
733 /* the uncompressed size is stored as the firmware version,
734 which would otherwise go unused */
735 fw_len = (size_t) fw->version;
736 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
737 if (inflate_buffer == NULL)
739 zs.avail_in = fw->datasize;
740 zs.next_in = __DECONST(char *, fw->data);
741 zs.avail_out = fw_len;
742 zs.next_out = inflate_buffer;
743 status = inflate(&zs, Z_FINISH);
744 if (status != Z_STREAM_END) {
745 device_printf(sc->dev, "zlib %d\n", status);
747 goto abort_with_buffer;
751 hdr_offset = htobe32(*(const uint32_t *)
752 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
753 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
754 device_printf(sc->dev, "Bad firmware file");
756 goto abort_with_buffer;
758 hdr = (const void*)(inflate_buffer + hdr_offset);
760 status = mxge_validate_firmware(sc, hdr);
762 goto abort_with_buffer;
764 /* Copy the inflated firmware to NIC SRAM. */
765 for (i = 0; i < fw_len; i += 256) {
766 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
768 min(256U, (unsigned)(fw_len - i)));
777 free(inflate_buffer, M_TEMP);
781 firmware_put(fw, FIRMWARE_UNLOAD);
786 * Enable or disable periodic RDMAs from the host to make certain
787 * chipsets resend dropped PCIe messages
791 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
794 volatile uint32_t *confirm;
795 volatile char *submit;
796 uint32_t *buf, dma_low, dma_high;
799 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
801 /* clear confirmation addr */
802 confirm = (volatile uint32_t *)sc->cmd;
806 /* send an rdma command to the PCIe engine, and wait for the
807 response in the confirmation address. The firmware should
808 write a -1 there to indicate it is alive and well
811 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
812 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
813 buf[0] = htobe32(dma_high); /* confirm addr MSW */
814 buf[1] = htobe32(dma_low); /* confirm addr LSW */
815 buf[2] = htobe32(0xffffffff); /* confirm data */
816 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
817 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
818 buf[3] = htobe32(dma_high); /* dummy addr MSW */
819 buf[4] = htobe32(dma_low); /* dummy addr LSW */
820 buf[5] = htobe32(enable); /* enable? */
823 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
825 mxge_pio_copy(submit, buf, 64);
830 while (*confirm != 0xffffffff && i < 20) {
834 if (*confirm != 0xffffffff) {
835 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
836 (enable ? "enable" : "disable"), confirm,
843 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
846 char buf_bytes[sizeof(*buf) + 8];
847 volatile mcp_cmd_response_t *response = sc->cmd;
848 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
849 uint32_t dma_low, dma_high;
850 int err, sleep_total = 0;
852 /* ensure buf is aligned to 8 bytes */
853 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
855 buf->data0 = htobe32(data->data0);
856 buf->data1 = htobe32(data->data1);
857 buf->data2 = htobe32(data->data2);
858 buf->cmd = htobe32(cmd);
859 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
860 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
862 buf->response_addr.low = htobe32(dma_low);
863 buf->response_addr.high = htobe32(dma_high);
864 mtx_lock(&sc->cmd_mtx);
865 response->result = 0xffffffff;
867 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
869 /* wait up to 20ms */
871 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
872 bus_dmamap_sync(sc->cmd_dma.dmat,
873 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
875 switch (be32toh(response->result)) {
877 data->data0 = be32toh(response->data);
883 case MXGEFW_CMD_UNKNOWN:
886 case MXGEFW_CMD_ERROR_UNALIGNED:
889 case MXGEFW_CMD_ERROR_BUSY:
892 case MXGEFW_CMD_ERROR_I2C_ABSENT:
896 device_printf(sc->dev,
898 "failed, result = %d\n",
899 cmd, be32toh(response->result));
907 device_printf(sc->dev, "mxge: command %d timed out"
909 cmd, be32toh(response->result));
910 mtx_unlock(&sc->cmd_mtx);
915 mxge_adopt_running_firmware(mxge_softc_t *sc)
917 struct mcp_gen_header *hdr;
918 const size_t bytes = sizeof (struct mcp_gen_header);
922 /* find running firmware header */
923 hdr_offset = htobe32(*(volatile uint32_t *)
924 (sc->sram + MCP_HEADER_PTR_OFFSET));
926 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
927 device_printf(sc->dev,
928 "Running firmware has bad header offset (%d)\n",
933 /* copy header of running firmware from SRAM to host memory to
934 * validate firmware */
935 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
937 device_printf(sc->dev, "could not malloc firmware hdr\n");
940 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
941 rman_get_bushandle(sc->mem_res),
942 hdr_offset, (char *)hdr, bytes);
943 status = mxge_validate_firmware(sc, hdr);
947 * check to see if adopted firmware has bug where adopting
948 * it will cause broadcasts to be filtered unless the NIC
949 * is kept in ALLMULTI mode
951 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
952 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
953 sc->adopted_rx_filter_bug = 1;
954 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
955 "working around rx filter bug\n",
956 sc->fw_ver_major, sc->fw_ver_minor,
965 mxge_load_firmware(mxge_softc_t *sc, int adopt)
967 volatile uint32_t *confirm;
968 volatile char *submit;
970 uint32_t *buf, size, dma_low, dma_high;
973 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
975 size = sc->sram_size;
976 status = mxge_load_firmware_helper(sc, &size);
980 /* Try to use the currently running firmware, if
982 status = mxge_adopt_running_firmware(sc);
984 device_printf(sc->dev,
985 "failed to adopt running firmware\n");
988 device_printf(sc->dev,
989 "Successfully adopted running firmware\n");
990 if (sc->tx_boundary == 4096) {
991 device_printf(sc->dev,
992 "Using firmware currently running on NIC"
994 device_printf(sc->dev,
995 "performance consider loading optimized "
998 sc->fw_name = mxge_fw_unaligned;
999 sc->tx_boundary = 2048;
1002 /* clear confirmation addr */
1003 confirm = (volatile uint32_t *)sc->cmd;
1006 /* send a reload command to the bootstrap MCP, and wait for the
1007 response in the confirmation address. The firmware should
1008 write a -1 there to indicate it is alive and well
1011 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1012 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1014 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1015 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1016 buf[2] = htobe32(0xffffffff); /* confirm data */
1018 /* FIX: All newest firmware should un-protect the bottom of
1019 the sram before handoff. However, the very first interfaces
1020 do not. Therefore the handoff copy must skip the first 8 bytes
1022 /* where the code starts*/
1023 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1024 buf[4] = htobe32(size - 8); /* length of code */
1025 buf[5] = htobe32(8); /* where to copy to */
1026 buf[6] = htobe32(0); /* where to jump to */
1028 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1029 mxge_pio_copy(submit, buf, 64);
1034 while (*confirm != 0xffffffff && i < 20) {
1037 bus_dmamap_sync(sc->cmd_dma.dmat,
1038 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1040 if (*confirm != 0xffffffff) {
1041 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1050 mxge_update_mac_address(mxge_softc_t *sc)
1053 uint8_t *addr = sc->mac_addr;
1057 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1058 | (addr[2] << 8) | addr[3]);
1060 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1062 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1067 mxge_change_pause(mxge_softc_t *sc, int pause)
1073 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1076 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1080 device_printf(sc->dev, "Failed to set flow control mode\n");
1088 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1093 if (mxge_always_promisc)
1097 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1100 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1104 device_printf(sc->dev, "Failed to set promisc mode\n");
1109 mxge_set_multicast_list(mxge_softc_t *sc)
1112 struct ifmultiaddr *ifma;
1113 struct ifnet *ifp = sc->ifp;
1116 /* This firmware is known to not support multicast */
1117 if (!sc->fw_multicast_support)
1120 /* Disable multicast filtering while we play with the lists*/
1121 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1123 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1124 " error status: %d\n", err);
1128 if (sc->adopted_rx_filter_bug)
1131 if (ifp->if_flags & IFF_ALLMULTI)
1132 /* request to disable multicast filtering, so quit here */
1135 /* Flush all the filters */
1137 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1139 device_printf(sc->dev,
1140 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1141 ", error status: %d\n", err);
1145 /* Walk the multicast list, and add each address */
1147 if_maddr_rlock(ifp);
1148 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1149 if (ifma->ifma_addr->sa_family != AF_LINK)
1151 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1153 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1155 cmd.data0 = htonl(cmd.data0);
1156 cmd.data1 = htonl(cmd.data1);
1157 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1159 device_printf(sc->dev, "Failed "
1160 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1162 /* abort, leaving multicast filtering off */
1163 if_maddr_runlock(ifp);
1167 if_maddr_runlock(ifp);
1168 /* Enable multicast filtering */
1169 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1171 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1172 ", error status: %d\n", err);
1177 mxge_max_mtu(mxge_softc_t *sc)
1182 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1183 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1185 /* try to set nbufs to see if it we can
1186 use virtually contiguous jumbos */
1188 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1191 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1193 /* otherwise, we're limited to MJUMPAGESIZE */
1194 return MJUMPAGESIZE - MXGEFW_PAD;
1198 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1200 struct mxge_slice_state *ss;
1201 mxge_rx_done_t *rx_done;
1202 volatile uint32_t *irq_claim;
1206 /* try to send a reset command to the card to see if it
1208 memset(&cmd, 0, sizeof (cmd));
1209 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1211 device_printf(sc->dev, "failed reset\n");
1215 mxge_dummy_rdma(sc, 1);
1218 /* set the intrq size */
1219 cmd.data0 = sc->rx_ring_size;
1220 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1223 * Even though we already know how many slices are supported
1224 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1225 * has magic side effects, and must be called after a reset.
1226 * It must be called prior to calling any RSS related cmds,
1227 * including assigning an interrupt queue for anything but
1228 * slice 0. It must also be called *after*
1229 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1230 * the firmware to compute offsets.
1233 if (sc->num_slices > 1) {
1234 /* ask the maximum number of slices it supports */
1235 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1238 device_printf(sc->dev,
1239 "failed to get number of slices\n");
1243 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1244 * to setting up the interrupt queue DMA
1246 cmd.data0 = sc->num_slices;
1247 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1248 #ifdef IFNET_BUF_RING
1249 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1251 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1254 device_printf(sc->dev,
1255 "failed to set number of slices\n");
1261 if (interrupts_setup) {
1262 /* Now exchange information about interrupts */
1263 for (slice = 0; slice < sc->num_slices; slice++) {
1264 rx_done = &sc->ss[slice].rx_done;
1265 memset(rx_done->entry, 0, sc->rx_ring_size);
1266 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1267 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1269 status |= mxge_send_cmd(sc,
1270 MXGEFW_CMD_SET_INTRQ_DMA,
1275 status |= mxge_send_cmd(sc,
1276 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1279 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1281 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1282 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1285 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1287 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1289 device_printf(sc->dev, "failed set interrupt parameters\n");
1294 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1297 /* run a DMA benchmark */
1298 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1300 for (slice = 0; slice < sc->num_slices; slice++) {
1301 ss = &sc->ss[slice];
1303 ss->irq_claim = irq_claim + (2 * slice);
1304 /* reset mcp/driver shared state back to 0 */
1305 ss->rx_done.idx = 0;
1306 ss->rx_done.cnt = 0;
1309 ss->tx.pkt_done = 0;
1310 ss->tx.queue_active = 0;
1311 ss->tx.activate = 0;
1312 ss->tx.deactivate = 0;
1317 ss->rx_small.cnt = 0;
1318 ss->lc.lro_bad_csum = 0;
1319 ss->lc.lro_queued = 0;
1320 ss->lc.lro_flushed = 0;
1321 if (ss->fw_stats != NULL) {
1322 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1325 sc->rdma_tags_available = 15;
1326 status = mxge_update_mac_address(sc);
1327 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1328 mxge_change_pause(sc, sc->pause);
1329 mxge_set_multicast_list(sc);
1331 cmd.data0 = sc->throttle;
1332 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1334 device_printf(sc->dev,
1335 "can't enable throttle\n");
1342 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1347 unsigned int throttle;
1350 throttle = sc->throttle;
1351 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1356 if (throttle == sc->throttle)
1359 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1362 mtx_lock(&sc->driver_mtx);
1363 cmd.data0 = throttle;
1364 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1366 sc->throttle = throttle;
1367 mtx_unlock(&sc->driver_mtx);
1372 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1375 unsigned int intr_coal_delay;
1379 intr_coal_delay = sc->intr_coal_delay;
1380 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1384 if (intr_coal_delay == sc->intr_coal_delay)
1387 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1390 mtx_lock(&sc->driver_mtx);
1391 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1392 sc->intr_coal_delay = intr_coal_delay;
1394 mtx_unlock(&sc->driver_mtx);
1399 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1402 unsigned int enabled;
1406 enabled = sc->pause;
1407 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1411 if (enabled == sc->pause)
1414 mtx_lock(&sc->driver_mtx);
1415 err = mxge_change_pause(sc, enabled);
1416 mtx_unlock(&sc->driver_mtx);
1421 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1427 arg2 = be32toh(*(int *)arg1);
1429 err = sysctl_handle_int(oidp, arg1, arg2, req);
1435 mxge_rem_sysctls(mxge_softc_t *sc)
1437 struct mxge_slice_state *ss;
1440 if (sc->slice_sysctl_tree == NULL)
1443 for (slice = 0; slice < sc->num_slices; slice++) {
1444 ss = &sc->ss[slice];
1445 if (ss == NULL || ss->sysctl_tree == NULL)
1447 sysctl_ctx_free(&ss->sysctl_ctx);
1448 ss->sysctl_tree = NULL;
1450 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1451 sc->slice_sysctl_tree = NULL;
1455 mxge_add_sysctls(mxge_softc_t *sc)
1457 struct sysctl_ctx_list *ctx;
1458 struct sysctl_oid_list *children;
1460 struct mxge_slice_state *ss;
1464 ctx = device_get_sysctl_ctx(sc->dev);
1465 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1466 fw = sc->ss[0].fw_stats;
1468 /* random information */
1469 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1471 CTLFLAG_RD, sc->fw_version,
1472 0, "firmware version");
1473 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1475 CTLFLAG_RD, sc->serial_number_string,
1476 0, "serial number");
1477 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1479 CTLFLAG_RD, sc->product_code_string,
1481 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1483 CTLFLAG_RD, &sc->link_width,
1485 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1487 CTLFLAG_RD, &sc->tx_boundary,
1489 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1491 CTLFLAG_RD, &sc->wc,
1492 0, "write combining PIO?");
1493 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1495 CTLFLAG_RD, &sc->read_dma,
1496 0, "DMA Read speed in MB/s");
1497 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1499 CTLFLAG_RD, &sc->write_dma,
1500 0, "DMA Write speed in MB/s");
1501 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1502 "read_write_dma_MBs",
1503 CTLFLAG_RD, &sc->read_write_dma,
1504 0, "DMA concurrent Read/Write speed in MB/s");
1505 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1507 CTLFLAG_RD, &sc->watchdog_resets,
1508 0, "Number of times NIC was reset");
1511 /* performance related tunables */
1512 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1514 CTLTYPE_INT|CTLFLAG_RW, sc,
1515 0, mxge_change_intr_coal,
1516 "I", "interrupt coalescing delay in usecs");
1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1520 CTLTYPE_INT|CTLFLAG_RW, sc,
1521 0, mxge_change_throttle,
1522 "I", "transmit throttling");
1524 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1525 "flow_control_enabled",
1526 CTLTYPE_INT|CTLFLAG_RW, sc,
1527 0, mxge_change_flow_control,
1528 "I", "interrupt coalescing delay in usecs");
1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1532 CTLFLAG_RW, &mxge_deassert_wait,
1533 0, "Wait for IRQ line to go low in ihandler");
1535 /* stats block from firmware is in network byte order.
1537 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1539 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1540 0, mxge_handle_be32,
1542 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1543 "rdma_tags_available",
1544 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1545 0, mxge_handle_be32,
1546 "I", "rdma_tags_available");
1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1548 "dropped_bad_crc32",
1549 CTLTYPE_INT|CTLFLAG_RD,
1550 &fw->dropped_bad_crc32,
1551 0, mxge_handle_be32,
1552 "I", "dropped_bad_crc32");
1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1555 CTLTYPE_INT|CTLFLAG_RD,
1556 &fw->dropped_bad_phy,
1557 0, mxge_handle_be32,
1558 "I", "dropped_bad_phy");
1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1560 "dropped_link_error_or_filtered",
1561 CTLTYPE_INT|CTLFLAG_RD,
1562 &fw->dropped_link_error_or_filtered,
1563 0, mxge_handle_be32,
1564 "I", "dropped_link_error_or_filtered");
1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1566 "dropped_link_overflow",
1567 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1568 0, mxge_handle_be32,
1569 "I", "dropped_link_overflow");
1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1571 "dropped_multicast_filtered",
1572 CTLTYPE_INT|CTLFLAG_RD,
1573 &fw->dropped_multicast_filtered,
1574 0, mxge_handle_be32,
1575 "I", "dropped_multicast_filtered");
1576 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1577 "dropped_no_big_buffer",
1578 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1579 0, mxge_handle_be32,
1580 "I", "dropped_no_big_buffer");
1581 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1582 "dropped_no_small_buffer",
1583 CTLTYPE_INT|CTLFLAG_RD,
1584 &fw->dropped_no_small_buffer,
1585 0, mxge_handle_be32,
1586 "I", "dropped_no_small_buffer");
1587 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1589 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1590 0, mxge_handle_be32,
1591 "I", "dropped_overrun");
1592 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1594 CTLTYPE_INT|CTLFLAG_RD,
1596 0, mxge_handle_be32,
1597 "I", "dropped_pause");
1598 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1600 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1601 0, mxge_handle_be32,
1602 "I", "dropped_runt");
1604 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1605 "dropped_unicast_filtered",
1606 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1607 0, mxge_handle_be32,
1608 "I", "dropped_unicast_filtered");
1610 /* verbose printing? */
1611 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1613 CTLFLAG_RW, &mxge_verbose,
1614 0, "verbose printing");
1616 /* add counters exported for debugging from all slices */
1617 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1618 sc->slice_sysctl_tree =
1619 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1620 "slice", CTLFLAG_RD, 0, "");
1622 for (slice = 0; slice < sc->num_slices; slice++) {
1623 ss = &sc->ss[slice];
1624 sysctl_ctx_init(&ss->sysctl_ctx);
1625 ctx = &ss->sysctl_ctx;
1626 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1627 sprintf(slice_num, "%d", slice);
1629 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1631 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1632 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1634 CTLFLAG_RD, &ss->rx_small.cnt,
1636 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1638 CTLFLAG_RD, &ss->rx_big.cnt,
1640 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1641 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1642 0, "number of lro merge queues flushed");
1644 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1645 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1646 0, "number of bad csums preventing LRO");
1648 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1649 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1650 0, "number of frames appended to lro merge"
1653 #ifndef IFNET_BUF_RING
1654 /* only transmit from slice 0 for now */
1658 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1660 CTLFLAG_RD, &ss->tx.req,
1663 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1665 CTLFLAG_RD, &ss->tx.done,
1667 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1669 CTLFLAG_RD, &ss->tx.pkt_done,
1671 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1673 CTLFLAG_RD, &ss->tx.stall,
1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1677 CTLFLAG_RD, &ss->tx.wake,
1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1681 CTLFLAG_RD, &ss->tx.defrag,
1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1685 CTLFLAG_RD, &ss->tx.queue_active,
1686 0, "tx_queue_active");
1687 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1689 CTLFLAG_RD, &ss->tx.activate,
1691 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1693 CTLFLAG_RD, &ss->tx.deactivate,
1694 0, "tx_deactivate");
1698 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1699 backwards one at a time and handle ring wraps */
1702 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1703 mcp_kreq_ether_send_t *src, int cnt)
1705 int idx, starting_slot;
1706 starting_slot = tx->req;
1709 idx = (starting_slot + cnt) & tx->mask;
1710 mxge_pio_copy(&tx->lanai[idx],
1711 &src[cnt], sizeof(*src));
1717 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1718 * at most 32 bytes at a time, so as to avoid involving the software
1719 * pio handler in the nic. We re-write the first segment's flags
1720 * to mark them valid only after writing the entire chain
1724 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1729 volatile uint32_t *dst_ints;
1730 mcp_kreq_ether_send_t *srcp;
1731 volatile mcp_kreq_ether_send_t *dstp, *dst;
1734 idx = tx->req & tx->mask;
1736 last_flags = src->flags;
1739 dst = dstp = &tx->lanai[idx];
1742 if ((idx + cnt) < tx->mask) {
1743 for (i = 0; i < (cnt - 1); i += 2) {
1744 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1745 wmb(); /* force write every 32 bytes */
1750 /* submit all but the first request, and ensure
1751 that it is submitted below */
1752 mxge_submit_req_backwards(tx, src, cnt);
1756 /* submit the first request */
1757 mxge_pio_copy(dstp, srcp, sizeof(*src));
1758 wmb(); /* barrier before setting valid flag */
1761 /* re-write the last 32-bits with the valid flags */
1762 src->flags = last_flags;
1763 src_ints = (uint32_t *)src;
1765 dst_ints = (volatile uint32_t *)dst;
1767 *dst_ints = *src_ints;
1773 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1774 struct mxge_pkt_info *pi)
1776 struct ether_vlan_header *eh;
1778 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1779 #if IFCAP_TSO6 && defined(INET6)
1783 eh = mtod(m, struct ether_vlan_header *);
1784 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1785 etype = ntohs(eh->evl_proto);
1786 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1788 etype = ntohs(eh->evl_encap_proto);
1789 pi->ip_off = ETHER_HDR_LEN;
1795 * ensure ip header is in first mbuf, copy it to a
1796 * scratch buffer if not
1798 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1800 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1801 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1803 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1805 pi->ip_hlen = pi->ip->ip_hl << 2;
1809 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1810 sizeof(struct tcphdr))) {
1811 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1812 sizeof(struct tcphdr), ss->scratch);
1813 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1815 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1817 #if IFCAP_TSO6 && defined(INET6)
1818 case ETHERTYPE_IPV6:
1819 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1820 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1821 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1823 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1826 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1827 pi->ip_hlen -= pi->ip_off;
1828 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1834 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1837 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1838 sizeof(struct tcphdr))) {
1839 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1840 sizeof(struct tcphdr), ss->scratch);
1841 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1843 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1855 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1856 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1859 mcp_kreq_ether_send_t *req;
1860 bus_dma_segment_t *seg;
1861 uint32_t low, high_swapped;
1862 int len, seglen, cum_len, cum_len_next;
1863 int next_is_first, chop, cnt, rdma_count, small;
1864 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1865 uint8_t flags, flags_next;
1868 mss = m->m_pkthdr.tso_segsz;
1870 /* negative cum_len signifies to the
1871 * send loop that we are still in the
1872 * header portion of the TSO packet.
1875 cksum_offset = pi->ip_off + pi->ip_hlen;
1876 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1878 /* TSO implies checksum offload on this hardware */
1879 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1881 * If packet has full TCP csum, replace it with pseudo hdr
1882 * sum that the NIC expects, otherwise the NIC will emit
1883 * packets with bad TCP checksums.
1885 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1887 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1888 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1889 sum = in6_cksum_pseudo(pi->ip6,
1890 m->m_pkthdr.len - cksum_offset,
1895 m->m_pkthdr.csum_flags |= CSUM_TCP;
1896 sum = in_pseudo(pi->ip->ip_src.s_addr,
1897 pi->ip->ip_dst.s_addr,
1898 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1902 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1903 cksum_offset, sizeof(sum), (caddr_t)&sum);
1905 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1908 /* for TSO, pseudo_hdr_offset holds mss.
1909 * The firmware figures out where to put
1910 * the checksum by parsing the header. */
1911 pseudo_hdr_offset = htobe16(mss);
1915 * for IPv6 TSO, the "checksum offset" is re-purposed
1916 * to store the TCP header len
1918 cksum_offset = (pi->tcp->th_off << 2);
1926 /* "rdma_count" is the number of RDMAs belonging to the
1927 * current packet BEFORE the current send request. For
1928 * non-TSO packets, this is equal to "count".
1929 * For TSO packets, rdma_count needs to be reset
1930 * to 0 after a segment cut.
1932 * The rdma_count field of the send request is
1933 * the number of RDMAs of the packet starting at
1934 * that request. For TSO send requests with one ore more cuts
1935 * in the middle, this is the number of RDMAs starting
1936 * after the last cut in the request. All previous
1937 * segments before the last cut implicitly have 1 RDMA.
1939 * Since the number of RDMAs is not known beforehand,
1940 * it must be filled-in retroactively - after each
1941 * segmentation cut or at the end of the entire packet.
1944 while (busdma_seg_cnt) {
1945 /* Break the busdma segment up into pieces*/
1946 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1947 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1951 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1953 cum_len_next = cum_len + seglen;
1954 (req-rdma_count)->rdma_count = rdma_count + 1;
1955 if (__predict_true(cum_len >= 0)) {
1957 chop = (cum_len_next > mss);
1958 cum_len_next = cum_len_next % mss;
1959 next_is_first = (cum_len_next == 0);
1960 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1961 flags_next |= next_is_first *
1963 rdma_count |= -(chop | next_is_first);
1964 rdma_count += chop & !next_is_first;
1965 } else if (cum_len_next >= 0) {
1970 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1971 flags_next = MXGEFW_FLAGS_TSO_PLD |
1972 MXGEFW_FLAGS_FIRST |
1973 (small * MXGEFW_FLAGS_SMALL);
1976 req->addr_high = high_swapped;
1977 req->addr_low = htobe32(low);
1978 req->pseudo_hdr_offset = pseudo_hdr_offset;
1980 req->rdma_count = 1;
1981 req->length = htobe16(seglen);
1982 req->cksum_offset = cksum_offset;
1983 req->flags = flags | ((cum_len & 1) *
1984 MXGEFW_FLAGS_ALIGN_ODD);
1987 cum_len = cum_len_next;
1992 if (cksum_offset != 0 && !pi->ip6) {
1993 if (__predict_false(cksum_offset > seglen))
1994 cksum_offset -= seglen;
1998 if (__predict_false(cnt > tx->max_desc))
2004 (req-rdma_count)->rdma_count = rdma_count;
2008 req->flags |= MXGEFW_FLAGS_TSO_LAST;
2009 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
2011 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2012 mxge_submit_req(tx, tx->req_list, cnt);
2013 #ifdef IFNET_BUF_RING
2014 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2015 /* tell the NIC to start polling this slice */
2017 tx->queue_active = 1;
2025 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
2029 printf("tx->max_desc exceeded via TSO!\n");
2030 printf("mss = %d, %ld, %d!\n", mss,
2031 (long)seg - (long)tx->seg_list, tx->max_desc);
2038 #endif /* IFCAP_TSO4 */
2040 #ifdef MXGE_NEW_VLAN_API
2042 * We reproduce the software vlan tag insertion from
2043 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2044 * vlan tag insertion. We need to advertise this in order to have the
2045 * vlan interface respect our csum offload flags.
2047 static struct mbuf *
2048 mxge_vlan_tag_insert(struct mbuf *m)
2050 struct ether_vlan_header *evl;
2052 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2053 if (__predict_false(m == NULL))
2055 if (m->m_len < sizeof(*evl)) {
2056 m = m_pullup(m, sizeof(*evl));
2057 if (__predict_false(m == NULL))
2061 * Transform the Ethernet header into an Ethernet header
2062 * with 802.1Q encapsulation.
2064 evl = mtod(m, struct ether_vlan_header *);
2065 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2066 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2067 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2068 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2069 m->m_flags &= ~M_VLANTAG;
2072 #endif /* MXGE_NEW_VLAN_API */
2075 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2077 struct mxge_pkt_info pi = {0,0,0,0};
2079 mcp_kreq_ether_send_t *req;
2080 bus_dma_segment_t *seg;
2084 int cnt, cum_len, err, i, idx, odd_flag;
2085 uint16_t pseudo_hdr_offset;
2086 uint8_t flags, cksum_offset;
2093 #ifdef MXGE_NEW_VLAN_API
2094 if (m->m_flags & M_VLANTAG) {
2095 m = mxge_vlan_tag_insert(m);
2096 if (__predict_false(m == NULL))
2097 goto drop_without_m;
2100 if (m->m_pkthdr.csum_flags &
2101 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2102 if (mxge_parse_tx(ss, m, &pi))
2106 /* (try to) map the frame for DMA */
2107 idx = tx->req & tx->mask;
2108 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2109 m, tx->seg_list, &cnt,
2111 if (__predict_false(err == EFBIG)) {
2112 /* Too many segments in the chain. Try
2114 m_tmp = m_defrag(m, M_NOWAIT);
2115 if (m_tmp == NULL) {
2120 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2122 m, tx->seg_list, &cnt,
2125 if (__predict_false(err != 0)) {
2126 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2127 " packet len = %d\n", err, m->m_pkthdr.len);
2130 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2131 BUS_DMASYNC_PREWRITE);
2132 tx->info[idx].m = m;
2135 /* TSO is different enough, we handle it in another routine */
2136 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2137 mxge_encap_tso(ss, m, cnt, &pi);
2144 pseudo_hdr_offset = 0;
2145 flags = MXGEFW_FLAGS_NO_TSO;
2147 /* checksum offloading? */
2148 if (m->m_pkthdr.csum_flags &
2149 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2150 /* ensure ip header is in first mbuf, copy
2151 it to a scratch buffer if not */
2152 cksum_offset = pi.ip_off + pi.ip_hlen;
2153 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2154 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2155 req->cksum_offset = cksum_offset;
2156 flags |= MXGEFW_FLAGS_CKSUM;
2157 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2161 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2162 flags |= MXGEFW_FLAGS_SMALL;
2164 /* convert segments into a request list */
2167 req->flags = MXGEFW_FLAGS_FIRST;
2168 for (i = 0; i < cnt; i++) {
2170 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2172 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2173 req->length = htobe16(seg->ds_len);
2174 req->cksum_offset = cksum_offset;
2175 if (cksum_offset > seg->ds_len)
2176 cksum_offset -= seg->ds_len;
2179 req->pseudo_hdr_offset = pseudo_hdr_offset;
2180 req->pad = 0; /* complete solid 16-byte block */
2181 req->rdma_count = 1;
2182 req->flags |= flags | ((cum_len & 1) * odd_flag);
2183 cum_len += seg->ds_len;
2189 /* pad runts to 60 bytes */
2193 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2195 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2196 req->length = htobe16(60 - cum_len);
2197 req->cksum_offset = 0;
2198 req->pseudo_hdr_offset = pseudo_hdr_offset;
2199 req->pad = 0; /* complete solid 16-byte block */
2200 req->rdma_count = 1;
2201 req->flags |= flags | ((cum_len & 1) * odd_flag);
2205 tx->req_list[0].rdma_count = cnt;
2207 /* print what the firmware will see */
2208 for (i = 0; i < cnt; i++) {
2209 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2210 "cso:%d, flags:0x%x, rdma:%d\n",
2211 i, (int)ntohl(tx->req_list[i].addr_high),
2212 (int)ntohl(tx->req_list[i].addr_low),
2213 (int)ntohs(tx->req_list[i].length),
2214 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2215 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2216 tx->req_list[i].rdma_count);
2218 printf("--------------\n");
2220 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2221 mxge_submit_req(tx, tx->req_list, cnt);
2222 #ifdef IFNET_BUF_RING
2223 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2224 /* tell the NIC to start polling this slice */
2226 tx->queue_active = 1;
2240 #ifdef IFNET_BUF_RING
2242 mxge_qflush(struct ifnet *ifp)
2244 mxge_softc_t *sc = ifp->if_softc;
2249 for (slice = 0; slice < sc->num_slices; slice++) {
2250 tx = &sc->ss[slice].tx;
2252 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2254 mtx_unlock(&tx->mtx);
2260 mxge_start_locked(struct mxge_slice_state *ss)
2271 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2272 m = drbr_dequeue(ifp, tx->br);
2276 /* let BPF see it */
2279 /* give it to the nic */
2282 /* ran out of transmit slots */
2283 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2284 && (!drbr_empty(ifp, tx->br))) {
2285 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2291 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2302 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2304 err = drbr_enqueue(ifp, tx->br, m);
2308 if (!drbr_needs_enqueue(ifp, tx->br) &&
2309 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2310 /* let BPF see it */
2312 /* give it to the nic */
2314 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2317 if (!drbr_empty(ifp, tx->br))
2318 mxge_start_locked(ss);
2323 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2325 mxge_softc_t *sc = ifp->if_softc;
2326 struct mxge_slice_state *ss;
2331 slice = m->m_pkthdr.flowid;
2332 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2334 ss = &sc->ss[slice];
2337 if (mtx_trylock(&tx->mtx)) {
2338 err = mxge_transmit_locked(ss, m);
2339 mtx_unlock(&tx->mtx);
2341 err = drbr_enqueue(ifp, tx->br, m);
2350 mxge_start_locked(struct mxge_slice_state *ss)
2360 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2361 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2365 /* let BPF see it */
2368 /* give it to the nic */
2371 /* ran out of transmit slots */
2372 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2373 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2379 mxge_start(struct ifnet *ifp)
2381 mxge_softc_t *sc = ifp->if_softc;
2382 struct mxge_slice_state *ss;
2384 /* only use the first slice for now */
2386 mtx_lock(&ss->tx.mtx);
2387 mxge_start_locked(ss);
2388 mtx_unlock(&ss->tx.mtx);
2392 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2393 * at most 32 bytes at a time, so as to avoid involving the software
2394 * pio handler in the nic. We re-write the first segment's low
2395 * DMA address to mark it valid only after we write the entire chunk
2399 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2400 mcp_kreq_ether_recv_t *src)
2404 low = src->addr_low;
2405 src->addr_low = 0xffffffff;
2406 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2408 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2410 src->addr_low = low;
2411 dst->addr_low = low;
2416 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2418 bus_dma_segment_t seg;
2420 mxge_rx_ring_t *rx = &ss->rx_small;
2423 m = m_gethdr(M_NOWAIT, MT_DATA);
2430 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2431 &seg, &cnt, BUS_DMA_NOWAIT);
2436 rx->info[idx].m = m;
2437 rx->shadow[idx].addr_low =
2438 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2439 rx->shadow[idx].addr_high =
2440 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2444 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2449 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2451 bus_dma_segment_t seg[3];
2453 mxge_rx_ring_t *rx = &ss->rx_big;
2456 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2462 m->m_len = rx->mlen;
2463 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2464 seg, &cnt, BUS_DMA_NOWAIT);
2469 rx->info[idx].m = m;
2470 rx->shadow[idx].addr_low =
2471 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2472 rx->shadow[idx].addr_high =
2473 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2475 #if MXGE_VIRT_JUMBOS
2476 for (i = 1; i < cnt; i++) {
2477 rx->shadow[idx + i].addr_low =
2478 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2479 rx->shadow[idx + i].addr_high =
2480 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2485 for (i = 0; i < rx->nbufs; i++) {
2486 if ((idx & 7) == 7) {
2487 mxge_submit_8rx(&rx->lanai[idx - 7],
2488 &rx->shadow[idx - 7]);
2498 mxge_csum_generic(uint16_t *raw, int len)
2509 csum = (csum >> 16) + (csum & 0xffff);
2510 csum = (csum >> 16) + (csum & 0xffff);
2511 return (uint16_t)csum;
2514 static inline uint16_t
2515 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2518 int nxt, cksum_offset;
2519 struct ip6_hdr *ip6 = p;
2523 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2524 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2525 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2526 IPPROTO_IPV6, &nxt);
2527 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2532 * IPv6 headers do not contain a checksum, and hence
2533 * do not checksum to zero, so they don't "fall out"
2534 * of the partial checksum calculation like IPv4
2535 * headers do. We need to fix the partial checksum by
2536 * subtracting the checksum of the IPv6 header.
2539 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2542 csum += (csum < ~partial);
2543 csum = (csum >> 16) + (csum & 0xFFFF);
2544 csum = (csum >> 16) + (csum & 0xFFFF);
2545 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2552 * Myri10GE hardware checksums are not valid if the sender
2553 * padded the frame with non-zero padding. This is because
2554 * the firmware just does a simple 16-bit 1s complement
2555 * checksum across the entire frame, excluding the first 14
2556 * bytes. It is best to simply to check the checksum and
2557 * tell the stack about it only if the checksum is good
2560 static inline uint16_t
2561 mxge_rx_csum(struct mbuf *m, int csum)
2563 struct ether_header *eh;
2567 #if defined(INET) || defined(INET6)
2568 int cap = m->m_pkthdr.rcvif->if_capenable;
2573 eh = mtod(m, struct ether_header *);
2574 etype = ntohs(eh->ether_type);
2578 if ((cap & IFCAP_RXCSUM) == 0)
2580 ip = (struct ip *)(eh + 1);
2581 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2583 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2584 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2585 (ip->ip_hl << 2) + ip->ip_p));
2590 case ETHERTYPE_IPV6:
2591 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2593 c = mxge_rx_csum6((eh + 1), m, csum);
2603 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2605 struct ether_vlan_header *evl;
2606 struct ether_header *eh;
2609 evl = mtod(m, struct ether_vlan_header *);
2610 eh = mtod(m, struct ether_header *);
2613 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2614 * after what the firmware thought was the end of the ethernet
2618 /* put checksum into host byte order */
2619 *csum = ntohs(*csum);
2620 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2621 (*csum) += ~partial;
2622 (*csum) += ((*csum) < ~partial);
2623 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2624 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2626 /* restore checksum to network byte order;
2627 later consumers expect this */
2628 *csum = htons(*csum);
2631 #ifdef MXGE_NEW_VLAN_API
2632 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2636 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2640 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2641 m_tag_prepend(m, mtag);
2645 m->m_flags |= M_VLANTAG;
2648 * Remove the 802.1q header by copying the Ethernet
2649 * addresses over it and adjusting the beginning of
2650 * the data in the mbuf. The encapsulated Ethernet
2651 * type field is already in place.
2653 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2654 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2655 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2660 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2661 uint32_t csum, int lro)
2666 struct ether_header *eh;
2668 bus_dmamap_t old_map;
2674 idx = rx->cnt & rx->mask;
2675 rx->cnt += rx->nbufs;
2676 /* save a pointer to the received mbuf */
2677 m = rx->info[idx].m;
2678 /* try to replace the received mbuf */
2679 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2680 /* drop the frame -- the old mbuf is re-cycled */
2681 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2685 /* unmap the received buffer */
2686 old_map = rx->info[idx].map;
2687 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2688 bus_dmamap_unload(rx->dmat, old_map);
2690 /* swap the bus_dmamap_t's */
2691 rx->info[idx].map = rx->extra_map;
2692 rx->extra_map = old_map;
2694 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2696 m->m_data += MXGEFW_PAD;
2698 m->m_pkthdr.rcvif = ifp;
2699 m->m_len = m->m_pkthdr.len = len;
2701 eh = mtod(m, struct ether_header *);
2702 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2703 mxge_vlan_tag_remove(m, &csum);
2705 /* flowid only valid if RSS hashing is enabled */
2706 if (sc->num_slices > 1) {
2707 m->m_pkthdr.flowid = (ss - sc->ss);
2708 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2710 /* if the checksum is valid, mark it in the mbuf header */
2711 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2712 (0 == mxge_rx_csum(m, csum))) {
2713 /* Tell the stack that the checksum is good */
2714 m->m_pkthdr.csum_data = 0xffff;
2715 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2718 #if defined(INET) || defined (INET6)
2719 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2723 /* pass the frame up the stack */
2724 (*ifp->if_input)(ifp, m);
2728 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2729 uint32_t csum, int lro)
2733 struct ether_header *eh;
2736 bus_dmamap_t old_map;
2742 idx = rx->cnt & rx->mask;
2744 /* save a pointer to the received mbuf */
2745 m = rx->info[idx].m;
2746 /* try to replace the received mbuf */
2747 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2748 /* drop the frame -- the old mbuf is re-cycled */
2749 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2753 /* unmap the received buffer */
2754 old_map = rx->info[idx].map;
2755 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2756 bus_dmamap_unload(rx->dmat, old_map);
2758 /* swap the bus_dmamap_t's */
2759 rx->info[idx].map = rx->extra_map;
2760 rx->extra_map = old_map;
2762 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2764 m->m_data += MXGEFW_PAD;
2766 m->m_pkthdr.rcvif = ifp;
2767 m->m_len = m->m_pkthdr.len = len;
2769 eh = mtod(m, struct ether_header *);
2770 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2771 mxge_vlan_tag_remove(m, &csum);
2773 /* flowid only valid if RSS hashing is enabled */
2774 if (sc->num_slices > 1) {
2775 m->m_pkthdr.flowid = (ss - sc->ss);
2776 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2778 /* if the checksum is valid, mark it in the mbuf header */
2779 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2780 (0 == mxge_rx_csum(m, csum))) {
2781 /* Tell the stack that the checksum is good */
2782 m->m_pkthdr.csum_data = 0xffff;
2783 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2786 #if defined(INET) || defined (INET6)
2787 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2791 /* pass the frame up the stack */
2792 (*ifp->if_input)(ifp, m);
2796 mxge_clean_rx_done(struct mxge_slice_state *ss)
2798 mxge_rx_done_t *rx_done = &ss->rx_done;
2804 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2805 while (rx_done->entry[rx_done->idx].length != 0) {
2806 length = ntohs(rx_done->entry[rx_done->idx].length);
2807 rx_done->entry[rx_done->idx].length = 0;
2808 checksum = rx_done->entry[rx_done->idx].checksum;
2809 if (length <= (MHLEN - MXGEFW_PAD))
2810 mxge_rx_done_small(ss, length, checksum, lro);
2812 mxge_rx_done_big(ss, length, checksum, lro);
2814 rx_done->idx = rx_done->cnt & rx_done->mask;
2816 /* limit potential for livelock */
2817 if (__predict_false(++limit > rx_done->mask / 2))
2820 #if defined(INET) || defined (INET6)
2821 tcp_lro_flush_all(&ss->lc);
2827 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2838 while (tx->pkt_done != mcp_idx) {
2839 idx = tx->done & tx->mask;
2841 m = tx->info[idx].m;
2842 /* mbuf and DMA map only attached to the first
2845 ss->obytes += m->m_pkthdr.len;
2846 if (m->m_flags & M_MCAST)
2849 tx->info[idx].m = NULL;
2850 map = tx->info[idx].map;
2851 bus_dmamap_unload(tx->dmat, map);
2854 if (tx->info[idx].flag) {
2855 tx->info[idx].flag = 0;
2860 /* If we have space, clear IFF_OACTIVE to tell the stack that
2861 its OK to send packets */
2862 #ifdef IFNET_BUF_RING
2863 flags = &ss->if_drv_flags;
2865 flags = &ifp->if_drv_flags;
2867 mtx_lock(&ss->tx.mtx);
2868 if ((*flags) & IFF_DRV_OACTIVE &&
2869 tx->req - tx->done < (tx->mask + 1)/4) {
2870 *(flags) &= ~IFF_DRV_OACTIVE;
2872 mxge_start_locked(ss);
2874 #ifdef IFNET_BUF_RING
2875 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2876 /* let the NIC stop polling this queue, since there
2877 * are no more transmits pending */
2878 if (tx->req == tx->done) {
2880 tx->queue_active = 0;
2886 mtx_unlock(&ss->tx.mtx);
2890 static struct mxge_media_type mxge_xfp_media_types[] =
2892 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2893 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2894 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2895 {0, (1 << 5), "10GBASE-ER"},
2896 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2897 {0, (1 << 3), "10GBASE-SW"},
2898 {0, (1 << 2), "10GBASE-LW"},
2899 {0, (1 << 1), "10GBASE-EW"},
2900 {0, (1 << 0), "Reserved"}
2902 static struct mxge_media_type mxge_sfp_media_types[] =
2904 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2905 {0, (1 << 7), "Reserved"},
2906 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2907 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2908 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2909 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2913 mxge_media_set(mxge_softc_t *sc, int media_type)
2917 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2919 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2920 sc->current_media = media_type;
2921 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2925 mxge_media_init(mxge_softc_t *sc)
2930 ifmedia_removeall(&sc->media);
2931 mxge_media_set(sc, IFM_AUTO);
2934 * parse the product code to deterimine the interface type
2935 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2936 * after the 3rd dash in the driver's cached copy of the
2937 * EEPROM's product code string.
2939 ptr = sc->product_code_string;
2941 device_printf(sc->dev, "Missing product code\n");
2945 for (i = 0; i < 3; i++, ptr++) {
2946 ptr = strchr(ptr, '-');
2948 device_printf(sc->dev,
2949 "only %d dashes in PC?!?\n", i);
2953 if (*ptr == 'C' || *(ptr +1) == 'C') {
2955 sc->connector = MXGE_CX4;
2956 mxge_media_set(sc, IFM_10G_CX4);
2957 } else if (*ptr == 'Q') {
2958 /* -Q is Quad Ribbon Fiber */
2959 sc->connector = MXGE_QRF;
2960 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2961 /* FreeBSD has no media type for Quad ribbon fiber */
2962 } else if (*ptr == 'R') {
2964 sc->connector = MXGE_XFP;
2965 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2966 /* -S or -2S is SFP+ */
2967 sc->connector = MXGE_SFP;
2969 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2974 * Determine the media type for a NIC. Some XFPs will identify
2975 * themselves only when their link is up, so this is initiated via a
2976 * link up interrupt. However, this can potentially take up to
2977 * several milliseconds, so it is run via the watchdog routine, rather
2978 * than in the interrupt handler itself.
2981 mxge_media_probe(mxge_softc_t *sc)
2986 struct mxge_media_type *mxge_media_types = NULL;
2987 int i, err, ms, mxge_media_type_entries;
2990 sc->need_media_probe = 0;
2992 if (sc->connector == MXGE_XFP) {
2994 mxge_media_types = mxge_xfp_media_types;
2995 mxge_media_type_entries =
2996 nitems(mxge_xfp_media_types);
2997 byte = MXGE_XFP_COMPLIANCE_BYTE;
2999 } else if (sc->connector == MXGE_SFP) {
3000 /* -S or -2S is SFP+ */
3001 mxge_media_types = mxge_sfp_media_types;
3002 mxge_media_type_entries =
3003 nitems(mxge_sfp_media_types);
3007 /* nothing to do; media type cannot change */
3012 * At this point we know the NIC has an XFP cage, so now we
3013 * try to determine what is in the cage by using the
3014 * firmware's XFP I2C commands to read the XFP 10GbE compilance
3015 * register. We read just one byte, which may take over
3019 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
3021 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3022 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
3023 device_printf(sc->dev, "failed to read XFP\n");
3025 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
3026 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
3028 if (err != MXGEFW_CMD_OK) {
3032 /* now we wait for the data to be cached */
3034 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3035 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3038 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3040 if (err != MXGEFW_CMD_OK) {
3041 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
3042 cage_type, err, ms);
3046 if (cmd.data0 == mxge_media_types[0].bitmask) {
3048 device_printf(sc->dev, "%s:%s\n", cage_type,
3049 mxge_media_types[0].name);
3050 if (sc->current_media != mxge_media_types[0].flag) {
3051 mxge_media_init(sc);
3052 mxge_media_set(sc, mxge_media_types[0].flag);
3056 for (i = 1; i < mxge_media_type_entries; i++) {
3057 if (cmd.data0 & mxge_media_types[i].bitmask) {
3059 device_printf(sc->dev, "%s:%s\n",
3061 mxge_media_types[i].name);
3063 if (sc->current_media != mxge_media_types[i].flag) {
3064 mxge_media_init(sc);
3065 mxge_media_set(sc, mxge_media_types[i].flag);
3071 device_printf(sc->dev, "%s media 0x%x unknown\n",
3072 cage_type, cmd.data0);
3078 mxge_intr(void *arg)
3080 struct mxge_slice_state *ss = arg;
3081 mxge_softc_t *sc = ss->sc;
3082 mcp_irq_data_t *stats = ss->fw_stats;
3083 mxge_tx_ring_t *tx = &ss->tx;
3084 mxge_rx_done_t *rx_done = &ss->rx_done;
3085 uint32_t send_done_count;
3089 #ifndef IFNET_BUF_RING
3090 /* an interrupt on a non-zero slice is implicitly valid
3091 since MSI-X irqs are not shared */
3093 mxge_clean_rx_done(ss);
3094 *ss->irq_claim = be32toh(3);
3099 /* make sure the DMA has finished */
3100 if (!stats->valid) {
3103 valid = stats->valid;
3105 if (sc->legacy_irq) {
3106 /* lower legacy IRQ */
3107 *sc->irq_deassert = 0;
3108 if (!mxge_deassert_wait)
3109 /* don't wait for conf. that irq is low */
3115 /* loop while waiting for legacy irq deassertion */
3117 /* check for transmit completes and receives */
3118 send_done_count = be32toh(stats->send_done_count);
3119 while ((send_done_count != tx->pkt_done) ||
3120 (rx_done->entry[rx_done->idx].length != 0)) {
3121 if (send_done_count != tx->pkt_done)
3122 mxge_tx_done(ss, (int)send_done_count);
3123 mxge_clean_rx_done(ss);
3124 send_done_count = be32toh(stats->send_done_count);
3126 if (sc->legacy_irq && mxge_deassert_wait)
3128 } while (*((volatile uint8_t *) &stats->valid));
3130 /* fw link & error stats meaningful only on the first slice */
3131 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3132 if (sc->link_state != stats->link_up) {
3133 sc->link_state = stats->link_up;
3134 if (sc->link_state) {
3135 if_link_state_change(sc->ifp, LINK_STATE_UP);
3137 device_printf(sc->dev, "link up\n");
3139 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3141 device_printf(sc->dev, "link down\n");
3143 sc->need_media_probe = 1;
3145 if (sc->rdma_tags_available !=
3146 be32toh(stats->rdma_tags_available)) {
3147 sc->rdma_tags_available =
3148 be32toh(stats->rdma_tags_available);
3149 device_printf(sc->dev, "RDMA timed out! %d tags "
3150 "left\n", sc->rdma_tags_available);
3153 if (stats->link_down) {
3154 sc->down_cnt += stats->link_down;
3156 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3160 /* check to see if we have rx token to pass back */
3162 *ss->irq_claim = be32toh(3);
3163 *(ss->irq_claim + 1) = be32toh(3);
3167 mxge_init(void *arg)
3169 mxge_softc_t *sc = arg;
3170 struct ifnet *ifp = sc->ifp;
3173 mtx_lock(&sc->driver_mtx);
3174 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3175 (void) mxge_open(sc);
3176 mtx_unlock(&sc->driver_mtx);
3182 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3186 #if defined(INET) || defined(INET6)
3187 tcp_lro_free(&ss->lc);
3189 for (i = 0; i <= ss->rx_big.mask; i++) {
3190 if (ss->rx_big.info[i].m == NULL)
3192 bus_dmamap_unload(ss->rx_big.dmat,
3193 ss->rx_big.info[i].map);
3194 m_freem(ss->rx_big.info[i].m);
3195 ss->rx_big.info[i].m = NULL;
3198 for (i = 0; i <= ss->rx_small.mask; i++) {
3199 if (ss->rx_small.info[i].m == NULL)
3201 bus_dmamap_unload(ss->rx_small.dmat,
3202 ss->rx_small.info[i].map);
3203 m_freem(ss->rx_small.info[i].m);
3204 ss->rx_small.info[i].m = NULL;
3207 /* transmit ring used only on the first slice */
3208 if (ss->tx.info == NULL)
3211 for (i = 0; i <= ss->tx.mask; i++) {
3212 ss->tx.info[i].flag = 0;
3213 if (ss->tx.info[i].m == NULL)
3215 bus_dmamap_unload(ss->tx.dmat,
3216 ss->tx.info[i].map);
3217 m_freem(ss->tx.info[i].m);
3218 ss->tx.info[i].m = NULL;
3223 mxge_free_mbufs(mxge_softc_t *sc)
3227 for (slice = 0; slice < sc->num_slices; slice++)
3228 mxge_free_slice_mbufs(&sc->ss[slice]);
3232 mxge_free_slice_rings(struct mxge_slice_state *ss)
3237 if (ss->rx_done.entry != NULL)
3238 mxge_dma_free(&ss->rx_done.dma);
3239 ss->rx_done.entry = NULL;
3241 if (ss->tx.req_bytes != NULL)
3242 free(ss->tx.req_bytes, M_DEVBUF);
3243 ss->tx.req_bytes = NULL;
3245 if (ss->tx.seg_list != NULL)
3246 free(ss->tx.seg_list, M_DEVBUF);
3247 ss->tx.seg_list = NULL;
3249 if (ss->rx_small.shadow != NULL)
3250 free(ss->rx_small.shadow, M_DEVBUF);
3251 ss->rx_small.shadow = NULL;
3253 if (ss->rx_big.shadow != NULL)
3254 free(ss->rx_big.shadow, M_DEVBUF);
3255 ss->rx_big.shadow = NULL;
3257 if (ss->tx.info != NULL) {
3258 if (ss->tx.dmat != NULL) {
3259 for (i = 0; i <= ss->tx.mask; i++) {
3260 bus_dmamap_destroy(ss->tx.dmat,
3261 ss->tx.info[i].map);
3263 bus_dma_tag_destroy(ss->tx.dmat);
3265 free(ss->tx.info, M_DEVBUF);
3269 if (ss->rx_small.info != NULL) {
3270 if (ss->rx_small.dmat != NULL) {
3271 for (i = 0; i <= ss->rx_small.mask; i++) {
3272 bus_dmamap_destroy(ss->rx_small.dmat,
3273 ss->rx_small.info[i].map);
3275 bus_dmamap_destroy(ss->rx_small.dmat,
3276 ss->rx_small.extra_map);
3277 bus_dma_tag_destroy(ss->rx_small.dmat);
3279 free(ss->rx_small.info, M_DEVBUF);
3281 ss->rx_small.info = NULL;
3283 if (ss->rx_big.info != NULL) {
3284 if (ss->rx_big.dmat != NULL) {
3285 for (i = 0; i <= ss->rx_big.mask; i++) {
3286 bus_dmamap_destroy(ss->rx_big.dmat,
3287 ss->rx_big.info[i].map);
3289 bus_dmamap_destroy(ss->rx_big.dmat,
3290 ss->rx_big.extra_map);
3291 bus_dma_tag_destroy(ss->rx_big.dmat);
3293 free(ss->rx_big.info, M_DEVBUF);
3295 ss->rx_big.info = NULL;
3299 mxge_free_rings(mxge_softc_t *sc)
3303 for (slice = 0; slice < sc->num_slices; slice++)
3304 mxge_free_slice_rings(&sc->ss[slice]);
3308 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3309 int tx_ring_entries)
3311 mxge_softc_t *sc = ss->sc;
3315 /* allocate per-slice receive resources */
3317 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3318 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3320 /* allocate the rx shadow rings */
3321 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3322 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3324 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3325 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3327 /* allocate the rx host info rings */
3328 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3329 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3331 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3332 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3334 /* allocate the rx busdma resources */
3335 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3337 4096, /* boundary */
3338 BUS_SPACE_MAXADDR, /* low */
3339 BUS_SPACE_MAXADDR, /* high */
3340 NULL, NULL, /* filter */
3341 MHLEN, /* maxsize */
3343 MHLEN, /* maxsegsize */
3344 BUS_DMA_ALLOCNOW, /* flags */
3345 NULL, NULL, /* lock */
3346 &ss->rx_small.dmat); /* tag */
3348 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3353 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3355 #if MXGE_VIRT_JUMBOS
3356 4096, /* boundary */
3360 BUS_SPACE_MAXADDR, /* low */
3361 BUS_SPACE_MAXADDR, /* high */
3362 NULL, NULL, /* filter */
3363 3*4096, /* maxsize */
3364 #if MXGE_VIRT_JUMBOS
3366 4096, /* maxsegsize*/
3369 MJUM9BYTES, /* maxsegsize*/
3371 BUS_DMA_ALLOCNOW, /* flags */
3372 NULL, NULL, /* lock */
3373 &ss->rx_big.dmat); /* tag */
3375 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3379 for (i = 0; i <= ss->rx_small.mask; i++) {
3380 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3381 &ss->rx_small.info[i].map);
3383 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3388 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3389 &ss->rx_small.extra_map);
3391 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3396 for (i = 0; i <= ss->rx_big.mask; i++) {
3397 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3398 &ss->rx_big.info[i].map);
3400 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3405 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3406 &ss->rx_big.extra_map);
3408 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3413 /* now allocate TX resources */
3415 #ifndef IFNET_BUF_RING
3416 /* only use a single TX ring for now */
3417 if (ss != ss->sc->ss)
3421 ss->tx.mask = tx_ring_entries - 1;
3422 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3425 /* allocate the tx request copy block */
3427 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3428 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3429 /* ensure req_list entries are aligned to 8 bytes */
3430 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3431 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3433 /* allocate the tx busdma segment list */
3434 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3435 ss->tx.seg_list = (bus_dma_segment_t *)
3436 malloc(bytes, M_DEVBUF, M_WAITOK);
3438 /* allocate the tx host info ring */
3439 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3440 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3442 /* allocate the tx busdma resources */
3443 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3445 sc->tx_boundary, /* boundary */
3446 BUS_SPACE_MAXADDR, /* low */
3447 BUS_SPACE_MAXADDR, /* high */
3448 NULL, NULL, /* filter */
3449 65536 + 256, /* maxsize */
3450 ss->tx.max_desc - 2, /* num segs */
3451 sc->tx_boundary, /* maxsegsz */
3452 BUS_DMA_ALLOCNOW, /* flags */
3453 NULL, NULL, /* lock */
3454 &ss->tx.dmat); /* tag */
3457 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3462 /* now use these tags to setup dmamaps for each slot
3464 for (i = 0; i <= ss->tx.mask; i++) {
3465 err = bus_dmamap_create(ss->tx.dmat, 0,
3466 &ss->tx.info[i].map);
3468 device_printf(sc->dev, "Err %d tx dmamap\n",
3478 mxge_alloc_rings(mxge_softc_t *sc)
3482 int tx_ring_entries, rx_ring_entries;
3485 /* get ring sizes */
3486 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3487 tx_ring_size = cmd.data0;
3489 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3493 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3494 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3495 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3496 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3497 IFQ_SET_READY(&sc->ifp->if_snd);
3499 for (slice = 0; slice < sc->num_slices; slice++) {
3500 err = mxge_alloc_slice_rings(&sc->ss[slice],
3509 mxge_free_rings(sc);
3516 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3518 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3520 if (bufsize < MCLBYTES) {
3521 /* easy, everything fits in a single buffer */
3522 *big_buf_size = MCLBYTES;
3523 *cl_size = MCLBYTES;
3528 if (bufsize < MJUMPAGESIZE) {
3529 /* still easy, everything still fits in a single buffer */
3530 *big_buf_size = MJUMPAGESIZE;
3531 *cl_size = MJUMPAGESIZE;
3535 #if MXGE_VIRT_JUMBOS
3536 /* now we need to use virtually contiguous buffers */
3537 *cl_size = MJUM9BYTES;
3538 *big_buf_size = 4096;
3539 *nbufs = mtu / 4096 + 1;
3540 /* needs to be a power of two, so round up */
3544 *cl_size = MJUM9BYTES;
3545 *big_buf_size = MJUM9BYTES;
3551 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3560 slice = ss - sc->ss;
3562 #if defined(INET) || defined(INET6)
3563 (void)tcp_lro_init(&ss->lc);
3565 ss->lc.ifp = sc->ifp;
3567 /* get the lanai pointers to the send and receive rings */
3570 #ifndef IFNET_BUF_RING
3571 /* We currently only send from the first slice */
3575 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3577 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3578 ss->tx.send_go = (volatile uint32_t *)
3579 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3580 ss->tx.send_stop = (volatile uint32_t *)
3581 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3582 #ifndef IFNET_BUF_RING
3586 err |= mxge_send_cmd(sc,
3587 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3588 ss->rx_small.lanai =
3589 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3591 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3593 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3596 device_printf(sc->dev,
3597 "failed to get ring sizes or locations\n");
3601 /* stock receive rings */
3602 for (i = 0; i <= ss->rx_small.mask; i++) {
3603 map = ss->rx_small.info[i].map;
3604 err = mxge_get_buf_small(ss, map, i);
3606 device_printf(sc->dev, "alloced %d/%d smalls\n",
3607 i, ss->rx_small.mask + 1);
3611 for (i = 0; i <= ss->rx_big.mask; i++) {
3612 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3613 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3615 ss->rx_big.nbufs = nbufs;
3616 ss->rx_big.cl_size = cl_size;
3617 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3618 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3619 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3620 map = ss->rx_big.info[i].map;
3621 err = mxge_get_buf_big(ss, map, i);
3623 device_printf(sc->dev, "alloced %d/%d bigs\n",
3624 i, ss->rx_big.mask + 1);
3632 mxge_open(mxge_softc_t *sc)
3635 int err, big_bytes, nbufs, slice, cl_size, i;
3637 volatile uint8_t *itable;
3638 struct mxge_slice_state *ss;
3640 /* Copy the MAC address in case it was overridden */
3641 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3643 err = mxge_reset(sc, 1);
3645 device_printf(sc->dev, "failed to reset\n");
3649 if (sc->num_slices > 1) {
3650 /* setup the indirection table */
3651 cmd.data0 = sc->num_slices;
3652 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3655 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3658 device_printf(sc->dev,
3659 "failed to setup rss tables\n");
3663 /* just enable an identity mapping */
3664 itable = sc->sram + cmd.data0;
3665 for (i = 0; i < sc->num_slices; i++)
3666 itable[i] = (uint8_t)i;
3669 cmd.data1 = mxge_rss_hash_type;
3670 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3672 device_printf(sc->dev, "failed to enable slices\n");
3678 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3681 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3683 /* error is only meaningful if we're trying to set
3684 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3685 if (err && nbufs > 1) {
3686 device_printf(sc->dev,
3687 "Failed to set alway-use-n to %d\n",
3691 /* Give the firmware the mtu and the big and small buffer
3692 sizes. The firmware wants the big buf size to be a power
3693 of two. Luckily, FreeBSD's clusters are powers of two */
3694 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3695 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3696 cmd.data0 = MHLEN - MXGEFW_PAD;
3697 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3699 cmd.data0 = big_bytes;
3700 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3703 device_printf(sc->dev, "failed to setup params\n");
3707 /* Now give him the pointer to the stats block */
3709 #ifdef IFNET_BUF_RING
3710 slice < sc->num_slices;
3715 ss = &sc->ss[slice];
3717 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3719 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3720 cmd.data2 = sizeof(struct mcp_irq_data);
3721 cmd.data2 |= (slice << 16);
3722 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3726 bus = sc->ss->fw_stats_dma.bus_addr;
3727 bus += offsetof(struct mcp_irq_data, send_done_count);
3728 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3729 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3730 err = mxge_send_cmd(sc,
3731 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3733 /* Firmware cannot support multicast without STATS_DMA_V2 */
3734 sc->fw_multicast_support = 0;
3736 sc->fw_multicast_support = 1;
3740 device_printf(sc->dev, "failed to setup params\n");
3744 for (slice = 0; slice < sc->num_slices; slice++) {
3745 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3747 device_printf(sc->dev, "couldn't open slice %d\n",
3753 /* Finally, start the firmware running */
3754 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3756 device_printf(sc->dev, "Couldn't bring up link\n");
3759 #ifdef IFNET_BUF_RING
3760 for (slice = 0; slice < sc->num_slices; slice++) {
3761 ss = &sc->ss[slice];
3762 ss->if_drv_flags |= IFF_DRV_RUNNING;
3763 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3766 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3767 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3773 mxge_free_mbufs(sc);
3779 mxge_close(mxge_softc_t *sc, int down)
3782 int err, old_down_cnt;
3783 #ifdef IFNET_BUF_RING
3784 struct mxge_slice_state *ss;
3788 #ifdef IFNET_BUF_RING
3789 for (slice = 0; slice < sc->num_slices; slice++) {
3790 ss = &sc->ss[slice];
3791 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3794 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3796 old_down_cnt = sc->down_cnt;
3798 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3800 device_printf(sc->dev,
3801 "Couldn't bring down link\n");
3803 if (old_down_cnt == sc->down_cnt) {
3804 /* wait for down irq */
3805 DELAY(10 * sc->intr_coal_delay);
3808 if (old_down_cnt == sc->down_cnt) {
3809 device_printf(sc->dev, "never got down irq\n");
3812 mxge_free_mbufs(sc);
3818 mxge_setup_cfg_space(mxge_softc_t *sc)
3820 device_t dev = sc->dev;
3822 uint16_t lnk, pectl;
3824 /* find the PCIe link width and set max read request to 4KB*/
3825 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3826 lnk = pci_read_config(dev, reg + 0x12, 2);
3827 sc->link_width = (lnk >> 4) & 0x3f;
3829 if (sc->pectl == 0) {
3830 pectl = pci_read_config(dev, reg + 0x8, 2);
3831 pectl = (pectl & ~0x7000) | (5 << 12);
3832 pci_write_config(dev, reg + 0x8, pectl, 2);
3835 /* restore saved pectl after watchdog reset */
3836 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3840 /* Enable DMA and Memory space access */
3841 pci_enable_busmaster(dev);
3845 mxge_read_reboot(mxge_softc_t *sc)
3847 device_t dev = sc->dev;
3850 /* find the vendor specific offset */
3851 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3852 device_printf(sc->dev,
3853 "could not find vendor specific offset\n");
3854 return (uint32_t)-1;
3856 /* enable read32 mode */
3857 pci_write_config(dev, vs + 0x10, 0x3, 1);
3858 /* tell NIC which register to read */
3859 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3860 return (pci_read_config(dev, vs + 0x14, 4));
3864 mxge_watchdog_reset(mxge_softc_t *sc)
3866 struct pci_devinfo *dinfo;
3867 struct mxge_slice_state *ss;
3868 int err, running, s, num_tx_slices = 1;
3874 device_printf(sc->dev, "Watchdog reset!\n");
3877 * check to see if the NIC rebooted. If it did, then all of
3878 * PCI config space has been reset, and things like the
3879 * busmaster bit will be zero. If this is the case, then we
3880 * must restore PCI config space before the NIC can be used
3883 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3884 if (cmd == 0xffff) {
3886 * maybe the watchdog caught the NIC rebooting; wait
3887 * up to 100ms for it to finish. If it does not come
3888 * back, then give up
3891 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3892 if (cmd == 0xffff) {
3893 device_printf(sc->dev, "NIC disappeared!\n");
3896 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3897 /* print the reboot status */
3898 reboot = mxge_read_reboot(sc);
3899 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3901 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3905 * quiesce NIC so that TX routines will not try to
3906 * xmit after restoration of BAR
3909 /* Mark the link as down */
3910 if (sc->link_state) {
3912 if_link_state_change(sc->ifp,
3915 #ifdef IFNET_BUF_RING
3916 num_tx_slices = sc->num_slices;
3918 /* grab all TX locks to ensure no tx */
3919 for (s = 0; s < num_tx_slices; s++) {
3921 mtx_lock(&ss->tx.mtx);
3925 /* restore PCI configuration space */
3926 dinfo = device_get_ivars(sc->dev);
3927 pci_cfg_restore(sc->dev, dinfo);
3929 /* and redo any changes we made to our config space */
3930 mxge_setup_cfg_space(sc);
3933 err = mxge_load_firmware(sc, 0);
3935 device_printf(sc->dev,
3936 "Unable to re-load f/w\n");
3940 err = mxge_open(sc);
3941 /* release all TX locks */
3942 for (s = 0; s < num_tx_slices; s++) {
3944 #ifdef IFNET_BUF_RING
3945 mxge_start_locked(ss);
3947 mtx_unlock(&ss->tx.mtx);
3950 sc->watchdog_resets++;
3952 device_printf(sc->dev,
3953 "NIC did not reboot, not resetting\n");
3957 device_printf(sc->dev, "watchdog reset failed\n");
3961 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3966 mxge_watchdog_task(void *arg, int pending)
3968 mxge_softc_t *sc = arg;
3971 mtx_lock(&sc->driver_mtx);
3972 mxge_watchdog_reset(sc);
3973 mtx_unlock(&sc->driver_mtx);
3977 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3979 tx = &sc->ss[slice].tx;
3980 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3981 device_printf(sc->dev,
3982 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3983 tx->req, tx->done, tx->queue_active);
3984 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3985 tx->activate, tx->deactivate);
3986 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3988 be32toh(sc->ss->fw_stats->send_done_count));
3992 mxge_watchdog(mxge_softc_t *sc)
3995 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3998 /* see if we have outstanding transmits, which
3999 have been pending for more than mxge_ticks */
4001 #ifdef IFNET_BUF_RING
4002 (i < sc->num_slices) && (err == 0);
4004 (i < 1) && (err == 0);
4008 if (tx->req != tx->done &&
4009 tx->watchdog_req != tx->watchdog_done &&
4010 tx->done == tx->watchdog_done) {
4011 /* check for pause blocking before resetting */
4012 if (tx->watchdog_rx_pause == rx_pause) {
4013 mxge_warn_stuck(sc, tx, i);
4014 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4018 device_printf(sc->dev, "Flow control blocking "
4019 "xmits, check link partner\n");
4022 tx->watchdog_req = tx->req;
4023 tx->watchdog_done = tx->done;
4024 tx->watchdog_rx_pause = rx_pause;
4027 if (sc->need_media_probe)
4028 mxge_media_probe(sc);
4033 mxge_get_counter(struct ifnet *ifp, ift_counter cnt)
4035 struct mxge_softc *sc;
4038 sc = if_getsoftc(ifp);
4042 case IFCOUNTER_IPACKETS:
4043 for (int s = 0; s < sc->num_slices; s++)
4044 rv += sc->ss[s].ipackets;
4046 case IFCOUNTER_OPACKETS:
4047 for (int s = 0; s < sc->num_slices; s++)
4048 rv += sc->ss[s].opackets;
4050 case IFCOUNTER_OERRORS:
4051 for (int s = 0; s < sc->num_slices; s++)
4052 rv += sc->ss[s].oerrors;
4054 #ifdef IFNET_BUF_RING
4055 case IFCOUNTER_OBYTES:
4056 for (int s = 0; s < sc->num_slices; s++)
4057 rv += sc->ss[s].obytes;
4059 case IFCOUNTER_OMCASTS:
4060 for (int s = 0; s < sc->num_slices; s++)
4061 rv += sc->ss[s].omcasts;
4063 case IFCOUNTER_OQDROPS:
4064 for (int s = 0; s < sc->num_slices; s++)
4065 rv += sc->ss[s].tx.br->br_drops;
4069 return (if_get_counter_default(ifp, cnt));
4074 mxge_tick(void *arg)
4076 mxge_softc_t *sc = arg;
4083 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4085 if (!sc->watchdog_countdown) {
4086 err = mxge_watchdog(sc);
4087 sc->watchdog_countdown = 4;
4089 sc->watchdog_countdown--;
4092 /* ensure NIC did not suffer h/w fault while idle */
4093 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4094 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4096 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4099 /* look less often if NIC is idle */
4104 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4109 mxge_media_change(struct ifnet *ifp)
4115 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4117 struct ifnet *ifp = sc->ifp;
4118 int real_mtu, old_mtu;
4122 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4123 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4125 mtx_lock(&sc->driver_mtx);
4126 old_mtu = ifp->if_mtu;
4128 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4130 err = mxge_open(sc);
4132 ifp->if_mtu = old_mtu;
4134 (void) mxge_open(sc);
4137 mtx_unlock(&sc->driver_mtx);
4142 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4144 mxge_softc_t *sc = ifp->if_softc;
4149 ifmr->ifm_status = IFM_AVALID;
4150 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4151 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4152 ifmr->ifm_active |= sc->current_media;
4156 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4158 mxge_softc_t *sc = ifp->if_softc;
4159 struct ifreq *ifr = (struct ifreq *)data;
4166 err = ether_ioctl(ifp, command, data);
4170 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4174 mtx_lock(&sc->driver_mtx);
4176 mtx_unlock(&sc->driver_mtx);
4179 if (ifp->if_flags & IFF_UP) {
4180 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4181 err = mxge_open(sc);
4183 /* take care of promis can allmulti
4185 mxge_change_promisc(sc,
4186 ifp->if_flags & IFF_PROMISC);
4187 mxge_set_multicast_list(sc);
4190 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4194 mtx_unlock(&sc->driver_mtx);
4199 mtx_lock(&sc->driver_mtx);
4200 mxge_set_multicast_list(sc);
4201 mtx_unlock(&sc->driver_mtx);
4205 mtx_lock(&sc->driver_mtx);
4206 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4207 if (mask & IFCAP_TXCSUM) {
4208 if (IFCAP_TXCSUM & ifp->if_capenable) {
4209 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4210 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4212 ifp->if_capenable |= IFCAP_TXCSUM;
4213 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4215 } else if (mask & IFCAP_RXCSUM) {
4216 if (IFCAP_RXCSUM & ifp->if_capenable) {
4217 ifp->if_capenable &= ~IFCAP_RXCSUM;
4219 ifp->if_capenable |= IFCAP_RXCSUM;
4222 if (mask & IFCAP_TSO4) {
4223 if (IFCAP_TSO4 & ifp->if_capenable) {
4224 ifp->if_capenable &= ~IFCAP_TSO4;
4225 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4226 ifp->if_capenable |= IFCAP_TSO4;
4227 ifp->if_hwassist |= CSUM_TSO;
4229 printf("mxge requires tx checksum offload"
4230 " be enabled to use TSO\n");
4235 if (mask & IFCAP_TXCSUM_IPV6) {
4236 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4237 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4239 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4242 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4243 ifp->if_hwassist |= (CSUM_TCP_IPV6
4246 } else if (mask & IFCAP_RXCSUM_IPV6) {
4247 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4248 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4250 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4253 if (mask & IFCAP_TSO6) {
4254 if (IFCAP_TSO6 & ifp->if_capenable) {
4255 ifp->if_capenable &= ~IFCAP_TSO6;
4256 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4257 ifp->if_capenable |= IFCAP_TSO6;
4258 ifp->if_hwassist |= CSUM_TSO;
4260 printf("mxge requires tx checksum offload"
4261 " be enabled to use TSO\n");
4265 #endif /*IFCAP_TSO6 */
4267 if (mask & IFCAP_LRO)
4268 ifp->if_capenable ^= IFCAP_LRO;
4269 if (mask & IFCAP_VLAN_HWTAGGING)
4270 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4271 if (mask & IFCAP_VLAN_HWTSO)
4272 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4274 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4275 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4276 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4278 mtx_unlock(&sc->driver_mtx);
4279 VLAN_CAPABILITIES(ifp);
4284 mtx_lock(&sc->driver_mtx);
4285 mxge_media_probe(sc);
4286 mtx_unlock(&sc->driver_mtx);
4287 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4288 &sc->media, command);
4298 mxge_fetch_tunables(mxge_softc_t *sc)
4301 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4302 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4303 &mxge_flow_control);
4304 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4305 &mxge_intr_coal_delay);
4306 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4307 &mxge_nvidia_ecrc_enable);
4308 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4309 &mxge_force_firmware);
4310 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4311 &mxge_deassert_wait);
4312 TUNABLE_INT_FETCH("hw.mxge.verbose",
4314 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4315 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4316 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4317 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4318 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4319 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4323 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4324 mxge_intr_coal_delay = 30;
4325 if (mxge_ticks == 0)
4326 mxge_ticks = hz / 2;
4327 sc->pause = mxge_flow_control;
4328 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4329 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4330 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4332 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4333 mxge_initial_mtu < ETHER_MIN_LEN)
4334 mxge_initial_mtu = ETHERMTU_JUMBO;
4336 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4337 mxge_throttle = MXGE_MAX_THROTTLE;
4338 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4339 mxge_throttle = MXGE_MIN_THROTTLE;
4340 sc->throttle = mxge_throttle;
4345 mxge_free_slices(mxge_softc_t *sc)
4347 struct mxge_slice_state *ss;
4354 for (i = 0; i < sc->num_slices; i++) {
4356 if (ss->fw_stats != NULL) {
4357 mxge_dma_free(&ss->fw_stats_dma);
4358 ss->fw_stats = NULL;
4359 #ifdef IFNET_BUF_RING
4360 if (ss->tx.br != NULL) {
4361 drbr_free(ss->tx.br, M_DEVBUF);
4365 mtx_destroy(&ss->tx.mtx);
4367 if (ss->rx_done.entry != NULL) {
4368 mxge_dma_free(&ss->rx_done.dma);
4369 ss->rx_done.entry = NULL;
4372 free(sc->ss, M_DEVBUF);
4377 mxge_alloc_slices(mxge_softc_t *sc)
4380 struct mxge_slice_state *ss;
4382 int err, i, max_intr_slots;
4384 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4386 device_printf(sc->dev, "Cannot determine rx ring size\n");
4389 sc->rx_ring_size = cmd.data0;
4390 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4392 bytes = sizeof (*sc->ss) * sc->num_slices;
4393 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4396 for (i = 0; i < sc->num_slices; i++) {
4401 /* allocate per-slice rx interrupt queues */
4403 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4404 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4407 ss->rx_done.entry = ss->rx_done.dma.addr;
4408 bzero(ss->rx_done.entry, bytes);
4411 * allocate the per-slice firmware stats; stats
4412 * (including tx) are used used only on the first
4415 #ifndef IFNET_BUF_RING
4420 bytes = sizeof (*ss->fw_stats);
4421 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4422 sizeof (*ss->fw_stats), 64);
4425 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4426 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4427 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4428 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4429 #ifdef IFNET_BUF_RING
4430 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4438 mxge_free_slices(sc);
4443 mxge_slice_probe(mxge_softc_t *sc)
4447 int msix_cnt, status, max_intr_slots;
4451 * don't enable multiple slices if they are not enabled,
4452 * or if this is not an SMP system
4455 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4458 /* see how many MSI-X interrupts are available */
4459 msix_cnt = pci_msix_count(sc->dev);
4463 /* now load the slice aware firmware see what it supports */
4464 old_fw = sc->fw_name;
4465 if (old_fw == mxge_fw_aligned)
4466 sc->fw_name = mxge_fw_rss_aligned;
4468 sc->fw_name = mxge_fw_rss_unaligned;
4469 status = mxge_load_firmware(sc, 0);
4471 device_printf(sc->dev, "Falling back to a single slice\n");
4475 /* try to send a reset command to the card to see if it
4477 memset(&cmd, 0, sizeof (cmd));
4478 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4480 device_printf(sc->dev, "failed reset\n");
4484 /* get rx ring size */
4485 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4487 device_printf(sc->dev, "Cannot determine rx ring size\n");
4490 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4492 /* tell it the size of the interrupt queues */
4493 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4494 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4496 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4500 /* ask the maximum number of slices it supports */
4501 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4503 device_printf(sc->dev,
4504 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4507 sc->num_slices = cmd.data0;
4508 if (sc->num_slices > msix_cnt)
4509 sc->num_slices = msix_cnt;
4511 if (mxge_max_slices == -1) {
4512 /* cap to number of CPUs in system */
4513 if (sc->num_slices > mp_ncpus)
4514 sc->num_slices = mp_ncpus;
4516 if (sc->num_slices > mxge_max_slices)
4517 sc->num_slices = mxge_max_slices;
4519 /* make sure it is a power of two */
4520 while (sc->num_slices & (sc->num_slices - 1))
4524 device_printf(sc->dev, "using %d slices\n",
4530 sc->fw_name = old_fw;
4531 (void) mxge_load_firmware(sc, 0);
4535 mxge_add_msix_irqs(mxge_softc_t *sc)
4538 int count, err, i, rid;
4541 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4544 if (sc->msix_table_res == NULL) {
4545 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4549 count = sc->num_slices;
4550 err = pci_alloc_msix(sc->dev, &count);
4552 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4553 "err = %d \n", sc->num_slices, err);
4554 goto abort_with_msix_table;
4556 if (count < sc->num_slices) {
4557 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4558 count, sc->num_slices);
4559 device_printf(sc->dev,
4560 "Try setting hw.mxge.max_slices to %d\n",
4563 goto abort_with_msix;
4565 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4566 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4567 if (sc->msix_irq_res == NULL) {
4569 goto abort_with_msix;
4572 for (i = 0; i < sc->num_slices; i++) {
4574 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4577 if (sc->msix_irq_res[i] == NULL) {
4578 device_printf(sc->dev, "couldn't allocate IRQ res"
4579 " for message %d\n", i);
4581 goto abort_with_res;
4585 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4586 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4588 for (i = 0; i < sc->num_slices; i++) {
4589 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4590 INTR_TYPE_NET | INTR_MPSAFE,
4591 #if __FreeBSD_version > 700030
4594 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4596 device_printf(sc->dev, "couldn't setup intr for "
4598 goto abort_with_intr;
4600 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4601 sc->msix_ih[i], "s%d", i);
4605 device_printf(sc->dev, "using %d msix IRQs:",
4607 for (i = 0; i < sc->num_slices; i++)
4608 printf(" %jd", rman_get_start(sc->msix_irq_res[i]));
4614 for (i = 0; i < sc->num_slices; i++) {
4615 if (sc->msix_ih[i] != NULL) {
4616 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4618 sc->msix_ih[i] = NULL;
4621 free(sc->msix_ih, M_DEVBUF);
4625 for (i = 0; i < sc->num_slices; i++) {
4627 if (sc->msix_irq_res[i] != NULL)
4628 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4629 sc->msix_irq_res[i]);
4630 sc->msix_irq_res[i] = NULL;
4632 free(sc->msix_irq_res, M_DEVBUF);
4636 pci_release_msi(sc->dev);
4638 abort_with_msix_table:
4639 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4640 sc->msix_table_res);
4646 mxge_add_single_irq(mxge_softc_t *sc)
4648 int count, err, rid;
4650 count = pci_msi_count(sc->dev);
4651 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4657 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
4658 RF_SHAREABLE | RF_ACTIVE);
4659 if (sc->irq_res == NULL) {
4660 device_printf(sc->dev, "could not alloc interrupt\n");
4664 device_printf(sc->dev, "using %s irq %jd\n",
4665 sc->legacy_irq ? "INTx" : "MSI",
4666 rman_get_start(sc->irq_res));
4667 err = bus_setup_intr(sc->dev, sc->irq_res,
4668 INTR_TYPE_NET | INTR_MPSAFE,
4669 #if __FreeBSD_version > 700030
4672 mxge_intr, &sc->ss[0], &sc->ih);
4674 bus_release_resource(sc->dev, SYS_RES_IRQ,
4675 sc->legacy_irq ? 0 : 1, sc->irq_res);
4676 if (!sc->legacy_irq)
4677 pci_release_msi(sc->dev);
4683 mxge_rem_msix_irqs(mxge_softc_t *sc)
4687 for (i = 0; i < sc->num_slices; i++) {
4688 if (sc->msix_ih[i] != NULL) {
4689 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4691 sc->msix_ih[i] = NULL;
4694 free(sc->msix_ih, M_DEVBUF);
4696 for (i = 0; i < sc->num_slices; i++) {
4698 if (sc->msix_irq_res[i] != NULL)
4699 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4700 sc->msix_irq_res[i]);
4701 sc->msix_irq_res[i] = NULL;
4703 free(sc->msix_irq_res, M_DEVBUF);
4705 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4706 sc->msix_table_res);
4708 pci_release_msi(sc->dev);
4713 mxge_rem_single_irq(mxge_softc_t *sc)
4715 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4716 bus_release_resource(sc->dev, SYS_RES_IRQ,
4717 sc->legacy_irq ? 0 : 1, sc->irq_res);
4718 if (!sc->legacy_irq)
4719 pci_release_msi(sc->dev);
4723 mxge_rem_irq(mxge_softc_t *sc)
4725 if (sc->num_slices > 1)
4726 mxge_rem_msix_irqs(sc);
4728 mxge_rem_single_irq(sc);
4732 mxge_add_irq(mxge_softc_t *sc)
4736 if (sc->num_slices > 1)
4737 err = mxge_add_msix_irqs(sc);
4739 err = mxge_add_single_irq(sc);
4741 if (0 && err == 0 && sc->num_slices > 1) {
4742 mxge_rem_msix_irqs(sc);
4743 err = mxge_add_msix_irqs(sc);
4750 mxge_attach(device_t dev)
4753 mxge_softc_t *sc = device_get_softc(dev);
4758 mxge_fetch_tunables(sc);
4760 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4761 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4762 taskqueue_thread_enqueue, &sc->tq);
4763 if (sc->tq == NULL) {
4765 goto abort_with_nothing;
4768 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4771 BUS_SPACE_MAXADDR, /* low */
4772 BUS_SPACE_MAXADDR, /* high */
4773 NULL, NULL, /* filter */
4774 65536 + 256, /* maxsize */
4775 MXGE_MAX_SEND_DESC, /* num segs */
4776 65536, /* maxsegsize */
4778 NULL, NULL, /* lock */
4779 &sc->parent_dmat); /* tag */
4782 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4787 ifp = sc->ifp = if_alloc(IFT_ETHER);
4789 device_printf(dev, "can not if_alloc()\n");
4791 goto abort_with_parent_dmat;
4793 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4795 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4796 device_get_nameunit(dev));
4797 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4798 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4799 "%s:drv", device_get_nameunit(dev));
4800 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4801 MTX_NETWORK_LOCK, MTX_DEF);
4803 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4805 mxge_setup_cfg_space(sc);
4807 /* Map the board into the kernel */
4809 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
4811 if (sc->mem_res == NULL) {
4812 device_printf(dev, "could not map memory\n");
4814 goto abort_with_lock;
4816 sc->sram = rman_get_virtual(sc->mem_res);
4817 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4818 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4819 device_printf(dev, "impossible memory region size %jd\n",
4820 rman_get_size(sc->mem_res));
4822 goto abort_with_mem_res;
4825 /* make NULL terminated copy of the EEPROM strings section of
4827 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4828 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4829 rman_get_bushandle(sc->mem_res),
4830 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4832 MXGE_EEPROM_STRINGS_SIZE - 2);
4833 err = mxge_parse_strings(sc);
4835 goto abort_with_mem_res;
4837 /* Enable write combining for efficient use of PCIe bus */
4840 /* Allocate the out of band dma memory */
4841 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4842 sizeof (mxge_cmd_t), 64);
4844 goto abort_with_mem_res;
4845 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4846 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4848 goto abort_with_cmd_dma;
4850 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4852 goto abort_with_zeropad_dma;
4854 /* select & load the firmware */
4855 err = mxge_select_firmware(sc);
4857 goto abort_with_dmabench;
4858 sc->intr_coal_delay = mxge_intr_coal_delay;
4860 mxge_slice_probe(sc);
4861 err = mxge_alloc_slices(sc);
4863 goto abort_with_dmabench;
4865 err = mxge_reset(sc, 0);
4867 goto abort_with_slices;
4869 err = mxge_alloc_rings(sc);
4871 device_printf(sc->dev, "failed to allocate rings\n");
4872 goto abort_with_slices;
4875 err = mxge_add_irq(sc);
4877 device_printf(sc->dev, "failed to add irq\n");
4878 goto abort_with_rings;
4881 ifp->if_baudrate = IF_Gbps(10);
4882 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4883 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4885 #if defined(INET) || defined(INET6)
4886 ifp->if_capabilities |= IFCAP_LRO;
4889 #ifdef MXGE_NEW_VLAN_API
4890 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4892 /* Only FW 1.4.32 and newer can do TSO over vlans */
4893 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4894 sc->fw_ver_tiny >= 32)
4895 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4897 sc->max_mtu = mxge_max_mtu(sc);
4898 if (sc->max_mtu >= 9000)
4899 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4901 device_printf(dev, "MTU limited to %d. Install "
4902 "latest firmware for 9000 byte jumbo support\n",
4903 sc->max_mtu - ETHER_HDR_LEN);
4904 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4905 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4906 /* check to see if f/w supports TSO for IPv6 */
4907 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4909 ifp->if_capabilities |= IFCAP_TSO6;
4910 sc->max_tso6_hlen = min(cmd.data0,
4911 sizeof (sc->ss[0].scratch));
4913 ifp->if_capenable = ifp->if_capabilities;
4914 if (sc->lro_cnt == 0)
4915 ifp->if_capenable &= ~IFCAP_LRO;
4916 ifp->if_init = mxge_init;
4918 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4919 ifp->if_ioctl = mxge_ioctl;
4920 ifp->if_start = mxge_start;
4921 ifp->if_get_counter = mxge_get_counter;
4922 /* Initialise the ifmedia structure */
4923 ifmedia_init(&sc->media, 0, mxge_media_change,
4925 mxge_media_init(sc);
4926 mxge_media_probe(sc);
4928 ether_ifattach(ifp, sc->mac_addr);
4929 /* ether_ifattach sets mtu to ETHERMTU */
4930 if (mxge_initial_mtu != ETHERMTU)
4931 mxge_change_mtu(sc, mxge_initial_mtu);
4933 mxge_add_sysctls(sc);
4934 #ifdef IFNET_BUF_RING
4935 ifp->if_transmit = mxge_transmit;
4936 ifp->if_qflush = mxge_qflush;
4938 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4939 device_get_nameunit(sc->dev));
4940 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4944 mxge_free_rings(sc);
4946 mxge_free_slices(sc);
4947 abort_with_dmabench:
4948 mxge_dma_free(&sc->dmabench_dma);
4949 abort_with_zeropad_dma:
4950 mxge_dma_free(&sc->zeropad_dma);
4952 mxge_dma_free(&sc->cmd_dma);
4954 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4956 pci_disable_busmaster(dev);
4957 mtx_destroy(&sc->cmd_mtx);
4958 mtx_destroy(&sc->driver_mtx);
4960 abort_with_parent_dmat:
4961 bus_dma_tag_destroy(sc->parent_dmat);
4963 if (sc->tq != NULL) {
4964 taskqueue_drain(sc->tq, &sc->watchdog_task);
4965 taskqueue_free(sc->tq);
4973 mxge_detach(device_t dev)
4975 mxge_softc_t *sc = device_get_softc(dev);
4977 if (mxge_vlans_active(sc)) {
4978 device_printf(sc->dev,
4979 "Detach vlans before removing module\n");
4982 mtx_lock(&sc->driver_mtx);
4984 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4986 mtx_unlock(&sc->driver_mtx);
4987 ether_ifdetach(sc->ifp);
4988 if (sc->tq != NULL) {
4989 taskqueue_drain(sc->tq, &sc->watchdog_task);
4990 taskqueue_free(sc->tq);
4993 callout_drain(&sc->co_hdl);
4994 ifmedia_removeall(&sc->media);
4995 mxge_dummy_rdma(sc, 0);
4996 mxge_rem_sysctls(sc);
4998 mxge_free_rings(sc);
4999 mxge_free_slices(sc);
5000 mxge_dma_free(&sc->dmabench_dma);
5001 mxge_dma_free(&sc->zeropad_dma);
5002 mxge_dma_free(&sc->cmd_dma);
5003 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5004 pci_disable_busmaster(dev);
5005 mtx_destroy(&sc->cmd_mtx);
5006 mtx_destroy(&sc->driver_mtx);
5008 bus_dma_tag_destroy(sc->parent_dmat);
5013 mxge_shutdown(device_t dev)
5019 This file uses Myri10GE driver indentation.
5022 c-file-style:"linux"