1 /******************************************************************************
3 Copyright (c) 2006-2013, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/sockio.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/module.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
52 #include <net/if_var.h>
53 #include <net/if_arp.h>
54 #include <net/ethernet.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
60 #include <net/if_types.h>
61 #include <net/if_vlan_var.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/ip.h>
66 #include <netinet/ip6.h>
67 #include <netinet/tcp.h>
68 #include <netinet/tcp_lro.h>
69 #include <netinet6/ip6_var.h>
71 #include <machine/bus.h>
72 #include <machine/in_cksum.h>
73 #include <machine/resource.h>
78 #include <dev/pci/pcireg.h>
79 #include <dev/pci/pcivar.h>
80 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
82 #include <vm/vm.h> /* for pmap_mapdev() */
85 #if defined(__i386) || defined(__amd64)
86 #include <machine/specialreg.h>
89 #include <dev/mxge/mxge_mcp.h>
90 #include <dev/mxge/mcp_gen_header.h>
91 /*#define MXGE_FAKE_IFP*/
92 #include <dev/mxge/if_mxge_var.h>
94 #include <sys/buf_ring.h>
98 #include "opt_inet6.h"
101 static int mxge_nvidia_ecrc_enable = 1;
102 static int mxge_force_firmware = 0;
103 static int mxge_intr_coal_delay = 30;
104 static int mxge_deassert_wait = 1;
105 static int mxge_flow_control = 1;
106 static int mxge_verbose = 0;
107 static int mxge_ticks;
108 static int mxge_max_slices = 1;
109 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
110 static int mxge_always_promisc = 0;
111 static int mxge_initial_mtu = ETHERMTU_JUMBO;
112 static int mxge_throttle = 0;
113 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
114 static char *mxge_fw_aligned = "mxge_eth_z8e";
115 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
116 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
118 static int mxge_probe(device_t dev);
119 static int mxge_attach(device_t dev);
120 static int mxge_detach(device_t dev);
121 static int mxge_shutdown(device_t dev);
122 static void mxge_intr(void *arg);
124 static device_method_t mxge_methods[] =
126 /* Device interface */
127 DEVMETHOD(device_probe, mxge_probe),
128 DEVMETHOD(device_attach, mxge_attach),
129 DEVMETHOD(device_detach, mxge_detach),
130 DEVMETHOD(device_shutdown, mxge_shutdown),
135 static driver_t mxge_driver =
139 sizeof(mxge_softc_t),
142 static devclass_t mxge_devclass;
144 /* Declare ourselves to be a child of the PCI bus.*/
145 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
146 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
147 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
149 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
150 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
151 static int mxge_close(mxge_softc_t *sc, int down);
152 static int mxge_open(mxge_softc_t *sc);
153 static void mxge_tick(void *arg);
156 mxge_probe(device_t dev)
161 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
162 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
163 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
164 rev = pci_get_revid(dev);
166 case MXGE_PCI_REV_Z8E:
167 device_set_desc(dev, "Myri10G-PCIE-8A");
169 case MXGE_PCI_REV_Z8ES:
170 device_set_desc(dev, "Myri10G-PCIE-8B");
173 device_set_desc(dev, "Myri10G-PCIE-8??");
174 device_printf(dev, "Unrecognized rev %d NIC\n",
184 mxge_enable_wc(mxge_softc_t *sc)
186 #if defined(__i386) || defined(__amd64)
191 len = rman_get_size(sc->mem_res);
192 err = pmap_change_attr((vm_offset_t) sc->sram,
193 len, PAT_WRITE_COMBINING);
195 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
203 /* callback to get our DMA address */
205 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
209 *(bus_addr_t *) arg = segs->ds_addr;
214 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
215 bus_size_t alignment)
218 device_t dev = sc->dev;
219 bus_size_t boundary, maxsegsize;
221 if (bytes > 4096 && alignment == 4096) {
229 /* allocate DMAable memory tags */
230 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
231 alignment, /* alignment */
232 boundary, /* boundary */
233 BUS_SPACE_MAXADDR, /* low */
234 BUS_SPACE_MAXADDR, /* high */
235 NULL, NULL, /* filter */
238 maxsegsize, /* maxsegsize */
239 BUS_DMA_COHERENT, /* flags */
240 NULL, NULL, /* lock */
241 &dma->dmat); /* tag */
243 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
247 /* allocate DMAable memory & map */
248 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
249 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
250 | BUS_DMA_ZERO), &dma->map);
252 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
253 goto abort_with_dmat;
256 /* load the memory */
257 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
258 mxge_dmamap_callback,
259 (void *)&dma->bus_addr, 0);
261 device_printf(dev, "couldn't load map (err = %d)\n", err);
267 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
269 (void)bus_dma_tag_destroy(dma->dmat);
275 mxge_dma_free(mxge_dma_t *dma)
277 bus_dmamap_unload(dma->dmat, dma->map);
278 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
279 (void)bus_dma_tag_destroy(dma->dmat);
283 * The eeprom strings on the lanaiX have the format
290 mxge_parse_strings(mxge_softc_t *sc)
293 int i, found_mac, found_sn2;
296 ptr = sc->eeprom_strings;
299 while (*ptr != '\0') {
300 if (strncmp(ptr, "MAC=", 4) == 0) {
303 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
304 if (endptr - ptr != 2)
313 } else if (strncmp(ptr, "PC=", 3) == 0) {
315 strlcpy(sc->product_code_string, ptr,
316 sizeof(sc->product_code_string));
317 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
319 strlcpy(sc->serial_number_string, ptr,
320 sizeof(sc->serial_number_string));
321 } else if (strncmp(ptr, "SN2=", 4) == 0) {
322 /* SN2 takes precedence over SN */
325 strlcpy(sc->serial_number_string, ptr,
326 sizeof(sc->serial_number_string));
328 while (*ptr++ != '\0') {}
335 device_printf(sc->dev, "failed to parse eeprom_strings\n");
340 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
342 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
345 unsigned long base, off;
347 device_t pdev, mcp55;
348 uint16_t vendor_id, device_id, word;
349 uintptr_t bus, slot, func, ivend, idev;
353 if (!mxge_nvidia_ecrc_enable)
356 pdev = device_get_parent(device_get_parent(sc->dev));
358 device_printf(sc->dev, "could not find parent?\n");
361 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
362 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
364 if (vendor_id != 0x10de)
369 if (device_id == 0x005d) {
370 /* ck804, base address is magic */
372 } else if (device_id >= 0x0374 && device_id <= 0x378) {
373 /* mcp55, base address stored in chipset */
374 mcp55 = pci_find_bsf(0, 0, 0);
376 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
377 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
378 word = pci_read_config(mcp55, 0x90, 2);
379 base = ((unsigned long)word & 0x7ffeU) << 25;
386 Test below is commented because it is believed that doing
387 config read/write beyond 0xff will access the config space
388 for the next larger function. Uncomment this and remove
389 the hacky pmap_mapdev() way of accessing config space when
390 FreeBSD grows support for extended pcie config space access
393 /* See if we can, by some miracle, access the extended
395 val = pci_read_config(pdev, 0x178, 4);
396 if (val != 0xffffffff) {
398 pci_write_config(pdev, 0x178, val, 4);
402 /* Rather than using normal pci config space writes, we must
403 * map the Nvidia config space ourselves. This is because on
404 * opteron/nvidia class machine the 0xe000000 mapping is
405 * handled by the nvidia chipset, that means the internal PCI
406 * device (the on-chip northbridge), or the amd-8131 bridge
407 * and things behind them are not visible by this method.
410 BUS_READ_IVAR(device_get_parent(pdev), pdev,
412 BUS_READ_IVAR(device_get_parent(pdev), pdev,
413 PCI_IVAR_SLOT, &slot);
414 BUS_READ_IVAR(device_get_parent(pdev), pdev,
415 PCI_IVAR_FUNCTION, &func);
416 BUS_READ_IVAR(device_get_parent(pdev), pdev,
417 PCI_IVAR_VENDOR, &ivend);
418 BUS_READ_IVAR(device_get_parent(pdev), pdev,
419 PCI_IVAR_DEVICE, &idev);
422 + 0x00100000UL * (unsigned long)bus
423 + 0x00001000UL * (unsigned long)(func
426 /* map it into the kernel */
427 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
431 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
434 /* get a pointer to the config space mapped into the kernel */
435 cfgptr = va + (off & PAGE_MASK);
437 /* make sure that we can really access it */
438 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
439 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
440 if (! (vendor_id == ivend && device_id == idev)) {
441 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
442 vendor_id, device_id);
443 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
447 ptr32 = (uint32_t*)(cfgptr + 0x178);
450 if (val == 0xffffffff) {
451 device_printf(sc->dev, "extended mapping failed\n");
452 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
456 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
458 device_printf(sc->dev,
459 "Enabled ECRC on upstream Nvidia bridge "
461 (int)bus, (int)slot, (int)func);
466 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
468 device_printf(sc->dev,
469 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
476 mxge_dma_test(mxge_softc_t *sc, int test_type)
479 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
485 /* Run a small DMA test.
486 * The magic multipliers to the length tell the firmware
487 * to do DMA read, write, or read+write tests. The
488 * results are returned in cmd.data0. The upper 16
489 * bits of the return is the number of transfers completed.
490 * The lower 16 bits is the time in 0.5us ticks that the
491 * transfers took to complete.
494 len = sc->tx_boundary;
496 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
497 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
498 cmd.data2 = len * 0x10000;
499 status = mxge_send_cmd(sc, test_type, &cmd);
504 sc->read_dma = ((cmd.data0>>16) * len * 2) /
505 (cmd.data0 & 0xffff);
506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
508 cmd.data2 = len * 0x1;
509 status = mxge_send_cmd(sc, test_type, &cmd);
514 sc->write_dma = ((cmd.data0>>16) * len * 2) /
515 (cmd.data0 & 0xffff);
517 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
518 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
519 cmd.data2 = len * 0x10001;
520 status = mxge_send_cmd(sc, test_type, &cmd);
525 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
526 (cmd.data0 & 0xffff);
529 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
530 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
537 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
538 * when the PCI-E Completion packets are aligned on an 8-byte
539 * boundary. Some PCI-E chip sets always align Completion packets; on
540 * the ones that do not, the alignment can be enforced by enabling
541 * ECRC generation (if supported).
543 * When PCI-E Completion packets are not aligned, it is actually more
544 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
546 * If the driver can neither enable ECRC nor verify that it has
547 * already been enabled, then it must use a firmware image which works
548 * around unaligned completion packets (ethp_z8e.dat), and it should
549 * also ensure that it never gives the device a Read-DMA which is
550 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
551 * enabled, then the driver should use the aligned (eth_z8e.dat)
552 * firmware image, and set tx_boundary to 4KB.
556 mxge_firmware_probe(mxge_softc_t *sc)
558 device_t dev = sc->dev;
562 sc->tx_boundary = 4096;
564 * Verify the max read request size was set to 4KB
565 * before trying the test with 4KB.
567 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
568 pectl = pci_read_config(dev, reg + 0x8, 2);
569 if ((pectl & (5 << 12)) != (5 << 12)) {
570 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
572 sc->tx_boundary = 2048;
577 * load the optimized firmware (which assumes aligned PCIe
578 * completions) in order to see if it works on this host.
580 sc->fw_name = mxge_fw_aligned;
581 status = mxge_load_firmware(sc, 1);
587 * Enable ECRC if possible
589 mxge_enable_nvidia_ecrc(sc);
592 * Run a DMA test which watches for unaligned completions and
593 * aborts on the first one seen. Not required on Z8ES or newer.
595 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
597 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
599 return 0; /* keep the aligned firmware */
602 device_printf(dev, "DMA test failed: %d\n", status);
603 if (status == ENOSYS)
604 device_printf(dev, "Falling back to ethp! "
605 "Please install up to date fw\n");
610 mxge_select_firmware(mxge_softc_t *sc)
613 int force_firmware = mxge_force_firmware;
616 force_firmware = sc->throttle;
618 if (force_firmware != 0) {
619 if (force_firmware == 1)
624 device_printf(sc->dev,
625 "Assuming %s completions (forced)\n",
626 aligned ? "aligned" : "unaligned");
630 /* if the PCIe link width is 4 or less, we can use the aligned
631 firmware and skip any checks */
632 if (sc->link_width != 0 && sc->link_width <= 4) {
633 device_printf(sc->dev,
634 "PCIe x%d Link, expect reduced performance\n",
640 if (0 == mxge_firmware_probe(sc))
645 sc->fw_name = mxge_fw_aligned;
646 sc->tx_boundary = 4096;
648 sc->fw_name = mxge_fw_unaligned;
649 sc->tx_boundary = 2048;
651 return (mxge_load_firmware(sc, 0));
655 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
659 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
660 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
661 be32toh(hdr->mcp_type));
665 /* save firmware version for sysctl */
666 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
668 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
670 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
671 &sc->fw_ver_minor, &sc->fw_ver_tiny);
673 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
674 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
675 device_printf(sc->dev, "Found firmware version %s\n",
677 device_printf(sc->dev, "Driver needs %d.%d\n",
678 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
686 z_alloc(void *nil, u_int items, u_int size)
690 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
695 z_free(void *nil, void *ptr)
702 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
705 char *inflate_buffer;
706 const struct firmware *fw;
707 const mcp_gen_header_t *hdr;
714 fw = firmware_get(sc->fw_name);
716 device_printf(sc->dev, "Could not find firmware image %s\n",
723 /* setup zlib and decompress f/w */
724 bzero(&zs, sizeof (zs));
727 status = inflateInit(&zs);
728 if (status != Z_OK) {
733 /* the uncompressed size is stored as the firmware version,
734 which would otherwise go unused */
735 fw_len = (size_t) fw->version;
736 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
737 if (inflate_buffer == NULL)
739 zs.avail_in = fw->datasize;
740 zs.next_in = __DECONST(char *, fw->data);
741 zs.avail_out = fw_len;
742 zs.next_out = inflate_buffer;
743 status = inflate(&zs, Z_FINISH);
744 if (status != Z_STREAM_END) {
745 device_printf(sc->dev, "zlib %d\n", status);
747 goto abort_with_buffer;
751 hdr_offset = htobe32(*(const uint32_t *)
752 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
753 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
754 device_printf(sc->dev, "Bad firmware file");
756 goto abort_with_buffer;
758 hdr = (const void*)(inflate_buffer + hdr_offset);
760 status = mxge_validate_firmware(sc, hdr);
762 goto abort_with_buffer;
764 /* Copy the inflated firmware to NIC SRAM. */
765 for (i = 0; i < fw_len; i += 256) {
766 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
768 min(256U, (unsigned)(fw_len - i)));
777 free(inflate_buffer, M_TEMP);
781 firmware_put(fw, FIRMWARE_UNLOAD);
786 * Enable or disable periodic RDMAs from the host to make certain
787 * chipsets resend dropped PCIe messages
791 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
794 volatile uint32_t *confirm;
795 volatile char *submit;
796 uint32_t *buf, dma_low, dma_high;
799 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
801 /* clear confirmation addr */
802 confirm = (volatile uint32_t *)sc->cmd;
806 /* send an rdma command to the PCIe engine, and wait for the
807 response in the confirmation address. The firmware should
808 write a -1 there to indicate it is alive and well
811 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
812 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
813 buf[0] = htobe32(dma_high); /* confirm addr MSW */
814 buf[1] = htobe32(dma_low); /* confirm addr LSW */
815 buf[2] = htobe32(0xffffffff); /* confirm data */
816 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
817 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
818 buf[3] = htobe32(dma_high); /* dummy addr MSW */
819 buf[4] = htobe32(dma_low); /* dummy addr LSW */
820 buf[5] = htobe32(enable); /* enable? */
823 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
825 mxge_pio_copy(submit, buf, 64);
830 while (*confirm != 0xffffffff && i < 20) {
834 if (*confirm != 0xffffffff) {
835 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
836 (enable ? "enable" : "disable"), confirm,
843 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
846 char buf_bytes[sizeof(*buf) + 8];
847 volatile mcp_cmd_response_t *response = sc->cmd;
848 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
849 uint32_t dma_low, dma_high;
850 int err, sleep_total = 0;
852 /* ensure buf is aligned to 8 bytes */
853 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
855 buf->data0 = htobe32(data->data0);
856 buf->data1 = htobe32(data->data1);
857 buf->data2 = htobe32(data->data2);
858 buf->cmd = htobe32(cmd);
859 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
860 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
862 buf->response_addr.low = htobe32(dma_low);
863 buf->response_addr.high = htobe32(dma_high);
864 mtx_lock(&sc->cmd_mtx);
865 response->result = 0xffffffff;
867 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
869 /* wait up to 20ms */
871 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
872 bus_dmamap_sync(sc->cmd_dma.dmat,
873 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
875 switch (be32toh(response->result)) {
877 data->data0 = be32toh(response->data);
883 case MXGEFW_CMD_UNKNOWN:
886 case MXGEFW_CMD_ERROR_UNALIGNED:
889 case MXGEFW_CMD_ERROR_BUSY:
892 case MXGEFW_CMD_ERROR_I2C_ABSENT:
896 device_printf(sc->dev,
898 "failed, result = %d\n",
899 cmd, be32toh(response->result));
907 device_printf(sc->dev, "mxge: command %d timed out"
909 cmd, be32toh(response->result));
910 mtx_unlock(&sc->cmd_mtx);
915 mxge_adopt_running_firmware(mxge_softc_t *sc)
917 struct mcp_gen_header *hdr;
918 const size_t bytes = sizeof (struct mcp_gen_header);
922 /* find running firmware header */
923 hdr_offset = htobe32(*(volatile uint32_t *)
924 (sc->sram + MCP_HEADER_PTR_OFFSET));
926 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
927 device_printf(sc->dev,
928 "Running firmware has bad header offset (%d)\n",
933 /* copy header of running firmware from SRAM to host memory to
934 * validate firmware */
935 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
937 device_printf(sc->dev, "could not malloc firmware hdr\n");
940 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
941 rman_get_bushandle(sc->mem_res),
942 hdr_offset, (char *)hdr, bytes);
943 status = mxge_validate_firmware(sc, hdr);
947 * check to see if adopted firmware has bug where adopting
948 * it will cause broadcasts to be filtered unless the NIC
949 * is kept in ALLMULTI mode
951 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
952 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
953 sc->adopted_rx_filter_bug = 1;
954 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
955 "working around rx filter bug\n",
956 sc->fw_ver_major, sc->fw_ver_minor,
965 mxge_load_firmware(mxge_softc_t *sc, int adopt)
967 volatile uint32_t *confirm;
968 volatile char *submit;
970 uint32_t *buf, size, dma_low, dma_high;
973 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
975 size = sc->sram_size;
976 status = mxge_load_firmware_helper(sc, &size);
980 /* Try to use the currently running firmware, if
982 status = mxge_adopt_running_firmware(sc);
984 device_printf(sc->dev,
985 "failed to adopt running firmware\n");
988 device_printf(sc->dev,
989 "Successfully adopted running firmware\n");
990 if (sc->tx_boundary == 4096) {
991 device_printf(sc->dev,
992 "Using firmware currently running on NIC"
994 device_printf(sc->dev,
995 "performance consider loading optimized "
998 sc->fw_name = mxge_fw_unaligned;
999 sc->tx_boundary = 2048;
1002 /* clear confirmation addr */
1003 confirm = (volatile uint32_t *)sc->cmd;
1006 /* send a reload command to the bootstrap MCP, and wait for the
1007 response in the confirmation address. The firmware should
1008 write a -1 there to indicate it is alive and well
1011 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1012 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1014 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1015 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1016 buf[2] = htobe32(0xffffffff); /* confirm data */
1018 /* FIX: All newest firmware should un-protect the bottom of
1019 the sram before handoff. However, the very first interfaces
1020 do not. Therefore the handoff copy must skip the first 8 bytes
1022 /* where the code starts*/
1023 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1024 buf[4] = htobe32(size - 8); /* length of code */
1025 buf[5] = htobe32(8); /* where to copy to */
1026 buf[6] = htobe32(0); /* where to jump to */
1028 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1029 mxge_pio_copy(submit, buf, 64);
1034 while (*confirm != 0xffffffff && i < 20) {
1037 bus_dmamap_sync(sc->cmd_dma.dmat,
1038 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1040 if (*confirm != 0xffffffff) {
1041 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1050 mxge_update_mac_address(mxge_softc_t *sc)
1053 uint8_t *addr = sc->mac_addr;
1057 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1058 | (addr[2] << 8) | addr[3]);
1060 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1062 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1067 mxge_change_pause(mxge_softc_t *sc, int pause)
1073 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1076 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1080 device_printf(sc->dev, "Failed to set flow control mode\n");
1088 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1093 if (mxge_always_promisc)
1097 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1100 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1104 device_printf(sc->dev, "Failed to set promisc mode\n");
1109 mxge_set_multicast_list(mxge_softc_t *sc)
1112 struct ifmultiaddr *ifma;
1113 struct ifnet *ifp = sc->ifp;
1116 /* This firmware is known to not support multicast */
1117 if (!sc->fw_multicast_support)
1120 /* Disable multicast filtering while we play with the lists*/
1121 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1123 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1124 " error status: %d\n", err);
1128 if (sc->adopted_rx_filter_bug)
1131 if (ifp->if_flags & IFF_ALLMULTI)
1132 /* request to disable multicast filtering, so quit here */
1135 /* Flush all the filters */
1137 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1139 device_printf(sc->dev,
1140 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1141 ", error status: %d\n", err);
1145 /* Walk the multicast list, and add each address */
1147 if_maddr_rlock(ifp);
1148 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1149 if (ifma->ifma_addr->sa_family != AF_LINK)
1151 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1153 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1155 cmd.data0 = htonl(cmd.data0);
1156 cmd.data1 = htonl(cmd.data1);
1157 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1159 device_printf(sc->dev, "Failed "
1160 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1162 /* abort, leaving multicast filtering off */
1163 if_maddr_runlock(ifp);
1167 if_maddr_runlock(ifp);
1168 /* Enable multicast filtering */
1169 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1171 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1172 ", error status: %d\n", err);
1177 mxge_max_mtu(mxge_softc_t *sc)
1182 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1183 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1185 /* try to set nbufs to see if it we can
1186 use virtually contiguous jumbos */
1188 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1191 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1193 /* otherwise, we're limited to MJUMPAGESIZE */
1194 return MJUMPAGESIZE - MXGEFW_PAD;
1198 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1200 struct mxge_slice_state *ss;
1201 mxge_rx_done_t *rx_done;
1202 volatile uint32_t *irq_claim;
1206 /* try to send a reset command to the card to see if it
1208 memset(&cmd, 0, sizeof (cmd));
1209 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1211 device_printf(sc->dev, "failed reset\n");
1215 mxge_dummy_rdma(sc, 1);
1218 /* set the intrq size */
1219 cmd.data0 = sc->rx_ring_size;
1220 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1223 * Even though we already know how many slices are supported
1224 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1225 * has magic side effects, and must be called after a reset.
1226 * It must be called prior to calling any RSS related cmds,
1227 * including assigning an interrupt queue for anything but
1228 * slice 0. It must also be called *after*
1229 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1230 * the firmware to compute offsets.
1233 if (sc->num_slices > 1) {
1234 /* ask the maximum number of slices it supports */
1235 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1238 device_printf(sc->dev,
1239 "failed to get number of slices\n");
1243 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1244 * to setting up the interrupt queue DMA
1246 cmd.data0 = sc->num_slices;
1247 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1248 #ifdef IFNET_BUF_RING
1249 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1251 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1254 device_printf(sc->dev,
1255 "failed to set number of slices\n");
1261 if (interrupts_setup) {
1262 /* Now exchange information about interrupts */
1263 for (slice = 0; slice < sc->num_slices; slice++) {
1264 rx_done = &sc->ss[slice].rx_done;
1265 memset(rx_done->entry, 0, sc->rx_ring_size);
1266 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1267 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1269 status |= mxge_send_cmd(sc,
1270 MXGEFW_CMD_SET_INTRQ_DMA,
1275 status |= mxge_send_cmd(sc,
1276 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1279 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1281 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1282 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1285 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1287 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1289 device_printf(sc->dev, "failed set interrupt parameters\n");
1294 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1297 /* run a DMA benchmark */
1298 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1300 for (slice = 0; slice < sc->num_slices; slice++) {
1301 ss = &sc->ss[slice];
1303 ss->irq_claim = irq_claim + (2 * slice);
1304 /* reset mcp/driver shared state back to 0 */
1305 ss->rx_done.idx = 0;
1306 ss->rx_done.cnt = 0;
1309 ss->tx.pkt_done = 0;
1310 ss->tx.queue_active = 0;
1311 ss->tx.activate = 0;
1312 ss->tx.deactivate = 0;
1317 ss->rx_small.cnt = 0;
1318 ss->lc.lro_bad_csum = 0;
1319 ss->lc.lro_queued = 0;
1320 ss->lc.lro_flushed = 0;
1321 if (ss->fw_stats != NULL) {
1322 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1325 sc->rdma_tags_available = 15;
1326 status = mxge_update_mac_address(sc);
1327 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1328 mxge_change_pause(sc, sc->pause);
1329 mxge_set_multicast_list(sc);
1331 cmd.data0 = sc->throttle;
1332 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1334 device_printf(sc->dev,
1335 "can't enable throttle\n");
1342 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1347 unsigned int throttle;
1350 throttle = sc->throttle;
1351 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1356 if (throttle == sc->throttle)
1359 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1362 mtx_lock(&sc->driver_mtx);
1363 cmd.data0 = throttle;
1364 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1366 sc->throttle = throttle;
1367 mtx_unlock(&sc->driver_mtx);
1372 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1375 unsigned int intr_coal_delay;
1379 intr_coal_delay = sc->intr_coal_delay;
1380 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1384 if (intr_coal_delay == sc->intr_coal_delay)
1387 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1390 mtx_lock(&sc->driver_mtx);
1391 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1392 sc->intr_coal_delay = intr_coal_delay;
1394 mtx_unlock(&sc->driver_mtx);
1399 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1402 unsigned int enabled;
1406 enabled = sc->pause;
1407 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1411 if (enabled == sc->pause)
1414 mtx_lock(&sc->driver_mtx);
1415 err = mxge_change_pause(sc, enabled);
1416 mtx_unlock(&sc->driver_mtx);
1421 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1427 arg2 = be32toh(*(int *)arg1);
1429 err = sysctl_handle_int(oidp, arg1, arg2, req);
1435 mxge_rem_sysctls(mxge_softc_t *sc)
1437 struct mxge_slice_state *ss;
1440 if (sc->slice_sysctl_tree == NULL)
1443 for (slice = 0; slice < sc->num_slices; slice++) {
1444 ss = &sc->ss[slice];
1445 if (ss == NULL || ss->sysctl_tree == NULL)
1447 sysctl_ctx_free(&ss->sysctl_ctx);
1448 ss->sysctl_tree = NULL;
1450 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1451 sc->slice_sysctl_tree = NULL;
1455 mxge_add_sysctls(mxge_softc_t *sc)
1457 struct sysctl_ctx_list *ctx;
1458 struct sysctl_oid_list *children;
1460 struct mxge_slice_state *ss;
1464 ctx = device_get_sysctl_ctx(sc->dev);
1465 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1466 fw = sc->ss[0].fw_stats;
1468 /* random information */
1469 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1471 CTLFLAG_RD, sc->fw_version,
1472 0, "firmware version");
1473 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1475 CTLFLAG_RD, sc->serial_number_string,
1476 0, "serial number");
1477 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1479 CTLFLAG_RD, sc->product_code_string,
1481 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1483 CTLFLAG_RD, &sc->link_width,
1485 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1487 CTLFLAG_RD, &sc->tx_boundary,
1489 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1491 CTLFLAG_RD, &sc->wc,
1492 0, "write combining PIO?");
1493 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1495 CTLFLAG_RD, &sc->read_dma,
1496 0, "DMA Read speed in MB/s");
1497 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1499 CTLFLAG_RD, &sc->write_dma,
1500 0, "DMA Write speed in MB/s");
1501 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1502 "read_write_dma_MBs",
1503 CTLFLAG_RD, &sc->read_write_dma,
1504 0, "DMA concurrent Read/Write speed in MB/s");
1505 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1507 CTLFLAG_RD, &sc->watchdog_resets,
1508 0, "Number of times NIC was reset");
1511 /* performance related tunables */
1512 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1514 CTLTYPE_INT|CTLFLAG_RW, sc,
1515 0, mxge_change_intr_coal,
1516 "I", "interrupt coalescing delay in usecs");
1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1520 CTLTYPE_INT|CTLFLAG_RW, sc,
1521 0, mxge_change_throttle,
1522 "I", "transmit throttling");
1524 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1525 "flow_control_enabled",
1526 CTLTYPE_INT|CTLFLAG_RW, sc,
1527 0, mxge_change_flow_control,
1528 "I", "interrupt coalescing delay in usecs");
1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1532 CTLFLAG_RW, &mxge_deassert_wait,
1533 0, "Wait for IRQ line to go low in ihandler");
1535 /* stats block from firmware is in network byte order.
1537 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1539 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1540 0, mxge_handle_be32,
1542 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1543 "rdma_tags_available",
1544 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1545 0, mxge_handle_be32,
1546 "I", "rdma_tags_available");
1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1548 "dropped_bad_crc32",
1549 CTLTYPE_INT|CTLFLAG_RD,
1550 &fw->dropped_bad_crc32,
1551 0, mxge_handle_be32,
1552 "I", "dropped_bad_crc32");
1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1555 CTLTYPE_INT|CTLFLAG_RD,
1556 &fw->dropped_bad_phy,
1557 0, mxge_handle_be32,
1558 "I", "dropped_bad_phy");
1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1560 "dropped_link_error_or_filtered",
1561 CTLTYPE_INT|CTLFLAG_RD,
1562 &fw->dropped_link_error_or_filtered,
1563 0, mxge_handle_be32,
1564 "I", "dropped_link_error_or_filtered");
1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1566 "dropped_link_overflow",
1567 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1568 0, mxge_handle_be32,
1569 "I", "dropped_link_overflow");
1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1571 "dropped_multicast_filtered",
1572 CTLTYPE_INT|CTLFLAG_RD,
1573 &fw->dropped_multicast_filtered,
1574 0, mxge_handle_be32,
1575 "I", "dropped_multicast_filtered");
1576 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1577 "dropped_no_big_buffer",
1578 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1579 0, mxge_handle_be32,
1580 "I", "dropped_no_big_buffer");
1581 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1582 "dropped_no_small_buffer",
1583 CTLTYPE_INT|CTLFLAG_RD,
1584 &fw->dropped_no_small_buffer,
1585 0, mxge_handle_be32,
1586 "I", "dropped_no_small_buffer");
1587 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1589 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1590 0, mxge_handle_be32,
1591 "I", "dropped_overrun");
1592 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1594 CTLTYPE_INT|CTLFLAG_RD,
1596 0, mxge_handle_be32,
1597 "I", "dropped_pause");
1598 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1600 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1601 0, mxge_handle_be32,
1602 "I", "dropped_runt");
1604 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1605 "dropped_unicast_filtered",
1606 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1607 0, mxge_handle_be32,
1608 "I", "dropped_unicast_filtered");
1610 /* verbose printing? */
1611 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1613 CTLFLAG_RW, &mxge_verbose,
1614 0, "verbose printing");
1616 /* add counters exported for debugging from all slices */
1617 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1618 sc->slice_sysctl_tree =
1619 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1620 "slice", CTLFLAG_RD, 0, "");
1622 for (slice = 0; slice < sc->num_slices; slice++) {
1623 ss = &sc->ss[slice];
1624 sysctl_ctx_init(&ss->sysctl_ctx);
1625 ctx = &ss->sysctl_ctx;
1626 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1627 sprintf(slice_num, "%d", slice);
1629 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1631 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1632 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1634 CTLFLAG_RD, &ss->rx_small.cnt,
1636 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1638 CTLFLAG_RD, &ss->rx_big.cnt,
1640 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1641 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1642 0, "number of lro merge queues flushed");
1644 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1645 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1646 0, "number of bad csums preventing LRO");
1648 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1649 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1650 0, "number of frames appended to lro merge"
1653 #ifndef IFNET_BUF_RING
1654 /* only transmit from slice 0 for now */
1658 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1660 CTLFLAG_RD, &ss->tx.req,
1663 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1665 CTLFLAG_RD, &ss->tx.done,
1667 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1669 CTLFLAG_RD, &ss->tx.pkt_done,
1671 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1673 CTLFLAG_RD, &ss->tx.stall,
1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1677 CTLFLAG_RD, &ss->tx.wake,
1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1681 CTLFLAG_RD, &ss->tx.defrag,
1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1685 CTLFLAG_RD, &ss->tx.queue_active,
1686 0, "tx_queue_active");
1687 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1689 CTLFLAG_RD, &ss->tx.activate,
1691 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1693 CTLFLAG_RD, &ss->tx.deactivate,
1694 0, "tx_deactivate");
1698 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1699 backwards one at a time and handle ring wraps */
1702 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1703 mcp_kreq_ether_send_t *src, int cnt)
1705 int idx, starting_slot;
1706 starting_slot = tx->req;
1709 idx = (starting_slot + cnt) & tx->mask;
1710 mxge_pio_copy(&tx->lanai[idx],
1711 &src[cnt], sizeof(*src));
1717 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1718 * at most 32 bytes at a time, so as to avoid involving the software
1719 * pio handler in the nic. We re-write the first segment's flags
1720 * to mark them valid only after writing the entire chain
1724 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1729 volatile uint32_t *dst_ints;
1730 mcp_kreq_ether_send_t *srcp;
1731 volatile mcp_kreq_ether_send_t *dstp, *dst;
1734 idx = tx->req & tx->mask;
1736 last_flags = src->flags;
1739 dst = dstp = &tx->lanai[idx];
1742 if ((idx + cnt) < tx->mask) {
1743 for (i = 0; i < (cnt - 1); i += 2) {
1744 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1745 wmb(); /* force write every 32 bytes */
1750 /* submit all but the first request, and ensure
1751 that it is submitted below */
1752 mxge_submit_req_backwards(tx, src, cnt);
1756 /* submit the first request */
1757 mxge_pio_copy(dstp, srcp, sizeof(*src));
1758 wmb(); /* barrier before setting valid flag */
1761 /* re-write the last 32-bits with the valid flags */
1762 src->flags = last_flags;
1763 src_ints = (uint32_t *)src;
1765 dst_ints = (volatile uint32_t *)dst;
1767 *dst_ints = *src_ints;
1773 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1774 struct mxge_pkt_info *pi)
1776 struct ether_vlan_header *eh;
1778 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1779 #if IFCAP_TSO6 && defined(INET6)
1783 eh = mtod(m, struct ether_vlan_header *);
1784 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1785 etype = ntohs(eh->evl_proto);
1786 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1788 etype = ntohs(eh->evl_encap_proto);
1789 pi->ip_off = ETHER_HDR_LEN;
1795 * ensure ip header is in first mbuf, copy it to a
1796 * scratch buffer if not
1798 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1800 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1801 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1803 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1805 pi->ip_hlen = pi->ip->ip_hl << 2;
1809 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1810 sizeof(struct tcphdr))) {
1811 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1812 sizeof(struct tcphdr), ss->scratch);
1813 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1815 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1817 #if IFCAP_TSO6 && defined(INET6)
1818 case ETHERTYPE_IPV6:
1819 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1820 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1821 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1823 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1826 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1827 pi->ip_hlen -= pi->ip_off;
1828 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1834 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1837 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1838 sizeof(struct tcphdr))) {
1839 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1840 sizeof(struct tcphdr), ss->scratch);
1841 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1843 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1855 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1856 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1859 mcp_kreq_ether_send_t *req;
1860 bus_dma_segment_t *seg;
1861 uint32_t low, high_swapped;
1862 int len, seglen, cum_len, cum_len_next;
1863 int next_is_first, chop, cnt, rdma_count, small;
1864 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1865 uint8_t flags, flags_next;
1868 mss = m->m_pkthdr.tso_segsz;
1870 /* negative cum_len signifies to the
1871 * send loop that we are still in the
1872 * header portion of the TSO packet.
1875 cksum_offset = pi->ip_off + pi->ip_hlen;
1876 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1878 /* TSO implies checksum offload on this hardware */
1879 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1881 * If packet has full TCP csum, replace it with pseudo hdr
1882 * sum that the NIC expects, otherwise the NIC will emit
1883 * packets with bad TCP checksums.
1885 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1887 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1888 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1889 sum = in6_cksum_pseudo(pi->ip6,
1890 m->m_pkthdr.len - cksum_offset,
1895 m->m_pkthdr.csum_flags |= CSUM_TCP;
1896 sum = in_pseudo(pi->ip->ip_src.s_addr,
1897 pi->ip->ip_dst.s_addr,
1898 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1902 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1903 cksum_offset, sizeof(sum), (caddr_t)&sum);
1905 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1908 /* for TSO, pseudo_hdr_offset holds mss.
1909 * The firmware figures out where to put
1910 * the checksum by parsing the header. */
1911 pseudo_hdr_offset = htobe16(mss);
1915 * for IPv6 TSO, the "checksum offset" is re-purposed
1916 * to store the TCP header len
1918 cksum_offset = (pi->tcp->th_off << 2);
1926 /* "rdma_count" is the number of RDMAs belonging to the
1927 * current packet BEFORE the current send request. For
1928 * non-TSO packets, this is equal to "count".
1929 * For TSO packets, rdma_count needs to be reset
1930 * to 0 after a segment cut.
1932 * The rdma_count field of the send request is
1933 * the number of RDMAs of the packet starting at
1934 * that request. For TSO send requests with one ore more cuts
1935 * in the middle, this is the number of RDMAs starting
1936 * after the last cut in the request. All previous
1937 * segments before the last cut implicitly have 1 RDMA.
1939 * Since the number of RDMAs is not known beforehand,
1940 * it must be filled-in retroactively - after each
1941 * segmentation cut or at the end of the entire packet.
1944 while (busdma_seg_cnt) {
1945 /* Break the busdma segment up into pieces*/
1946 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1947 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1951 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1953 cum_len_next = cum_len + seglen;
1954 (req-rdma_count)->rdma_count = rdma_count + 1;
1955 if (__predict_true(cum_len >= 0)) {
1957 chop = (cum_len_next > mss);
1958 cum_len_next = cum_len_next % mss;
1959 next_is_first = (cum_len_next == 0);
1960 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1961 flags_next |= next_is_first *
1963 rdma_count |= -(chop | next_is_first);
1964 rdma_count += chop & !next_is_first;
1965 } else if (cum_len_next >= 0) {
1970 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1971 flags_next = MXGEFW_FLAGS_TSO_PLD |
1972 MXGEFW_FLAGS_FIRST |
1973 (small * MXGEFW_FLAGS_SMALL);
1976 req->addr_high = high_swapped;
1977 req->addr_low = htobe32(low);
1978 req->pseudo_hdr_offset = pseudo_hdr_offset;
1980 req->rdma_count = 1;
1981 req->length = htobe16(seglen);
1982 req->cksum_offset = cksum_offset;
1983 req->flags = flags | ((cum_len & 1) *
1984 MXGEFW_FLAGS_ALIGN_ODD);
1987 cum_len = cum_len_next;
1992 if (cksum_offset != 0 && !pi->ip6) {
1993 if (__predict_false(cksum_offset > seglen))
1994 cksum_offset -= seglen;
1998 if (__predict_false(cnt > tx->max_desc))
2004 (req-rdma_count)->rdma_count = rdma_count;
2008 req->flags |= MXGEFW_FLAGS_TSO_LAST;
2009 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
2011 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2012 mxge_submit_req(tx, tx->req_list, cnt);
2013 #ifdef IFNET_BUF_RING
2014 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2015 /* tell the NIC to start polling this slice */
2017 tx->queue_active = 1;
2025 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
2029 printf("tx->max_desc exceeded via TSO!\n");
2030 printf("mss = %d, %ld, %d!\n", mss,
2031 (long)seg - (long)tx->seg_list, tx->max_desc);
2038 #endif /* IFCAP_TSO4 */
2040 #ifdef MXGE_NEW_VLAN_API
2042 * We reproduce the software vlan tag insertion from
2043 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2044 * vlan tag insertion. We need to advertise this in order to have the
2045 * vlan interface respect our csum offload flags.
2047 static struct mbuf *
2048 mxge_vlan_tag_insert(struct mbuf *m)
2050 struct ether_vlan_header *evl;
2052 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2053 if (__predict_false(m == NULL))
2055 if (m->m_len < sizeof(*evl)) {
2056 m = m_pullup(m, sizeof(*evl));
2057 if (__predict_false(m == NULL))
2061 * Transform the Ethernet header into an Ethernet header
2062 * with 802.1Q encapsulation.
2064 evl = mtod(m, struct ether_vlan_header *);
2065 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2066 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2067 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2068 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2069 m->m_flags &= ~M_VLANTAG;
2072 #endif /* MXGE_NEW_VLAN_API */
2075 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2077 struct mxge_pkt_info pi = {0,0,0,0};
2079 mcp_kreq_ether_send_t *req;
2080 bus_dma_segment_t *seg;
2084 int cnt, cum_len, err, i, idx, odd_flag;
2085 uint16_t pseudo_hdr_offset;
2086 uint8_t flags, cksum_offset;
2093 #ifdef MXGE_NEW_VLAN_API
2094 if (m->m_flags & M_VLANTAG) {
2095 m = mxge_vlan_tag_insert(m);
2096 if (__predict_false(m == NULL))
2097 goto drop_without_m;
2100 if (m->m_pkthdr.csum_flags &
2101 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2102 if (mxge_parse_tx(ss, m, &pi))
2106 /* (try to) map the frame for DMA */
2107 idx = tx->req & tx->mask;
2108 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2109 m, tx->seg_list, &cnt,
2111 if (__predict_false(err == EFBIG)) {
2112 /* Too many segments in the chain. Try
2114 m_tmp = m_defrag(m, M_NOWAIT);
2115 if (m_tmp == NULL) {
2120 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2122 m, tx->seg_list, &cnt,
2125 if (__predict_false(err != 0)) {
2126 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2127 " packet len = %d\n", err, m->m_pkthdr.len);
2130 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2131 BUS_DMASYNC_PREWRITE);
2132 tx->info[idx].m = m;
2135 /* TSO is different enough, we handle it in another routine */
2136 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2137 mxge_encap_tso(ss, m, cnt, &pi);
2144 pseudo_hdr_offset = 0;
2145 flags = MXGEFW_FLAGS_NO_TSO;
2147 /* checksum offloading? */
2148 if (m->m_pkthdr.csum_flags &
2149 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2150 /* ensure ip header is in first mbuf, copy
2151 it to a scratch buffer if not */
2152 cksum_offset = pi.ip_off + pi.ip_hlen;
2153 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2154 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2155 req->cksum_offset = cksum_offset;
2156 flags |= MXGEFW_FLAGS_CKSUM;
2157 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2161 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2162 flags |= MXGEFW_FLAGS_SMALL;
2164 /* convert segments into a request list */
2167 req->flags = MXGEFW_FLAGS_FIRST;
2168 for (i = 0; i < cnt; i++) {
2170 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2172 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2173 req->length = htobe16(seg->ds_len);
2174 req->cksum_offset = cksum_offset;
2175 if (cksum_offset > seg->ds_len)
2176 cksum_offset -= seg->ds_len;
2179 req->pseudo_hdr_offset = pseudo_hdr_offset;
2180 req->pad = 0; /* complete solid 16-byte block */
2181 req->rdma_count = 1;
2182 req->flags |= flags | ((cum_len & 1) * odd_flag);
2183 cum_len += seg->ds_len;
2189 /* pad runts to 60 bytes */
2193 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2195 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2196 req->length = htobe16(60 - cum_len);
2197 req->cksum_offset = 0;
2198 req->pseudo_hdr_offset = pseudo_hdr_offset;
2199 req->pad = 0; /* complete solid 16-byte block */
2200 req->rdma_count = 1;
2201 req->flags |= flags | ((cum_len & 1) * odd_flag);
2205 tx->req_list[0].rdma_count = cnt;
2207 /* print what the firmware will see */
2208 for (i = 0; i < cnt; i++) {
2209 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2210 "cso:%d, flags:0x%x, rdma:%d\n",
2211 i, (int)ntohl(tx->req_list[i].addr_high),
2212 (int)ntohl(tx->req_list[i].addr_low),
2213 (int)ntohs(tx->req_list[i].length),
2214 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2215 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2216 tx->req_list[i].rdma_count);
2218 printf("--------------\n");
2220 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2221 mxge_submit_req(tx, tx->req_list, cnt);
2222 #ifdef IFNET_BUF_RING
2223 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2224 /* tell the NIC to start polling this slice */
2226 tx->queue_active = 1;
2240 #ifdef IFNET_BUF_RING
2242 mxge_qflush(struct ifnet *ifp)
2244 mxge_softc_t *sc = ifp->if_softc;
2249 for (slice = 0; slice < sc->num_slices; slice++) {
2250 tx = &sc->ss[slice].tx;
2252 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2254 mtx_unlock(&tx->mtx);
2260 mxge_start_locked(struct mxge_slice_state *ss)
2271 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2272 m = drbr_dequeue(ifp, tx->br);
2276 /* let BPF see it */
2279 /* give it to the nic */
2282 /* ran out of transmit slots */
2283 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2284 && (!drbr_empty(ifp, tx->br))) {
2285 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2291 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2302 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2304 err = drbr_enqueue(ifp, tx->br, m);
2308 if (!drbr_needs_enqueue(ifp, tx->br) &&
2309 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2310 /* let BPF see it */
2312 /* give it to the nic */
2314 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2317 if (!drbr_empty(ifp, tx->br))
2318 mxge_start_locked(ss);
2323 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2325 mxge_softc_t *sc = ifp->if_softc;
2326 struct mxge_slice_state *ss;
2331 slice = m->m_pkthdr.flowid;
2332 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2334 ss = &sc->ss[slice];
2337 if (mtx_trylock(&tx->mtx)) {
2338 err = mxge_transmit_locked(ss, m);
2339 mtx_unlock(&tx->mtx);
2341 err = drbr_enqueue(ifp, tx->br, m);
2350 mxge_start_locked(struct mxge_slice_state *ss)
2360 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2361 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2365 /* let BPF see it */
2368 /* give it to the nic */
2371 /* ran out of transmit slots */
2372 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2373 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2379 mxge_start(struct ifnet *ifp)
2381 mxge_softc_t *sc = ifp->if_softc;
2382 struct mxge_slice_state *ss;
2384 /* only use the first slice for now */
2386 mtx_lock(&ss->tx.mtx);
2387 mxge_start_locked(ss);
2388 mtx_unlock(&ss->tx.mtx);
2392 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2393 * at most 32 bytes at a time, so as to avoid involving the software
2394 * pio handler in the nic. We re-write the first segment's low
2395 * DMA address to mark it valid only after we write the entire chunk
2399 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2400 mcp_kreq_ether_recv_t *src)
2404 low = src->addr_low;
2405 src->addr_low = 0xffffffff;
2406 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2408 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2410 src->addr_low = low;
2411 dst->addr_low = low;
2416 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2418 bus_dma_segment_t seg;
2420 mxge_rx_ring_t *rx = &ss->rx_small;
2423 m = m_gethdr(M_NOWAIT, MT_DATA);
2430 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2431 &seg, &cnt, BUS_DMA_NOWAIT);
2436 rx->info[idx].m = m;
2437 rx->shadow[idx].addr_low =
2438 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2439 rx->shadow[idx].addr_high =
2440 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2444 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2449 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2451 bus_dma_segment_t seg[3];
2453 mxge_rx_ring_t *rx = &ss->rx_big;
2456 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2462 m->m_len = rx->mlen;
2463 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2464 seg, &cnt, BUS_DMA_NOWAIT);
2469 rx->info[idx].m = m;
2470 rx->shadow[idx].addr_low =
2471 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2472 rx->shadow[idx].addr_high =
2473 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2475 #if MXGE_VIRT_JUMBOS
2476 for (i = 1; i < cnt; i++) {
2477 rx->shadow[idx + i].addr_low =
2478 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2479 rx->shadow[idx + i].addr_high =
2480 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2485 for (i = 0; i < rx->nbufs; i++) {
2486 if ((idx & 7) == 7) {
2487 mxge_submit_8rx(&rx->lanai[idx - 7],
2488 &rx->shadow[idx - 7]);
2498 mxge_csum_generic(uint16_t *raw, int len)
2509 csum = (csum >> 16) + (csum & 0xffff);
2510 csum = (csum >> 16) + (csum & 0xffff);
2511 return (uint16_t)csum;
2514 static inline uint16_t
2515 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2518 int nxt, cksum_offset;
2519 struct ip6_hdr *ip6 = p;
2523 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2524 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2525 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2526 IPPROTO_IPV6, &nxt);
2527 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2532 * IPv6 headers do not contain a checksum, and hence
2533 * do not checksum to zero, so they don't "fall out"
2534 * of the partial checksum calculation like IPv4
2535 * headers do. We need to fix the partial checksum by
2536 * subtracting the checksum of the IPv6 header.
2539 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2542 csum += (csum < ~partial);
2543 csum = (csum >> 16) + (csum & 0xFFFF);
2544 csum = (csum >> 16) + (csum & 0xFFFF);
2545 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2552 * Myri10GE hardware checksums are not valid if the sender
2553 * padded the frame with non-zero padding. This is because
2554 * the firmware just does a simple 16-bit 1s complement
2555 * checksum across the entire frame, excluding the first 14
2556 * bytes. It is best to simply to check the checksum and
2557 * tell the stack about it only if the checksum is good
2560 static inline uint16_t
2561 mxge_rx_csum(struct mbuf *m, int csum)
2563 struct ether_header *eh;
2567 #if defined(INET) || defined(INET6)
2568 int cap = m->m_pkthdr.rcvif->if_capenable;
2573 eh = mtod(m, struct ether_header *);
2574 etype = ntohs(eh->ether_type);
2578 if ((cap & IFCAP_RXCSUM) == 0)
2580 ip = (struct ip *)(eh + 1);
2581 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2583 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2584 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2585 (ip->ip_hl << 2) + ip->ip_p));
2590 case ETHERTYPE_IPV6:
2591 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2593 c = mxge_rx_csum6((eh + 1), m, csum);
2603 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2605 struct ether_vlan_header *evl;
2606 struct ether_header *eh;
2609 evl = mtod(m, struct ether_vlan_header *);
2610 eh = mtod(m, struct ether_header *);
2613 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2614 * after what the firmware thought was the end of the ethernet
2618 /* put checksum into host byte order */
2619 *csum = ntohs(*csum);
2620 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2621 (*csum) += ~partial;
2622 (*csum) += ((*csum) < ~partial);
2623 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2624 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2626 /* restore checksum to network byte order;
2627 later consumers expect this */
2628 *csum = htons(*csum);
2631 #ifdef MXGE_NEW_VLAN_API
2632 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2636 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2640 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2641 m_tag_prepend(m, mtag);
2645 m->m_flags |= M_VLANTAG;
2648 * Remove the 802.1q header by copying the Ethernet
2649 * addresses over it and adjusting the beginning of
2650 * the data in the mbuf. The encapsulated Ethernet
2651 * type field is already in place.
2653 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2654 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2655 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2660 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2661 uint32_t csum, int lro)
2666 struct ether_header *eh;
2668 bus_dmamap_t old_map;
2674 idx = rx->cnt & rx->mask;
2675 rx->cnt += rx->nbufs;
2676 /* save a pointer to the received mbuf */
2677 m = rx->info[idx].m;
2678 /* try to replace the received mbuf */
2679 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2680 /* drop the frame -- the old mbuf is re-cycled */
2681 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2685 /* unmap the received buffer */
2686 old_map = rx->info[idx].map;
2687 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2688 bus_dmamap_unload(rx->dmat, old_map);
2690 /* swap the bus_dmamap_t's */
2691 rx->info[idx].map = rx->extra_map;
2692 rx->extra_map = old_map;
2694 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2696 m->m_data += MXGEFW_PAD;
2698 m->m_pkthdr.rcvif = ifp;
2699 m->m_len = m->m_pkthdr.len = len;
2701 eh = mtod(m, struct ether_header *);
2702 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2703 mxge_vlan_tag_remove(m, &csum);
2705 /* if the checksum is valid, mark it in the mbuf header */
2707 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2708 (0 == mxge_rx_csum(m, csum))) {
2709 /* Tell the stack that the checksum is good */
2710 m->m_pkthdr.csum_data = 0xffff;
2711 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2714 #if defined(INET) || defined (INET6)
2715 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2719 /* flowid only valid if RSS hashing is enabled */
2720 if (sc->num_slices > 1) {
2721 m->m_pkthdr.flowid = (ss - sc->ss);
2722 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2724 /* pass the frame up the stack */
2725 (*ifp->if_input)(ifp, m);
2729 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2730 uint32_t csum, int lro)
2734 struct ether_header *eh;
2737 bus_dmamap_t old_map;
2743 idx = rx->cnt & rx->mask;
2745 /* save a pointer to the received mbuf */
2746 m = rx->info[idx].m;
2747 /* try to replace the received mbuf */
2748 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2749 /* drop the frame -- the old mbuf is re-cycled */
2750 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2754 /* unmap the received buffer */
2755 old_map = rx->info[idx].map;
2756 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2757 bus_dmamap_unload(rx->dmat, old_map);
2759 /* swap the bus_dmamap_t's */
2760 rx->info[idx].map = rx->extra_map;
2761 rx->extra_map = old_map;
2763 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2765 m->m_data += MXGEFW_PAD;
2767 m->m_pkthdr.rcvif = ifp;
2768 m->m_len = m->m_pkthdr.len = len;
2770 eh = mtod(m, struct ether_header *);
2771 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2772 mxge_vlan_tag_remove(m, &csum);
2774 /* if the checksum is valid, mark it in the mbuf header */
2775 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2776 (0 == mxge_rx_csum(m, csum))) {
2777 /* Tell the stack that the checksum is good */
2778 m->m_pkthdr.csum_data = 0xffff;
2779 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2782 #if defined(INET) || defined (INET6)
2783 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2787 /* flowid only valid if RSS hashing is enabled */
2788 if (sc->num_slices > 1) {
2789 m->m_pkthdr.flowid = (ss - sc->ss);
2790 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2792 /* pass the frame up the stack */
2793 (*ifp->if_input)(ifp, m);
2797 mxge_clean_rx_done(struct mxge_slice_state *ss)
2799 mxge_rx_done_t *rx_done = &ss->rx_done;
2805 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2806 while (rx_done->entry[rx_done->idx].length != 0) {
2807 length = ntohs(rx_done->entry[rx_done->idx].length);
2808 rx_done->entry[rx_done->idx].length = 0;
2809 checksum = rx_done->entry[rx_done->idx].checksum;
2810 if (length <= (MHLEN - MXGEFW_PAD))
2811 mxge_rx_done_small(ss, length, checksum, lro);
2813 mxge_rx_done_big(ss, length, checksum, lro);
2815 rx_done->idx = rx_done->cnt & rx_done->mask;
2817 /* limit potential for livelock */
2818 if (__predict_false(++limit > rx_done->mask / 2))
2821 #if defined(INET) || defined (INET6)
2822 tcp_lro_flush_all(&ss->lc);
2828 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2839 while (tx->pkt_done != mcp_idx) {
2840 idx = tx->done & tx->mask;
2842 m = tx->info[idx].m;
2843 /* mbuf and DMA map only attached to the first
2846 ss->obytes += m->m_pkthdr.len;
2847 if (m->m_flags & M_MCAST)
2850 tx->info[idx].m = NULL;
2851 map = tx->info[idx].map;
2852 bus_dmamap_unload(tx->dmat, map);
2855 if (tx->info[idx].flag) {
2856 tx->info[idx].flag = 0;
2861 /* If we have space, clear IFF_OACTIVE to tell the stack that
2862 its OK to send packets */
2863 #ifdef IFNET_BUF_RING
2864 flags = &ss->if_drv_flags;
2866 flags = &ifp->if_drv_flags;
2868 mtx_lock(&ss->tx.mtx);
2869 if ((*flags) & IFF_DRV_OACTIVE &&
2870 tx->req - tx->done < (tx->mask + 1)/4) {
2871 *(flags) &= ~IFF_DRV_OACTIVE;
2873 mxge_start_locked(ss);
2875 #ifdef IFNET_BUF_RING
2876 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2877 /* let the NIC stop polling this queue, since there
2878 * are no more transmits pending */
2879 if (tx->req == tx->done) {
2881 tx->queue_active = 0;
2887 mtx_unlock(&ss->tx.mtx);
2891 static struct mxge_media_type mxge_xfp_media_types[] =
2893 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2894 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2895 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2896 {0, (1 << 5), "10GBASE-ER"},
2897 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2898 {0, (1 << 3), "10GBASE-SW"},
2899 {0, (1 << 2), "10GBASE-LW"},
2900 {0, (1 << 1), "10GBASE-EW"},
2901 {0, (1 << 0), "Reserved"}
2903 static struct mxge_media_type mxge_sfp_media_types[] =
2905 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2906 {0, (1 << 7), "Reserved"},
2907 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2908 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2909 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2910 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2914 mxge_media_set(mxge_softc_t *sc, int media_type)
2918 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2920 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2921 sc->current_media = media_type;
2922 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2926 mxge_media_init(mxge_softc_t *sc)
2931 ifmedia_removeall(&sc->media);
2932 mxge_media_set(sc, IFM_AUTO);
2935 * parse the product code to deterimine the interface type
2936 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2937 * after the 3rd dash in the driver's cached copy of the
2938 * EEPROM's product code string.
2940 ptr = sc->product_code_string;
2942 device_printf(sc->dev, "Missing product code\n");
2946 for (i = 0; i < 3; i++, ptr++) {
2947 ptr = strchr(ptr, '-');
2949 device_printf(sc->dev,
2950 "only %d dashes in PC?!?\n", i);
2954 if (*ptr == 'C' || *(ptr +1) == 'C') {
2956 sc->connector = MXGE_CX4;
2957 mxge_media_set(sc, IFM_10G_CX4);
2958 } else if (*ptr == 'Q') {
2959 /* -Q is Quad Ribbon Fiber */
2960 sc->connector = MXGE_QRF;
2961 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2962 /* FreeBSD has no media type for Quad ribbon fiber */
2963 } else if (*ptr == 'R') {
2965 sc->connector = MXGE_XFP;
2966 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2967 /* -S or -2S is SFP+ */
2968 sc->connector = MXGE_SFP;
2970 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2975 * Determine the media type for a NIC. Some XFPs will identify
2976 * themselves only when their link is up, so this is initiated via a
2977 * link up interrupt. However, this can potentially take up to
2978 * several milliseconds, so it is run via the watchdog routine, rather
2979 * than in the interrupt handler itself.
2982 mxge_media_probe(mxge_softc_t *sc)
2987 struct mxge_media_type *mxge_media_types = NULL;
2988 int i, err, ms, mxge_media_type_entries;
2991 sc->need_media_probe = 0;
2993 if (sc->connector == MXGE_XFP) {
2995 mxge_media_types = mxge_xfp_media_types;
2996 mxge_media_type_entries =
2997 nitems(mxge_xfp_media_types);
2998 byte = MXGE_XFP_COMPLIANCE_BYTE;
3000 } else if (sc->connector == MXGE_SFP) {
3001 /* -S or -2S is SFP+ */
3002 mxge_media_types = mxge_sfp_media_types;
3003 mxge_media_type_entries =
3004 nitems(mxge_sfp_media_types);
3008 /* nothing to do; media type cannot change */
3013 * At this point we know the NIC has an XFP cage, so now we
3014 * try to determine what is in the cage by using the
3015 * firmware's XFP I2C commands to read the XFP 10GbE compilance
3016 * register. We read just one byte, which may take over
3020 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
3022 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3023 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
3024 device_printf(sc->dev, "failed to read XFP\n");
3026 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
3027 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
3029 if (err != MXGEFW_CMD_OK) {
3033 /* now we wait for the data to be cached */
3035 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3036 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3039 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3041 if (err != MXGEFW_CMD_OK) {
3042 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
3043 cage_type, err, ms);
3047 if (cmd.data0 == mxge_media_types[0].bitmask) {
3049 device_printf(sc->dev, "%s:%s\n", cage_type,
3050 mxge_media_types[0].name);
3051 if (sc->current_media != mxge_media_types[0].flag) {
3052 mxge_media_init(sc);
3053 mxge_media_set(sc, mxge_media_types[0].flag);
3057 for (i = 1; i < mxge_media_type_entries; i++) {
3058 if (cmd.data0 & mxge_media_types[i].bitmask) {
3060 device_printf(sc->dev, "%s:%s\n",
3062 mxge_media_types[i].name);
3064 if (sc->current_media != mxge_media_types[i].flag) {
3065 mxge_media_init(sc);
3066 mxge_media_set(sc, mxge_media_types[i].flag);
3072 device_printf(sc->dev, "%s media 0x%x unknown\n",
3073 cage_type, cmd.data0);
3079 mxge_intr(void *arg)
3081 struct mxge_slice_state *ss = arg;
3082 mxge_softc_t *sc = ss->sc;
3083 mcp_irq_data_t *stats = ss->fw_stats;
3084 mxge_tx_ring_t *tx = &ss->tx;
3085 mxge_rx_done_t *rx_done = &ss->rx_done;
3086 uint32_t send_done_count;
3090 #ifndef IFNET_BUF_RING
3091 /* an interrupt on a non-zero slice is implicitly valid
3092 since MSI-X irqs are not shared */
3094 mxge_clean_rx_done(ss);
3095 *ss->irq_claim = be32toh(3);
3100 /* make sure the DMA has finished */
3101 if (!stats->valid) {
3104 valid = stats->valid;
3106 if (sc->legacy_irq) {
3107 /* lower legacy IRQ */
3108 *sc->irq_deassert = 0;
3109 if (!mxge_deassert_wait)
3110 /* don't wait for conf. that irq is low */
3116 /* loop while waiting for legacy irq deassertion */
3118 /* check for transmit completes and receives */
3119 send_done_count = be32toh(stats->send_done_count);
3120 while ((send_done_count != tx->pkt_done) ||
3121 (rx_done->entry[rx_done->idx].length != 0)) {
3122 if (send_done_count != tx->pkt_done)
3123 mxge_tx_done(ss, (int)send_done_count);
3124 mxge_clean_rx_done(ss);
3125 send_done_count = be32toh(stats->send_done_count);
3127 if (sc->legacy_irq && mxge_deassert_wait)
3129 } while (*((volatile uint8_t *) &stats->valid));
3131 /* fw link & error stats meaningful only on the first slice */
3132 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3133 if (sc->link_state != stats->link_up) {
3134 sc->link_state = stats->link_up;
3135 if (sc->link_state) {
3136 if_link_state_change(sc->ifp, LINK_STATE_UP);
3138 device_printf(sc->dev, "link up\n");
3140 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3142 device_printf(sc->dev, "link down\n");
3144 sc->need_media_probe = 1;
3146 if (sc->rdma_tags_available !=
3147 be32toh(stats->rdma_tags_available)) {
3148 sc->rdma_tags_available =
3149 be32toh(stats->rdma_tags_available);
3150 device_printf(sc->dev, "RDMA timed out! %d tags "
3151 "left\n", sc->rdma_tags_available);
3154 if (stats->link_down) {
3155 sc->down_cnt += stats->link_down;
3157 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3161 /* check to see if we have rx token to pass back */
3163 *ss->irq_claim = be32toh(3);
3164 *(ss->irq_claim + 1) = be32toh(3);
3168 mxge_init(void *arg)
3170 mxge_softc_t *sc = arg;
3171 struct ifnet *ifp = sc->ifp;
3174 mtx_lock(&sc->driver_mtx);
3175 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3176 (void) mxge_open(sc);
3177 mtx_unlock(&sc->driver_mtx);
3183 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3187 #if defined(INET) || defined(INET6)
3188 tcp_lro_free(&ss->lc);
3190 for (i = 0; i <= ss->rx_big.mask; i++) {
3191 if (ss->rx_big.info[i].m == NULL)
3193 bus_dmamap_unload(ss->rx_big.dmat,
3194 ss->rx_big.info[i].map);
3195 m_freem(ss->rx_big.info[i].m);
3196 ss->rx_big.info[i].m = NULL;
3199 for (i = 0; i <= ss->rx_small.mask; i++) {
3200 if (ss->rx_small.info[i].m == NULL)
3202 bus_dmamap_unload(ss->rx_small.dmat,
3203 ss->rx_small.info[i].map);
3204 m_freem(ss->rx_small.info[i].m);
3205 ss->rx_small.info[i].m = NULL;
3208 /* transmit ring used only on the first slice */
3209 if (ss->tx.info == NULL)
3212 for (i = 0; i <= ss->tx.mask; i++) {
3213 ss->tx.info[i].flag = 0;
3214 if (ss->tx.info[i].m == NULL)
3216 bus_dmamap_unload(ss->tx.dmat,
3217 ss->tx.info[i].map);
3218 m_freem(ss->tx.info[i].m);
3219 ss->tx.info[i].m = NULL;
3224 mxge_free_mbufs(mxge_softc_t *sc)
3228 for (slice = 0; slice < sc->num_slices; slice++)
3229 mxge_free_slice_mbufs(&sc->ss[slice]);
3233 mxge_free_slice_rings(struct mxge_slice_state *ss)
3238 if (ss->rx_done.entry != NULL)
3239 mxge_dma_free(&ss->rx_done.dma);
3240 ss->rx_done.entry = NULL;
3242 if (ss->tx.req_bytes != NULL)
3243 free(ss->tx.req_bytes, M_DEVBUF);
3244 ss->tx.req_bytes = NULL;
3246 if (ss->tx.seg_list != NULL)
3247 free(ss->tx.seg_list, M_DEVBUF);
3248 ss->tx.seg_list = NULL;
3250 if (ss->rx_small.shadow != NULL)
3251 free(ss->rx_small.shadow, M_DEVBUF);
3252 ss->rx_small.shadow = NULL;
3254 if (ss->rx_big.shadow != NULL)
3255 free(ss->rx_big.shadow, M_DEVBUF);
3256 ss->rx_big.shadow = NULL;
3258 if (ss->tx.info != NULL) {
3259 if (ss->tx.dmat != NULL) {
3260 for (i = 0; i <= ss->tx.mask; i++) {
3261 bus_dmamap_destroy(ss->tx.dmat,
3262 ss->tx.info[i].map);
3264 bus_dma_tag_destroy(ss->tx.dmat);
3266 free(ss->tx.info, M_DEVBUF);
3270 if (ss->rx_small.info != NULL) {
3271 if (ss->rx_small.dmat != NULL) {
3272 for (i = 0; i <= ss->rx_small.mask; i++) {
3273 bus_dmamap_destroy(ss->rx_small.dmat,
3274 ss->rx_small.info[i].map);
3276 bus_dmamap_destroy(ss->rx_small.dmat,
3277 ss->rx_small.extra_map);
3278 bus_dma_tag_destroy(ss->rx_small.dmat);
3280 free(ss->rx_small.info, M_DEVBUF);
3282 ss->rx_small.info = NULL;
3284 if (ss->rx_big.info != NULL) {
3285 if (ss->rx_big.dmat != NULL) {
3286 for (i = 0; i <= ss->rx_big.mask; i++) {
3287 bus_dmamap_destroy(ss->rx_big.dmat,
3288 ss->rx_big.info[i].map);
3290 bus_dmamap_destroy(ss->rx_big.dmat,
3291 ss->rx_big.extra_map);
3292 bus_dma_tag_destroy(ss->rx_big.dmat);
3294 free(ss->rx_big.info, M_DEVBUF);
3296 ss->rx_big.info = NULL;
3300 mxge_free_rings(mxge_softc_t *sc)
3304 for (slice = 0; slice < sc->num_slices; slice++)
3305 mxge_free_slice_rings(&sc->ss[slice]);
3309 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3310 int tx_ring_entries)
3312 mxge_softc_t *sc = ss->sc;
3316 /* allocate per-slice receive resources */
3318 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3319 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3321 /* allocate the rx shadow rings */
3322 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3323 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3325 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3326 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3328 /* allocate the rx host info rings */
3329 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3330 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3332 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3333 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3335 /* allocate the rx busdma resources */
3336 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3338 4096, /* boundary */
3339 BUS_SPACE_MAXADDR, /* low */
3340 BUS_SPACE_MAXADDR, /* high */
3341 NULL, NULL, /* filter */
3342 MHLEN, /* maxsize */
3344 MHLEN, /* maxsegsize */
3345 BUS_DMA_ALLOCNOW, /* flags */
3346 NULL, NULL, /* lock */
3347 &ss->rx_small.dmat); /* tag */
3349 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3354 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3356 #if MXGE_VIRT_JUMBOS
3357 4096, /* boundary */
3361 BUS_SPACE_MAXADDR, /* low */
3362 BUS_SPACE_MAXADDR, /* high */
3363 NULL, NULL, /* filter */
3364 3*4096, /* maxsize */
3365 #if MXGE_VIRT_JUMBOS
3367 4096, /* maxsegsize*/
3370 MJUM9BYTES, /* maxsegsize*/
3372 BUS_DMA_ALLOCNOW, /* flags */
3373 NULL, NULL, /* lock */
3374 &ss->rx_big.dmat); /* tag */
3376 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3380 for (i = 0; i <= ss->rx_small.mask; i++) {
3381 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3382 &ss->rx_small.info[i].map);
3384 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3389 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3390 &ss->rx_small.extra_map);
3392 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3397 for (i = 0; i <= ss->rx_big.mask; i++) {
3398 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3399 &ss->rx_big.info[i].map);
3401 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3406 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3407 &ss->rx_big.extra_map);
3409 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3414 /* now allocate TX resources */
3416 #ifndef IFNET_BUF_RING
3417 /* only use a single TX ring for now */
3418 if (ss != ss->sc->ss)
3422 ss->tx.mask = tx_ring_entries - 1;
3423 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3426 /* allocate the tx request copy block */
3428 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3429 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3430 /* ensure req_list entries are aligned to 8 bytes */
3431 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3432 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3434 /* allocate the tx busdma segment list */
3435 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3436 ss->tx.seg_list = (bus_dma_segment_t *)
3437 malloc(bytes, M_DEVBUF, M_WAITOK);
3439 /* allocate the tx host info ring */
3440 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3441 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3443 /* allocate the tx busdma resources */
3444 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3446 sc->tx_boundary, /* boundary */
3447 BUS_SPACE_MAXADDR, /* low */
3448 BUS_SPACE_MAXADDR, /* high */
3449 NULL, NULL, /* filter */
3450 65536 + 256, /* maxsize */
3451 ss->tx.max_desc - 2, /* num segs */
3452 sc->tx_boundary, /* maxsegsz */
3453 BUS_DMA_ALLOCNOW, /* flags */
3454 NULL, NULL, /* lock */
3455 &ss->tx.dmat); /* tag */
3458 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3463 /* now use these tags to setup dmamaps for each slot
3465 for (i = 0; i <= ss->tx.mask; i++) {
3466 err = bus_dmamap_create(ss->tx.dmat, 0,
3467 &ss->tx.info[i].map);
3469 device_printf(sc->dev, "Err %d tx dmamap\n",
3479 mxge_alloc_rings(mxge_softc_t *sc)
3483 int tx_ring_entries, rx_ring_entries;
3486 /* get ring sizes */
3487 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3488 tx_ring_size = cmd.data0;
3490 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3494 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3495 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3496 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3497 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3498 IFQ_SET_READY(&sc->ifp->if_snd);
3500 for (slice = 0; slice < sc->num_slices; slice++) {
3501 err = mxge_alloc_slice_rings(&sc->ss[slice],
3510 mxge_free_rings(sc);
3517 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3519 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3521 if (bufsize < MCLBYTES) {
3522 /* easy, everything fits in a single buffer */
3523 *big_buf_size = MCLBYTES;
3524 *cl_size = MCLBYTES;
3529 if (bufsize < MJUMPAGESIZE) {
3530 /* still easy, everything still fits in a single buffer */
3531 *big_buf_size = MJUMPAGESIZE;
3532 *cl_size = MJUMPAGESIZE;
3536 #if MXGE_VIRT_JUMBOS
3537 /* now we need to use virtually contiguous buffers */
3538 *cl_size = MJUM9BYTES;
3539 *big_buf_size = 4096;
3540 *nbufs = mtu / 4096 + 1;
3541 /* needs to be a power of two, so round up */
3545 *cl_size = MJUM9BYTES;
3546 *big_buf_size = MJUM9BYTES;
3552 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3561 slice = ss - sc->ss;
3563 #if defined(INET) || defined(INET6)
3564 (void)tcp_lro_init(&ss->lc);
3566 ss->lc.ifp = sc->ifp;
3568 /* get the lanai pointers to the send and receive rings */
3571 #ifndef IFNET_BUF_RING
3572 /* We currently only send from the first slice */
3576 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3578 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3579 ss->tx.send_go = (volatile uint32_t *)
3580 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3581 ss->tx.send_stop = (volatile uint32_t *)
3582 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3583 #ifndef IFNET_BUF_RING
3587 err |= mxge_send_cmd(sc,
3588 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3589 ss->rx_small.lanai =
3590 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3592 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3594 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3597 device_printf(sc->dev,
3598 "failed to get ring sizes or locations\n");
3602 /* stock receive rings */
3603 for (i = 0; i <= ss->rx_small.mask; i++) {
3604 map = ss->rx_small.info[i].map;
3605 err = mxge_get_buf_small(ss, map, i);
3607 device_printf(sc->dev, "alloced %d/%d smalls\n",
3608 i, ss->rx_small.mask + 1);
3612 for (i = 0; i <= ss->rx_big.mask; i++) {
3613 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3614 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3616 ss->rx_big.nbufs = nbufs;
3617 ss->rx_big.cl_size = cl_size;
3618 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3619 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3620 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3621 map = ss->rx_big.info[i].map;
3622 err = mxge_get_buf_big(ss, map, i);
3624 device_printf(sc->dev, "alloced %d/%d bigs\n",
3625 i, ss->rx_big.mask + 1);
3633 mxge_open(mxge_softc_t *sc)
3636 int err, big_bytes, nbufs, slice, cl_size, i;
3638 volatile uint8_t *itable;
3639 struct mxge_slice_state *ss;
3641 /* Copy the MAC address in case it was overridden */
3642 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3644 err = mxge_reset(sc, 1);
3646 device_printf(sc->dev, "failed to reset\n");
3650 if (sc->num_slices > 1) {
3651 /* setup the indirection table */
3652 cmd.data0 = sc->num_slices;
3653 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3656 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3659 device_printf(sc->dev,
3660 "failed to setup rss tables\n");
3664 /* just enable an identity mapping */
3665 itable = sc->sram + cmd.data0;
3666 for (i = 0; i < sc->num_slices; i++)
3667 itable[i] = (uint8_t)i;
3670 cmd.data1 = mxge_rss_hash_type;
3671 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3673 device_printf(sc->dev, "failed to enable slices\n");
3679 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3682 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3684 /* error is only meaningful if we're trying to set
3685 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3686 if (err && nbufs > 1) {
3687 device_printf(sc->dev,
3688 "Failed to set alway-use-n to %d\n",
3692 /* Give the firmware the mtu and the big and small buffer
3693 sizes. The firmware wants the big buf size to be a power
3694 of two. Luckily, FreeBSD's clusters are powers of two */
3695 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3696 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3697 cmd.data0 = MHLEN - MXGEFW_PAD;
3698 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3700 cmd.data0 = big_bytes;
3701 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3704 device_printf(sc->dev, "failed to setup params\n");
3708 /* Now give him the pointer to the stats block */
3710 #ifdef IFNET_BUF_RING
3711 slice < sc->num_slices;
3716 ss = &sc->ss[slice];
3718 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3720 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3721 cmd.data2 = sizeof(struct mcp_irq_data);
3722 cmd.data2 |= (slice << 16);
3723 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3727 bus = sc->ss->fw_stats_dma.bus_addr;
3728 bus += offsetof(struct mcp_irq_data, send_done_count);
3729 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3730 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3731 err = mxge_send_cmd(sc,
3732 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3734 /* Firmware cannot support multicast without STATS_DMA_V2 */
3735 sc->fw_multicast_support = 0;
3737 sc->fw_multicast_support = 1;
3741 device_printf(sc->dev, "failed to setup params\n");
3745 for (slice = 0; slice < sc->num_slices; slice++) {
3746 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3748 device_printf(sc->dev, "couldn't open slice %d\n",
3754 /* Finally, start the firmware running */
3755 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3757 device_printf(sc->dev, "Couldn't bring up link\n");
3760 #ifdef IFNET_BUF_RING
3761 for (slice = 0; slice < sc->num_slices; slice++) {
3762 ss = &sc->ss[slice];
3763 ss->if_drv_flags |= IFF_DRV_RUNNING;
3764 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3767 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3768 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3774 mxge_free_mbufs(sc);
3780 mxge_close(mxge_softc_t *sc, int down)
3783 int err, old_down_cnt;
3784 #ifdef IFNET_BUF_RING
3785 struct mxge_slice_state *ss;
3789 #ifdef IFNET_BUF_RING
3790 for (slice = 0; slice < sc->num_slices; slice++) {
3791 ss = &sc->ss[slice];
3792 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3795 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3797 old_down_cnt = sc->down_cnt;
3799 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3801 device_printf(sc->dev,
3802 "Couldn't bring down link\n");
3804 if (old_down_cnt == sc->down_cnt) {
3805 /* wait for down irq */
3806 DELAY(10 * sc->intr_coal_delay);
3809 if (old_down_cnt == sc->down_cnt) {
3810 device_printf(sc->dev, "never got down irq\n");
3813 mxge_free_mbufs(sc);
3819 mxge_setup_cfg_space(mxge_softc_t *sc)
3821 device_t dev = sc->dev;
3823 uint16_t lnk, pectl;
3825 /* find the PCIe link width and set max read request to 4KB*/
3826 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3827 lnk = pci_read_config(dev, reg + 0x12, 2);
3828 sc->link_width = (lnk >> 4) & 0x3f;
3830 if (sc->pectl == 0) {
3831 pectl = pci_read_config(dev, reg + 0x8, 2);
3832 pectl = (pectl & ~0x7000) | (5 << 12);
3833 pci_write_config(dev, reg + 0x8, pectl, 2);
3836 /* restore saved pectl after watchdog reset */
3837 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3841 /* Enable DMA and Memory space access */
3842 pci_enable_busmaster(dev);
3846 mxge_read_reboot(mxge_softc_t *sc)
3848 device_t dev = sc->dev;
3851 /* find the vendor specific offset */
3852 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3853 device_printf(sc->dev,
3854 "could not find vendor specific offset\n");
3855 return (uint32_t)-1;
3857 /* enable read32 mode */
3858 pci_write_config(dev, vs + 0x10, 0x3, 1);
3859 /* tell NIC which register to read */
3860 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3861 return (pci_read_config(dev, vs + 0x14, 4));
3865 mxge_watchdog_reset(mxge_softc_t *sc)
3867 struct pci_devinfo *dinfo;
3868 struct mxge_slice_state *ss;
3869 int err, running, s, num_tx_slices = 1;
3875 device_printf(sc->dev, "Watchdog reset!\n");
3878 * check to see if the NIC rebooted. If it did, then all of
3879 * PCI config space has been reset, and things like the
3880 * busmaster bit will be zero. If this is the case, then we
3881 * must restore PCI config space before the NIC can be used
3884 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3885 if (cmd == 0xffff) {
3887 * maybe the watchdog caught the NIC rebooting; wait
3888 * up to 100ms for it to finish. If it does not come
3889 * back, then give up
3892 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3893 if (cmd == 0xffff) {
3894 device_printf(sc->dev, "NIC disappeared!\n");
3897 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3898 /* print the reboot status */
3899 reboot = mxge_read_reboot(sc);
3900 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3902 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3906 * quiesce NIC so that TX routines will not try to
3907 * xmit after restoration of BAR
3910 /* Mark the link as down */
3911 if (sc->link_state) {
3913 if_link_state_change(sc->ifp,
3916 #ifdef IFNET_BUF_RING
3917 num_tx_slices = sc->num_slices;
3919 /* grab all TX locks to ensure no tx */
3920 for (s = 0; s < num_tx_slices; s++) {
3922 mtx_lock(&ss->tx.mtx);
3926 /* restore PCI configuration space */
3927 dinfo = device_get_ivars(sc->dev);
3928 pci_cfg_restore(sc->dev, dinfo);
3930 /* and redo any changes we made to our config space */
3931 mxge_setup_cfg_space(sc);
3934 err = mxge_load_firmware(sc, 0);
3936 device_printf(sc->dev,
3937 "Unable to re-load f/w\n");
3941 err = mxge_open(sc);
3942 /* release all TX locks */
3943 for (s = 0; s < num_tx_slices; s++) {
3945 #ifdef IFNET_BUF_RING
3946 mxge_start_locked(ss);
3948 mtx_unlock(&ss->tx.mtx);
3951 sc->watchdog_resets++;
3953 device_printf(sc->dev,
3954 "NIC did not reboot, not resetting\n");
3958 device_printf(sc->dev, "watchdog reset failed\n");
3962 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3967 mxge_watchdog_task(void *arg, int pending)
3969 mxge_softc_t *sc = arg;
3972 mtx_lock(&sc->driver_mtx);
3973 mxge_watchdog_reset(sc);
3974 mtx_unlock(&sc->driver_mtx);
3978 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3980 tx = &sc->ss[slice].tx;
3981 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3982 device_printf(sc->dev,
3983 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3984 tx->req, tx->done, tx->queue_active);
3985 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3986 tx->activate, tx->deactivate);
3987 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3989 be32toh(sc->ss->fw_stats->send_done_count));
3993 mxge_watchdog(mxge_softc_t *sc)
3996 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3999 /* see if we have outstanding transmits, which
4000 have been pending for more than mxge_ticks */
4002 #ifdef IFNET_BUF_RING
4003 (i < sc->num_slices) && (err == 0);
4005 (i < 1) && (err == 0);
4009 if (tx->req != tx->done &&
4010 tx->watchdog_req != tx->watchdog_done &&
4011 tx->done == tx->watchdog_done) {
4012 /* check for pause blocking before resetting */
4013 if (tx->watchdog_rx_pause == rx_pause) {
4014 mxge_warn_stuck(sc, tx, i);
4015 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4019 device_printf(sc->dev, "Flow control blocking "
4020 "xmits, check link partner\n");
4023 tx->watchdog_req = tx->req;
4024 tx->watchdog_done = tx->done;
4025 tx->watchdog_rx_pause = rx_pause;
4028 if (sc->need_media_probe)
4029 mxge_media_probe(sc);
4034 mxge_get_counter(struct ifnet *ifp, ift_counter cnt)
4036 struct mxge_softc *sc;
4039 sc = if_getsoftc(ifp);
4043 case IFCOUNTER_IPACKETS:
4044 for (int s = 0; s < sc->num_slices; s++)
4045 rv += sc->ss[s].ipackets;
4047 case IFCOUNTER_OPACKETS:
4048 for (int s = 0; s < sc->num_slices; s++)
4049 rv += sc->ss[s].opackets;
4051 case IFCOUNTER_OERRORS:
4052 for (int s = 0; s < sc->num_slices; s++)
4053 rv += sc->ss[s].oerrors;
4055 #ifdef IFNET_BUF_RING
4056 case IFCOUNTER_OBYTES:
4057 for (int s = 0; s < sc->num_slices; s++)
4058 rv += sc->ss[s].obytes;
4060 case IFCOUNTER_OMCASTS:
4061 for (int s = 0; s < sc->num_slices; s++)
4062 rv += sc->ss[s].omcasts;
4064 case IFCOUNTER_OQDROPS:
4065 for (int s = 0; s < sc->num_slices; s++)
4066 rv += sc->ss[s].tx.br->br_drops;
4070 return (if_get_counter_default(ifp, cnt));
4075 mxge_tick(void *arg)
4077 mxge_softc_t *sc = arg;
4084 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4086 if (!sc->watchdog_countdown) {
4087 err = mxge_watchdog(sc);
4088 sc->watchdog_countdown = 4;
4090 sc->watchdog_countdown--;
4093 /* ensure NIC did not suffer h/w fault while idle */
4094 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4095 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4097 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4100 /* look less often if NIC is idle */
4105 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4110 mxge_media_change(struct ifnet *ifp)
4116 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4118 struct ifnet *ifp = sc->ifp;
4119 int real_mtu, old_mtu;
4123 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4124 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4126 mtx_lock(&sc->driver_mtx);
4127 old_mtu = ifp->if_mtu;
4129 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4131 err = mxge_open(sc);
4133 ifp->if_mtu = old_mtu;
4135 (void) mxge_open(sc);
4138 mtx_unlock(&sc->driver_mtx);
4143 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4145 mxge_softc_t *sc = ifp->if_softc;
4150 ifmr->ifm_status = IFM_AVALID;
4151 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4152 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4153 ifmr->ifm_active |= sc->current_media;
4157 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4159 mxge_softc_t *sc = ifp->if_softc;
4160 struct ifreq *ifr = (struct ifreq *)data;
4167 err = ether_ioctl(ifp, command, data);
4171 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4175 mtx_lock(&sc->driver_mtx);
4177 mtx_unlock(&sc->driver_mtx);
4180 if (ifp->if_flags & IFF_UP) {
4181 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4182 err = mxge_open(sc);
4184 /* take care of promis can allmulti
4186 mxge_change_promisc(sc,
4187 ifp->if_flags & IFF_PROMISC);
4188 mxge_set_multicast_list(sc);
4191 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4195 mtx_unlock(&sc->driver_mtx);
4200 mtx_lock(&sc->driver_mtx);
4201 mxge_set_multicast_list(sc);
4202 mtx_unlock(&sc->driver_mtx);
4206 mtx_lock(&sc->driver_mtx);
4207 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4208 if (mask & IFCAP_TXCSUM) {
4209 if (IFCAP_TXCSUM & ifp->if_capenable) {
4210 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4211 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4213 ifp->if_capenable |= IFCAP_TXCSUM;
4214 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4216 } else if (mask & IFCAP_RXCSUM) {
4217 if (IFCAP_RXCSUM & ifp->if_capenable) {
4218 ifp->if_capenable &= ~IFCAP_RXCSUM;
4220 ifp->if_capenable |= IFCAP_RXCSUM;
4223 if (mask & IFCAP_TSO4) {
4224 if (IFCAP_TSO4 & ifp->if_capenable) {
4225 ifp->if_capenable &= ~IFCAP_TSO4;
4226 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4227 ifp->if_capenable |= IFCAP_TSO4;
4228 ifp->if_hwassist |= CSUM_TSO;
4230 printf("mxge requires tx checksum offload"
4231 " be enabled to use TSO\n");
4236 if (mask & IFCAP_TXCSUM_IPV6) {
4237 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4238 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4240 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4243 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4244 ifp->if_hwassist |= (CSUM_TCP_IPV6
4247 } else if (mask & IFCAP_RXCSUM_IPV6) {
4248 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4249 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4251 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4254 if (mask & IFCAP_TSO6) {
4255 if (IFCAP_TSO6 & ifp->if_capenable) {
4256 ifp->if_capenable &= ~IFCAP_TSO6;
4257 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4258 ifp->if_capenable |= IFCAP_TSO6;
4259 ifp->if_hwassist |= CSUM_TSO;
4261 printf("mxge requires tx checksum offload"
4262 " be enabled to use TSO\n");
4266 #endif /*IFCAP_TSO6 */
4268 if (mask & IFCAP_LRO)
4269 ifp->if_capenable ^= IFCAP_LRO;
4270 if (mask & IFCAP_VLAN_HWTAGGING)
4271 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4272 if (mask & IFCAP_VLAN_HWTSO)
4273 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4275 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4276 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4277 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4279 mtx_unlock(&sc->driver_mtx);
4280 VLAN_CAPABILITIES(ifp);
4285 mtx_lock(&sc->driver_mtx);
4286 mxge_media_probe(sc);
4287 mtx_unlock(&sc->driver_mtx);
4288 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4289 &sc->media, command);
4299 mxge_fetch_tunables(mxge_softc_t *sc)
4302 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4303 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4304 &mxge_flow_control);
4305 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4306 &mxge_intr_coal_delay);
4307 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4308 &mxge_nvidia_ecrc_enable);
4309 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4310 &mxge_force_firmware);
4311 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4312 &mxge_deassert_wait);
4313 TUNABLE_INT_FETCH("hw.mxge.verbose",
4315 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4316 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4317 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4318 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4319 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4320 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4324 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4325 mxge_intr_coal_delay = 30;
4326 if (mxge_ticks == 0)
4327 mxge_ticks = hz / 2;
4328 sc->pause = mxge_flow_control;
4329 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4330 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4331 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4333 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4334 mxge_initial_mtu < ETHER_MIN_LEN)
4335 mxge_initial_mtu = ETHERMTU_JUMBO;
4337 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4338 mxge_throttle = MXGE_MAX_THROTTLE;
4339 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4340 mxge_throttle = MXGE_MIN_THROTTLE;
4341 sc->throttle = mxge_throttle;
4346 mxge_free_slices(mxge_softc_t *sc)
4348 struct mxge_slice_state *ss;
4355 for (i = 0; i < sc->num_slices; i++) {
4357 if (ss->fw_stats != NULL) {
4358 mxge_dma_free(&ss->fw_stats_dma);
4359 ss->fw_stats = NULL;
4360 #ifdef IFNET_BUF_RING
4361 if (ss->tx.br != NULL) {
4362 drbr_free(ss->tx.br, M_DEVBUF);
4366 mtx_destroy(&ss->tx.mtx);
4368 if (ss->rx_done.entry != NULL) {
4369 mxge_dma_free(&ss->rx_done.dma);
4370 ss->rx_done.entry = NULL;
4373 free(sc->ss, M_DEVBUF);
4378 mxge_alloc_slices(mxge_softc_t *sc)
4381 struct mxge_slice_state *ss;
4383 int err, i, max_intr_slots;
4385 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4387 device_printf(sc->dev, "Cannot determine rx ring size\n");
4390 sc->rx_ring_size = cmd.data0;
4391 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4393 bytes = sizeof (*sc->ss) * sc->num_slices;
4394 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4397 for (i = 0; i < sc->num_slices; i++) {
4402 /* allocate per-slice rx interrupt queues */
4404 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4405 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4408 ss->rx_done.entry = ss->rx_done.dma.addr;
4409 bzero(ss->rx_done.entry, bytes);
4412 * allocate the per-slice firmware stats; stats
4413 * (including tx) are used used only on the first
4416 #ifndef IFNET_BUF_RING
4421 bytes = sizeof (*ss->fw_stats);
4422 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4423 sizeof (*ss->fw_stats), 64);
4426 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4427 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4428 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4429 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4430 #ifdef IFNET_BUF_RING
4431 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4439 mxge_free_slices(sc);
4444 mxge_slice_probe(mxge_softc_t *sc)
4448 int msix_cnt, status, max_intr_slots;
4452 * don't enable multiple slices if they are not enabled,
4453 * or if this is not an SMP system
4456 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4459 /* see how many MSI-X interrupts are available */
4460 msix_cnt = pci_msix_count(sc->dev);
4464 /* now load the slice aware firmware see what it supports */
4465 old_fw = sc->fw_name;
4466 if (old_fw == mxge_fw_aligned)
4467 sc->fw_name = mxge_fw_rss_aligned;
4469 sc->fw_name = mxge_fw_rss_unaligned;
4470 status = mxge_load_firmware(sc, 0);
4472 device_printf(sc->dev, "Falling back to a single slice\n");
4476 /* try to send a reset command to the card to see if it
4478 memset(&cmd, 0, sizeof (cmd));
4479 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4481 device_printf(sc->dev, "failed reset\n");
4485 /* get rx ring size */
4486 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4488 device_printf(sc->dev, "Cannot determine rx ring size\n");
4491 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4493 /* tell it the size of the interrupt queues */
4494 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4495 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4497 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4501 /* ask the maximum number of slices it supports */
4502 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4504 device_printf(sc->dev,
4505 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4508 sc->num_slices = cmd.data0;
4509 if (sc->num_slices > msix_cnt)
4510 sc->num_slices = msix_cnt;
4512 if (mxge_max_slices == -1) {
4513 /* cap to number of CPUs in system */
4514 if (sc->num_slices > mp_ncpus)
4515 sc->num_slices = mp_ncpus;
4517 if (sc->num_slices > mxge_max_slices)
4518 sc->num_slices = mxge_max_slices;
4520 /* make sure it is a power of two */
4521 while (sc->num_slices & (sc->num_slices - 1))
4525 device_printf(sc->dev, "using %d slices\n",
4531 sc->fw_name = old_fw;
4532 (void) mxge_load_firmware(sc, 0);
4536 mxge_add_msix_irqs(mxge_softc_t *sc)
4539 int count, err, i, rid;
4542 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4545 if (sc->msix_table_res == NULL) {
4546 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4550 count = sc->num_slices;
4551 err = pci_alloc_msix(sc->dev, &count);
4553 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4554 "err = %d \n", sc->num_slices, err);
4555 goto abort_with_msix_table;
4557 if (count < sc->num_slices) {
4558 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4559 count, sc->num_slices);
4560 device_printf(sc->dev,
4561 "Try setting hw.mxge.max_slices to %d\n",
4564 goto abort_with_msix;
4566 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4567 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4568 if (sc->msix_irq_res == NULL) {
4570 goto abort_with_msix;
4573 for (i = 0; i < sc->num_slices; i++) {
4575 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4578 if (sc->msix_irq_res[i] == NULL) {
4579 device_printf(sc->dev, "couldn't allocate IRQ res"
4580 " for message %d\n", i);
4582 goto abort_with_res;
4586 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4587 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4589 for (i = 0; i < sc->num_slices; i++) {
4590 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4591 INTR_TYPE_NET | INTR_MPSAFE,
4592 #if __FreeBSD_version > 700030
4595 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4597 device_printf(sc->dev, "couldn't setup intr for "
4599 goto abort_with_intr;
4601 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4602 sc->msix_ih[i], "s%d", i);
4606 device_printf(sc->dev, "using %d msix IRQs:",
4608 for (i = 0; i < sc->num_slices; i++)
4609 printf(" %jd", rman_get_start(sc->msix_irq_res[i]));
4615 for (i = 0; i < sc->num_slices; i++) {
4616 if (sc->msix_ih[i] != NULL) {
4617 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4619 sc->msix_ih[i] = NULL;
4622 free(sc->msix_ih, M_DEVBUF);
4626 for (i = 0; i < sc->num_slices; i++) {
4628 if (sc->msix_irq_res[i] != NULL)
4629 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4630 sc->msix_irq_res[i]);
4631 sc->msix_irq_res[i] = NULL;
4633 free(sc->msix_irq_res, M_DEVBUF);
4637 pci_release_msi(sc->dev);
4639 abort_with_msix_table:
4640 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4641 sc->msix_table_res);
4647 mxge_add_single_irq(mxge_softc_t *sc)
4649 int count, err, rid;
4651 count = pci_msi_count(sc->dev);
4652 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4658 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
4659 RF_SHAREABLE | RF_ACTIVE);
4660 if (sc->irq_res == NULL) {
4661 device_printf(sc->dev, "could not alloc interrupt\n");
4665 device_printf(sc->dev, "using %s irq %jd\n",
4666 sc->legacy_irq ? "INTx" : "MSI",
4667 rman_get_start(sc->irq_res));
4668 err = bus_setup_intr(sc->dev, sc->irq_res,
4669 INTR_TYPE_NET | INTR_MPSAFE,
4670 #if __FreeBSD_version > 700030
4673 mxge_intr, &sc->ss[0], &sc->ih);
4675 bus_release_resource(sc->dev, SYS_RES_IRQ,
4676 sc->legacy_irq ? 0 : 1, sc->irq_res);
4677 if (!sc->legacy_irq)
4678 pci_release_msi(sc->dev);
4684 mxge_rem_msix_irqs(mxge_softc_t *sc)
4688 for (i = 0; i < sc->num_slices; i++) {
4689 if (sc->msix_ih[i] != NULL) {
4690 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4692 sc->msix_ih[i] = NULL;
4695 free(sc->msix_ih, M_DEVBUF);
4697 for (i = 0; i < sc->num_slices; i++) {
4699 if (sc->msix_irq_res[i] != NULL)
4700 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4701 sc->msix_irq_res[i]);
4702 sc->msix_irq_res[i] = NULL;
4704 free(sc->msix_irq_res, M_DEVBUF);
4706 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4707 sc->msix_table_res);
4709 pci_release_msi(sc->dev);
4714 mxge_rem_single_irq(mxge_softc_t *sc)
4716 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4717 bus_release_resource(sc->dev, SYS_RES_IRQ,
4718 sc->legacy_irq ? 0 : 1, sc->irq_res);
4719 if (!sc->legacy_irq)
4720 pci_release_msi(sc->dev);
4724 mxge_rem_irq(mxge_softc_t *sc)
4726 if (sc->num_slices > 1)
4727 mxge_rem_msix_irqs(sc);
4729 mxge_rem_single_irq(sc);
4733 mxge_add_irq(mxge_softc_t *sc)
4737 if (sc->num_slices > 1)
4738 err = mxge_add_msix_irqs(sc);
4740 err = mxge_add_single_irq(sc);
4742 if (0 && err == 0 && sc->num_slices > 1) {
4743 mxge_rem_msix_irqs(sc);
4744 err = mxge_add_msix_irqs(sc);
4751 mxge_attach(device_t dev)
4754 mxge_softc_t *sc = device_get_softc(dev);
4759 mxge_fetch_tunables(sc);
4761 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4762 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4763 taskqueue_thread_enqueue, &sc->tq);
4764 if (sc->tq == NULL) {
4766 goto abort_with_nothing;
4769 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4772 BUS_SPACE_MAXADDR, /* low */
4773 BUS_SPACE_MAXADDR, /* high */
4774 NULL, NULL, /* filter */
4775 65536 + 256, /* maxsize */
4776 MXGE_MAX_SEND_DESC, /* num segs */
4777 65536, /* maxsegsize */
4779 NULL, NULL, /* lock */
4780 &sc->parent_dmat); /* tag */
4783 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4788 ifp = sc->ifp = if_alloc(IFT_ETHER);
4790 device_printf(dev, "can not if_alloc()\n");
4792 goto abort_with_parent_dmat;
4794 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4796 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4797 device_get_nameunit(dev));
4798 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4799 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4800 "%s:drv", device_get_nameunit(dev));
4801 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4802 MTX_NETWORK_LOCK, MTX_DEF);
4804 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4806 mxge_setup_cfg_space(sc);
4808 /* Map the board into the kernel */
4810 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
4812 if (sc->mem_res == NULL) {
4813 device_printf(dev, "could not map memory\n");
4815 goto abort_with_lock;
4817 sc->sram = rman_get_virtual(sc->mem_res);
4818 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4819 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4820 device_printf(dev, "impossible memory region size %jd\n",
4821 rman_get_size(sc->mem_res));
4823 goto abort_with_mem_res;
4826 /* make NULL terminated copy of the EEPROM strings section of
4828 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4829 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4830 rman_get_bushandle(sc->mem_res),
4831 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4833 MXGE_EEPROM_STRINGS_SIZE - 2);
4834 err = mxge_parse_strings(sc);
4836 goto abort_with_mem_res;
4838 /* Enable write combining for efficient use of PCIe bus */
4841 /* Allocate the out of band dma memory */
4842 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4843 sizeof (mxge_cmd_t), 64);
4845 goto abort_with_mem_res;
4846 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4847 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4849 goto abort_with_cmd_dma;
4851 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4853 goto abort_with_zeropad_dma;
4855 /* select & load the firmware */
4856 err = mxge_select_firmware(sc);
4858 goto abort_with_dmabench;
4859 sc->intr_coal_delay = mxge_intr_coal_delay;
4861 mxge_slice_probe(sc);
4862 err = mxge_alloc_slices(sc);
4864 goto abort_with_dmabench;
4866 err = mxge_reset(sc, 0);
4868 goto abort_with_slices;
4870 err = mxge_alloc_rings(sc);
4872 device_printf(sc->dev, "failed to allocate rings\n");
4873 goto abort_with_slices;
4876 err = mxge_add_irq(sc);
4878 device_printf(sc->dev, "failed to add irq\n");
4879 goto abort_with_rings;
4882 ifp->if_baudrate = IF_Gbps(10);
4883 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4884 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4886 #if defined(INET) || defined(INET6)
4887 ifp->if_capabilities |= IFCAP_LRO;
4890 #ifdef MXGE_NEW_VLAN_API
4891 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4893 /* Only FW 1.4.32 and newer can do TSO over vlans */
4894 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4895 sc->fw_ver_tiny >= 32)
4896 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4898 sc->max_mtu = mxge_max_mtu(sc);
4899 if (sc->max_mtu >= 9000)
4900 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4902 device_printf(dev, "MTU limited to %d. Install "
4903 "latest firmware for 9000 byte jumbo support\n",
4904 sc->max_mtu - ETHER_HDR_LEN);
4905 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4906 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4907 /* check to see if f/w supports TSO for IPv6 */
4908 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4910 ifp->if_capabilities |= IFCAP_TSO6;
4911 sc->max_tso6_hlen = min(cmd.data0,
4912 sizeof (sc->ss[0].scratch));
4914 ifp->if_capenable = ifp->if_capabilities;
4915 if (sc->lro_cnt == 0)
4916 ifp->if_capenable &= ~IFCAP_LRO;
4917 ifp->if_init = mxge_init;
4919 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4920 ifp->if_ioctl = mxge_ioctl;
4921 ifp->if_start = mxge_start;
4922 ifp->if_get_counter = mxge_get_counter;
4923 /* Initialise the ifmedia structure */
4924 ifmedia_init(&sc->media, 0, mxge_media_change,
4926 mxge_media_init(sc);
4927 mxge_media_probe(sc);
4929 ether_ifattach(ifp, sc->mac_addr);
4930 /* ether_ifattach sets mtu to ETHERMTU */
4931 if (mxge_initial_mtu != ETHERMTU)
4932 mxge_change_mtu(sc, mxge_initial_mtu);
4934 mxge_add_sysctls(sc);
4935 #ifdef IFNET_BUF_RING
4936 ifp->if_transmit = mxge_transmit;
4937 ifp->if_qflush = mxge_qflush;
4939 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4940 device_get_nameunit(sc->dev));
4941 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4945 mxge_free_rings(sc);
4947 mxge_free_slices(sc);
4948 abort_with_dmabench:
4949 mxge_dma_free(&sc->dmabench_dma);
4950 abort_with_zeropad_dma:
4951 mxge_dma_free(&sc->zeropad_dma);
4953 mxge_dma_free(&sc->cmd_dma);
4955 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4957 pci_disable_busmaster(dev);
4958 mtx_destroy(&sc->cmd_mtx);
4959 mtx_destroy(&sc->driver_mtx);
4961 abort_with_parent_dmat:
4962 bus_dma_tag_destroy(sc->parent_dmat);
4964 if (sc->tq != NULL) {
4965 taskqueue_drain(sc->tq, &sc->watchdog_task);
4966 taskqueue_free(sc->tq);
4974 mxge_detach(device_t dev)
4976 mxge_softc_t *sc = device_get_softc(dev);
4978 if (mxge_vlans_active(sc)) {
4979 device_printf(sc->dev,
4980 "Detach vlans before removing module\n");
4983 mtx_lock(&sc->driver_mtx);
4985 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4987 mtx_unlock(&sc->driver_mtx);
4988 ether_ifdetach(sc->ifp);
4989 if (sc->tq != NULL) {
4990 taskqueue_drain(sc->tq, &sc->watchdog_task);
4991 taskqueue_free(sc->tq);
4994 callout_drain(&sc->co_hdl);
4995 ifmedia_removeall(&sc->media);
4996 mxge_dummy_rdma(sc, 0);
4997 mxge_rem_sysctls(sc);
4999 mxge_free_rings(sc);
5000 mxge_free_slices(sc);
5001 mxge_dma_free(&sc->dmabench_dma);
5002 mxge_dma_free(&sc->zeropad_dma);
5003 mxge_dma_free(&sc->cmd_dma);
5004 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5005 pci_disable_busmaster(dev);
5006 mtx_destroy(&sc->cmd_mtx);
5007 mtx_destroy(&sc->driver_mtx);
5009 bus_dma_tag_destroy(sc->parent_dmat);
5014 mxge_shutdown(device_t dev)
5020 This file uses Myri10GE driver indentation.
5023 c-file-style:"linux"