1 /******************************************************************************
3 Copyright (c) 2006-2013, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/sockio.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/module.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
52 #include <net/if_var.h>
53 #include <net/if_arp.h>
54 #include <net/ethernet.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
60 #include <net/if_types.h>
61 #include <net/if_vlan_var.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/ip.h>
66 #include <netinet/ip6.h>
67 #include <netinet/tcp.h>
68 #include <netinet/tcp_lro.h>
69 #include <netinet6/ip6_var.h>
71 #include <machine/bus.h>
72 #include <machine/in_cksum.h>
73 #include <machine/resource.h>
78 #include <dev/pci/pcireg.h>
79 #include <dev/pci/pcivar.h>
80 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
82 #include <vm/vm.h> /* for pmap_mapdev() */
85 #if defined(__i386) || defined(__amd64)
86 #include <machine/specialreg.h>
89 #include <dev/mxge/mxge_mcp.h>
90 #include <dev/mxge/mcp_gen_header.h>
91 /*#define MXGE_FAKE_IFP*/
92 #include <dev/mxge/if_mxge_var.h>
94 #include <sys/buf_ring.h>
98 #include "opt_inet6.h"
101 static int mxge_nvidia_ecrc_enable = 1;
102 static int mxge_force_firmware = 0;
103 static int mxge_intr_coal_delay = 30;
104 static int mxge_deassert_wait = 1;
105 static int mxge_flow_control = 1;
106 static int mxge_verbose = 0;
107 static int mxge_ticks;
108 static int mxge_max_slices = 1;
109 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
110 static int mxge_always_promisc = 0;
111 static int mxge_initial_mtu = ETHERMTU_JUMBO;
112 static int mxge_throttle = 0;
113 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
114 static char *mxge_fw_aligned = "mxge_eth_z8e";
115 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
116 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
118 static int mxge_probe(device_t dev);
119 static int mxge_attach(device_t dev);
120 static int mxge_detach(device_t dev);
121 static int mxge_shutdown(device_t dev);
122 static void mxge_intr(void *arg);
124 static device_method_t mxge_methods[] =
126 /* Device interface */
127 DEVMETHOD(device_probe, mxge_probe),
128 DEVMETHOD(device_attach, mxge_attach),
129 DEVMETHOD(device_detach, mxge_detach),
130 DEVMETHOD(device_shutdown, mxge_shutdown),
135 static driver_t mxge_driver =
139 sizeof(mxge_softc_t),
142 static devclass_t mxge_devclass;
144 /* Declare ourselves to be a child of the PCI bus.*/
145 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
146 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
147 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
149 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
150 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
151 static int mxge_close(mxge_softc_t *sc, int down);
152 static int mxge_open(mxge_softc_t *sc);
153 static void mxge_tick(void *arg);
156 mxge_probe(device_t dev)
161 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
162 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
163 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
164 rev = pci_get_revid(dev);
166 case MXGE_PCI_REV_Z8E:
167 device_set_desc(dev, "Myri10G-PCIE-8A");
169 case MXGE_PCI_REV_Z8ES:
170 device_set_desc(dev, "Myri10G-PCIE-8B");
173 device_set_desc(dev, "Myri10G-PCIE-8??");
174 device_printf(dev, "Unrecognized rev %d NIC\n",
184 mxge_enable_wc(mxge_softc_t *sc)
186 #if defined(__i386) || defined(__amd64)
191 len = rman_get_size(sc->mem_res);
192 err = pmap_change_attr((vm_offset_t) sc->sram,
193 len, PAT_WRITE_COMBINING);
195 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
203 /* callback to get our DMA address */
205 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
209 *(bus_addr_t *) arg = segs->ds_addr;
214 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
215 bus_size_t alignment)
218 device_t dev = sc->dev;
219 bus_size_t boundary, maxsegsize;
221 if (bytes > 4096 && alignment == 4096) {
229 /* allocate DMAable memory tags */
230 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
231 alignment, /* alignment */
232 boundary, /* boundary */
233 BUS_SPACE_MAXADDR, /* low */
234 BUS_SPACE_MAXADDR, /* high */
235 NULL, NULL, /* filter */
238 maxsegsize, /* maxsegsize */
239 BUS_DMA_COHERENT, /* flags */
240 NULL, NULL, /* lock */
241 &dma->dmat); /* tag */
243 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
247 /* allocate DMAable memory & map */
248 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
249 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
250 | BUS_DMA_ZERO), &dma->map);
252 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
253 goto abort_with_dmat;
256 /* load the memory */
257 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
258 mxge_dmamap_callback,
259 (void *)&dma->bus_addr, 0);
261 device_printf(dev, "couldn't load map (err = %d)\n", err);
267 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
269 (void)bus_dma_tag_destroy(dma->dmat);
275 mxge_dma_free(mxge_dma_t *dma)
277 bus_dmamap_unload(dma->dmat, dma->map);
278 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
279 (void)bus_dma_tag_destroy(dma->dmat);
283 * The eeprom strings on the lanaiX have the format
290 mxge_parse_strings(mxge_softc_t *sc)
293 int i, found_mac, found_sn2;
296 ptr = sc->eeprom_strings;
299 while (*ptr != '\0') {
300 if (strncmp(ptr, "MAC=", 4) == 0) {
303 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
304 if (endptr - ptr != 2)
313 } else if (strncmp(ptr, "PC=", 3) == 0) {
315 strlcpy(sc->product_code_string, ptr,
316 sizeof(sc->product_code_string));
317 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
319 strlcpy(sc->serial_number_string, ptr,
320 sizeof(sc->serial_number_string));
321 } else if (strncmp(ptr, "SN2=", 4) == 0) {
322 /* SN2 takes precedence over SN */
325 strlcpy(sc->serial_number_string, ptr,
326 sizeof(sc->serial_number_string));
328 while (*ptr++ != '\0') {}
335 device_printf(sc->dev, "failed to parse eeprom_strings\n");
340 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
342 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
345 unsigned long base, off;
347 device_t pdev, mcp55;
348 uint16_t vendor_id, device_id, word;
349 uintptr_t bus, slot, func, ivend, idev;
353 if (!mxge_nvidia_ecrc_enable)
356 pdev = device_get_parent(device_get_parent(sc->dev));
358 device_printf(sc->dev, "could not find parent?\n");
361 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
362 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
364 if (vendor_id != 0x10de)
369 if (device_id == 0x005d) {
370 /* ck804, base address is magic */
372 } else if (device_id >= 0x0374 && device_id <= 0x378) {
373 /* mcp55, base address stored in chipset */
374 mcp55 = pci_find_bsf(0, 0, 0);
376 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
377 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
378 word = pci_read_config(mcp55, 0x90, 2);
379 base = ((unsigned long)word & 0x7ffeU) << 25;
386 Test below is commented because it is believed that doing
387 config read/write beyond 0xff will access the config space
388 for the next larger function. Uncomment this and remove
389 the hacky pmap_mapdev() way of accessing config space when
390 FreeBSD grows support for extended pcie config space access
393 /* See if we can, by some miracle, access the extended
395 val = pci_read_config(pdev, 0x178, 4);
396 if (val != 0xffffffff) {
398 pci_write_config(pdev, 0x178, val, 4);
402 /* Rather than using normal pci config space writes, we must
403 * map the Nvidia config space ourselves. This is because on
404 * opteron/nvidia class machine the 0xe000000 mapping is
405 * handled by the nvidia chipset, that means the internal PCI
406 * device (the on-chip northbridge), or the amd-8131 bridge
407 * and things behind them are not visible by this method.
410 BUS_READ_IVAR(device_get_parent(pdev), pdev,
412 BUS_READ_IVAR(device_get_parent(pdev), pdev,
413 PCI_IVAR_SLOT, &slot);
414 BUS_READ_IVAR(device_get_parent(pdev), pdev,
415 PCI_IVAR_FUNCTION, &func);
416 BUS_READ_IVAR(device_get_parent(pdev), pdev,
417 PCI_IVAR_VENDOR, &ivend);
418 BUS_READ_IVAR(device_get_parent(pdev), pdev,
419 PCI_IVAR_DEVICE, &idev);
422 + 0x00100000UL * (unsigned long)bus
423 + 0x00001000UL * (unsigned long)(func
426 /* map it into the kernel */
427 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
431 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
434 /* get a pointer to the config space mapped into the kernel */
435 cfgptr = va + (off & PAGE_MASK);
437 /* make sure that we can really access it */
438 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
439 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
440 if (! (vendor_id == ivend && device_id == idev)) {
441 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
442 vendor_id, device_id);
443 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
447 ptr32 = (uint32_t*)(cfgptr + 0x178);
450 if (val == 0xffffffff) {
451 device_printf(sc->dev, "extended mapping failed\n");
452 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
456 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
458 device_printf(sc->dev,
459 "Enabled ECRC on upstream Nvidia bridge "
461 (int)bus, (int)slot, (int)func);
466 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
468 device_printf(sc->dev,
469 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
476 mxge_dma_test(mxge_softc_t *sc, int test_type)
479 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
485 /* Run a small DMA test.
486 * The magic multipliers to the length tell the firmware
487 * to do DMA read, write, or read+write tests. The
488 * results are returned in cmd.data0. The upper 16
489 * bits of the return is the number of transfers completed.
490 * The lower 16 bits is the time in 0.5us ticks that the
491 * transfers took to complete.
494 len = sc->tx_boundary;
496 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
497 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
498 cmd.data2 = len * 0x10000;
499 status = mxge_send_cmd(sc, test_type, &cmd);
504 sc->read_dma = ((cmd.data0>>16) * len * 2) /
505 (cmd.data0 & 0xffff);
506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
508 cmd.data2 = len * 0x1;
509 status = mxge_send_cmd(sc, test_type, &cmd);
514 sc->write_dma = ((cmd.data0>>16) * len * 2) /
515 (cmd.data0 & 0xffff);
517 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
518 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
519 cmd.data2 = len * 0x10001;
520 status = mxge_send_cmd(sc, test_type, &cmd);
525 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
526 (cmd.data0 & 0xffff);
529 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
530 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
537 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
538 * when the PCI-E Completion packets are aligned on an 8-byte
539 * boundary. Some PCI-E chip sets always align Completion packets; on
540 * the ones that do not, the alignment can be enforced by enabling
541 * ECRC generation (if supported).
543 * When PCI-E Completion packets are not aligned, it is actually more
544 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
546 * If the driver can neither enable ECRC nor verify that it has
547 * already been enabled, then it must use a firmware image which works
548 * around unaligned completion packets (ethp_z8e.dat), and it should
549 * also ensure that it never gives the device a Read-DMA which is
550 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
551 * enabled, then the driver should use the aligned (eth_z8e.dat)
552 * firmware image, and set tx_boundary to 4KB.
556 mxge_firmware_probe(mxge_softc_t *sc)
558 device_t dev = sc->dev;
562 sc->tx_boundary = 4096;
564 * Verify the max read request size was set to 4KB
565 * before trying the test with 4KB.
567 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
568 pectl = pci_read_config(dev, reg + 0x8, 2);
569 if ((pectl & (5 << 12)) != (5 << 12)) {
570 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
572 sc->tx_boundary = 2048;
577 * load the optimized firmware (which assumes aligned PCIe
578 * completions) in order to see if it works on this host.
580 sc->fw_name = mxge_fw_aligned;
581 status = mxge_load_firmware(sc, 1);
587 * Enable ECRC if possible
589 mxge_enable_nvidia_ecrc(sc);
592 * Run a DMA test which watches for unaligned completions and
593 * aborts on the first one seen. Not required on Z8ES or newer.
595 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
597 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
599 return 0; /* keep the aligned firmware */
602 device_printf(dev, "DMA test failed: %d\n", status);
603 if (status == ENOSYS)
604 device_printf(dev, "Falling back to ethp! "
605 "Please install up to date fw\n");
610 mxge_select_firmware(mxge_softc_t *sc)
613 int force_firmware = mxge_force_firmware;
616 force_firmware = sc->throttle;
618 if (force_firmware != 0) {
619 if (force_firmware == 1)
624 device_printf(sc->dev,
625 "Assuming %s completions (forced)\n",
626 aligned ? "aligned" : "unaligned");
630 /* if the PCIe link width is 4 or less, we can use the aligned
631 firmware and skip any checks */
632 if (sc->link_width != 0 && sc->link_width <= 4) {
633 device_printf(sc->dev,
634 "PCIe x%d Link, expect reduced performance\n",
640 if (0 == mxge_firmware_probe(sc))
645 sc->fw_name = mxge_fw_aligned;
646 sc->tx_boundary = 4096;
648 sc->fw_name = mxge_fw_unaligned;
649 sc->tx_boundary = 2048;
651 return (mxge_load_firmware(sc, 0));
655 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
659 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
660 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
661 be32toh(hdr->mcp_type));
665 /* save firmware version for sysctl */
666 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
668 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
670 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
671 &sc->fw_ver_minor, &sc->fw_ver_tiny);
673 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
674 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
675 device_printf(sc->dev, "Found firmware version %s\n",
677 device_printf(sc->dev, "Driver needs %d.%d\n",
678 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
686 z_alloc(void *nil, u_int items, u_int size)
690 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
695 z_free(void *nil, void *ptr)
702 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
705 char *inflate_buffer;
706 const struct firmware *fw;
707 const mcp_gen_header_t *hdr;
714 fw = firmware_get(sc->fw_name);
716 device_printf(sc->dev, "Could not find firmware image %s\n",
723 /* setup zlib and decompress f/w */
724 bzero(&zs, sizeof (zs));
727 status = inflateInit(&zs);
728 if (status != Z_OK) {
733 /* the uncompressed size is stored as the firmware version,
734 which would otherwise go unused */
735 fw_len = (size_t) fw->version;
736 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
737 if (inflate_buffer == NULL)
739 zs.avail_in = fw->datasize;
740 zs.next_in = __DECONST(char *, fw->data);
741 zs.avail_out = fw_len;
742 zs.next_out = inflate_buffer;
743 status = inflate(&zs, Z_FINISH);
744 if (status != Z_STREAM_END) {
745 device_printf(sc->dev, "zlib %d\n", status);
747 goto abort_with_buffer;
751 hdr_offset = htobe32(*(const uint32_t *)
752 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
753 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
754 device_printf(sc->dev, "Bad firmware file");
756 goto abort_with_buffer;
758 hdr = (const void*)(inflate_buffer + hdr_offset);
760 status = mxge_validate_firmware(sc, hdr);
762 goto abort_with_buffer;
764 /* Copy the inflated firmware to NIC SRAM. */
765 for (i = 0; i < fw_len; i += 256) {
766 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
768 min(256U, (unsigned)(fw_len - i)));
777 free(inflate_buffer, M_TEMP);
781 firmware_put(fw, FIRMWARE_UNLOAD);
786 * Enable or disable periodic RDMAs from the host to make certain
787 * chipsets resend dropped PCIe messages
791 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
794 volatile uint32_t *confirm;
795 volatile char *submit;
796 uint32_t *buf, dma_low, dma_high;
799 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
801 /* clear confirmation addr */
802 confirm = (volatile uint32_t *)sc->cmd;
806 /* send an rdma command to the PCIe engine, and wait for the
807 response in the confirmation address. The firmware should
808 write a -1 there to indicate it is alive and well
811 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
812 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
813 buf[0] = htobe32(dma_high); /* confirm addr MSW */
814 buf[1] = htobe32(dma_low); /* confirm addr LSW */
815 buf[2] = htobe32(0xffffffff); /* confirm data */
816 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
817 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
818 buf[3] = htobe32(dma_high); /* dummy addr MSW */
819 buf[4] = htobe32(dma_low); /* dummy addr LSW */
820 buf[5] = htobe32(enable); /* enable? */
823 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
825 mxge_pio_copy(submit, buf, 64);
830 while (*confirm != 0xffffffff && i < 20) {
834 if (*confirm != 0xffffffff) {
835 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
836 (enable ? "enable" : "disable"), confirm,
843 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
846 char buf_bytes[sizeof(*buf) + 8];
847 volatile mcp_cmd_response_t *response = sc->cmd;
848 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
849 uint32_t dma_low, dma_high;
850 int err, sleep_total = 0;
852 /* ensure buf is aligned to 8 bytes */
853 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
855 buf->data0 = htobe32(data->data0);
856 buf->data1 = htobe32(data->data1);
857 buf->data2 = htobe32(data->data2);
858 buf->cmd = htobe32(cmd);
859 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
860 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
862 buf->response_addr.low = htobe32(dma_low);
863 buf->response_addr.high = htobe32(dma_high);
864 mtx_lock(&sc->cmd_mtx);
865 response->result = 0xffffffff;
867 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
869 /* wait up to 20ms */
871 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
872 bus_dmamap_sync(sc->cmd_dma.dmat,
873 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
875 switch (be32toh(response->result)) {
877 data->data0 = be32toh(response->data);
883 case MXGEFW_CMD_UNKNOWN:
886 case MXGEFW_CMD_ERROR_UNALIGNED:
889 case MXGEFW_CMD_ERROR_BUSY:
892 case MXGEFW_CMD_ERROR_I2C_ABSENT:
896 device_printf(sc->dev,
898 "failed, result = %d\n",
899 cmd, be32toh(response->result));
907 device_printf(sc->dev, "mxge: command %d timed out"
909 cmd, be32toh(response->result));
910 mtx_unlock(&sc->cmd_mtx);
915 mxge_adopt_running_firmware(mxge_softc_t *sc)
917 struct mcp_gen_header *hdr;
918 const size_t bytes = sizeof (struct mcp_gen_header);
922 /* find running firmware header */
923 hdr_offset = htobe32(*(volatile uint32_t *)
924 (sc->sram + MCP_HEADER_PTR_OFFSET));
926 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
927 device_printf(sc->dev,
928 "Running firmware has bad header offset (%d)\n",
933 /* copy header of running firmware from SRAM to host memory to
934 * validate firmware */
935 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
937 device_printf(sc->dev, "could not malloc firmware hdr\n");
940 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
941 rman_get_bushandle(sc->mem_res),
942 hdr_offset, (char *)hdr, bytes);
943 status = mxge_validate_firmware(sc, hdr);
947 * check to see if adopted firmware has bug where adopting
948 * it will cause broadcasts to be filtered unless the NIC
949 * is kept in ALLMULTI mode
951 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
952 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
953 sc->adopted_rx_filter_bug = 1;
954 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
955 "working around rx filter bug\n",
956 sc->fw_ver_major, sc->fw_ver_minor,
965 mxge_load_firmware(mxge_softc_t *sc, int adopt)
967 volatile uint32_t *confirm;
968 volatile char *submit;
970 uint32_t *buf, size, dma_low, dma_high;
973 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
975 size = sc->sram_size;
976 status = mxge_load_firmware_helper(sc, &size);
980 /* Try to use the currently running firmware, if
982 status = mxge_adopt_running_firmware(sc);
984 device_printf(sc->dev,
985 "failed to adopt running firmware\n");
988 device_printf(sc->dev,
989 "Successfully adopted running firmware\n");
990 if (sc->tx_boundary == 4096) {
991 device_printf(sc->dev,
992 "Using firmware currently running on NIC"
994 device_printf(sc->dev,
995 "performance consider loading optimized "
998 sc->fw_name = mxge_fw_unaligned;
999 sc->tx_boundary = 2048;
1002 /* clear confirmation addr */
1003 confirm = (volatile uint32_t *)sc->cmd;
1006 /* send a reload command to the bootstrap MCP, and wait for the
1007 response in the confirmation address. The firmware should
1008 write a -1 there to indicate it is alive and well
1011 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1012 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1014 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1015 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1016 buf[2] = htobe32(0xffffffff); /* confirm data */
1018 /* FIX: All newest firmware should un-protect the bottom of
1019 the sram before handoff. However, the very first interfaces
1020 do not. Therefore the handoff copy must skip the first 8 bytes
1022 /* where the code starts*/
1023 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1024 buf[4] = htobe32(size - 8); /* length of code */
1025 buf[5] = htobe32(8); /* where to copy to */
1026 buf[6] = htobe32(0); /* where to jump to */
1028 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1029 mxge_pio_copy(submit, buf, 64);
1034 while (*confirm != 0xffffffff && i < 20) {
1037 bus_dmamap_sync(sc->cmd_dma.dmat,
1038 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1040 if (*confirm != 0xffffffff) {
1041 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1050 mxge_update_mac_address(mxge_softc_t *sc)
1053 uint8_t *addr = sc->mac_addr;
1057 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1058 | (addr[2] << 8) | addr[3]);
1060 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1062 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1067 mxge_change_pause(mxge_softc_t *sc, int pause)
1073 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1076 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1080 device_printf(sc->dev, "Failed to set flow control mode\n");
1088 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1093 if (mxge_always_promisc)
1097 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1100 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1104 device_printf(sc->dev, "Failed to set promisc mode\n");
1109 mxge_set_multicast_list(mxge_softc_t *sc)
1112 struct ifmultiaddr *ifma;
1113 struct ifnet *ifp = sc->ifp;
1116 /* This firmware is known to not support multicast */
1117 if (!sc->fw_multicast_support)
1120 /* Disable multicast filtering while we play with the lists*/
1121 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1123 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1124 " error status: %d\n", err);
1128 if (sc->adopted_rx_filter_bug)
1131 if (ifp->if_flags & IFF_ALLMULTI)
1132 /* request to disable multicast filtering, so quit here */
1135 /* Flush all the filters */
1137 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1139 device_printf(sc->dev,
1140 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1141 ", error status: %d\n", err);
1145 /* Walk the multicast list, and add each address */
1147 if_maddr_rlock(ifp);
1148 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1149 if (ifma->ifma_addr->sa_family != AF_LINK)
1151 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1153 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1155 cmd.data0 = htonl(cmd.data0);
1156 cmd.data1 = htonl(cmd.data1);
1157 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1159 device_printf(sc->dev, "Failed "
1160 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1162 /* abort, leaving multicast filtering off */
1163 if_maddr_runlock(ifp);
1167 if_maddr_runlock(ifp);
1168 /* Enable multicast filtering */
1169 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1171 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1172 ", error status: %d\n", err);
1177 mxge_max_mtu(mxge_softc_t *sc)
1182 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1183 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1185 /* try to set nbufs to see if it we can
1186 use virtually contiguous jumbos */
1188 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1191 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1193 /* otherwise, we're limited to MJUMPAGESIZE */
1194 return MJUMPAGESIZE - MXGEFW_PAD;
1198 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1200 struct mxge_slice_state *ss;
1201 mxge_rx_done_t *rx_done;
1202 volatile uint32_t *irq_claim;
1206 /* try to send a reset command to the card to see if it
1208 memset(&cmd, 0, sizeof (cmd));
1209 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1211 device_printf(sc->dev, "failed reset\n");
1215 mxge_dummy_rdma(sc, 1);
1218 /* set the intrq size */
1219 cmd.data0 = sc->rx_ring_size;
1220 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1223 * Even though we already know how many slices are supported
1224 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1225 * has magic side effects, and must be called after a reset.
1226 * It must be called prior to calling any RSS related cmds,
1227 * including assigning an interrupt queue for anything but
1228 * slice 0. It must also be called *after*
1229 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1230 * the firmware to compute offsets.
1233 if (sc->num_slices > 1) {
1234 /* ask the maximum number of slices it supports */
1235 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1238 device_printf(sc->dev,
1239 "failed to get number of slices\n");
1243 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1244 * to setting up the interrupt queue DMA
1246 cmd.data0 = sc->num_slices;
1247 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1248 #ifdef IFNET_BUF_RING
1249 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1251 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1254 device_printf(sc->dev,
1255 "failed to set number of slices\n");
1261 if (interrupts_setup) {
1262 /* Now exchange information about interrupts */
1263 for (slice = 0; slice < sc->num_slices; slice++) {
1264 rx_done = &sc->ss[slice].rx_done;
1265 memset(rx_done->entry, 0, sc->rx_ring_size);
1266 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1267 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1269 status |= mxge_send_cmd(sc,
1270 MXGEFW_CMD_SET_INTRQ_DMA,
1275 status |= mxge_send_cmd(sc,
1276 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1279 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1281 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1282 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1285 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1287 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1289 device_printf(sc->dev, "failed set interrupt parameters\n");
1294 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1297 /* run a DMA benchmark */
1298 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1300 for (slice = 0; slice < sc->num_slices; slice++) {
1301 ss = &sc->ss[slice];
1303 ss->irq_claim = irq_claim + (2 * slice);
1304 /* reset mcp/driver shared state back to 0 */
1305 ss->rx_done.idx = 0;
1306 ss->rx_done.cnt = 0;
1309 ss->tx.pkt_done = 0;
1310 ss->tx.queue_active = 0;
1311 ss->tx.activate = 0;
1312 ss->tx.deactivate = 0;
1317 ss->rx_small.cnt = 0;
1318 ss->lc.lro_bad_csum = 0;
1319 ss->lc.lro_queued = 0;
1320 ss->lc.lro_flushed = 0;
1321 if (ss->fw_stats != NULL) {
1322 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1325 sc->rdma_tags_available = 15;
1326 status = mxge_update_mac_address(sc);
1327 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1328 mxge_change_pause(sc, sc->pause);
1329 mxge_set_multicast_list(sc);
1331 cmd.data0 = sc->throttle;
1332 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1334 device_printf(sc->dev,
1335 "can't enable throttle\n");
1342 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1347 unsigned int throttle;
1350 throttle = sc->throttle;
1351 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1356 if (throttle == sc->throttle)
1359 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1362 mtx_lock(&sc->driver_mtx);
1363 cmd.data0 = throttle;
1364 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1366 sc->throttle = throttle;
1367 mtx_unlock(&sc->driver_mtx);
1372 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1375 unsigned int intr_coal_delay;
1379 intr_coal_delay = sc->intr_coal_delay;
1380 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1384 if (intr_coal_delay == sc->intr_coal_delay)
1387 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1390 mtx_lock(&sc->driver_mtx);
1391 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1392 sc->intr_coal_delay = intr_coal_delay;
1394 mtx_unlock(&sc->driver_mtx);
1399 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1402 unsigned int enabled;
1406 enabled = sc->pause;
1407 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1411 if (enabled == sc->pause)
1414 mtx_lock(&sc->driver_mtx);
1415 err = mxge_change_pause(sc, enabled);
1416 mtx_unlock(&sc->driver_mtx);
1421 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1427 arg2 = be32toh(*(int *)arg1);
1429 err = sysctl_handle_int(oidp, arg1, arg2, req);
1435 mxge_rem_sysctls(mxge_softc_t *sc)
1437 struct mxge_slice_state *ss;
1440 if (sc->slice_sysctl_tree == NULL)
1443 for (slice = 0; slice < sc->num_slices; slice++) {
1444 ss = &sc->ss[slice];
1445 if (ss == NULL || ss->sysctl_tree == NULL)
1447 sysctl_ctx_free(&ss->sysctl_ctx);
1448 ss->sysctl_tree = NULL;
1450 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1451 sc->slice_sysctl_tree = NULL;
1455 mxge_add_sysctls(mxge_softc_t *sc)
1457 struct sysctl_ctx_list *ctx;
1458 struct sysctl_oid_list *children;
1460 struct mxge_slice_state *ss;
1464 ctx = device_get_sysctl_ctx(sc->dev);
1465 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1466 fw = sc->ss[0].fw_stats;
1468 /* random information */
1469 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1471 CTLFLAG_RD, sc->fw_version,
1472 0, "firmware version");
1473 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1475 CTLFLAG_RD, sc->serial_number_string,
1476 0, "serial number");
1477 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1479 CTLFLAG_RD, sc->product_code_string,
1481 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1483 CTLFLAG_RD, &sc->link_width,
1485 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1487 CTLFLAG_RD, &sc->tx_boundary,
1489 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1491 CTLFLAG_RD, &sc->wc,
1492 0, "write combining PIO?");
1493 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1495 CTLFLAG_RD, &sc->read_dma,
1496 0, "DMA Read speed in MB/s");
1497 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1499 CTLFLAG_RD, &sc->write_dma,
1500 0, "DMA Write speed in MB/s");
1501 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1502 "read_write_dma_MBs",
1503 CTLFLAG_RD, &sc->read_write_dma,
1504 0, "DMA concurrent Read/Write speed in MB/s");
1505 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1507 CTLFLAG_RD, &sc->watchdog_resets,
1508 0, "Number of times NIC was reset");
1511 /* performance related tunables */
1512 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1514 CTLTYPE_INT|CTLFLAG_RW, sc,
1515 0, mxge_change_intr_coal,
1516 "I", "interrupt coalescing delay in usecs");
1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1520 CTLTYPE_INT|CTLFLAG_RW, sc,
1521 0, mxge_change_throttle,
1522 "I", "transmit throttling");
1524 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1525 "flow_control_enabled",
1526 CTLTYPE_INT|CTLFLAG_RW, sc,
1527 0, mxge_change_flow_control,
1528 "I", "interrupt coalescing delay in usecs");
1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1532 CTLFLAG_RW, &mxge_deassert_wait,
1533 0, "Wait for IRQ line to go low in ihandler");
1535 /* stats block from firmware is in network byte order.
1537 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1539 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1540 0, mxge_handle_be32,
1542 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1543 "rdma_tags_available",
1544 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1545 0, mxge_handle_be32,
1546 "I", "rdma_tags_available");
1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1548 "dropped_bad_crc32",
1549 CTLTYPE_INT|CTLFLAG_RD,
1550 &fw->dropped_bad_crc32,
1551 0, mxge_handle_be32,
1552 "I", "dropped_bad_crc32");
1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1555 CTLTYPE_INT|CTLFLAG_RD,
1556 &fw->dropped_bad_phy,
1557 0, mxge_handle_be32,
1558 "I", "dropped_bad_phy");
1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1560 "dropped_link_error_or_filtered",
1561 CTLTYPE_INT|CTLFLAG_RD,
1562 &fw->dropped_link_error_or_filtered,
1563 0, mxge_handle_be32,
1564 "I", "dropped_link_error_or_filtered");
1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1566 "dropped_link_overflow",
1567 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1568 0, mxge_handle_be32,
1569 "I", "dropped_link_overflow");
1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1571 "dropped_multicast_filtered",
1572 CTLTYPE_INT|CTLFLAG_RD,
1573 &fw->dropped_multicast_filtered,
1574 0, mxge_handle_be32,
1575 "I", "dropped_multicast_filtered");
1576 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1577 "dropped_no_big_buffer",
1578 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1579 0, mxge_handle_be32,
1580 "I", "dropped_no_big_buffer");
1581 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1582 "dropped_no_small_buffer",
1583 CTLTYPE_INT|CTLFLAG_RD,
1584 &fw->dropped_no_small_buffer,
1585 0, mxge_handle_be32,
1586 "I", "dropped_no_small_buffer");
1587 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1589 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1590 0, mxge_handle_be32,
1591 "I", "dropped_overrun");
1592 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1594 CTLTYPE_INT|CTLFLAG_RD,
1596 0, mxge_handle_be32,
1597 "I", "dropped_pause");
1598 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1600 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1601 0, mxge_handle_be32,
1602 "I", "dropped_runt");
1604 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1605 "dropped_unicast_filtered",
1606 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1607 0, mxge_handle_be32,
1608 "I", "dropped_unicast_filtered");
1610 /* verbose printing? */
1611 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1613 CTLFLAG_RW, &mxge_verbose,
1614 0, "verbose printing");
1616 /* add counters exported for debugging from all slices */
1617 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1618 sc->slice_sysctl_tree =
1619 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1620 "slice", CTLFLAG_RD, 0, "");
1622 for (slice = 0; slice < sc->num_slices; slice++) {
1623 ss = &sc->ss[slice];
1624 sysctl_ctx_init(&ss->sysctl_ctx);
1625 ctx = &ss->sysctl_ctx;
1626 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1627 sprintf(slice_num, "%d", slice);
1629 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1631 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1632 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1634 CTLFLAG_RD, &ss->rx_small.cnt,
1636 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1638 CTLFLAG_RD, &ss->rx_big.cnt,
1640 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1641 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1642 0, "number of lro merge queues flushed");
1644 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1645 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1646 0, "number of bad csums preventing LRO");
1648 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1649 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1650 0, "number of frames appended to lro merge"
1653 #ifndef IFNET_BUF_RING
1654 /* only transmit from slice 0 for now */
1658 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1660 CTLFLAG_RD, &ss->tx.req,
1663 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1665 CTLFLAG_RD, &ss->tx.done,
1667 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1669 CTLFLAG_RD, &ss->tx.pkt_done,
1671 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1673 CTLFLAG_RD, &ss->tx.stall,
1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1677 CTLFLAG_RD, &ss->tx.wake,
1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1681 CTLFLAG_RD, &ss->tx.defrag,
1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1685 CTLFLAG_RD, &ss->tx.queue_active,
1686 0, "tx_queue_active");
1687 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1689 CTLFLAG_RD, &ss->tx.activate,
1691 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1693 CTLFLAG_RD, &ss->tx.deactivate,
1694 0, "tx_deactivate");
1698 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1699 backwards one at a time and handle ring wraps */
1702 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1703 mcp_kreq_ether_send_t *src, int cnt)
1705 int idx, starting_slot;
1706 starting_slot = tx->req;
1709 idx = (starting_slot + cnt) & tx->mask;
1710 mxge_pio_copy(&tx->lanai[idx],
1711 &src[cnt], sizeof(*src));
1717 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1718 * at most 32 bytes at a time, so as to avoid involving the software
1719 * pio handler in the nic. We re-write the first segment's flags
1720 * to mark them valid only after writing the entire chain
1724 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1729 volatile uint32_t *dst_ints;
1730 mcp_kreq_ether_send_t *srcp;
1731 volatile mcp_kreq_ether_send_t *dstp, *dst;
1734 idx = tx->req & tx->mask;
1736 last_flags = src->flags;
1739 dst = dstp = &tx->lanai[idx];
1742 if ((idx + cnt) < tx->mask) {
1743 for (i = 0; i < (cnt - 1); i += 2) {
1744 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1745 wmb(); /* force write every 32 bytes */
1750 /* submit all but the first request, and ensure
1751 that it is submitted below */
1752 mxge_submit_req_backwards(tx, src, cnt);
1756 /* submit the first request */
1757 mxge_pio_copy(dstp, srcp, sizeof(*src));
1758 wmb(); /* barrier before setting valid flag */
1761 /* re-write the last 32-bits with the valid flags */
1762 src->flags = last_flags;
1763 src_ints = (uint32_t *)src;
1765 dst_ints = (volatile uint32_t *)dst;
1767 *dst_ints = *src_ints;
1773 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1774 struct mxge_pkt_info *pi)
1776 struct ether_vlan_header *eh;
1778 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1779 #if IFCAP_TSO6 && defined(INET6)
1783 eh = mtod(m, struct ether_vlan_header *);
1784 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1785 etype = ntohs(eh->evl_proto);
1786 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1788 etype = ntohs(eh->evl_encap_proto);
1789 pi->ip_off = ETHER_HDR_LEN;
1795 * ensure ip header is in first mbuf, copy it to a
1796 * scratch buffer if not
1798 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1800 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1801 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1803 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1805 pi->ip_hlen = pi->ip->ip_hl << 2;
1809 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1810 sizeof(struct tcphdr))) {
1811 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1812 sizeof(struct tcphdr), ss->scratch);
1813 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1815 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1817 #if IFCAP_TSO6 && defined(INET6)
1818 case ETHERTYPE_IPV6:
1819 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1820 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1821 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1823 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1826 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1827 pi->ip_hlen -= pi->ip_off;
1828 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1834 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1837 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1838 sizeof(struct tcphdr))) {
1839 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1840 sizeof(struct tcphdr), ss->scratch);
1841 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1843 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1855 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1856 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1859 mcp_kreq_ether_send_t *req;
1860 bus_dma_segment_t *seg;
1861 uint32_t low, high_swapped;
1862 int len, seglen, cum_len, cum_len_next;
1863 int next_is_first, chop, cnt, rdma_count, small;
1864 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1865 uint8_t flags, flags_next;
1868 mss = m->m_pkthdr.tso_segsz;
1870 /* negative cum_len signifies to the
1871 * send loop that we are still in the
1872 * header portion of the TSO packet.
1875 cksum_offset = pi->ip_off + pi->ip_hlen;
1876 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1878 /* TSO implies checksum offload on this hardware */
1879 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1881 * If packet has full TCP csum, replace it with pseudo hdr
1882 * sum that the NIC expects, otherwise the NIC will emit
1883 * packets with bad TCP checksums.
1885 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1887 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1888 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1889 sum = in6_cksum_pseudo(pi->ip6,
1890 m->m_pkthdr.len - cksum_offset,
1895 m->m_pkthdr.csum_flags |= CSUM_TCP;
1896 sum = in_pseudo(pi->ip->ip_src.s_addr,
1897 pi->ip->ip_dst.s_addr,
1898 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1902 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1903 cksum_offset, sizeof(sum), (caddr_t)&sum);
1905 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1908 /* for TSO, pseudo_hdr_offset holds mss.
1909 * The firmware figures out where to put
1910 * the checksum by parsing the header. */
1911 pseudo_hdr_offset = htobe16(mss);
1915 * for IPv6 TSO, the "checksum offset" is re-purposed
1916 * to store the TCP header len
1918 cksum_offset = (pi->tcp->th_off << 2);
1926 /* "rdma_count" is the number of RDMAs belonging to the
1927 * current packet BEFORE the current send request. For
1928 * non-TSO packets, this is equal to "count".
1929 * For TSO packets, rdma_count needs to be reset
1930 * to 0 after a segment cut.
1932 * The rdma_count field of the send request is
1933 * the number of RDMAs of the packet starting at
1934 * that request. For TSO send requests with one ore more cuts
1935 * in the middle, this is the number of RDMAs starting
1936 * after the last cut in the request. All previous
1937 * segments before the last cut implicitly have 1 RDMA.
1939 * Since the number of RDMAs is not known beforehand,
1940 * it must be filled-in retroactively - after each
1941 * segmentation cut or at the end of the entire packet.
1944 while (busdma_seg_cnt) {
1945 /* Break the busdma segment up into pieces*/
1946 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1947 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1951 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1953 cum_len_next = cum_len + seglen;
1954 (req-rdma_count)->rdma_count = rdma_count + 1;
1955 if (__predict_true(cum_len >= 0)) {
1957 chop = (cum_len_next > mss);
1958 cum_len_next = cum_len_next % mss;
1959 next_is_first = (cum_len_next == 0);
1960 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1961 flags_next |= next_is_first *
1963 rdma_count |= -(chop | next_is_first);
1964 rdma_count += chop & !next_is_first;
1965 } else if (cum_len_next >= 0) {
1970 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1971 flags_next = MXGEFW_FLAGS_TSO_PLD |
1972 MXGEFW_FLAGS_FIRST |
1973 (small * MXGEFW_FLAGS_SMALL);
1976 req->addr_high = high_swapped;
1977 req->addr_low = htobe32(low);
1978 req->pseudo_hdr_offset = pseudo_hdr_offset;
1980 req->rdma_count = 1;
1981 req->length = htobe16(seglen);
1982 req->cksum_offset = cksum_offset;
1983 req->flags = flags | ((cum_len & 1) *
1984 MXGEFW_FLAGS_ALIGN_ODD);
1987 cum_len = cum_len_next;
1992 if (cksum_offset != 0 && !pi->ip6) {
1993 if (__predict_false(cksum_offset > seglen))
1994 cksum_offset -= seglen;
1998 if (__predict_false(cnt > tx->max_desc))
2004 (req-rdma_count)->rdma_count = rdma_count;
2008 req->flags |= MXGEFW_FLAGS_TSO_LAST;
2009 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
2011 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2012 mxge_submit_req(tx, tx->req_list, cnt);
2013 #ifdef IFNET_BUF_RING
2014 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2015 /* tell the NIC to start polling this slice */
2017 tx->queue_active = 1;
2025 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
2029 printf("tx->max_desc exceeded via TSO!\n");
2030 printf("mss = %d, %ld, %d!\n", mss,
2031 (long)seg - (long)tx->seg_list, tx->max_desc);
2038 #endif /* IFCAP_TSO4 */
2040 #ifdef MXGE_NEW_VLAN_API
2042 * We reproduce the software vlan tag insertion from
2043 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2044 * vlan tag insertion. We need to advertise this in order to have the
2045 * vlan interface respect our csum offload flags.
2047 static struct mbuf *
2048 mxge_vlan_tag_insert(struct mbuf *m)
2050 struct ether_vlan_header *evl;
2052 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2053 if (__predict_false(m == NULL))
2055 if (m->m_len < sizeof(*evl)) {
2056 m = m_pullup(m, sizeof(*evl));
2057 if (__predict_false(m == NULL))
2061 * Transform the Ethernet header into an Ethernet header
2062 * with 802.1Q encapsulation.
2064 evl = mtod(m, struct ether_vlan_header *);
2065 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2066 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2067 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2068 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2069 m->m_flags &= ~M_VLANTAG;
2072 #endif /* MXGE_NEW_VLAN_API */
2075 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2077 struct mxge_pkt_info pi = {0,0,0,0};
2079 mcp_kreq_ether_send_t *req;
2080 bus_dma_segment_t *seg;
2084 int cnt, cum_len, err, i, idx, odd_flag;
2085 uint16_t pseudo_hdr_offset;
2086 uint8_t flags, cksum_offset;
2093 #ifdef MXGE_NEW_VLAN_API
2094 if (m->m_flags & M_VLANTAG) {
2095 m = mxge_vlan_tag_insert(m);
2096 if (__predict_false(m == NULL))
2097 goto drop_without_m;
2100 if (m->m_pkthdr.csum_flags &
2101 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2102 if (mxge_parse_tx(ss, m, &pi))
2106 /* (try to) map the frame for DMA */
2107 idx = tx->req & tx->mask;
2108 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2109 m, tx->seg_list, &cnt,
2111 if (__predict_false(err == EFBIG)) {
2112 /* Too many segments in the chain. Try
2114 m_tmp = m_defrag(m, M_NOWAIT);
2115 if (m_tmp == NULL) {
2120 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2122 m, tx->seg_list, &cnt,
2125 if (__predict_false(err != 0)) {
2126 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2127 " packet len = %d\n", err, m->m_pkthdr.len);
2130 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2131 BUS_DMASYNC_PREWRITE);
2132 tx->info[idx].m = m;
2135 /* TSO is different enough, we handle it in another routine */
2136 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2137 mxge_encap_tso(ss, m, cnt, &pi);
2144 pseudo_hdr_offset = 0;
2145 flags = MXGEFW_FLAGS_NO_TSO;
2147 /* checksum offloading? */
2148 if (m->m_pkthdr.csum_flags &
2149 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2150 /* ensure ip header is in first mbuf, copy
2151 it to a scratch buffer if not */
2152 cksum_offset = pi.ip_off + pi.ip_hlen;
2153 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2154 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2155 req->cksum_offset = cksum_offset;
2156 flags |= MXGEFW_FLAGS_CKSUM;
2157 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2161 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2162 flags |= MXGEFW_FLAGS_SMALL;
2164 /* convert segments into a request list */
2167 req->flags = MXGEFW_FLAGS_FIRST;
2168 for (i = 0; i < cnt; i++) {
2170 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2172 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2173 req->length = htobe16(seg->ds_len);
2174 req->cksum_offset = cksum_offset;
2175 if (cksum_offset > seg->ds_len)
2176 cksum_offset -= seg->ds_len;
2179 req->pseudo_hdr_offset = pseudo_hdr_offset;
2180 req->pad = 0; /* complete solid 16-byte block */
2181 req->rdma_count = 1;
2182 req->flags |= flags | ((cum_len & 1) * odd_flag);
2183 cum_len += seg->ds_len;
2189 /* pad runts to 60 bytes */
2193 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2195 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2196 req->length = htobe16(60 - cum_len);
2197 req->cksum_offset = 0;
2198 req->pseudo_hdr_offset = pseudo_hdr_offset;
2199 req->pad = 0; /* complete solid 16-byte block */
2200 req->rdma_count = 1;
2201 req->flags |= flags | ((cum_len & 1) * odd_flag);
2205 tx->req_list[0].rdma_count = cnt;
2207 /* print what the firmware will see */
2208 for (i = 0; i < cnt; i++) {
2209 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2210 "cso:%d, flags:0x%x, rdma:%d\n",
2211 i, (int)ntohl(tx->req_list[i].addr_high),
2212 (int)ntohl(tx->req_list[i].addr_low),
2213 (int)ntohs(tx->req_list[i].length),
2214 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2215 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2216 tx->req_list[i].rdma_count);
2218 printf("--------------\n");
2220 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2221 mxge_submit_req(tx, tx->req_list, cnt);
2222 #ifdef IFNET_BUF_RING
2223 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2224 /* tell the NIC to start polling this slice */
2226 tx->queue_active = 1;
2240 #ifdef IFNET_BUF_RING
2242 mxge_qflush(struct ifnet *ifp)
2244 mxge_softc_t *sc = ifp->if_softc;
2249 for (slice = 0; slice < sc->num_slices; slice++) {
2250 tx = &sc->ss[slice].tx;
2252 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2254 mtx_unlock(&tx->mtx);
2260 mxge_start_locked(struct mxge_slice_state *ss)
2271 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2272 m = drbr_dequeue(ifp, tx->br);
2276 /* let BPF see it */
2279 /* give it to the nic */
2282 /* ran out of transmit slots */
2283 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2284 && (!drbr_empty(ifp, tx->br))) {
2285 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2291 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2302 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2304 err = drbr_enqueue(ifp, tx->br, m);
2308 if (!drbr_needs_enqueue(ifp, tx->br) &&
2309 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2310 /* let BPF see it */
2312 /* give it to the nic */
2314 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2317 if (!drbr_empty(ifp, tx->br))
2318 mxge_start_locked(ss);
2323 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2325 mxge_softc_t *sc = ifp->if_softc;
2326 struct mxge_slice_state *ss;
2331 slice = m->m_pkthdr.flowid;
2332 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2334 ss = &sc->ss[slice];
2337 if (mtx_trylock(&tx->mtx)) {
2338 err = mxge_transmit_locked(ss, m);
2339 mtx_unlock(&tx->mtx);
2341 err = drbr_enqueue(ifp, tx->br, m);
2350 mxge_start_locked(struct mxge_slice_state *ss)
2360 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2361 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2365 /* let BPF see it */
2368 /* give it to the nic */
2371 /* ran out of transmit slots */
2372 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2373 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2379 mxge_start(struct ifnet *ifp)
2381 mxge_softc_t *sc = ifp->if_softc;
2382 struct mxge_slice_state *ss;
2384 /* only use the first slice for now */
2386 mtx_lock(&ss->tx.mtx);
2387 mxge_start_locked(ss);
2388 mtx_unlock(&ss->tx.mtx);
2392 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2393 * at most 32 bytes at a time, so as to avoid involving the software
2394 * pio handler in the nic. We re-write the first segment's low
2395 * DMA address to mark it valid only after we write the entire chunk
2399 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2400 mcp_kreq_ether_recv_t *src)
2404 low = src->addr_low;
2405 src->addr_low = 0xffffffff;
2406 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2408 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2410 src->addr_low = low;
2411 dst->addr_low = low;
2416 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2418 bus_dma_segment_t seg;
2420 mxge_rx_ring_t *rx = &ss->rx_small;
2423 m = m_gethdr(M_NOWAIT, MT_DATA);
2430 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2431 &seg, &cnt, BUS_DMA_NOWAIT);
2436 rx->info[idx].m = m;
2437 rx->shadow[idx].addr_low =
2438 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2439 rx->shadow[idx].addr_high =
2440 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2444 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2449 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2451 bus_dma_segment_t seg[3];
2453 mxge_rx_ring_t *rx = &ss->rx_big;
2456 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2462 m->m_len = rx->mlen;
2463 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2464 seg, &cnt, BUS_DMA_NOWAIT);
2469 rx->info[idx].m = m;
2470 rx->shadow[idx].addr_low =
2471 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2472 rx->shadow[idx].addr_high =
2473 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2475 #if MXGE_VIRT_JUMBOS
2476 for (i = 1; i < cnt; i++) {
2477 rx->shadow[idx + i].addr_low =
2478 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2479 rx->shadow[idx + i].addr_high =
2480 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2485 for (i = 0; i < rx->nbufs; i++) {
2486 if ((idx & 7) == 7) {
2487 mxge_submit_8rx(&rx->lanai[idx - 7],
2488 &rx->shadow[idx - 7]);
2498 mxge_csum_generic(uint16_t *raw, int len)
2509 csum = (csum >> 16) + (csum & 0xffff);
2510 csum = (csum >> 16) + (csum & 0xffff);
2511 return (uint16_t)csum;
2514 static inline uint16_t
2515 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2518 int nxt, cksum_offset;
2519 struct ip6_hdr *ip6 = p;
2523 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2524 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2525 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2526 IPPROTO_IPV6, &nxt);
2527 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2532 * IPv6 headers do not contain a checksum, and hence
2533 * do not checksum to zero, so they don't "fall out"
2534 * of the partial checksum calculation like IPv4
2535 * headers do. We need to fix the partial checksum by
2536 * subtracting the checksum of the IPv6 header.
2539 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2542 csum += (csum < ~partial);
2543 csum = (csum >> 16) + (csum & 0xFFFF);
2544 csum = (csum >> 16) + (csum & 0xFFFF);
2545 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2552 * Myri10GE hardware checksums are not valid if the sender
2553 * padded the frame with non-zero padding. This is because
2554 * the firmware just does a simple 16-bit 1s complement
2555 * checksum across the entire frame, excluding the first 14
2556 * bytes. It is best to simply to check the checksum and
2557 * tell the stack about it only if the checksum is good
2560 static inline uint16_t
2561 mxge_rx_csum(struct mbuf *m, int csum)
2563 struct ether_header *eh;
2567 #if defined(INET) || defined(INET6)
2568 int cap = m->m_pkthdr.rcvif->if_capenable;
2573 eh = mtod(m, struct ether_header *);
2574 etype = ntohs(eh->ether_type);
2578 if ((cap & IFCAP_RXCSUM) == 0)
2580 ip = (struct ip *)(eh + 1);
2581 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2583 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2584 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2585 (ip->ip_hl << 2) + ip->ip_p));
2590 case ETHERTYPE_IPV6:
2591 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2593 c = mxge_rx_csum6((eh + 1), m, csum);
2603 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2605 struct ether_vlan_header *evl;
2606 struct ether_header *eh;
2609 evl = mtod(m, struct ether_vlan_header *);
2610 eh = mtod(m, struct ether_header *);
2613 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2614 * after what the firmware thought was the end of the ethernet
2618 /* put checksum into host byte order */
2619 *csum = ntohs(*csum);
2620 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2621 (*csum) += ~partial;
2622 (*csum) += ((*csum) < ~partial);
2623 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2624 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2626 /* restore checksum to network byte order;
2627 later consumers expect this */
2628 *csum = htons(*csum);
2631 #ifdef MXGE_NEW_VLAN_API
2632 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2636 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2640 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2641 m_tag_prepend(m, mtag);
2645 m->m_flags |= M_VLANTAG;
2648 * Remove the 802.1q header by copying the Ethernet
2649 * addresses over it and adjusting the beginning of
2650 * the data in the mbuf. The encapsulated Ethernet
2651 * type field is already in place.
2653 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2654 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2655 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2660 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2661 uint32_t csum, int lro)
2666 struct ether_header *eh;
2668 bus_dmamap_t old_map;
2674 idx = rx->cnt & rx->mask;
2675 rx->cnt += rx->nbufs;
2676 /* save a pointer to the received mbuf */
2677 m = rx->info[idx].m;
2678 /* try to replace the received mbuf */
2679 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2680 /* drop the frame -- the old mbuf is re-cycled */
2681 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2685 /* unmap the received buffer */
2686 old_map = rx->info[idx].map;
2687 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2688 bus_dmamap_unload(rx->dmat, old_map);
2690 /* swap the bus_dmamap_t's */
2691 rx->info[idx].map = rx->extra_map;
2692 rx->extra_map = old_map;
2694 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2696 m->m_data += MXGEFW_PAD;
2698 m->m_pkthdr.rcvif = ifp;
2699 m->m_len = m->m_pkthdr.len = len;
2701 eh = mtod(m, struct ether_header *);
2702 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2703 mxge_vlan_tag_remove(m, &csum);
2705 /* if the checksum is valid, mark it in the mbuf header */
2707 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2708 (0 == mxge_rx_csum(m, csum))) {
2709 /* Tell the stack that the checksum is good */
2710 m->m_pkthdr.csum_data = 0xffff;
2711 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2714 #if defined(INET) || defined (INET6)
2715 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2719 /* flowid only valid if RSS hashing is enabled */
2720 if (sc->num_slices > 1) {
2721 m->m_pkthdr.flowid = (ss - sc->ss);
2722 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2724 /* pass the frame up the stack */
2725 (*ifp->if_input)(ifp, m);
2729 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2730 uint32_t csum, int lro)
2734 struct ether_header *eh;
2737 bus_dmamap_t old_map;
2743 idx = rx->cnt & rx->mask;
2745 /* save a pointer to the received mbuf */
2746 m = rx->info[idx].m;
2747 /* try to replace the received mbuf */
2748 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2749 /* drop the frame -- the old mbuf is re-cycled */
2750 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2754 /* unmap the received buffer */
2755 old_map = rx->info[idx].map;
2756 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2757 bus_dmamap_unload(rx->dmat, old_map);
2759 /* swap the bus_dmamap_t's */
2760 rx->info[idx].map = rx->extra_map;
2761 rx->extra_map = old_map;
2763 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2765 m->m_data += MXGEFW_PAD;
2767 m->m_pkthdr.rcvif = ifp;
2768 m->m_len = m->m_pkthdr.len = len;
2770 eh = mtod(m, struct ether_header *);
2771 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2772 mxge_vlan_tag_remove(m, &csum);
2774 /* if the checksum is valid, mark it in the mbuf header */
2775 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2776 (0 == mxge_rx_csum(m, csum))) {
2777 /* Tell the stack that the checksum is good */
2778 m->m_pkthdr.csum_data = 0xffff;
2779 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2782 #if defined(INET) || defined (INET6)
2783 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2787 /* flowid only valid if RSS hashing is enabled */
2788 if (sc->num_slices > 1) {
2789 m->m_pkthdr.flowid = (ss - sc->ss);
2790 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2792 /* pass the frame up the stack */
2793 (*ifp->if_input)(ifp, m);
2797 mxge_clean_rx_done(struct mxge_slice_state *ss)
2799 mxge_rx_done_t *rx_done = &ss->rx_done;
2805 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2806 while (rx_done->entry[rx_done->idx].length != 0) {
2807 length = ntohs(rx_done->entry[rx_done->idx].length);
2808 rx_done->entry[rx_done->idx].length = 0;
2809 checksum = rx_done->entry[rx_done->idx].checksum;
2810 if (length <= (MHLEN - MXGEFW_PAD))
2811 mxge_rx_done_small(ss, length, checksum, lro);
2813 mxge_rx_done_big(ss, length, checksum, lro);
2815 rx_done->idx = rx_done->cnt & rx_done->mask;
2817 /* limit potential for livelock */
2818 if (__predict_false(++limit > rx_done->mask / 2))
2821 #if defined(INET) || defined (INET6)
2822 tcp_lro_flush_all(&ss->lc);
2828 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2839 while (tx->pkt_done != mcp_idx) {
2840 idx = tx->done & tx->mask;
2842 m = tx->info[idx].m;
2843 /* mbuf and DMA map only attached to the first
2846 ss->obytes += m->m_pkthdr.len;
2847 if (m->m_flags & M_MCAST)
2850 tx->info[idx].m = NULL;
2851 map = tx->info[idx].map;
2852 bus_dmamap_unload(tx->dmat, map);
2855 if (tx->info[idx].flag) {
2856 tx->info[idx].flag = 0;
2861 /* If we have space, clear IFF_OACTIVE to tell the stack that
2862 its OK to send packets */
2863 #ifdef IFNET_BUF_RING
2864 flags = &ss->if_drv_flags;
2866 flags = &ifp->if_drv_flags;
2868 mtx_lock(&ss->tx.mtx);
2869 if ((*flags) & IFF_DRV_OACTIVE &&
2870 tx->req - tx->done < (tx->mask + 1)/4) {
2871 *(flags) &= ~IFF_DRV_OACTIVE;
2873 mxge_start_locked(ss);
2875 #ifdef IFNET_BUF_RING
2876 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2877 /* let the NIC stop polling this queue, since there
2878 * are no more transmits pending */
2879 if (tx->req == tx->done) {
2881 tx->queue_active = 0;
2887 mtx_unlock(&ss->tx.mtx);
2891 static struct mxge_media_type mxge_xfp_media_types[] =
2893 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2894 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2895 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2896 {0, (1 << 5), "10GBASE-ER"},
2897 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2898 {0, (1 << 3), "10GBASE-SW"},
2899 {0, (1 << 2), "10GBASE-LW"},
2900 {0, (1 << 1), "10GBASE-EW"},
2901 {0, (1 << 0), "Reserved"}
2903 static struct mxge_media_type mxge_sfp_media_types[] =
2905 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2906 {0, (1 << 7), "Reserved"},
2907 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2908 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2909 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2910 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2914 mxge_media_set(mxge_softc_t *sc, int media_type)
2918 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2920 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2921 sc->current_media = media_type;
2922 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2926 mxge_media_init(mxge_softc_t *sc)
2931 ifmedia_removeall(&sc->media);
2932 mxge_media_set(sc, IFM_AUTO);
2935 * parse the product code to deterimine the interface type
2936 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2937 * after the 3rd dash in the driver's cached copy of the
2938 * EEPROM's product code string.
2940 ptr = sc->product_code_string;
2942 device_printf(sc->dev, "Missing product code\n");
2946 for (i = 0; i < 3; i++, ptr++) {
2947 ptr = strchr(ptr, '-');
2949 device_printf(sc->dev,
2950 "only %d dashes in PC?!?\n", i);
2954 if (*ptr == 'C' || *(ptr +1) == 'C') {
2956 sc->connector = MXGE_CX4;
2957 mxge_media_set(sc, IFM_10G_CX4);
2958 } else if (*ptr == 'Q') {
2959 /* -Q is Quad Ribbon Fiber */
2960 sc->connector = MXGE_QRF;
2961 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2962 /* FreeBSD has no media type for Quad ribbon fiber */
2963 } else if (*ptr == 'R') {
2965 sc->connector = MXGE_XFP;
2966 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2967 /* -S or -2S is SFP+ */
2968 sc->connector = MXGE_SFP;
2970 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2975 * Determine the media type for a NIC. Some XFPs will identify
2976 * themselves only when their link is up, so this is initiated via a
2977 * link up interrupt. However, this can potentially take up to
2978 * several milliseconds, so it is run via the watchdog routine, rather
2979 * than in the interrupt handler itself.
2982 mxge_media_probe(mxge_softc_t *sc)
2987 struct mxge_media_type *mxge_media_types = NULL;
2988 int i, err, ms, mxge_media_type_entries;
2991 sc->need_media_probe = 0;
2993 if (sc->connector == MXGE_XFP) {
2995 mxge_media_types = mxge_xfp_media_types;
2996 mxge_media_type_entries =
2997 sizeof (mxge_xfp_media_types) /
2998 sizeof (mxge_xfp_media_types[0]);
2999 byte = MXGE_XFP_COMPLIANCE_BYTE;
3001 } else if (sc->connector == MXGE_SFP) {
3002 /* -S or -2S is SFP+ */
3003 mxge_media_types = mxge_sfp_media_types;
3004 mxge_media_type_entries =
3005 sizeof (mxge_sfp_media_types) /
3006 sizeof (mxge_sfp_media_types[0]);
3010 /* nothing to do; media type cannot change */
3015 * At this point we know the NIC has an XFP cage, so now we
3016 * try to determine what is in the cage by using the
3017 * firmware's XFP I2C commands to read the XFP 10GbE compilance
3018 * register. We read just one byte, which may take over
3022 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
3024 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3025 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
3026 device_printf(sc->dev, "failed to read XFP\n");
3028 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
3029 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
3031 if (err != MXGEFW_CMD_OK) {
3035 /* now we wait for the data to be cached */
3037 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3038 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3041 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3043 if (err != MXGEFW_CMD_OK) {
3044 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
3045 cage_type, err, ms);
3049 if (cmd.data0 == mxge_media_types[0].bitmask) {
3051 device_printf(sc->dev, "%s:%s\n", cage_type,
3052 mxge_media_types[0].name);
3053 if (sc->current_media != mxge_media_types[0].flag) {
3054 mxge_media_init(sc);
3055 mxge_media_set(sc, mxge_media_types[0].flag);
3059 for (i = 1; i < mxge_media_type_entries; i++) {
3060 if (cmd.data0 & mxge_media_types[i].bitmask) {
3062 device_printf(sc->dev, "%s:%s\n",
3064 mxge_media_types[i].name);
3066 if (sc->current_media != mxge_media_types[i].flag) {
3067 mxge_media_init(sc);
3068 mxge_media_set(sc, mxge_media_types[i].flag);
3074 device_printf(sc->dev, "%s media 0x%x unknown\n",
3075 cage_type, cmd.data0);
3081 mxge_intr(void *arg)
3083 struct mxge_slice_state *ss = arg;
3084 mxge_softc_t *sc = ss->sc;
3085 mcp_irq_data_t *stats = ss->fw_stats;
3086 mxge_tx_ring_t *tx = &ss->tx;
3087 mxge_rx_done_t *rx_done = &ss->rx_done;
3088 uint32_t send_done_count;
3092 #ifndef IFNET_BUF_RING
3093 /* an interrupt on a non-zero slice is implicitly valid
3094 since MSI-X irqs are not shared */
3096 mxge_clean_rx_done(ss);
3097 *ss->irq_claim = be32toh(3);
3102 /* make sure the DMA has finished */
3103 if (!stats->valid) {
3106 valid = stats->valid;
3108 if (sc->legacy_irq) {
3109 /* lower legacy IRQ */
3110 *sc->irq_deassert = 0;
3111 if (!mxge_deassert_wait)
3112 /* don't wait for conf. that irq is low */
3118 /* loop while waiting for legacy irq deassertion */
3120 /* check for transmit completes and receives */
3121 send_done_count = be32toh(stats->send_done_count);
3122 while ((send_done_count != tx->pkt_done) ||
3123 (rx_done->entry[rx_done->idx].length != 0)) {
3124 if (send_done_count != tx->pkt_done)
3125 mxge_tx_done(ss, (int)send_done_count);
3126 mxge_clean_rx_done(ss);
3127 send_done_count = be32toh(stats->send_done_count);
3129 if (sc->legacy_irq && mxge_deassert_wait)
3131 } while (*((volatile uint8_t *) &stats->valid));
3133 /* fw link & error stats meaningful only on the first slice */
3134 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3135 if (sc->link_state != stats->link_up) {
3136 sc->link_state = stats->link_up;
3137 if (sc->link_state) {
3138 if_link_state_change(sc->ifp, LINK_STATE_UP);
3140 device_printf(sc->dev, "link up\n");
3142 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3144 device_printf(sc->dev, "link down\n");
3146 sc->need_media_probe = 1;
3148 if (sc->rdma_tags_available !=
3149 be32toh(stats->rdma_tags_available)) {
3150 sc->rdma_tags_available =
3151 be32toh(stats->rdma_tags_available);
3152 device_printf(sc->dev, "RDMA timed out! %d tags "
3153 "left\n", sc->rdma_tags_available);
3156 if (stats->link_down) {
3157 sc->down_cnt += stats->link_down;
3159 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3163 /* check to see if we have rx token to pass back */
3165 *ss->irq_claim = be32toh(3);
3166 *(ss->irq_claim + 1) = be32toh(3);
3170 mxge_init(void *arg)
3172 mxge_softc_t *sc = arg;
3173 struct ifnet *ifp = sc->ifp;
3176 mtx_lock(&sc->driver_mtx);
3177 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3178 (void) mxge_open(sc);
3179 mtx_unlock(&sc->driver_mtx);
3185 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3189 #if defined(INET) || defined(INET6)
3190 tcp_lro_free(&ss->lc);
3192 for (i = 0; i <= ss->rx_big.mask; i++) {
3193 if (ss->rx_big.info[i].m == NULL)
3195 bus_dmamap_unload(ss->rx_big.dmat,
3196 ss->rx_big.info[i].map);
3197 m_freem(ss->rx_big.info[i].m);
3198 ss->rx_big.info[i].m = NULL;
3201 for (i = 0; i <= ss->rx_small.mask; i++) {
3202 if (ss->rx_small.info[i].m == NULL)
3204 bus_dmamap_unload(ss->rx_small.dmat,
3205 ss->rx_small.info[i].map);
3206 m_freem(ss->rx_small.info[i].m);
3207 ss->rx_small.info[i].m = NULL;
3210 /* transmit ring used only on the first slice */
3211 if (ss->tx.info == NULL)
3214 for (i = 0; i <= ss->tx.mask; i++) {
3215 ss->tx.info[i].flag = 0;
3216 if (ss->tx.info[i].m == NULL)
3218 bus_dmamap_unload(ss->tx.dmat,
3219 ss->tx.info[i].map);
3220 m_freem(ss->tx.info[i].m);
3221 ss->tx.info[i].m = NULL;
3226 mxge_free_mbufs(mxge_softc_t *sc)
3230 for (slice = 0; slice < sc->num_slices; slice++)
3231 mxge_free_slice_mbufs(&sc->ss[slice]);
3235 mxge_free_slice_rings(struct mxge_slice_state *ss)
3240 if (ss->rx_done.entry != NULL)
3241 mxge_dma_free(&ss->rx_done.dma);
3242 ss->rx_done.entry = NULL;
3244 if (ss->tx.req_bytes != NULL)
3245 free(ss->tx.req_bytes, M_DEVBUF);
3246 ss->tx.req_bytes = NULL;
3248 if (ss->tx.seg_list != NULL)
3249 free(ss->tx.seg_list, M_DEVBUF);
3250 ss->tx.seg_list = NULL;
3252 if (ss->rx_small.shadow != NULL)
3253 free(ss->rx_small.shadow, M_DEVBUF);
3254 ss->rx_small.shadow = NULL;
3256 if (ss->rx_big.shadow != NULL)
3257 free(ss->rx_big.shadow, M_DEVBUF);
3258 ss->rx_big.shadow = NULL;
3260 if (ss->tx.info != NULL) {
3261 if (ss->tx.dmat != NULL) {
3262 for (i = 0; i <= ss->tx.mask; i++) {
3263 bus_dmamap_destroy(ss->tx.dmat,
3264 ss->tx.info[i].map);
3266 bus_dma_tag_destroy(ss->tx.dmat);
3268 free(ss->tx.info, M_DEVBUF);
3272 if (ss->rx_small.info != NULL) {
3273 if (ss->rx_small.dmat != NULL) {
3274 for (i = 0; i <= ss->rx_small.mask; i++) {
3275 bus_dmamap_destroy(ss->rx_small.dmat,
3276 ss->rx_small.info[i].map);
3278 bus_dmamap_destroy(ss->rx_small.dmat,
3279 ss->rx_small.extra_map);
3280 bus_dma_tag_destroy(ss->rx_small.dmat);
3282 free(ss->rx_small.info, M_DEVBUF);
3284 ss->rx_small.info = NULL;
3286 if (ss->rx_big.info != NULL) {
3287 if (ss->rx_big.dmat != NULL) {
3288 for (i = 0; i <= ss->rx_big.mask; i++) {
3289 bus_dmamap_destroy(ss->rx_big.dmat,
3290 ss->rx_big.info[i].map);
3292 bus_dmamap_destroy(ss->rx_big.dmat,
3293 ss->rx_big.extra_map);
3294 bus_dma_tag_destroy(ss->rx_big.dmat);
3296 free(ss->rx_big.info, M_DEVBUF);
3298 ss->rx_big.info = NULL;
3302 mxge_free_rings(mxge_softc_t *sc)
3306 for (slice = 0; slice < sc->num_slices; slice++)
3307 mxge_free_slice_rings(&sc->ss[slice]);
3311 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3312 int tx_ring_entries)
3314 mxge_softc_t *sc = ss->sc;
3318 /* allocate per-slice receive resources */
3320 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3321 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3323 /* allocate the rx shadow rings */
3324 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3325 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3327 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3328 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3330 /* allocate the rx host info rings */
3331 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3332 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3334 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3335 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3337 /* allocate the rx busdma resources */
3338 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3340 4096, /* boundary */
3341 BUS_SPACE_MAXADDR, /* low */
3342 BUS_SPACE_MAXADDR, /* high */
3343 NULL, NULL, /* filter */
3344 MHLEN, /* maxsize */
3346 MHLEN, /* maxsegsize */
3347 BUS_DMA_ALLOCNOW, /* flags */
3348 NULL, NULL, /* lock */
3349 &ss->rx_small.dmat); /* tag */
3351 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3356 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3358 #if MXGE_VIRT_JUMBOS
3359 4096, /* boundary */
3363 BUS_SPACE_MAXADDR, /* low */
3364 BUS_SPACE_MAXADDR, /* high */
3365 NULL, NULL, /* filter */
3366 3*4096, /* maxsize */
3367 #if MXGE_VIRT_JUMBOS
3369 4096, /* maxsegsize*/
3372 MJUM9BYTES, /* maxsegsize*/
3374 BUS_DMA_ALLOCNOW, /* flags */
3375 NULL, NULL, /* lock */
3376 &ss->rx_big.dmat); /* tag */
3378 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3382 for (i = 0; i <= ss->rx_small.mask; i++) {
3383 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3384 &ss->rx_small.info[i].map);
3386 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3391 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3392 &ss->rx_small.extra_map);
3394 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3399 for (i = 0; i <= ss->rx_big.mask; i++) {
3400 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3401 &ss->rx_big.info[i].map);
3403 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3408 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3409 &ss->rx_big.extra_map);
3411 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3416 /* now allocate TX resources */
3418 #ifndef IFNET_BUF_RING
3419 /* only use a single TX ring for now */
3420 if (ss != ss->sc->ss)
3424 ss->tx.mask = tx_ring_entries - 1;
3425 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3428 /* allocate the tx request copy block */
3430 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3431 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3432 /* ensure req_list entries are aligned to 8 bytes */
3433 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3434 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3436 /* allocate the tx busdma segment list */
3437 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3438 ss->tx.seg_list = (bus_dma_segment_t *)
3439 malloc(bytes, M_DEVBUF, M_WAITOK);
3441 /* allocate the tx host info ring */
3442 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3443 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3445 /* allocate the tx busdma resources */
3446 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3448 sc->tx_boundary, /* boundary */
3449 BUS_SPACE_MAXADDR, /* low */
3450 BUS_SPACE_MAXADDR, /* high */
3451 NULL, NULL, /* filter */
3452 65536 + 256, /* maxsize */
3453 ss->tx.max_desc - 2, /* num segs */
3454 sc->tx_boundary, /* maxsegsz */
3455 BUS_DMA_ALLOCNOW, /* flags */
3456 NULL, NULL, /* lock */
3457 &ss->tx.dmat); /* tag */
3460 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3465 /* now use these tags to setup dmamaps for each slot
3467 for (i = 0; i <= ss->tx.mask; i++) {
3468 err = bus_dmamap_create(ss->tx.dmat, 0,
3469 &ss->tx.info[i].map);
3471 device_printf(sc->dev, "Err %d tx dmamap\n",
3481 mxge_alloc_rings(mxge_softc_t *sc)
3485 int tx_ring_entries, rx_ring_entries;
3488 /* get ring sizes */
3489 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3490 tx_ring_size = cmd.data0;
3492 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3496 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3497 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3498 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3499 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3500 IFQ_SET_READY(&sc->ifp->if_snd);
3502 for (slice = 0; slice < sc->num_slices; slice++) {
3503 err = mxge_alloc_slice_rings(&sc->ss[slice],
3512 mxge_free_rings(sc);
3519 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3521 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3523 if (bufsize < MCLBYTES) {
3524 /* easy, everything fits in a single buffer */
3525 *big_buf_size = MCLBYTES;
3526 *cl_size = MCLBYTES;
3531 if (bufsize < MJUMPAGESIZE) {
3532 /* still easy, everything still fits in a single buffer */
3533 *big_buf_size = MJUMPAGESIZE;
3534 *cl_size = MJUMPAGESIZE;
3538 #if MXGE_VIRT_JUMBOS
3539 /* now we need to use virtually contiguous buffers */
3540 *cl_size = MJUM9BYTES;
3541 *big_buf_size = 4096;
3542 *nbufs = mtu / 4096 + 1;
3543 /* needs to be a power of two, so round up */
3547 *cl_size = MJUM9BYTES;
3548 *big_buf_size = MJUM9BYTES;
3554 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3563 slice = ss - sc->ss;
3565 #if defined(INET) || defined(INET6)
3566 (void)tcp_lro_init(&ss->lc);
3568 ss->lc.ifp = sc->ifp;
3570 /* get the lanai pointers to the send and receive rings */
3573 #ifndef IFNET_BUF_RING
3574 /* We currently only send from the first slice */
3578 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3580 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3581 ss->tx.send_go = (volatile uint32_t *)
3582 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3583 ss->tx.send_stop = (volatile uint32_t *)
3584 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3585 #ifndef IFNET_BUF_RING
3589 err |= mxge_send_cmd(sc,
3590 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3591 ss->rx_small.lanai =
3592 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3594 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3596 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3599 device_printf(sc->dev,
3600 "failed to get ring sizes or locations\n");
3604 /* stock receive rings */
3605 for (i = 0; i <= ss->rx_small.mask; i++) {
3606 map = ss->rx_small.info[i].map;
3607 err = mxge_get_buf_small(ss, map, i);
3609 device_printf(sc->dev, "alloced %d/%d smalls\n",
3610 i, ss->rx_small.mask + 1);
3614 for (i = 0; i <= ss->rx_big.mask; i++) {
3615 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3616 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3618 ss->rx_big.nbufs = nbufs;
3619 ss->rx_big.cl_size = cl_size;
3620 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3621 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3622 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3623 map = ss->rx_big.info[i].map;
3624 err = mxge_get_buf_big(ss, map, i);
3626 device_printf(sc->dev, "alloced %d/%d bigs\n",
3627 i, ss->rx_big.mask + 1);
3635 mxge_open(mxge_softc_t *sc)
3638 int err, big_bytes, nbufs, slice, cl_size, i;
3640 volatile uint8_t *itable;
3641 struct mxge_slice_state *ss;
3643 /* Copy the MAC address in case it was overridden */
3644 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3646 err = mxge_reset(sc, 1);
3648 device_printf(sc->dev, "failed to reset\n");
3652 if (sc->num_slices > 1) {
3653 /* setup the indirection table */
3654 cmd.data0 = sc->num_slices;
3655 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3658 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3661 device_printf(sc->dev,
3662 "failed to setup rss tables\n");
3666 /* just enable an identity mapping */
3667 itable = sc->sram + cmd.data0;
3668 for (i = 0; i < sc->num_slices; i++)
3669 itable[i] = (uint8_t)i;
3672 cmd.data1 = mxge_rss_hash_type;
3673 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3675 device_printf(sc->dev, "failed to enable slices\n");
3681 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3684 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3686 /* error is only meaningful if we're trying to set
3687 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3688 if (err && nbufs > 1) {
3689 device_printf(sc->dev,
3690 "Failed to set alway-use-n to %d\n",
3694 /* Give the firmware the mtu and the big and small buffer
3695 sizes. The firmware wants the big buf size to be a power
3696 of two. Luckily, FreeBSD's clusters are powers of two */
3697 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3698 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3699 cmd.data0 = MHLEN - MXGEFW_PAD;
3700 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3702 cmd.data0 = big_bytes;
3703 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3706 device_printf(sc->dev, "failed to setup params\n");
3710 /* Now give him the pointer to the stats block */
3712 #ifdef IFNET_BUF_RING
3713 slice < sc->num_slices;
3718 ss = &sc->ss[slice];
3720 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3722 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3723 cmd.data2 = sizeof(struct mcp_irq_data);
3724 cmd.data2 |= (slice << 16);
3725 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3729 bus = sc->ss->fw_stats_dma.bus_addr;
3730 bus += offsetof(struct mcp_irq_data, send_done_count);
3731 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3732 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3733 err = mxge_send_cmd(sc,
3734 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3736 /* Firmware cannot support multicast without STATS_DMA_V2 */
3737 sc->fw_multicast_support = 0;
3739 sc->fw_multicast_support = 1;
3743 device_printf(sc->dev, "failed to setup params\n");
3747 for (slice = 0; slice < sc->num_slices; slice++) {
3748 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3750 device_printf(sc->dev, "couldn't open slice %d\n",
3756 /* Finally, start the firmware running */
3757 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3759 device_printf(sc->dev, "Couldn't bring up link\n");
3762 #ifdef IFNET_BUF_RING
3763 for (slice = 0; slice < sc->num_slices; slice++) {
3764 ss = &sc->ss[slice];
3765 ss->if_drv_flags |= IFF_DRV_RUNNING;
3766 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3769 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3770 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3776 mxge_free_mbufs(sc);
3782 mxge_close(mxge_softc_t *sc, int down)
3785 int err, old_down_cnt;
3786 #ifdef IFNET_BUF_RING
3787 struct mxge_slice_state *ss;
3791 #ifdef IFNET_BUF_RING
3792 for (slice = 0; slice < sc->num_slices; slice++) {
3793 ss = &sc->ss[slice];
3794 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3797 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3799 old_down_cnt = sc->down_cnt;
3801 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3803 device_printf(sc->dev,
3804 "Couldn't bring down link\n");
3806 if (old_down_cnt == sc->down_cnt) {
3807 /* wait for down irq */
3808 DELAY(10 * sc->intr_coal_delay);
3811 if (old_down_cnt == sc->down_cnt) {
3812 device_printf(sc->dev, "never got down irq\n");
3815 mxge_free_mbufs(sc);
3821 mxge_setup_cfg_space(mxge_softc_t *sc)
3823 device_t dev = sc->dev;
3825 uint16_t lnk, pectl;
3827 /* find the PCIe link width and set max read request to 4KB*/
3828 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3829 lnk = pci_read_config(dev, reg + 0x12, 2);
3830 sc->link_width = (lnk >> 4) & 0x3f;
3832 if (sc->pectl == 0) {
3833 pectl = pci_read_config(dev, reg + 0x8, 2);
3834 pectl = (pectl & ~0x7000) | (5 << 12);
3835 pci_write_config(dev, reg + 0x8, pectl, 2);
3838 /* restore saved pectl after watchdog reset */
3839 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3843 /* Enable DMA and Memory space access */
3844 pci_enable_busmaster(dev);
3848 mxge_read_reboot(mxge_softc_t *sc)
3850 device_t dev = sc->dev;
3853 /* find the vendor specific offset */
3854 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3855 device_printf(sc->dev,
3856 "could not find vendor specific offset\n");
3857 return (uint32_t)-1;
3859 /* enable read32 mode */
3860 pci_write_config(dev, vs + 0x10, 0x3, 1);
3861 /* tell NIC which register to read */
3862 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3863 return (pci_read_config(dev, vs + 0x14, 4));
3867 mxge_watchdog_reset(mxge_softc_t *sc)
3869 struct pci_devinfo *dinfo;
3870 struct mxge_slice_state *ss;
3871 int err, running, s, num_tx_slices = 1;
3877 device_printf(sc->dev, "Watchdog reset!\n");
3880 * check to see if the NIC rebooted. If it did, then all of
3881 * PCI config space has been reset, and things like the
3882 * busmaster bit will be zero. If this is the case, then we
3883 * must restore PCI config space before the NIC can be used
3886 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3887 if (cmd == 0xffff) {
3889 * maybe the watchdog caught the NIC rebooting; wait
3890 * up to 100ms for it to finish. If it does not come
3891 * back, then give up
3894 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3895 if (cmd == 0xffff) {
3896 device_printf(sc->dev, "NIC disappeared!\n");
3899 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3900 /* print the reboot status */
3901 reboot = mxge_read_reboot(sc);
3902 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3904 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3908 * quiesce NIC so that TX routines will not try to
3909 * xmit after restoration of BAR
3912 /* Mark the link as down */
3913 if (sc->link_state) {
3915 if_link_state_change(sc->ifp,
3918 #ifdef IFNET_BUF_RING
3919 num_tx_slices = sc->num_slices;
3921 /* grab all TX locks to ensure no tx */
3922 for (s = 0; s < num_tx_slices; s++) {
3924 mtx_lock(&ss->tx.mtx);
3928 /* restore PCI configuration space */
3929 dinfo = device_get_ivars(sc->dev);
3930 pci_cfg_restore(sc->dev, dinfo);
3932 /* and redo any changes we made to our config space */
3933 mxge_setup_cfg_space(sc);
3936 err = mxge_load_firmware(sc, 0);
3938 device_printf(sc->dev,
3939 "Unable to re-load f/w\n");
3943 err = mxge_open(sc);
3944 /* release all TX locks */
3945 for (s = 0; s < num_tx_slices; s++) {
3947 #ifdef IFNET_BUF_RING
3948 mxge_start_locked(ss);
3950 mtx_unlock(&ss->tx.mtx);
3953 sc->watchdog_resets++;
3955 device_printf(sc->dev,
3956 "NIC did not reboot, not resetting\n");
3960 device_printf(sc->dev, "watchdog reset failed\n");
3964 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3969 mxge_watchdog_task(void *arg, int pending)
3971 mxge_softc_t *sc = arg;
3974 mtx_lock(&sc->driver_mtx);
3975 mxge_watchdog_reset(sc);
3976 mtx_unlock(&sc->driver_mtx);
3980 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3982 tx = &sc->ss[slice].tx;
3983 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3984 device_printf(sc->dev,
3985 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3986 tx->req, tx->done, tx->queue_active);
3987 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3988 tx->activate, tx->deactivate);
3989 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3991 be32toh(sc->ss->fw_stats->send_done_count));
3995 mxge_watchdog(mxge_softc_t *sc)
3998 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
4001 /* see if we have outstanding transmits, which
4002 have been pending for more than mxge_ticks */
4004 #ifdef IFNET_BUF_RING
4005 (i < sc->num_slices) && (err == 0);
4007 (i < 1) && (err == 0);
4011 if (tx->req != tx->done &&
4012 tx->watchdog_req != tx->watchdog_done &&
4013 tx->done == tx->watchdog_done) {
4014 /* check for pause blocking before resetting */
4015 if (tx->watchdog_rx_pause == rx_pause) {
4016 mxge_warn_stuck(sc, tx, i);
4017 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4021 device_printf(sc->dev, "Flow control blocking "
4022 "xmits, check link partner\n");
4025 tx->watchdog_req = tx->req;
4026 tx->watchdog_done = tx->done;
4027 tx->watchdog_rx_pause = rx_pause;
4030 if (sc->need_media_probe)
4031 mxge_media_probe(sc);
4036 mxge_get_counter(struct ifnet *ifp, ift_counter cnt)
4038 struct mxge_softc *sc;
4041 sc = if_getsoftc(ifp);
4045 case IFCOUNTER_IPACKETS:
4046 for (int s = 0; s < sc->num_slices; s++)
4047 rv += sc->ss[s].ipackets;
4049 case IFCOUNTER_OPACKETS:
4050 for (int s = 0; s < sc->num_slices; s++)
4051 rv += sc->ss[s].opackets;
4053 case IFCOUNTER_OERRORS:
4054 for (int s = 0; s < sc->num_slices; s++)
4055 rv += sc->ss[s].oerrors;
4057 #ifdef IFNET_BUF_RING
4058 case IFCOUNTER_OBYTES:
4059 for (int s = 0; s < sc->num_slices; s++)
4060 rv += sc->ss[s].obytes;
4062 case IFCOUNTER_OMCASTS:
4063 for (int s = 0; s < sc->num_slices; s++)
4064 rv += sc->ss[s].omcasts;
4066 case IFCOUNTER_OQDROPS:
4067 for (int s = 0; s < sc->num_slices; s++)
4068 rv += sc->ss[s].tx.br->br_drops;
4072 return (if_get_counter_default(ifp, cnt));
4077 mxge_tick(void *arg)
4079 mxge_softc_t *sc = arg;
4086 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4088 if (!sc->watchdog_countdown) {
4089 err = mxge_watchdog(sc);
4090 sc->watchdog_countdown = 4;
4092 sc->watchdog_countdown--;
4095 /* ensure NIC did not suffer h/w fault while idle */
4096 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4097 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4099 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4102 /* look less often if NIC is idle */
4107 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4112 mxge_media_change(struct ifnet *ifp)
4118 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4120 struct ifnet *ifp = sc->ifp;
4121 int real_mtu, old_mtu;
4125 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4126 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4128 mtx_lock(&sc->driver_mtx);
4129 old_mtu = ifp->if_mtu;
4131 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4133 err = mxge_open(sc);
4135 ifp->if_mtu = old_mtu;
4137 (void) mxge_open(sc);
4140 mtx_unlock(&sc->driver_mtx);
4145 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4147 mxge_softc_t *sc = ifp->if_softc;
4152 ifmr->ifm_status = IFM_AVALID;
4153 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4154 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4155 ifmr->ifm_active |= sc->current_media;
4159 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4161 mxge_softc_t *sc = ifp->if_softc;
4162 struct ifreq *ifr = (struct ifreq *)data;
4169 err = ether_ioctl(ifp, command, data);
4173 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4177 mtx_lock(&sc->driver_mtx);
4179 mtx_unlock(&sc->driver_mtx);
4182 if (ifp->if_flags & IFF_UP) {
4183 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4184 err = mxge_open(sc);
4186 /* take care of promis can allmulti
4188 mxge_change_promisc(sc,
4189 ifp->if_flags & IFF_PROMISC);
4190 mxge_set_multicast_list(sc);
4193 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4197 mtx_unlock(&sc->driver_mtx);
4202 mtx_lock(&sc->driver_mtx);
4203 mxge_set_multicast_list(sc);
4204 mtx_unlock(&sc->driver_mtx);
4208 mtx_lock(&sc->driver_mtx);
4209 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4210 if (mask & IFCAP_TXCSUM) {
4211 if (IFCAP_TXCSUM & ifp->if_capenable) {
4212 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4213 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4215 ifp->if_capenable |= IFCAP_TXCSUM;
4216 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4218 } else if (mask & IFCAP_RXCSUM) {
4219 if (IFCAP_RXCSUM & ifp->if_capenable) {
4220 ifp->if_capenable &= ~IFCAP_RXCSUM;
4222 ifp->if_capenable |= IFCAP_RXCSUM;
4225 if (mask & IFCAP_TSO4) {
4226 if (IFCAP_TSO4 & ifp->if_capenable) {
4227 ifp->if_capenable &= ~IFCAP_TSO4;
4228 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4229 ifp->if_capenable |= IFCAP_TSO4;
4230 ifp->if_hwassist |= CSUM_TSO;
4232 printf("mxge requires tx checksum offload"
4233 " be enabled to use TSO\n");
4238 if (mask & IFCAP_TXCSUM_IPV6) {
4239 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4240 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4242 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4245 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4246 ifp->if_hwassist |= (CSUM_TCP_IPV6
4249 } else if (mask & IFCAP_RXCSUM_IPV6) {
4250 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4251 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4253 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4256 if (mask & IFCAP_TSO6) {
4257 if (IFCAP_TSO6 & ifp->if_capenable) {
4258 ifp->if_capenable &= ~IFCAP_TSO6;
4259 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4260 ifp->if_capenable |= IFCAP_TSO6;
4261 ifp->if_hwassist |= CSUM_TSO;
4263 printf("mxge requires tx checksum offload"
4264 " be enabled to use TSO\n");
4268 #endif /*IFCAP_TSO6 */
4270 if (mask & IFCAP_LRO)
4271 ifp->if_capenable ^= IFCAP_LRO;
4272 if (mask & IFCAP_VLAN_HWTAGGING)
4273 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4274 if (mask & IFCAP_VLAN_HWTSO)
4275 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4277 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4278 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4279 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4281 mtx_unlock(&sc->driver_mtx);
4282 VLAN_CAPABILITIES(ifp);
4287 mtx_lock(&sc->driver_mtx);
4288 mxge_media_probe(sc);
4289 mtx_unlock(&sc->driver_mtx);
4290 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4291 &sc->media, command);
4301 mxge_fetch_tunables(mxge_softc_t *sc)
4304 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4305 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4306 &mxge_flow_control);
4307 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4308 &mxge_intr_coal_delay);
4309 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4310 &mxge_nvidia_ecrc_enable);
4311 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4312 &mxge_force_firmware);
4313 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4314 &mxge_deassert_wait);
4315 TUNABLE_INT_FETCH("hw.mxge.verbose",
4317 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4318 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4319 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4320 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4321 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4322 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4326 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4327 mxge_intr_coal_delay = 30;
4328 if (mxge_ticks == 0)
4329 mxge_ticks = hz / 2;
4330 sc->pause = mxge_flow_control;
4331 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4332 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4333 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4335 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4336 mxge_initial_mtu < ETHER_MIN_LEN)
4337 mxge_initial_mtu = ETHERMTU_JUMBO;
4339 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4340 mxge_throttle = MXGE_MAX_THROTTLE;
4341 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4342 mxge_throttle = MXGE_MIN_THROTTLE;
4343 sc->throttle = mxge_throttle;
4348 mxge_free_slices(mxge_softc_t *sc)
4350 struct mxge_slice_state *ss;
4357 for (i = 0; i < sc->num_slices; i++) {
4359 if (ss->fw_stats != NULL) {
4360 mxge_dma_free(&ss->fw_stats_dma);
4361 ss->fw_stats = NULL;
4362 #ifdef IFNET_BUF_RING
4363 if (ss->tx.br != NULL) {
4364 drbr_free(ss->tx.br, M_DEVBUF);
4368 mtx_destroy(&ss->tx.mtx);
4370 if (ss->rx_done.entry != NULL) {
4371 mxge_dma_free(&ss->rx_done.dma);
4372 ss->rx_done.entry = NULL;
4375 free(sc->ss, M_DEVBUF);
4380 mxge_alloc_slices(mxge_softc_t *sc)
4383 struct mxge_slice_state *ss;
4385 int err, i, max_intr_slots;
4387 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4389 device_printf(sc->dev, "Cannot determine rx ring size\n");
4392 sc->rx_ring_size = cmd.data0;
4393 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4395 bytes = sizeof (*sc->ss) * sc->num_slices;
4396 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4399 for (i = 0; i < sc->num_slices; i++) {
4404 /* allocate per-slice rx interrupt queues */
4406 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4407 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4410 ss->rx_done.entry = ss->rx_done.dma.addr;
4411 bzero(ss->rx_done.entry, bytes);
4414 * allocate the per-slice firmware stats; stats
4415 * (including tx) are used used only on the first
4418 #ifndef IFNET_BUF_RING
4423 bytes = sizeof (*ss->fw_stats);
4424 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4425 sizeof (*ss->fw_stats), 64);
4428 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4429 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4430 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4431 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4432 #ifdef IFNET_BUF_RING
4433 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4441 mxge_free_slices(sc);
4446 mxge_slice_probe(mxge_softc_t *sc)
4450 int msix_cnt, status, max_intr_slots;
4454 * don't enable multiple slices if they are not enabled,
4455 * or if this is not an SMP system
4458 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4461 /* see how many MSI-X interrupts are available */
4462 msix_cnt = pci_msix_count(sc->dev);
4466 /* now load the slice aware firmware see what it supports */
4467 old_fw = sc->fw_name;
4468 if (old_fw == mxge_fw_aligned)
4469 sc->fw_name = mxge_fw_rss_aligned;
4471 sc->fw_name = mxge_fw_rss_unaligned;
4472 status = mxge_load_firmware(sc, 0);
4474 device_printf(sc->dev, "Falling back to a single slice\n");
4478 /* try to send a reset command to the card to see if it
4480 memset(&cmd, 0, sizeof (cmd));
4481 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4483 device_printf(sc->dev, "failed reset\n");
4487 /* get rx ring size */
4488 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4490 device_printf(sc->dev, "Cannot determine rx ring size\n");
4493 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4495 /* tell it the size of the interrupt queues */
4496 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4497 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4499 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4503 /* ask the maximum number of slices it supports */
4504 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4506 device_printf(sc->dev,
4507 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4510 sc->num_slices = cmd.data0;
4511 if (sc->num_slices > msix_cnt)
4512 sc->num_slices = msix_cnt;
4514 if (mxge_max_slices == -1) {
4515 /* cap to number of CPUs in system */
4516 if (sc->num_slices > mp_ncpus)
4517 sc->num_slices = mp_ncpus;
4519 if (sc->num_slices > mxge_max_slices)
4520 sc->num_slices = mxge_max_slices;
4522 /* make sure it is a power of two */
4523 while (sc->num_slices & (sc->num_slices - 1))
4527 device_printf(sc->dev, "using %d slices\n",
4533 sc->fw_name = old_fw;
4534 (void) mxge_load_firmware(sc, 0);
4538 mxge_add_msix_irqs(mxge_softc_t *sc)
4541 int count, err, i, rid;
4544 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4547 if (sc->msix_table_res == NULL) {
4548 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4552 count = sc->num_slices;
4553 err = pci_alloc_msix(sc->dev, &count);
4555 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4556 "err = %d \n", sc->num_slices, err);
4557 goto abort_with_msix_table;
4559 if (count < sc->num_slices) {
4560 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4561 count, sc->num_slices);
4562 device_printf(sc->dev,
4563 "Try setting hw.mxge.max_slices to %d\n",
4566 goto abort_with_msix;
4568 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4569 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4570 if (sc->msix_irq_res == NULL) {
4572 goto abort_with_msix;
4575 for (i = 0; i < sc->num_slices; i++) {
4577 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4580 if (sc->msix_irq_res[i] == NULL) {
4581 device_printf(sc->dev, "couldn't allocate IRQ res"
4582 " for message %d\n", i);
4584 goto abort_with_res;
4588 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4589 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4591 for (i = 0; i < sc->num_slices; i++) {
4592 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4593 INTR_TYPE_NET | INTR_MPSAFE,
4594 #if __FreeBSD_version > 700030
4597 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4599 device_printf(sc->dev, "couldn't setup intr for "
4601 goto abort_with_intr;
4603 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4604 sc->msix_ih[i], "s%d", i);
4608 device_printf(sc->dev, "using %d msix IRQs:",
4610 for (i = 0; i < sc->num_slices; i++)
4611 printf(" %jd", rman_get_start(sc->msix_irq_res[i]));
4617 for (i = 0; i < sc->num_slices; i++) {
4618 if (sc->msix_ih[i] != NULL) {
4619 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4621 sc->msix_ih[i] = NULL;
4624 free(sc->msix_ih, M_DEVBUF);
4628 for (i = 0; i < sc->num_slices; i++) {
4630 if (sc->msix_irq_res[i] != NULL)
4631 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4632 sc->msix_irq_res[i]);
4633 sc->msix_irq_res[i] = NULL;
4635 free(sc->msix_irq_res, M_DEVBUF);
4639 pci_release_msi(sc->dev);
4641 abort_with_msix_table:
4642 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4643 sc->msix_table_res);
4649 mxge_add_single_irq(mxge_softc_t *sc)
4651 int count, err, rid;
4653 count = pci_msi_count(sc->dev);
4654 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4660 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
4661 RF_SHAREABLE | RF_ACTIVE);
4662 if (sc->irq_res == NULL) {
4663 device_printf(sc->dev, "could not alloc interrupt\n");
4667 device_printf(sc->dev, "using %s irq %jd\n",
4668 sc->legacy_irq ? "INTx" : "MSI",
4669 rman_get_start(sc->irq_res));
4670 err = bus_setup_intr(sc->dev, sc->irq_res,
4671 INTR_TYPE_NET | INTR_MPSAFE,
4672 #if __FreeBSD_version > 700030
4675 mxge_intr, &sc->ss[0], &sc->ih);
4677 bus_release_resource(sc->dev, SYS_RES_IRQ,
4678 sc->legacy_irq ? 0 : 1, sc->irq_res);
4679 if (!sc->legacy_irq)
4680 pci_release_msi(sc->dev);
4686 mxge_rem_msix_irqs(mxge_softc_t *sc)
4690 for (i = 0; i < sc->num_slices; i++) {
4691 if (sc->msix_ih[i] != NULL) {
4692 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4694 sc->msix_ih[i] = NULL;
4697 free(sc->msix_ih, M_DEVBUF);
4699 for (i = 0; i < sc->num_slices; i++) {
4701 if (sc->msix_irq_res[i] != NULL)
4702 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4703 sc->msix_irq_res[i]);
4704 sc->msix_irq_res[i] = NULL;
4706 free(sc->msix_irq_res, M_DEVBUF);
4708 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4709 sc->msix_table_res);
4711 pci_release_msi(sc->dev);
4716 mxge_rem_single_irq(mxge_softc_t *sc)
4718 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4719 bus_release_resource(sc->dev, SYS_RES_IRQ,
4720 sc->legacy_irq ? 0 : 1, sc->irq_res);
4721 if (!sc->legacy_irq)
4722 pci_release_msi(sc->dev);
4726 mxge_rem_irq(mxge_softc_t *sc)
4728 if (sc->num_slices > 1)
4729 mxge_rem_msix_irqs(sc);
4731 mxge_rem_single_irq(sc);
4735 mxge_add_irq(mxge_softc_t *sc)
4739 if (sc->num_slices > 1)
4740 err = mxge_add_msix_irqs(sc);
4742 err = mxge_add_single_irq(sc);
4744 if (0 && err == 0 && sc->num_slices > 1) {
4745 mxge_rem_msix_irqs(sc);
4746 err = mxge_add_msix_irqs(sc);
4753 mxge_attach(device_t dev)
4756 mxge_softc_t *sc = device_get_softc(dev);
4761 mxge_fetch_tunables(sc);
4763 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4764 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4765 taskqueue_thread_enqueue, &sc->tq);
4766 if (sc->tq == NULL) {
4768 goto abort_with_nothing;
4771 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4774 BUS_SPACE_MAXADDR, /* low */
4775 BUS_SPACE_MAXADDR, /* high */
4776 NULL, NULL, /* filter */
4777 65536 + 256, /* maxsize */
4778 MXGE_MAX_SEND_DESC, /* num segs */
4779 65536, /* maxsegsize */
4781 NULL, NULL, /* lock */
4782 &sc->parent_dmat); /* tag */
4785 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4790 ifp = sc->ifp = if_alloc(IFT_ETHER);
4792 device_printf(dev, "can not if_alloc()\n");
4794 goto abort_with_parent_dmat;
4796 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4798 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4799 device_get_nameunit(dev));
4800 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4801 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4802 "%s:drv", device_get_nameunit(dev));
4803 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4804 MTX_NETWORK_LOCK, MTX_DEF);
4806 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4808 mxge_setup_cfg_space(sc);
4810 /* Map the board into the kernel */
4812 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
4814 if (sc->mem_res == NULL) {
4815 device_printf(dev, "could not map memory\n");
4817 goto abort_with_lock;
4819 sc->sram = rman_get_virtual(sc->mem_res);
4820 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4821 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4822 device_printf(dev, "impossible memory region size %jd\n",
4823 rman_get_size(sc->mem_res));
4825 goto abort_with_mem_res;
4828 /* make NULL terminated copy of the EEPROM strings section of
4830 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4831 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4832 rman_get_bushandle(sc->mem_res),
4833 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4835 MXGE_EEPROM_STRINGS_SIZE - 2);
4836 err = mxge_parse_strings(sc);
4838 goto abort_with_mem_res;
4840 /* Enable write combining for efficient use of PCIe bus */
4843 /* Allocate the out of band dma memory */
4844 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4845 sizeof (mxge_cmd_t), 64);
4847 goto abort_with_mem_res;
4848 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4849 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4851 goto abort_with_cmd_dma;
4853 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4855 goto abort_with_zeropad_dma;
4857 /* select & load the firmware */
4858 err = mxge_select_firmware(sc);
4860 goto abort_with_dmabench;
4861 sc->intr_coal_delay = mxge_intr_coal_delay;
4863 mxge_slice_probe(sc);
4864 err = mxge_alloc_slices(sc);
4866 goto abort_with_dmabench;
4868 err = mxge_reset(sc, 0);
4870 goto abort_with_slices;
4872 err = mxge_alloc_rings(sc);
4874 device_printf(sc->dev, "failed to allocate rings\n");
4875 goto abort_with_slices;
4878 err = mxge_add_irq(sc);
4880 device_printf(sc->dev, "failed to add irq\n");
4881 goto abort_with_rings;
4884 ifp->if_baudrate = IF_Gbps(10);
4885 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4886 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4888 #if defined(INET) || defined(INET6)
4889 ifp->if_capabilities |= IFCAP_LRO;
4892 #ifdef MXGE_NEW_VLAN_API
4893 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4895 /* Only FW 1.4.32 and newer can do TSO over vlans */
4896 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4897 sc->fw_ver_tiny >= 32)
4898 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4900 sc->max_mtu = mxge_max_mtu(sc);
4901 if (sc->max_mtu >= 9000)
4902 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4904 device_printf(dev, "MTU limited to %d. Install "
4905 "latest firmware for 9000 byte jumbo support\n",
4906 sc->max_mtu - ETHER_HDR_LEN);
4907 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4908 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4909 /* check to see if f/w supports TSO for IPv6 */
4910 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4912 ifp->if_capabilities |= IFCAP_TSO6;
4913 sc->max_tso6_hlen = min(cmd.data0,
4914 sizeof (sc->ss[0].scratch));
4916 ifp->if_capenable = ifp->if_capabilities;
4917 if (sc->lro_cnt == 0)
4918 ifp->if_capenable &= ~IFCAP_LRO;
4919 ifp->if_init = mxge_init;
4921 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4922 ifp->if_ioctl = mxge_ioctl;
4923 ifp->if_start = mxge_start;
4924 ifp->if_get_counter = mxge_get_counter;
4925 /* Initialise the ifmedia structure */
4926 ifmedia_init(&sc->media, 0, mxge_media_change,
4928 mxge_media_init(sc);
4929 mxge_media_probe(sc);
4931 ether_ifattach(ifp, sc->mac_addr);
4932 /* ether_ifattach sets mtu to ETHERMTU */
4933 if (mxge_initial_mtu != ETHERMTU)
4934 mxge_change_mtu(sc, mxge_initial_mtu);
4936 mxge_add_sysctls(sc);
4937 #ifdef IFNET_BUF_RING
4938 ifp->if_transmit = mxge_transmit;
4939 ifp->if_qflush = mxge_qflush;
4941 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4942 device_get_nameunit(sc->dev));
4943 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4947 mxge_free_rings(sc);
4949 mxge_free_slices(sc);
4950 abort_with_dmabench:
4951 mxge_dma_free(&sc->dmabench_dma);
4952 abort_with_zeropad_dma:
4953 mxge_dma_free(&sc->zeropad_dma);
4955 mxge_dma_free(&sc->cmd_dma);
4957 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4959 pci_disable_busmaster(dev);
4960 mtx_destroy(&sc->cmd_mtx);
4961 mtx_destroy(&sc->driver_mtx);
4963 abort_with_parent_dmat:
4964 bus_dma_tag_destroy(sc->parent_dmat);
4966 if (sc->tq != NULL) {
4967 taskqueue_drain(sc->tq, &sc->watchdog_task);
4968 taskqueue_free(sc->tq);
4976 mxge_detach(device_t dev)
4978 mxge_softc_t *sc = device_get_softc(dev);
4980 if (mxge_vlans_active(sc)) {
4981 device_printf(sc->dev,
4982 "Detach vlans before removing module\n");
4985 mtx_lock(&sc->driver_mtx);
4987 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4989 mtx_unlock(&sc->driver_mtx);
4990 ether_ifdetach(sc->ifp);
4991 if (sc->tq != NULL) {
4992 taskqueue_drain(sc->tq, &sc->watchdog_task);
4993 taskqueue_free(sc->tq);
4996 callout_drain(&sc->co_hdl);
4997 ifmedia_removeall(&sc->media);
4998 mxge_dummy_rdma(sc, 0);
4999 mxge_rem_sysctls(sc);
5001 mxge_free_rings(sc);
5002 mxge_free_slices(sc);
5003 mxge_dma_free(&sc->dmabench_dma);
5004 mxge_dma_free(&sc->zeropad_dma);
5005 mxge_dma_free(&sc->cmd_dma);
5006 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5007 pci_disable_busmaster(dev);
5008 mtx_destroy(&sc->cmd_mtx);
5009 mtx_destroy(&sc->driver_mtx);
5011 bus_dma_tag_destroy(sc->parent_dmat);
5016 mxge_shutdown(device_t dev)
5022 This file uses Myri10GE driver indentation.
5025 c-file-style:"linux"