1 /******************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 Copyright (c) 2006-2013, Myricom Inc.
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
13 2. Neither the name of the Myricom Inc, nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
29 ***************************************************************************/
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/linker.h>
37 #include <sys/firmware.h>
38 #include <sys/endian.h>
39 #include <sys/sockio.h>
41 #include <sys/malloc.h>
43 #include <sys/kernel.h>
45 #include <sys/module.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
49 #include <sys/taskqueue.h>
50 #include <contrib/zlib/zlib.h>
51 #include <dev/zlib/zcalloc.h>
54 #include <net/if_var.h>
55 #include <net/if_arp.h>
56 #include <net/ethernet.h>
57 #include <net/if_dl.h>
58 #include <net/if_media.h>
62 #include <net/if_types.h>
63 #include <net/if_vlan_var.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip6.h>
69 #include <netinet/tcp.h>
70 #include <netinet/tcp_lro.h>
71 #include <netinet6/ip6_var.h>
73 #include <machine/bus.h>
74 #include <machine/in_cksum.h>
75 #include <machine/resource.h>
80 #include <dev/pci/pcireg.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
84 #include <vm/vm.h> /* for pmap_mapdev() */
87 #if defined(__i386) || defined(__amd64)
88 #include <machine/specialreg.h>
91 #include <dev/mxge/mxge_mcp.h>
92 #include <dev/mxge/mcp_gen_header.h>
93 /*#define MXGE_FAKE_IFP*/
94 #include <dev/mxge/if_mxge_var.h>
96 #include <sys/buf_ring.h>
100 #include "opt_inet6.h"
103 static int mxge_nvidia_ecrc_enable = 1;
104 static int mxge_force_firmware = 0;
105 static int mxge_intr_coal_delay = 30;
106 static int mxge_deassert_wait = 1;
107 static int mxge_flow_control = 1;
108 static int mxge_verbose = 0;
109 static int mxge_ticks;
110 static int mxge_max_slices = 1;
111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
112 static int mxge_always_promisc = 0;
113 static int mxge_initial_mtu = ETHERMTU_JUMBO;
114 static int mxge_throttle = 0;
115 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
116 static char *mxge_fw_aligned = "mxge_eth_z8e";
117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
120 static int mxge_probe(device_t dev);
121 static int mxge_attach(device_t dev);
122 static int mxge_detach(device_t dev);
123 static int mxge_shutdown(device_t dev);
124 static void mxge_intr(void *arg);
126 static device_method_t mxge_methods[] =
128 /* Device interface */
129 DEVMETHOD(device_probe, mxge_probe),
130 DEVMETHOD(device_attach, mxge_attach),
131 DEVMETHOD(device_detach, mxge_detach),
132 DEVMETHOD(device_shutdown, mxge_shutdown),
137 static driver_t mxge_driver =
141 sizeof(mxge_softc_t),
144 static devclass_t mxge_devclass;
146 /* Declare ourselves to be a child of the PCI bus.*/
147 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
148 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
149 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
151 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
152 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
153 static int mxge_close(mxge_softc_t *sc, int down);
154 static int mxge_open(mxge_softc_t *sc);
155 static void mxge_tick(void *arg);
158 mxge_probe(device_t dev)
162 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
163 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
164 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
165 rev = pci_get_revid(dev);
167 case MXGE_PCI_REV_Z8E:
168 device_set_desc(dev, "Myri10G-PCIE-8A");
170 case MXGE_PCI_REV_Z8ES:
171 device_set_desc(dev, "Myri10G-PCIE-8B");
174 device_set_desc(dev, "Myri10G-PCIE-8??");
175 device_printf(dev, "Unrecognized rev %d NIC\n",
185 mxge_enable_wc(mxge_softc_t *sc)
187 #if defined(__i386) || defined(__amd64)
192 len = rman_get_size(sc->mem_res);
193 err = pmap_change_attr((vm_offset_t) sc->sram,
194 len, PAT_WRITE_COMBINING);
196 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
203 /* callback to get our DMA address */
205 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
209 *(bus_addr_t *) arg = segs->ds_addr;
214 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
215 bus_size_t alignment)
218 device_t dev = sc->dev;
219 bus_size_t boundary, maxsegsize;
221 if (bytes > 4096 && alignment == 4096) {
229 /* allocate DMAable memory tags */
230 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
231 alignment, /* alignment */
232 boundary, /* boundary */
233 BUS_SPACE_MAXADDR, /* low */
234 BUS_SPACE_MAXADDR, /* high */
235 NULL, NULL, /* filter */
238 maxsegsize, /* maxsegsize */
239 BUS_DMA_COHERENT, /* flags */
240 NULL, NULL, /* lock */
241 &dma->dmat); /* tag */
243 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
247 /* allocate DMAable memory & map */
248 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
249 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
250 | BUS_DMA_ZERO), &dma->map);
252 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
253 goto abort_with_dmat;
256 /* load the memory */
257 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
258 mxge_dmamap_callback,
259 (void *)&dma->bus_addr, 0);
261 device_printf(dev, "couldn't load map (err = %d)\n", err);
267 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
269 (void)bus_dma_tag_destroy(dma->dmat);
274 mxge_dma_free(mxge_dma_t *dma)
276 bus_dmamap_unload(dma->dmat, dma->map);
277 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
278 (void)bus_dma_tag_destroy(dma->dmat);
282 * The eeprom strings on the lanaiX have the format
289 mxge_parse_strings(mxge_softc_t *sc)
292 int i, found_mac, found_sn2;
295 ptr = sc->eeprom_strings;
298 while (*ptr != '\0') {
299 if (strncmp(ptr, "MAC=", 4) == 0) {
302 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
303 if (endptr - ptr != 2)
312 } else if (strncmp(ptr, "PC=", 3) == 0) {
314 strlcpy(sc->product_code_string, ptr,
315 sizeof(sc->product_code_string));
316 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
318 strlcpy(sc->serial_number_string, ptr,
319 sizeof(sc->serial_number_string));
320 } else if (strncmp(ptr, "SN2=", 4) == 0) {
321 /* SN2 takes precedence over SN */
324 strlcpy(sc->serial_number_string, ptr,
325 sizeof(sc->serial_number_string));
327 while (*ptr++ != '\0') {}
334 device_printf(sc->dev, "failed to parse eeprom_strings\n");
339 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
341 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
344 unsigned long base, off;
346 device_t pdev, mcp55;
347 uint16_t vendor_id, device_id, word;
348 uintptr_t bus, slot, func, ivend, idev;
351 if (!mxge_nvidia_ecrc_enable)
354 pdev = device_get_parent(device_get_parent(sc->dev));
356 device_printf(sc->dev, "could not find parent?\n");
359 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
360 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
362 if (vendor_id != 0x10de)
367 if (device_id == 0x005d) {
368 /* ck804, base address is magic */
370 } else if (device_id >= 0x0374 && device_id <= 0x378) {
371 /* mcp55, base address stored in chipset */
372 mcp55 = pci_find_bsf(0, 0, 0);
374 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
375 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
376 word = pci_read_config(mcp55, 0x90, 2);
377 base = ((unsigned long)word & 0x7ffeU) << 25;
384 Test below is commented because it is believed that doing
385 config read/write beyond 0xff will access the config space
386 for the next larger function. Uncomment this and remove
387 the hacky pmap_mapdev() way of accessing config space when
388 FreeBSD grows support for extended pcie config space access
391 /* See if we can, by some miracle, access the extended
393 val = pci_read_config(pdev, 0x178, 4);
394 if (val != 0xffffffff) {
396 pci_write_config(pdev, 0x178, val, 4);
400 /* Rather than using normal pci config space writes, we must
401 * map the Nvidia config space ourselves. This is because on
402 * opteron/nvidia class machine the 0xe000000 mapping is
403 * handled by the nvidia chipset, that means the internal PCI
404 * device (the on-chip northbridge), or the amd-8131 bridge
405 * and things behind them are not visible by this method.
408 BUS_READ_IVAR(device_get_parent(pdev), pdev,
410 BUS_READ_IVAR(device_get_parent(pdev), pdev,
411 PCI_IVAR_SLOT, &slot);
412 BUS_READ_IVAR(device_get_parent(pdev), pdev,
413 PCI_IVAR_FUNCTION, &func);
414 BUS_READ_IVAR(device_get_parent(pdev), pdev,
415 PCI_IVAR_VENDOR, &ivend);
416 BUS_READ_IVAR(device_get_parent(pdev), pdev,
417 PCI_IVAR_DEVICE, &idev);
420 + 0x00100000UL * (unsigned long)bus
421 + 0x00001000UL * (unsigned long)(func
424 /* map it into the kernel */
425 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
428 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
431 /* get a pointer to the config space mapped into the kernel */
432 cfgptr = va + (off & PAGE_MASK);
434 /* make sure that we can really access it */
435 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
436 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
437 if (! (vendor_id == ivend && device_id == idev)) {
438 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
439 vendor_id, device_id);
440 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
444 ptr32 = (uint32_t*)(cfgptr + 0x178);
447 if (val == 0xffffffff) {
448 device_printf(sc->dev, "extended mapping failed\n");
449 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
453 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
455 device_printf(sc->dev,
456 "Enabled ECRC on upstream Nvidia bridge "
458 (int)bus, (int)slot, (int)func);
463 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
465 device_printf(sc->dev,
466 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
472 mxge_dma_test(mxge_softc_t *sc, int test_type)
475 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
480 /* Run a small DMA test.
481 * The magic multipliers to the length tell the firmware
482 * to do DMA read, write, or read+write tests. The
483 * results are returned in cmd.data0. The upper 16
484 * bits of the return is the number of transfers completed.
485 * The lower 16 bits is the time in 0.5us ticks that the
486 * transfers took to complete.
489 len = sc->tx_boundary;
491 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
492 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
493 cmd.data2 = len * 0x10000;
494 status = mxge_send_cmd(sc, test_type, &cmd);
499 sc->read_dma = ((cmd.data0>>16) * len * 2) /
500 (cmd.data0 & 0xffff);
501 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
502 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
503 cmd.data2 = len * 0x1;
504 status = mxge_send_cmd(sc, test_type, &cmd);
509 sc->write_dma = ((cmd.data0>>16) * len * 2) /
510 (cmd.data0 & 0xffff);
512 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
513 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
514 cmd.data2 = len * 0x10001;
515 status = mxge_send_cmd(sc, test_type, &cmd);
520 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
521 (cmd.data0 & 0xffff);
524 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
525 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
532 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
533 * when the PCI-E Completion packets are aligned on an 8-byte
534 * boundary. Some PCI-E chip sets always align Completion packets; on
535 * the ones that do not, the alignment can be enforced by enabling
536 * ECRC generation (if supported).
538 * When PCI-E Completion packets are not aligned, it is actually more
539 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
541 * If the driver can neither enable ECRC nor verify that it has
542 * already been enabled, then it must use a firmware image which works
543 * around unaligned completion packets (ethp_z8e.dat), and it should
544 * also ensure that it never gives the device a Read-DMA which is
545 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
546 * enabled, then the driver should use the aligned (eth_z8e.dat)
547 * firmware image, and set tx_boundary to 4KB.
551 mxge_firmware_probe(mxge_softc_t *sc)
553 device_t dev = sc->dev;
557 sc->tx_boundary = 4096;
559 * Verify the max read request size was set to 4KB
560 * before trying the test with 4KB.
562 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
563 pectl = pci_read_config(dev, reg + 0x8, 2);
564 if ((pectl & (5 << 12)) != (5 << 12)) {
565 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
567 sc->tx_boundary = 2048;
572 * load the optimized firmware (which assumes aligned PCIe
573 * completions) in order to see if it works on this host.
575 sc->fw_name = mxge_fw_aligned;
576 status = mxge_load_firmware(sc, 1);
582 * Enable ECRC if possible
584 mxge_enable_nvidia_ecrc(sc);
587 * Run a DMA test which watches for unaligned completions and
588 * aborts on the first one seen. Not required on Z8ES or newer.
590 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
592 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
594 return 0; /* keep the aligned firmware */
597 device_printf(dev, "DMA test failed: %d\n", status);
598 if (status == ENOSYS)
599 device_printf(dev, "Falling back to ethp! "
600 "Please install up to date fw\n");
605 mxge_select_firmware(mxge_softc_t *sc)
608 int force_firmware = mxge_force_firmware;
611 force_firmware = sc->throttle;
613 if (force_firmware != 0) {
614 if (force_firmware == 1)
619 device_printf(sc->dev,
620 "Assuming %s completions (forced)\n",
621 aligned ? "aligned" : "unaligned");
625 /* if the PCIe link width is 4 or less, we can use the aligned
626 firmware and skip any checks */
627 if (sc->link_width != 0 && sc->link_width <= 4) {
628 device_printf(sc->dev,
629 "PCIe x%d Link, expect reduced performance\n",
635 if (0 == mxge_firmware_probe(sc))
640 sc->fw_name = mxge_fw_aligned;
641 sc->tx_boundary = 4096;
643 sc->fw_name = mxge_fw_unaligned;
644 sc->tx_boundary = 2048;
646 return (mxge_load_firmware(sc, 0));
650 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
654 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
655 be32toh(hdr->mcp_type));
659 /* save firmware version for sysctl */
660 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
662 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
665 &sc->fw_ver_minor, &sc->fw_ver_tiny);
667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
669 device_printf(sc->dev, "Found firmware version %s\n",
671 device_printf(sc->dev, "Driver needs %d.%d\n",
672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
680 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
683 char *inflate_buffer;
684 const struct firmware *fw;
685 const mcp_gen_header_t *hdr;
692 fw = firmware_get(sc->fw_name);
694 device_printf(sc->dev, "Could not find firmware image %s\n",
699 /* setup zlib and decompress f/w */
700 bzero(&zs, sizeof (zs));
701 zs.zalloc = zcalloc_nowait;
703 status = inflateInit(&zs);
704 if (status != Z_OK) {
709 /* the uncompressed size is stored as the firmware version,
710 which would otherwise go unused */
711 fw_len = (size_t) fw->version;
712 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
713 if (inflate_buffer == NULL)
715 zs.avail_in = fw->datasize;
716 zs.next_in = __DECONST(char *, fw->data);
717 zs.avail_out = fw_len;
718 zs.next_out = inflate_buffer;
719 status = inflate(&zs, Z_FINISH);
720 if (status != Z_STREAM_END) {
721 device_printf(sc->dev, "zlib %d\n", status);
723 goto abort_with_buffer;
727 hdr_offset = htobe32(*(const uint32_t *)
728 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
729 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
730 device_printf(sc->dev, "Bad firmware file");
732 goto abort_with_buffer;
734 hdr = (const void*)(inflate_buffer + hdr_offset);
736 status = mxge_validate_firmware(sc, hdr);
738 goto abort_with_buffer;
740 /* Copy the inflated firmware to NIC SRAM. */
741 for (i = 0; i < fw_len; i += 256) {
742 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
744 min(256U, (unsigned)(fw_len - i)));
753 free(inflate_buffer, M_TEMP);
757 firmware_put(fw, FIRMWARE_UNLOAD);
762 * Enable or disable periodic RDMAs from the host to make certain
763 * chipsets resend dropped PCIe messages
767 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
770 volatile uint32_t *confirm;
771 volatile char *submit;
772 uint32_t *buf, dma_low, dma_high;
775 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
777 /* clear confirmation addr */
778 confirm = (volatile uint32_t *)sc->cmd;
782 /* send an rdma command to the PCIe engine, and wait for the
783 response in the confirmation address. The firmware should
784 write a -1 there to indicate it is alive and well
787 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
788 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
789 buf[0] = htobe32(dma_high); /* confirm addr MSW */
790 buf[1] = htobe32(dma_low); /* confirm addr LSW */
791 buf[2] = htobe32(0xffffffff); /* confirm data */
792 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
793 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
794 buf[3] = htobe32(dma_high); /* dummy addr MSW */
795 buf[4] = htobe32(dma_low); /* dummy addr LSW */
796 buf[5] = htobe32(enable); /* enable? */
798 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
800 mxge_pio_copy(submit, buf, 64);
805 while (*confirm != 0xffffffff && i < 20) {
809 if (*confirm != 0xffffffff) {
810 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
811 (enable ? "enable" : "disable"), confirm,
818 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
821 char buf_bytes[sizeof(*buf) + 8];
822 volatile mcp_cmd_response_t *response = sc->cmd;
823 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
824 uint32_t dma_low, dma_high;
825 int err, sleep_total = 0;
827 /* ensure buf is aligned to 8 bytes */
828 buf = (mcp_cmd_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
830 buf->data0 = htobe32(data->data0);
831 buf->data1 = htobe32(data->data1);
832 buf->data2 = htobe32(data->data2);
833 buf->cmd = htobe32(cmd);
834 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
835 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
837 buf->response_addr.low = htobe32(dma_low);
838 buf->response_addr.high = htobe32(dma_high);
839 mtx_lock(&sc->cmd_mtx);
840 response->result = 0xffffffff;
842 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
844 /* wait up to 20ms */
846 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
847 bus_dmamap_sync(sc->cmd_dma.dmat,
848 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
850 switch (be32toh(response->result)) {
852 data->data0 = be32toh(response->data);
858 case MXGEFW_CMD_UNKNOWN:
861 case MXGEFW_CMD_ERROR_UNALIGNED:
864 case MXGEFW_CMD_ERROR_BUSY:
867 case MXGEFW_CMD_ERROR_I2C_ABSENT:
871 device_printf(sc->dev,
873 "failed, result = %d\n",
874 cmd, be32toh(response->result));
882 device_printf(sc->dev, "mxge: command %d timed out"
884 cmd, be32toh(response->result));
885 mtx_unlock(&sc->cmd_mtx);
890 mxge_adopt_running_firmware(mxge_softc_t *sc)
892 struct mcp_gen_header *hdr;
893 const size_t bytes = sizeof (struct mcp_gen_header);
897 /* find running firmware header */
898 hdr_offset = htobe32(*(volatile uint32_t *)
899 (sc->sram + MCP_HEADER_PTR_OFFSET));
901 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
902 device_printf(sc->dev,
903 "Running firmware has bad header offset (%d)\n",
908 /* copy header of running firmware from SRAM to host memory to
909 * validate firmware */
910 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
912 device_printf(sc->dev, "could not malloc firmware hdr\n");
915 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
916 rman_get_bushandle(sc->mem_res),
917 hdr_offset, (char *)hdr, bytes);
918 status = mxge_validate_firmware(sc, hdr);
922 * check to see if adopted firmware has bug where adopting
923 * it will cause broadcasts to be filtered unless the NIC
924 * is kept in ALLMULTI mode
926 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
927 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
928 sc->adopted_rx_filter_bug = 1;
929 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
930 "working around rx filter bug\n",
931 sc->fw_ver_major, sc->fw_ver_minor,
939 mxge_load_firmware(mxge_softc_t *sc, int adopt)
941 volatile uint32_t *confirm;
942 volatile char *submit;
944 uint32_t *buf, size, dma_low, dma_high;
947 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
949 size = sc->sram_size;
950 status = mxge_load_firmware_helper(sc, &size);
954 /* Try to use the currently running firmware, if
956 status = mxge_adopt_running_firmware(sc);
958 device_printf(sc->dev,
959 "failed to adopt running firmware\n");
962 device_printf(sc->dev,
963 "Successfully adopted running firmware\n");
964 if (sc->tx_boundary == 4096) {
965 device_printf(sc->dev,
966 "Using firmware currently running on NIC"
968 device_printf(sc->dev,
969 "performance consider loading optimized "
972 sc->fw_name = mxge_fw_unaligned;
973 sc->tx_boundary = 2048;
976 /* clear confirmation addr */
977 confirm = (volatile uint32_t *)sc->cmd;
980 /* send a reload command to the bootstrap MCP, and wait for the
981 response in the confirmation address. The firmware should
982 write a -1 there to indicate it is alive and well
985 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
986 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
988 buf[0] = htobe32(dma_high); /* confirm addr MSW */
989 buf[1] = htobe32(dma_low); /* confirm addr LSW */
990 buf[2] = htobe32(0xffffffff); /* confirm data */
992 /* FIX: All newest firmware should un-protect the bottom of
993 the sram before handoff. However, the very first interfaces
994 do not. Therefore the handoff copy must skip the first 8 bytes
996 /* where the code starts*/
997 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
998 buf[4] = htobe32(size - 8); /* length of code */
999 buf[5] = htobe32(8); /* where to copy to */
1000 buf[6] = htobe32(0); /* where to jump to */
1002 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1003 mxge_pio_copy(submit, buf, 64);
1008 while (*confirm != 0xffffffff && i < 20) {
1011 bus_dmamap_sync(sc->cmd_dma.dmat,
1012 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1014 if (*confirm != 0xffffffff) {
1015 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1024 mxge_update_mac_address(mxge_softc_t *sc)
1027 uint8_t *addr = sc->mac_addr;
1030 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1031 | (addr[2] << 8) | addr[3]);
1033 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1035 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1040 mxge_change_pause(mxge_softc_t *sc, int pause)
1046 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1049 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1053 device_printf(sc->dev, "Failed to set flow control mode\n");
1061 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1066 if (mxge_always_promisc)
1070 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1073 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1077 device_printf(sc->dev, "Failed to set promisc mode\n");
1081 struct mxge_add_maddr_ctx {
1087 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
1089 struct mxge_add_maddr_ctx *ctx = arg;
1092 if (ctx->error != 0)
1094 bcopy(LLADDR(sdl), &cmd.data0, 4);
1095 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2);
1096 cmd.data0 = htonl(cmd.data0);
1097 cmd.data1 = htonl(cmd.data1);
1099 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1105 mxge_set_multicast_list(mxge_softc_t *sc)
1107 struct mxge_add_maddr_ctx ctx;
1108 struct ifnet *ifp = sc->ifp;
1112 /* This firmware is known to not support multicast */
1113 if (!sc->fw_multicast_support)
1116 /* Disable multicast filtering while we play with the lists*/
1117 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1119 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1120 " error status: %d\n", err);
1124 if (sc->adopted_rx_filter_bug)
1127 if (ifp->if_flags & IFF_ALLMULTI)
1128 /* request to disable multicast filtering, so quit here */
1131 /* Flush all the filters */
1133 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1135 device_printf(sc->dev,
1136 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1137 ", error status: %d\n", err);
1141 /* Walk the multicast list, and add each address */
1144 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx);
1145 if (ctx.error != 0) {
1146 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
1147 "error status:" "%d\t", ctx.error);
1148 /* abort, leaving multicast filtering off */
1152 /* Enable multicast filtering */
1153 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1155 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1156 ", error status: %d\n", err);
1161 mxge_max_mtu(mxge_softc_t *sc)
1166 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1167 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1169 /* try to set nbufs to see if it we can
1170 use virtually contiguous jumbos */
1172 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1175 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1177 /* otherwise, we're limited to MJUMPAGESIZE */
1178 return MJUMPAGESIZE - MXGEFW_PAD;
1182 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1184 struct mxge_slice_state *ss;
1185 mxge_rx_done_t *rx_done;
1186 volatile uint32_t *irq_claim;
1190 /* try to send a reset command to the card to see if it
1192 memset(&cmd, 0, sizeof (cmd));
1193 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1195 device_printf(sc->dev, "failed reset\n");
1199 mxge_dummy_rdma(sc, 1);
1201 /* set the intrq size */
1202 cmd.data0 = sc->rx_ring_size;
1203 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1206 * Even though we already know how many slices are supported
1207 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1208 * has magic side effects, and must be called after a reset.
1209 * It must be called prior to calling any RSS related cmds,
1210 * including assigning an interrupt queue for anything but
1211 * slice 0. It must also be called *after*
1212 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1213 * the firmware to compute offsets.
1216 if (sc->num_slices > 1) {
1217 /* ask the maximum number of slices it supports */
1218 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1221 device_printf(sc->dev,
1222 "failed to get number of slices\n");
1226 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1227 * to setting up the interrupt queue DMA
1229 cmd.data0 = sc->num_slices;
1230 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1231 #ifdef IFNET_BUF_RING
1232 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1234 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1237 device_printf(sc->dev,
1238 "failed to set number of slices\n");
1243 if (interrupts_setup) {
1244 /* Now exchange information about interrupts */
1245 for (slice = 0; slice < sc->num_slices; slice++) {
1246 rx_done = &sc->ss[slice].rx_done;
1247 memset(rx_done->entry, 0, sc->rx_ring_size);
1248 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1249 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1251 status |= mxge_send_cmd(sc,
1252 MXGEFW_CMD_SET_INTRQ_DMA,
1257 status |= mxge_send_cmd(sc,
1258 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1260 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1262 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1263 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1265 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1267 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1269 device_printf(sc->dev, "failed set interrupt parameters\n");
1273 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1275 /* run a DMA benchmark */
1276 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1278 for (slice = 0; slice < sc->num_slices; slice++) {
1279 ss = &sc->ss[slice];
1281 ss->irq_claim = irq_claim + (2 * slice);
1282 /* reset mcp/driver shared state back to 0 */
1283 ss->rx_done.idx = 0;
1284 ss->rx_done.cnt = 0;
1287 ss->tx.pkt_done = 0;
1288 ss->tx.queue_active = 0;
1289 ss->tx.activate = 0;
1290 ss->tx.deactivate = 0;
1295 ss->rx_small.cnt = 0;
1296 ss->lc.lro_bad_csum = 0;
1297 ss->lc.lro_queued = 0;
1298 ss->lc.lro_flushed = 0;
1299 if (ss->fw_stats != NULL) {
1300 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1303 sc->rdma_tags_available = 15;
1304 status = mxge_update_mac_address(sc);
1305 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1306 mxge_change_pause(sc, sc->pause);
1307 mxge_set_multicast_list(sc);
1309 cmd.data0 = sc->throttle;
1310 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1312 device_printf(sc->dev,
1313 "can't enable throttle\n");
1320 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1325 unsigned int throttle;
1328 throttle = sc->throttle;
1329 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1334 if (throttle == sc->throttle)
1337 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1340 mtx_lock(&sc->driver_mtx);
1341 cmd.data0 = throttle;
1342 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1344 sc->throttle = throttle;
1345 mtx_unlock(&sc->driver_mtx);
1350 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1353 unsigned int intr_coal_delay;
1357 intr_coal_delay = sc->intr_coal_delay;
1358 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1362 if (intr_coal_delay == sc->intr_coal_delay)
1365 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1368 mtx_lock(&sc->driver_mtx);
1369 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1370 sc->intr_coal_delay = intr_coal_delay;
1372 mtx_unlock(&sc->driver_mtx);
1377 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1380 unsigned int enabled;
1384 enabled = sc->pause;
1385 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1389 if (enabled == sc->pause)
1392 mtx_lock(&sc->driver_mtx);
1393 err = mxge_change_pause(sc, enabled);
1394 mtx_unlock(&sc->driver_mtx);
1399 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1405 arg2 = be32toh(*(int *)arg1);
1407 err = sysctl_handle_int(oidp, arg1, arg2, req);
1413 mxge_rem_sysctls(mxge_softc_t *sc)
1415 struct mxge_slice_state *ss;
1418 if (sc->slice_sysctl_tree == NULL)
1421 for (slice = 0; slice < sc->num_slices; slice++) {
1422 ss = &sc->ss[slice];
1423 if (ss == NULL || ss->sysctl_tree == NULL)
1425 sysctl_ctx_free(&ss->sysctl_ctx);
1426 ss->sysctl_tree = NULL;
1428 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1429 sc->slice_sysctl_tree = NULL;
1433 mxge_add_sysctls(mxge_softc_t *sc)
1435 struct sysctl_ctx_list *ctx;
1436 struct sysctl_oid_list *children;
1438 struct mxge_slice_state *ss;
1442 ctx = device_get_sysctl_ctx(sc->dev);
1443 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1444 fw = sc->ss[0].fw_stats;
1446 /* random information */
1447 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1449 CTLFLAG_RD, sc->fw_version,
1450 0, "firmware version");
1451 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1453 CTLFLAG_RD, sc->serial_number_string,
1454 0, "serial number");
1455 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1457 CTLFLAG_RD, sc->product_code_string,
1459 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1461 CTLFLAG_RD, &sc->link_width,
1463 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1465 CTLFLAG_RD, &sc->tx_boundary,
1467 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1469 CTLFLAG_RD, &sc->wc,
1470 0, "write combining PIO?");
1471 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1473 CTLFLAG_RD, &sc->read_dma,
1474 0, "DMA Read speed in MB/s");
1475 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1477 CTLFLAG_RD, &sc->write_dma,
1478 0, "DMA Write speed in MB/s");
1479 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1480 "read_write_dma_MBs",
1481 CTLFLAG_RD, &sc->read_write_dma,
1482 0, "DMA concurrent Read/Write speed in MB/s");
1483 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1485 CTLFLAG_RD, &sc->watchdog_resets,
1486 0, "Number of times NIC was reset");
1488 /* performance related tunables */
1489 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1490 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
1491 sc, 0, mxge_change_intr_coal, "I",
1492 "interrupt coalescing delay in usecs");
1494 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1495 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1496 mxge_change_throttle, "I", "transmit throttling");
1498 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1499 "flow_control_enabled",
1500 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1501 mxge_change_flow_control, "I",
1502 "interrupt coalescing delay in usecs");
1504 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1506 CTLFLAG_RW, &mxge_deassert_wait,
1507 0, "Wait for IRQ line to go low in ihandler");
1509 /* stats block from firmware is in network byte order.
1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1512 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1513 &fw->link_up, 0, mxge_handle_be32, "I", "link up");
1514 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1515 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1516 &fw->rdma_tags_available, 0, mxge_handle_be32, "I",
1517 "rdma_tags_available");
1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1519 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1520 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I",
1521 "dropped_bad_crc32");
1522 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1523 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1524 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy");
1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1526 "dropped_link_error_or_filtered",
1527 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1528 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I",
1529 "dropped_link_error_or_filtered");
1530 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1531 "dropped_link_overflow",
1532 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1533 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I",
1534 "dropped_link_overflow");
1535 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1536 "dropped_multicast_filtered",
1537 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1538 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I",
1539 "dropped_multicast_filtered");
1540 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1541 "dropped_no_big_buffer",
1542 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1543 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I",
1544 "dropped_no_big_buffer");
1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1546 "dropped_no_small_buffer",
1547 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1548 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I",
1549 "dropped_no_small_buffer");
1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1552 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1553 &fw->dropped_overrun, 0, mxge_handle_be32, "I",
1555 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1556 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1557 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause");
1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1559 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1560 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt");
1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1563 "dropped_unicast_filtered",
1564 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1565 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I",
1566 "dropped_unicast_filtered");
1568 /* verbose printing? */
1569 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1571 CTLFLAG_RW, &mxge_verbose,
1572 0, "verbose printing");
1574 /* add counters exported for debugging from all slices */
1575 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1576 sc->slice_sysctl_tree =
1577 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1578 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1580 for (slice = 0; slice < sc->num_slices; slice++) {
1581 ss = &sc->ss[slice];
1582 sysctl_ctx_init(&ss->sysctl_ctx);
1583 ctx = &ss->sysctl_ctx;
1584 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1585 sprintf(slice_num, "%d", slice);
1587 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1588 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1589 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1590 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1592 CTLFLAG_RD, &ss->rx_small.cnt,
1594 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1596 CTLFLAG_RD, &ss->rx_big.cnt,
1598 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1599 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1600 0, "number of lro merge queues flushed");
1602 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1603 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1604 0, "number of bad csums preventing LRO");
1606 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1607 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1608 0, "number of frames appended to lro merge"
1611 #ifndef IFNET_BUF_RING
1612 /* only transmit from slice 0 for now */
1616 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1618 CTLFLAG_RD, &ss->tx.req,
1621 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1623 CTLFLAG_RD, &ss->tx.done,
1625 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1627 CTLFLAG_RD, &ss->tx.pkt_done,
1629 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1631 CTLFLAG_RD, &ss->tx.stall,
1633 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1635 CTLFLAG_RD, &ss->tx.wake,
1637 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1639 CTLFLAG_RD, &ss->tx.defrag,
1641 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1643 CTLFLAG_RD, &ss->tx.queue_active,
1644 0, "tx_queue_active");
1645 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1647 CTLFLAG_RD, &ss->tx.activate,
1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1651 CTLFLAG_RD, &ss->tx.deactivate,
1652 0, "tx_deactivate");
1656 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1657 backwards one at a time and handle ring wraps */
1660 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1661 mcp_kreq_ether_send_t *src, int cnt)
1663 int idx, starting_slot;
1664 starting_slot = tx->req;
1667 idx = (starting_slot + cnt) & tx->mask;
1668 mxge_pio_copy(&tx->lanai[idx],
1669 &src[cnt], sizeof(*src));
1675 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1676 * at most 32 bytes at a time, so as to avoid involving the software
1677 * pio handler in the nic. We re-write the first segment's flags
1678 * to mark them valid only after writing the entire chain
1682 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1687 volatile uint32_t *dst_ints;
1688 mcp_kreq_ether_send_t *srcp;
1689 volatile mcp_kreq_ether_send_t *dstp, *dst;
1692 idx = tx->req & tx->mask;
1694 last_flags = src->flags;
1697 dst = dstp = &tx->lanai[idx];
1700 if ((idx + cnt) < tx->mask) {
1701 for (i = 0; i < (cnt - 1); i += 2) {
1702 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1703 wmb(); /* force write every 32 bytes */
1708 /* submit all but the first request, and ensure
1709 that it is submitted below */
1710 mxge_submit_req_backwards(tx, src, cnt);
1714 /* submit the first request */
1715 mxge_pio_copy(dstp, srcp, sizeof(*src));
1716 wmb(); /* barrier before setting valid flag */
1719 /* re-write the last 32-bits with the valid flags */
1720 src->flags = last_flags;
1721 src_ints = (uint32_t *)src;
1723 dst_ints = (volatile uint32_t *)dst;
1725 *dst_ints = *src_ints;
1731 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1732 struct mxge_pkt_info *pi)
1734 struct ether_vlan_header *eh;
1736 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1737 #if IFCAP_TSO6 && defined(INET6)
1741 eh = mtod(m, struct ether_vlan_header *);
1742 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1743 etype = ntohs(eh->evl_proto);
1744 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1746 etype = ntohs(eh->evl_encap_proto);
1747 pi->ip_off = ETHER_HDR_LEN;
1753 * ensure ip header is in first mbuf, copy it to a
1754 * scratch buffer if not
1756 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1758 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1759 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1761 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1763 pi->ip_hlen = pi->ip->ip_hl << 2;
1767 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1768 sizeof(struct tcphdr))) {
1769 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1770 sizeof(struct tcphdr), ss->scratch);
1771 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1773 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1775 #if IFCAP_TSO6 && defined(INET6)
1776 case ETHERTYPE_IPV6:
1777 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1778 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1779 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1781 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1784 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1785 pi->ip_hlen -= pi->ip_off;
1786 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1792 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1795 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1796 sizeof(struct tcphdr))) {
1797 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1798 sizeof(struct tcphdr), ss->scratch);
1799 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1801 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1813 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1814 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1817 mcp_kreq_ether_send_t *req;
1818 bus_dma_segment_t *seg;
1819 uint32_t low, high_swapped;
1820 int len, seglen, cum_len, cum_len_next;
1821 int next_is_first, chop, cnt, rdma_count, small;
1822 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1823 uint8_t flags, flags_next;
1826 mss = m->m_pkthdr.tso_segsz;
1828 /* negative cum_len signifies to the
1829 * send loop that we are still in the
1830 * header portion of the TSO packet.
1833 cksum_offset = pi->ip_off + pi->ip_hlen;
1834 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1836 /* TSO implies checksum offload on this hardware */
1837 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1839 * If packet has full TCP csum, replace it with pseudo hdr
1840 * sum that the NIC expects, otherwise the NIC will emit
1841 * packets with bad TCP checksums.
1843 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1845 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1846 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1847 sum = in6_cksum_pseudo(pi->ip6,
1848 m->m_pkthdr.len - cksum_offset,
1853 m->m_pkthdr.csum_flags |= CSUM_TCP;
1854 sum = in_pseudo(pi->ip->ip_src.s_addr,
1855 pi->ip->ip_dst.s_addr,
1856 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1860 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1861 cksum_offset, sizeof(sum), (caddr_t)&sum);
1863 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1865 /* for TSO, pseudo_hdr_offset holds mss.
1866 * The firmware figures out where to put
1867 * the checksum by parsing the header. */
1868 pseudo_hdr_offset = htobe16(mss);
1872 * for IPv6 TSO, the "checksum offset" is re-purposed
1873 * to store the TCP header len
1875 cksum_offset = (pi->tcp->th_off << 2);
1883 /* "rdma_count" is the number of RDMAs belonging to the
1884 * current packet BEFORE the current send request. For
1885 * non-TSO packets, this is equal to "count".
1886 * For TSO packets, rdma_count needs to be reset
1887 * to 0 after a segment cut.
1889 * The rdma_count field of the send request is
1890 * the number of RDMAs of the packet starting at
1891 * that request. For TSO send requests with one ore more cuts
1892 * in the middle, this is the number of RDMAs starting
1893 * after the last cut in the request. All previous
1894 * segments before the last cut implicitly have 1 RDMA.
1896 * Since the number of RDMAs is not known beforehand,
1897 * it must be filled-in retroactively - after each
1898 * segmentation cut or at the end of the entire packet.
1901 while (busdma_seg_cnt) {
1902 /* Break the busdma segment up into pieces*/
1903 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1904 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1908 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1910 cum_len_next = cum_len + seglen;
1911 (req-rdma_count)->rdma_count = rdma_count + 1;
1912 if (__predict_true(cum_len >= 0)) {
1914 chop = (cum_len_next > mss);
1915 cum_len_next = cum_len_next % mss;
1916 next_is_first = (cum_len_next == 0);
1917 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1918 flags_next |= next_is_first *
1920 rdma_count |= -(chop | next_is_first);
1921 rdma_count += chop & !next_is_first;
1922 } else if (cum_len_next >= 0) {
1927 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1928 flags_next = MXGEFW_FLAGS_TSO_PLD |
1929 MXGEFW_FLAGS_FIRST |
1930 (small * MXGEFW_FLAGS_SMALL);
1933 req->addr_high = high_swapped;
1934 req->addr_low = htobe32(low);
1935 req->pseudo_hdr_offset = pseudo_hdr_offset;
1937 req->rdma_count = 1;
1938 req->length = htobe16(seglen);
1939 req->cksum_offset = cksum_offset;
1940 req->flags = flags | ((cum_len & 1) *
1941 MXGEFW_FLAGS_ALIGN_ODD);
1944 cum_len = cum_len_next;
1949 if (cksum_offset != 0 && !pi->ip6) {
1950 if (__predict_false(cksum_offset > seglen))
1951 cksum_offset -= seglen;
1955 if (__predict_false(cnt > tx->max_desc))
1961 (req-rdma_count)->rdma_count = rdma_count;
1965 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1966 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1968 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1969 mxge_submit_req(tx, tx->req_list, cnt);
1970 #ifdef IFNET_BUF_RING
1971 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1972 /* tell the NIC to start polling this slice */
1974 tx->queue_active = 1;
1982 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1986 printf("tx->max_desc exceeded via TSO!\n");
1987 printf("mss = %d, %ld, %d!\n", mss,
1988 (long)seg - (long)tx->seg_list, tx->max_desc);
1995 #endif /* IFCAP_TSO4 */
1997 #ifdef MXGE_NEW_VLAN_API
1999 * We reproduce the software vlan tag insertion from
2000 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2001 * vlan tag insertion. We need to advertise this in order to have the
2002 * vlan interface respect our csum offload flags.
2004 static struct mbuf *
2005 mxge_vlan_tag_insert(struct mbuf *m)
2007 struct ether_vlan_header *evl;
2009 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2010 if (__predict_false(m == NULL))
2012 if (m->m_len < sizeof(*evl)) {
2013 m = m_pullup(m, sizeof(*evl));
2014 if (__predict_false(m == NULL))
2018 * Transform the Ethernet header into an Ethernet header
2019 * with 802.1Q encapsulation.
2021 evl = mtod(m, struct ether_vlan_header *);
2022 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2023 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2024 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2025 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2026 m->m_flags &= ~M_VLANTAG;
2029 #endif /* MXGE_NEW_VLAN_API */
2032 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2034 struct mxge_pkt_info pi = {0,0,0,0};
2036 mcp_kreq_ether_send_t *req;
2037 bus_dma_segment_t *seg;
2041 int cnt, cum_len, err, i, idx, odd_flag;
2042 uint16_t pseudo_hdr_offset;
2043 uint8_t flags, cksum_offset;
2049 #ifdef MXGE_NEW_VLAN_API
2050 if (m->m_flags & M_VLANTAG) {
2051 m = mxge_vlan_tag_insert(m);
2052 if (__predict_false(m == NULL))
2053 goto drop_without_m;
2056 if (m->m_pkthdr.csum_flags &
2057 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2058 if (mxge_parse_tx(ss, m, &pi))
2062 /* (try to) map the frame for DMA */
2063 idx = tx->req & tx->mask;
2064 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2065 m, tx->seg_list, &cnt,
2067 if (__predict_false(err == EFBIG)) {
2068 /* Too many segments in the chain. Try
2070 m_tmp = m_defrag(m, M_NOWAIT);
2071 if (m_tmp == NULL) {
2076 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2078 m, tx->seg_list, &cnt,
2081 if (__predict_false(err != 0)) {
2082 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2083 " packet len = %d\n", err, m->m_pkthdr.len);
2086 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2087 BUS_DMASYNC_PREWRITE);
2088 tx->info[idx].m = m;
2091 /* TSO is different enough, we handle it in another routine */
2092 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2093 mxge_encap_tso(ss, m, cnt, &pi);
2100 pseudo_hdr_offset = 0;
2101 flags = MXGEFW_FLAGS_NO_TSO;
2103 /* checksum offloading? */
2104 if (m->m_pkthdr.csum_flags &
2105 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2106 /* ensure ip header is in first mbuf, copy
2107 it to a scratch buffer if not */
2108 cksum_offset = pi.ip_off + pi.ip_hlen;
2109 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2110 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2111 req->cksum_offset = cksum_offset;
2112 flags |= MXGEFW_FLAGS_CKSUM;
2113 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2117 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2118 flags |= MXGEFW_FLAGS_SMALL;
2120 /* convert segments into a request list */
2123 req->flags = MXGEFW_FLAGS_FIRST;
2124 for (i = 0; i < cnt; i++) {
2126 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2128 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2129 req->length = htobe16(seg->ds_len);
2130 req->cksum_offset = cksum_offset;
2131 if (cksum_offset > seg->ds_len)
2132 cksum_offset -= seg->ds_len;
2135 req->pseudo_hdr_offset = pseudo_hdr_offset;
2136 req->pad = 0; /* complete solid 16-byte block */
2137 req->rdma_count = 1;
2138 req->flags |= flags | ((cum_len & 1) * odd_flag);
2139 cum_len += seg->ds_len;
2145 /* pad runts to 60 bytes */
2149 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2151 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2152 req->length = htobe16(60 - cum_len);
2153 req->cksum_offset = 0;
2154 req->pseudo_hdr_offset = pseudo_hdr_offset;
2155 req->pad = 0; /* complete solid 16-byte block */
2156 req->rdma_count = 1;
2157 req->flags |= flags | ((cum_len & 1) * odd_flag);
2161 tx->req_list[0].rdma_count = cnt;
2163 /* print what the firmware will see */
2164 for (i = 0; i < cnt; i++) {
2165 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2166 "cso:%d, flags:0x%x, rdma:%d\n",
2167 i, (int)ntohl(tx->req_list[i].addr_high),
2168 (int)ntohl(tx->req_list[i].addr_low),
2169 (int)ntohs(tx->req_list[i].length),
2170 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2171 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2172 tx->req_list[i].rdma_count);
2174 printf("--------------\n");
2176 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2177 mxge_submit_req(tx, tx->req_list, cnt);
2178 #ifdef IFNET_BUF_RING
2179 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2180 /* tell the NIC to start polling this slice */
2182 tx->queue_active = 1;
2196 #ifdef IFNET_BUF_RING
2198 mxge_qflush(struct ifnet *ifp)
2200 mxge_softc_t *sc = ifp->if_softc;
2205 for (slice = 0; slice < sc->num_slices; slice++) {
2206 tx = &sc->ss[slice].tx;
2208 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2210 mtx_unlock(&tx->mtx);
2216 mxge_start_locked(struct mxge_slice_state *ss)
2227 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2228 m = drbr_dequeue(ifp, tx->br);
2232 /* let BPF see it */
2235 /* give it to the nic */
2238 /* ran out of transmit slots */
2239 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2240 && (!drbr_empty(ifp, tx->br))) {
2241 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2247 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2258 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2260 err = drbr_enqueue(ifp, tx->br, m);
2264 if (!drbr_needs_enqueue(ifp, tx->br) &&
2265 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2266 /* let BPF see it */
2268 /* give it to the nic */
2270 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2273 if (!drbr_empty(ifp, tx->br))
2274 mxge_start_locked(ss);
2279 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2281 mxge_softc_t *sc = ifp->if_softc;
2282 struct mxge_slice_state *ss;
2287 slice = m->m_pkthdr.flowid;
2288 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2290 ss = &sc->ss[slice];
2293 if (mtx_trylock(&tx->mtx)) {
2294 err = mxge_transmit_locked(ss, m);
2295 mtx_unlock(&tx->mtx);
2297 err = drbr_enqueue(ifp, tx->br, m);
2306 mxge_start_locked(struct mxge_slice_state *ss)
2316 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2317 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2321 /* let BPF see it */
2324 /* give it to the nic */
2327 /* ran out of transmit slots */
2328 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2329 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2335 mxge_start(struct ifnet *ifp)
2337 mxge_softc_t *sc = ifp->if_softc;
2338 struct mxge_slice_state *ss;
2340 /* only use the first slice for now */
2342 mtx_lock(&ss->tx.mtx);
2343 mxge_start_locked(ss);
2344 mtx_unlock(&ss->tx.mtx);
2348 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2349 * at most 32 bytes at a time, so as to avoid involving the software
2350 * pio handler in the nic. We re-write the first segment's low
2351 * DMA address to mark it valid only after we write the entire chunk
2355 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2356 mcp_kreq_ether_recv_t *src)
2360 low = src->addr_low;
2361 src->addr_low = 0xffffffff;
2362 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2364 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2366 src->addr_low = low;
2367 dst->addr_low = low;
2372 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2374 bus_dma_segment_t seg;
2376 mxge_rx_ring_t *rx = &ss->rx_small;
2379 m = m_gethdr(M_NOWAIT, MT_DATA);
2386 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2387 &seg, &cnt, BUS_DMA_NOWAIT);
2392 rx->info[idx].m = m;
2393 rx->shadow[idx].addr_low =
2394 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2395 rx->shadow[idx].addr_high =
2396 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2400 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2405 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2407 bus_dma_segment_t seg[3];
2409 mxge_rx_ring_t *rx = &ss->rx_big;
2412 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2418 m->m_len = rx->mlen;
2419 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2420 seg, &cnt, BUS_DMA_NOWAIT);
2425 rx->info[idx].m = m;
2426 rx->shadow[idx].addr_low =
2427 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2428 rx->shadow[idx].addr_high =
2429 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2431 #if MXGE_VIRT_JUMBOS
2432 for (i = 1; i < cnt; i++) {
2433 rx->shadow[idx + i].addr_low =
2434 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2435 rx->shadow[idx + i].addr_high =
2436 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2441 for (i = 0; i < rx->nbufs; i++) {
2442 if ((idx & 7) == 7) {
2443 mxge_submit_8rx(&rx->lanai[idx - 7],
2444 &rx->shadow[idx - 7]);
2454 mxge_csum_generic(uint16_t *raw, int len)
2464 csum = (csum >> 16) + (csum & 0xffff);
2465 csum = (csum >> 16) + (csum & 0xffff);
2466 return (uint16_t)csum;
2469 static inline uint16_t
2470 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2473 int nxt, cksum_offset;
2474 struct ip6_hdr *ip6 = p;
2478 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2479 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2480 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2481 IPPROTO_IPV6, &nxt);
2482 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2487 * IPv6 headers do not contain a checksum, and hence
2488 * do not checksum to zero, so they don't "fall out"
2489 * of the partial checksum calculation like IPv4
2490 * headers do. We need to fix the partial checksum by
2491 * subtracting the checksum of the IPv6 header.
2494 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2497 csum += (csum < ~partial);
2498 csum = (csum >> 16) + (csum & 0xFFFF);
2499 csum = (csum >> 16) + (csum & 0xFFFF);
2500 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2507 * Myri10GE hardware checksums are not valid if the sender
2508 * padded the frame with non-zero padding. This is because
2509 * the firmware just does a simple 16-bit 1s complement
2510 * checksum across the entire frame, excluding the first 14
2511 * bytes. It is best to simply to check the checksum and
2512 * tell the stack about it only if the checksum is good
2515 static inline uint16_t
2516 mxge_rx_csum(struct mbuf *m, int csum)
2518 struct ether_header *eh;
2522 #if defined(INET) || defined(INET6)
2523 int cap = m->m_pkthdr.rcvif->if_capenable;
2527 eh = mtod(m, struct ether_header *);
2528 etype = ntohs(eh->ether_type);
2532 if ((cap & IFCAP_RXCSUM) == 0)
2534 ip = (struct ip *)(eh + 1);
2535 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2537 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2538 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2539 (ip->ip_hl << 2) + ip->ip_p));
2544 case ETHERTYPE_IPV6:
2545 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2547 c = mxge_rx_csum6((eh + 1), m, csum);
2557 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2559 struct ether_vlan_header *evl;
2560 struct ether_header *eh;
2563 evl = mtod(m, struct ether_vlan_header *);
2564 eh = mtod(m, struct ether_header *);
2567 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2568 * after what the firmware thought was the end of the ethernet
2572 /* put checksum into host byte order */
2573 *csum = ntohs(*csum);
2574 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2575 (*csum) += ~partial;
2576 (*csum) += ((*csum) < ~partial);
2577 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2578 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2580 /* restore checksum to network byte order;
2581 later consumers expect this */
2582 *csum = htons(*csum);
2585 #ifdef MXGE_NEW_VLAN_API
2586 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2590 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2594 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2595 m_tag_prepend(m, mtag);
2599 m->m_flags |= M_VLANTAG;
2602 * Remove the 802.1q header by copying the Ethernet
2603 * addresses over it and adjusting the beginning of
2604 * the data in the mbuf. The encapsulated Ethernet
2605 * type field is already in place.
2607 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2608 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2609 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2613 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2614 uint32_t csum, int lro)
2619 struct ether_header *eh;
2621 bus_dmamap_t old_map;
2627 idx = rx->cnt & rx->mask;
2628 rx->cnt += rx->nbufs;
2629 /* save a pointer to the received mbuf */
2630 m = rx->info[idx].m;
2631 /* try to replace the received mbuf */
2632 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2633 /* drop the frame -- the old mbuf is re-cycled */
2634 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2638 /* unmap the received buffer */
2639 old_map = rx->info[idx].map;
2640 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2641 bus_dmamap_unload(rx->dmat, old_map);
2643 /* swap the bus_dmamap_t's */
2644 rx->info[idx].map = rx->extra_map;
2645 rx->extra_map = old_map;
2647 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2649 m->m_data += MXGEFW_PAD;
2651 m->m_pkthdr.rcvif = ifp;
2652 m->m_len = m->m_pkthdr.len = len;
2654 eh = mtod(m, struct ether_header *);
2655 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2656 mxge_vlan_tag_remove(m, &csum);
2658 /* flowid only valid if RSS hashing is enabled */
2659 if (sc->num_slices > 1) {
2660 m->m_pkthdr.flowid = (ss - sc->ss);
2661 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2663 /* if the checksum is valid, mark it in the mbuf header */
2664 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2665 (0 == mxge_rx_csum(m, csum))) {
2666 /* Tell the stack that the checksum is good */
2667 m->m_pkthdr.csum_data = 0xffff;
2668 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2671 #if defined(INET) || defined (INET6)
2672 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2676 /* pass the frame up the stack */
2677 (*ifp->if_input)(ifp, m);
2681 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2682 uint32_t csum, int lro)
2686 struct ether_header *eh;
2689 bus_dmamap_t old_map;
2695 idx = rx->cnt & rx->mask;
2697 /* save a pointer to the received mbuf */
2698 m = rx->info[idx].m;
2699 /* try to replace the received mbuf */
2700 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2701 /* drop the frame -- the old mbuf is re-cycled */
2702 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2706 /* unmap the received buffer */
2707 old_map = rx->info[idx].map;
2708 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2709 bus_dmamap_unload(rx->dmat, old_map);
2711 /* swap the bus_dmamap_t's */
2712 rx->info[idx].map = rx->extra_map;
2713 rx->extra_map = old_map;
2715 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2717 m->m_data += MXGEFW_PAD;
2719 m->m_pkthdr.rcvif = ifp;
2720 m->m_len = m->m_pkthdr.len = len;
2722 eh = mtod(m, struct ether_header *);
2723 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2724 mxge_vlan_tag_remove(m, &csum);
2726 /* flowid only valid if RSS hashing is enabled */
2727 if (sc->num_slices > 1) {
2728 m->m_pkthdr.flowid = (ss - sc->ss);
2729 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2731 /* if the checksum is valid, mark it in the mbuf header */
2732 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2733 (0 == mxge_rx_csum(m, csum))) {
2734 /* Tell the stack that the checksum is good */
2735 m->m_pkthdr.csum_data = 0xffff;
2736 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2739 #if defined(INET) || defined (INET6)
2740 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2744 /* pass the frame up the stack */
2745 (*ifp->if_input)(ifp, m);
2749 mxge_clean_rx_done(struct mxge_slice_state *ss)
2751 mxge_rx_done_t *rx_done = &ss->rx_done;
2757 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2758 while (rx_done->entry[rx_done->idx].length != 0) {
2759 length = ntohs(rx_done->entry[rx_done->idx].length);
2760 rx_done->entry[rx_done->idx].length = 0;
2761 checksum = rx_done->entry[rx_done->idx].checksum;
2762 if (length <= (MHLEN - MXGEFW_PAD))
2763 mxge_rx_done_small(ss, length, checksum, lro);
2765 mxge_rx_done_big(ss, length, checksum, lro);
2767 rx_done->idx = rx_done->cnt & rx_done->mask;
2769 /* limit potential for livelock */
2770 if (__predict_false(++limit > rx_done->mask / 2))
2773 #if defined(INET) || defined (INET6)
2774 tcp_lro_flush_all(&ss->lc);
2779 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2790 while (tx->pkt_done != mcp_idx) {
2791 idx = tx->done & tx->mask;
2793 m = tx->info[idx].m;
2794 /* mbuf and DMA map only attached to the first
2797 ss->obytes += m->m_pkthdr.len;
2798 if (m->m_flags & M_MCAST)
2801 tx->info[idx].m = NULL;
2802 map = tx->info[idx].map;
2803 bus_dmamap_unload(tx->dmat, map);
2806 if (tx->info[idx].flag) {
2807 tx->info[idx].flag = 0;
2812 /* If we have space, clear IFF_OACTIVE to tell the stack that
2813 its OK to send packets */
2814 #ifdef IFNET_BUF_RING
2815 flags = &ss->if_drv_flags;
2817 flags = &ifp->if_drv_flags;
2819 mtx_lock(&ss->tx.mtx);
2820 if ((*flags) & IFF_DRV_OACTIVE &&
2821 tx->req - tx->done < (tx->mask + 1)/4) {
2822 *(flags) &= ~IFF_DRV_OACTIVE;
2824 mxge_start_locked(ss);
2826 #ifdef IFNET_BUF_RING
2827 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2828 /* let the NIC stop polling this queue, since there
2829 * are no more transmits pending */
2830 if (tx->req == tx->done) {
2832 tx->queue_active = 0;
2838 mtx_unlock(&ss->tx.mtx);
2842 static struct mxge_media_type mxge_xfp_media_types[] =
2844 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2845 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2846 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2847 {0, (1 << 5), "10GBASE-ER"},
2848 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2849 {0, (1 << 3), "10GBASE-SW"},
2850 {0, (1 << 2), "10GBASE-LW"},
2851 {0, (1 << 1), "10GBASE-EW"},
2852 {0, (1 << 0), "Reserved"}
2854 static struct mxge_media_type mxge_sfp_media_types[] =
2856 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2857 {0, (1 << 7), "Reserved"},
2858 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2859 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2860 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2861 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2865 mxge_media_set(mxge_softc_t *sc, int media_type)
2868 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2870 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2871 sc->current_media = media_type;
2872 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2876 mxge_media_init(mxge_softc_t *sc)
2881 ifmedia_removeall(&sc->media);
2882 mxge_media_set(sc, IFM_AUTO);
2885 * parse the product code to deterimine the interface type
2886 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2887 * after the 3rd dash in the driver's cached copy of the
2888 * EEPROM's product code string.
2890 ptr = sc->product_code_string;
2892 device_printf(sc->dev, "Missing product code\n");
2896 for (i = 0; i < 3; i++, ptr++) {
2897 ptr = strchr(ptr, '-');
2899 device_printf(sc->dev,
2900 "only %d dashes in PC?!?\n", i);
2904 if (*ptr == 'C' || *(ptr +1) == 'C') {
2906 sc->connector = MXGE_CX4;
2907 mxge_media_set(sc, IFM_10G_CX4);
2908 } else if (*ptr == 'Q') {
2909 /* -Q is Quad Ribbon Fiber */
2910 sc->connector = MXGE_QRF;
2911 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2912 /* FreeBSD has no media type for Quad ribbon fiber */
2913 } else if (*ptr == 'R') {
2915 sc->connector = MXGE_XFP;
2916 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2917 /* -S or -2S is SFP+ */
2918 sc->connector = MXGE_SFP;
2920 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2925 * Determine the media type for a NIC. Some XFPs will identify
2926 * themselves only when their link is up, so this is initiated via a
2927 * link up interrupt. However, this can potentially take up to
2928 * several milliseconds, so it is run via the watchdog routine, rather
2929 * than in the interrupt handler itself.
2932 mxge_media_probe(mxge_softc_t *sc)
2937 struct mxge_media_type *mxge_media_types = NULL;
2938 int i, err, ms, mxge_media_type_entries;
2941 sc->need_media_probe = 0;
2943 if (sc->connector == MXGE_XFP) {
2945 mxge_media_types = mxge_xfp_media_types;
2946 mxge_media_type_entries =
2947 nitems(mxge_xfp_media_types);
2948 byte = MXGE_XFP_COMPLIANCE_BYTE;
2950 } else if (sc->connector == MXGE_SFP) {
2951 /* -S or -2S is SFP+ */
2952 mxge_media_types = mxge_sfp_media_types;
2953 mxge_media_type_entries =
2954 nitems(mxge_sfp_media_types);
2958 /* nothing to do; media type cannot change */
2963 * At this point we know the NIC has an XFP cage, so now we
2964 * try to determine what is in the cage by using the
2965 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2966 * register. We read just one byte, which may take over
2970 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2972 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2973 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2974 device_printf(sc->dev, "failed to read XFP\n");
2976 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2977 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2979 if (err != MXGEFW_CMD_OK) {
2983 /* now we wait for the data to be cached */
2985 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2986 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2989 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2991 if (err != MXGEFW_CMD_OK) {
2992 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2993 cage_type, err, ms);
2997 if (cmd.data0 == mxge_media_types[0].bitmask) {
2999 device_printf(sc->dev, "%s:%s\n", cage_type,
3000 mxge_media_types[0].name);
3001 if (sc->current_media != mxge_media_types[0].flag) {
3002 mxge_media_init(sc);
3003 mxge_media_set(sc, mxge_media_types[0].flag);
3007 for (i = 1; i < mxge_media_type_entries; i++) {
3008 if (cmd.data0 & mxge_media_types[i].bitmask) {
3010 device_printf(sc->dev, "%s:%s\n",
3012 mxge_media_types[i].name);
3014 if (sc->current_media != mxge_media_types[i].flag) {
3015 mxge_media_init(sc);
3016 mxge_media_set(sc, mxge_media_types[i].flag);
3022 device_printf(sc->dev, "%s media 0x%x unknown\n",
3023 cage_type, cmd.data0);
3029 mxge_intr(void *arg)
3031 struct mxge_slice_state *ss = arg;
3032 mxge_softc_t *sc = ss->sc;
3033 mcp_irq_data_t *stats = ss->fw_stats;
3034 mxge_tx_ring_t *tx = &ss->tx;
3035 mxge_rx_done_t *rx_done = &ss->rx_done;
3036 uint32_t send_done_count;
3039 #ifndef IFNET_BUF_RING
3040 /* an interrupt on a non-zero slice is implicitly valid
3041 since MSI-X irqs are not shared */
3043 mxge_clean_rx_done(ss);
3044 *ss->irq_claim = be32toh(3);
3049 /* make sure the DMA has finished */
3050 if (!stats->valid) {
3053 valid = stats->valid;
3055 if (sc->legacy_irq) {
3056 /* lower legacy IRQ */
3057 *sc->irq_deassert = 0;
3058 if (!mxge_deassert_wait)
3059 /* don't wait for conf. that irq is low */
3065 /* loop while waiting for legacy irq deassertion */
3067 /* check for transmit completes and receives */
3068 send_done_count = be32toh(stats->send_done_count);
3069 while ((send_done_count != tx->pkt_done) ||
3070 (rx_done->entry[rx_done->idx].length != 0)) {
3071 if (send_done_count != tx->pkt_done)
3072 mxge_tx_done(ss, (int)send_done_count);
3073 mxge_clean_rx_done(ss);
3074 send_done_count = be32toh(stats->send_done_count);
3076 if (sc->legacy_irq && mxge_deassert_wait)
3078 } while (*((volatile uint8_t *) &stats->valid));
3080 /* fw link & error stats meaningful only on the first slice */
3081 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3082 if (sc->link_state != stats->link_up) {
3083 sc->link_state = stats->link_up;
3084 if (sc->link_state) {
3085 if_link_state_change(sc->ifp, LINK_STATE_UP);
3087 device_printf(sc->dev, "link up\n");
3089 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3091 device_printf(sc->dev, "link down\n");
3093 sc->need_media_probe = 1;
3095 if (sc->rdma_tags_available !=
3096 be32toh(stats->rdma_tags_available)) {
3097 sc->rdma_tags_available =
3098 be32toh(stats->rdma_tags_available);
3099 device_printf(sc->dev, "RDMA timed out! %d tags "
3100 "left\n", sc->rdma_tags_available);
3103 if (stats->link_down) {
3104 sc->down_cnt += stats->link_down;
3106 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3110 /* check to see if we have rx token to pass back */
3112 *ss->irq_claim = be32toh(3);
3113 *(ss->irq_claim + 1) = be32toh(3);
3117 mxge_init(void *arg)
3119 mxge_softc_t *sc = arg;
3120 struct ifnet *ifp = sc->ifp;
3122 mtx_lock(&sc->driver_mtx);
3123 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3124 (void) mxge_open(sc);
3125 mtx_unlock(&sc->driver_mtx);
3129 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3133 #if defined(INET) || defined(INET6)
3134 tcp_lro_free(&ss->lc);
3136 for (i = 0; i <= ss->rx_big.mask; i++) {
3137 if (ss->rx_big.info[i].m == NULL)
3139 bus_dmamap_unload(ss->rx_big.dmat,
3140 ss->rx_big.info[i].map);
3141 m_freem(ss->rx_big.info[i].m);
3142 ss->rx_big.info[i].m = NULL;
3145 for (i = 0; i <= ss->rx_small.mask; i++) {
3146 if (ss->rx_small.info[i].m == NULL)
3148 bus_dmamap_unload(ss->rx_small.dmat,
3149 ss->rx_small.info[i].map);
3150 m_freem(ss->rx_small.info[i].m);
3151 ss->rx_small.info[i].m = NULL;
3154 /* transmit ring used only on the first slice */
3155 if (ss->tx.info == NULL)
3158 for (i = 0; i <= ss->tx.mask; i++) {
3159 ss->tx.info[i].flag = 0;
3160 if (ss->tx.info[i].m == NULL)
3162 bus_dmamap_unload(ss->tx.dmat,
3163 ss->tx.info[i].map);
3164 m_freem(ss->tx.info[i].m);
3165 ss->tx.info[i].m = NULL;
3170 mxge_free_mbufs(mxge_softc_t *sc)
3174 for (slice = 0; slice < sc->num_slices; slice++)
3175 mxge_free_slice_mbufs(&sc->ss[slice]);
3179 mxge_free_slice_rings(struct mxge_slice_state *ss)
3183 if (ss->rx_done.entry != NULL)
3184 mxge_dma_free(&ss->rx_done.dma);
3185 ss->rx_done.entry = NULL;
3187 if (ss->tx.req_bytes != NULL)
3188 free(ss->tx.req_bytes, M_DEVBUF);
3189 ss->tx.req_bytes = NULL;
3191 if (ss->tx.seg_list != NULL)
3192 free(ss->tx.seg_list, M_DEVBUF);
3193 ss->tx.seg_list = NULL;
3195 if (ss->rx_small.shadow != NULL)
3196 free(ss->rx_small.shadow, M_DEVBUF);
3197 ss->rx_small.shadow = NULL;
3199 if (ss->rx_big.shadow != NULL)
3200 free(ss->rx_big.shadow, M_DEVBUF);
3201 ss->rx_big.shadow = NULL;
3203 if (ss->tx.info != NULL) {
3204 if (ss->tx.dmat != NULL) {
3205 for (i = 0; i <= ss->tx.mask; i++) {
3206 bus_dmamap_destroy(ss->tx.dmat,
3207 ss->tx.info[i].map);
3209 bus_dma_tag_destroy(ss->tx.dmat);
3211 free(ss->tx.info, M_DEVBUF);
3215 if (ss->rx_small.info != NULL) {
3216 if (ss->rx_small.dmat != NULL) {
3217 for (i = 0; i <= ss->rx_small.mask; i++) {
3218 bus_dmamap_destroy(ss->rx_small.dmat,
3219 ss->rx_small.info[i].map);
3221 bus_dmamap_destroy(ss->rx_small.dmat,
3222 ss->rx_small.extra_map);
3223 bus_dma_tag_destroy(ss->rx_small.dmat);
3225 free(ss->rx_small.info, M_DEVBUF);
3227 ss->rx_small.info = NULL;
3229 if (ss->rx_big.info != NULL) {
3230 if (ss->rx_big.dmat != NULL) {
3231 for (i = 0; i <= ss->rx_big.mask; i++) {
3232 bus_dmamap_destroy(ss->rx_big.dmat,
3233 ss->rx_big.info[i].map);
3235 bus_dmamap_destroy(ss->rx_big.dmat,
3236 ss->rx_big.extra_map);
3237 bus_dma_tag_destroy(ss->rx_big.dmat);
3239 free(ss->rx_big.info, M_DEVBUF);
3241 ss->rx_big.info = NULL;
3245 mxge_free_rings(mxge_softc_t *sc)
3249 for (slice = 0; slice < sc->num_slices; slice++)
3250 mxge_free_slice_rings(&sc->ss[slice]);
3254 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3255 int tx_ring_entries)
3257 mxge_softc_t *sc = ss->sc;
3261 /* allocate per-slice receive resources */
3263 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3264 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3266 /* allocate the rx shadow rings */
3267 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3268 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3270 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3271 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3273 /* allocate the rx host info rings */
3274 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3275 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3277 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3278 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3280 /* allocate the rx busdma resources */
3281 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3283 4096, /* boundary */
3284 BUS_SPACE_MAXADDR, /* low */
3285 BUS_SPACE_MAXADDR, /* high */
3286 NULL, NULL, /* filter */
3287 MHLEN, /* maxsize */
3289 MHLEN, /* maxsegsize */
3290 BUS_DMA_ALLOCNOW, /* flags */
3291 NULL, NULL, /* lock */
3292 &ss->rx_small.dmat); /* tag */
3294 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3299 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3301 #if MXGE_VIRT_JUMBOS
3302 4096, /* boundary */
3306 BUS_SPACE_MAXADDR, /* low */
3307 BUS_SPACE_MAXADDR, /* high */
3308 NULL, NULL, /* filter */
3309 3*4096, /* maxsize */
3310 #if MXGE_VIRT_JUMBOS
3312 4096, /* maxsegsize*/
3315 MJUM9BYTES, /* maxsegsize*/
3317 BUS_DMA_ALLOCNOW, /* flags */
3318 NULL, NULL, /* lock */
3319 &ss->rx_big.dmat); /* tag */
3321 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3325 for (i = 0; i <= ss->rx_small.mask; i++) {
3326 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3327 &ss->rx_small.info[i].map);
3329 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3334 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3335 &ss->rx_small.extra_map);
3337 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3342 for (i = 0; i <= ss->rx_big.mask; i++) {
3343 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3344 &ss->rx_big.info[i].map);
3346 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3351 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3352 &ss->rx_big.extra_map);
3354 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3359 /* now allocate TX resources */
3361 #ifndef IFNET_BUF_RING
3362 /* only use a single TX ring for now */
3363 if (ss != ss->sc->ss)
3367 ss->tx.mask = tx_ring_entries - 1;
3368 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3370 /* allocate the tx request copy block */
3372 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3373 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3374 /* ensure req_list entries are aligned to 8 bytes */
3375 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3376 ((uintptr_t)(ss->tx.req_bytes + 7) & ~7UL);
3378 /* allocate the tx busdma segment list */
3379 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3380 ss->tx.seg_list = (bus_dma_segment_t *)
3381 malloc(bytes, M_DEVBUF, M_WAITOK);
3383 /* allocate the tx host info ring */
3384 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3385 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3387 /* allocate the tx busdma resources */
3388 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3390 sc->tx_boundary, /* boundary */
3391 BUS_SPACE_MAXADDR, /* low */
3392 BUS_SPACE_MAXADDR, /* high */
3393 NULL, NULL, /* filter */
3394 65536 + 256, /* maxsize */
3395 ss->tx.max_desc - 2, /* num segs */
3396 sc->tx_boundary, /* maxsegsz */
3397 BUS_DMA_ALLOCNOW, /* flags */
3398 NULL, NULL, /* lock */
3399 &ss->tx.dmat); /* tag */
3402 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3407 /* now use these tags to setup dmamaps for each slot
3409 for (i = 0; i <= ss->tx.mask; i++) {
3410 err = bus_dmamap_create(ss->tx.dmat, 0,
3411 &ss->tx.info[i].map);
3413 device_printf(sc->dev, "Err %d tx dmamap\n",
3423 mxge_alloc_rings(mxge_softc_t *sc)
3427 int tx_ring_entries, rx_ring_entries;
3430 /* get ring sizes */
3431 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3432 tx_ring_size = cmd.data0;
3434 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3438 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3439 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3440 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3441 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3442 IFQ_SET_READY(&sc->ifp->if_snd);
3444 for (slice = 0; slice < sc->num_slices; slice++) {
3445 err = mxge_alloc_slice_rings(&sc->ss[slice],
3454 mxge_free_rings(sc);
3460 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3462 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3464 if (bufsize < MCLBYTES) {
3465 /* easy, everything fits in a single buffer */
3466 *big_buf_size = MCLBYTES;
3467 *cl_size = MCLBYTES;
3472 if (bufsize < MJUMPAGESIZE) {
3473 /* still easy, everything still fits in a single buffer */
3474 *big_buf_size = MJUMPAGESIZE;
3475 *cl_size = MJUMPAGESIZE;
3479 #if MXGE_VIRT_JUMBOS
3480 /* now we need to use virtually contiguous buffers */
3481 *cl_size = MJUM9BYTES;
3482 *big_buf_size = 4096;
3483 *nbufs = mtu / 4096 + 1;
3484 /* needs to be a power of two, so round up */
3488 *cl_size = MJUM9BYTES;
3489 *big_buf_size = MJUM9BYTES;
3495 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3503 slice = ss - sc->ss;
3505 #if defined(INET) || defined(INET6)
3506 (void)tcp_lro_init(&ss->lc);
3508 ss->lc.ifp = sc->ifp;
3510 /* get the lanai pointers to the send and receive rings */
3513 #ifndef IFNET_BUF_RING
3514 /* We currently only send from the first slice */
3518 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3520 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3521 ss->tx.send_go = (volatile uint32_t *)
3522 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3523 ss->tx.send_stop = (volatile uint32_t *)
3524 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3525 #ifndef IFNET_BUF_RING
3529 err |= mxge_send_cmd(sc,
3530 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3531 ss->rx_small.lanai =
3532 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3534 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3536 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3539 device_printf(sc->dev,
3540 "failed to get ring sizes or locations\n");
3544 /* stock receive rings */
3545 for (i = 0; i <= ss->rx_small.mask; i++) {
3546 map = ss->rx_small.info[i].map;
3547 err = mxge_get_buf_small(ss, map, i);
3549 device_printf(sc->dev, "alloced %d/%d smalls\n",
3550 i, ss->rx_small.mask + 1);
3554 for (i = 0; i <= ss->rx_big.mask; i++) {
3555 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3556 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3558 ss->rx_big.nbufs = nbufs;
3559 ss->rx_big.cl_size = cl_size;
3560 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3561 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3562 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3563 map = ss->rx_big.info[i].map;
3564 err = mxge_get_buf_big(ss, map, i);
3566 device_printf(sc->dev, "alloced %d/%d bigs\n",
3567 i, ss->rx_big.mask + 1);
3575 mxge_open(mxge_softc_t *sc)
3578 int err, big_bytes, nbufs, slice, cl_size, i;
3580 volatile uint8_t *itable;
3581 struct mxge_slice_state *ss;
3583 /* Copy the MAC address in case it was overridden */
3584 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3586 err = mxge_reset(sc, 1);
3588 device_printf(sc->dev, "failed to reset\n");
3592 if (sc->num_slices > 1) {
3593 /* setup the indirection table */
3594 cmd.data0 = sc->num_slices;
3595 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3598 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3601 device_printf(sc->dev,
3602 "failed to setup rss tables\n");
3606 /* just enable an identity mapping */
3607 itable = sc->sram + cmd.data0;
3608 for (i = 0; i < sc->num_slices; i++)
3609 itable[i] = (uint8_t)i;
3612 cmd.data1 = mxge_rss_hash_type;
3613 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3615 device_printf(sc->dev, "failed to enable slices\n");
3620 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3623 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3625 /* error is only meaningful if we're trying to set
3626 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3627 if (err && nbufs > 1) {
3628 device_printf(sc->dev,
3629 "Failed to set alway-use-n to %d\n",
3633 /* Give the firmware the mtu and the big and small buffer
3634 sizes. The firmware wants the big buf size to be a power
3635 of two. Luckily, FreeBSD's clusters are powers of two */
3636 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3637 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3638 cmd.data0 = MHLEN - MXGEFW_PAD;
3639 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3641 cmd.data0 = big_bytes;
3642 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3645 device_printf(sc->dev, "failed to setup params\n");
3649 /* Now give him the pointer to the stats block */
3651 #ifdef IFNET_BUF_RING
3652 slice < sc->num_slices;
3657 ss = &sc->ss[slice];
3659 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3661 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3662 cmd.data2 = sizeof(struct mcp_irq_data);
3663 cmd.data2 |= (slice << 16);
3664 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3668 bus = sc->ss->fw_stats_dma.bus_addr;
3669 bus += offsetof(struct mcp_irq_data, send_done_count);
3670 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3671 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3672 err = mxge_send_cmd(sc,
3673 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3675 /* Firmware cannot support multicast without STATS_DMA_V2 */
3676 sc->fw_multicast_support = 0;
3678 sc->fw_multicast_support = 1;
3682 device_printf(sc->dev, "failed to setup params\n");
3686 for (slice = 0; slice < sc->num_slices; slice++) {
3687 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3689 device_printf(sc->dev, "couldn't open slice %d\n",
3695 /* Finally, start the firmware running */
3696 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3698 device_printf(sc->dev, "Couldn't bring up link\n");
3701 #ifdef IFNET_BUF_RING
3702 for (slice = 0; slice < sc->num_slices; slice++) {
3703 ss = &sc->ss[slice];
3704 ss->if_drv_flags |= IFF_DRV_RUNNING;
3705 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3708 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3709 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3714 mxge_free_mbufs(sc);
3720 mxge_close(mxge_softc_t *sc, int down)
3723 int err, old_down_cnt;
3724 #ifdef IFNET_BUF_RING
3725 struct mxge_slice_state *ss;
3729 #ifdef IFNET_BUF_RING
3730 for (slice = 0; slice < sc->num_slices; slice++) {
3731 ss = &sc->ss[slice];
3732 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3735 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3737 old_down_cnt = sc->down_cnt;
3739 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3741 device_printf(sc->dev,
3742 "Couldn't bring down link\n");
3744 if (old_down_cnt == sc->down_cnt) {
3745 /* wait for down irq */
3746 DELAY(10 * sc->intr_coal_delay);
3749 if (old_down_cnt == sc->down_cnt) {
3750 device_printf(sc->dev, "never got down irq\n");
3753 mxge_free_mbufs(sc);
3759 mxge_setup_cfg_space(mxge_softc_t *sc)
3761 device_t dev = sc->dev;
3763 uint16_t lnk, pectl;
3765 /* find the PCIe link width and set max read request to 4KB*/
3766 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3767 lnk = pci_read_config(dev, reg + 0x12, 2);
3768 sc->link_width = (lnk >> 4) & 0x3f;
3770 if (sc->pectl == 0) {
3771 pectl = pci_read_config(dev, reg + 0x8, 2);
3772 pectl = (pectl & ~0x7000) | (5 << 12);
3773 pci_write_config(dev, reg + 0x8, pectl, 2);
3776 /* restore saved pectl after watchdog reset */
3777 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3781 /* Enable DMA and Memory space access */
3782 pci_enable_busmaster(dev);
3786 mxge_read_reboot(mxge_softc_t *sc)
3788 device_t dev = sc->dev;
3791 /* find the vendor specific offset */
3792 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3793 device_printf(sc->dev,
3794 "could not find vendor specific offset\n");
3795 return (uint32_t)-1;
3797 /* enable read32 mode */
3798 pci_write_config(dev, vs + 0x10, 0x3, 1);
3799 /* tell NIC which register to read */
3800 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3801 return (pci_read_config(dev, vs + 0x14, 4));
3805 mxge_watchdog_reset(mxge_softc_t *sc)
3807 struct pci_devinfo *dinfo;
3808 struct mxge_slice_state *ss;
3809 int err, running, s, num_tx_slices = 1;
3815 device_printf(sc->dev, "Watchdog reset!\n");
3818 * check to see if the NIC rebooted. If it did, then all of
3819 * PCI config space has been reset, and things like the
3820 * busmaster bit will be zero. If this is the case, then we
3821 * must restore PCI config space before the NIC can be used
3824 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3825 if (cmd == 0xffff) {
3827 * maybe the watchdog caught the NIC rebooting; wait
3828 * up to 100ms for it to finish. If it does not come
3829 * back, then give up
3832 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3833 if (cmd == 0xffff) {
3834 device_printf(sc->dev, "NIC disappeared!\n");
3837 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3838 /* print the reboot status */
3839 reboot = mxge_read_reboot(sc);
3840 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3842 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3845 * quiesce NIC so that TX routines will not try to
3846 * xmit after restoration of BAR
3849 /* Mark the link as down */
3850 if (sc->link_state) {
3852 if_link_state_change(sc->ifp,
3855 #ifdef IFNET_BUF_RING
3856 num_tx_slices = sc->num_slices;
3858 /* grab all TX locks to ensure no tx */
3859 for (s = 0; s < num_tx_slices; s++) {
3861 mtx_lock(&ss->tx.mtx);
3865 /* restore PCI configuration space */
3866 dinfo = device_get_ivars(sc->dev);
3867 pci_cfg_restore(sc->dev, dinfo);
3869 /* and redo any changes we made to our config space */
3870 mxge_setup_cfg_space(sc);
3873 err = mxge_load_firmware(sc, 0);
3875 device_printf(sc->dev,
3876 "Unable to re-load f/w\n");
3880 err = mxge_open(sc);
3881 /* release all TX locks */
3882 for (s = 0; s < num_tx_slices; s++) {
3884 #ifdef IFNET_BUF_RING
3885 mxge_start_locked(ss);
3887 mtx_unlock(&ss->tx.mtx);
3890 sc->watchdog_resets++;
3892 device_printf(sc->dev,
3893 "NIC did not reboot, not resetting\n");
3897 device_printf(sc->dev, "watchdog reset failed\n");
3901 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3906 mxge_watchdog_task(void *arg, int pending)
3908 mxge_softc_t *sc = arg;
3910 mtx_lock(&sc->driver_mtx);
3911 mxge_watchdog_reset(sc);
3912 mtx_unlock(&sc->driver_mtx);
3916 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3918 tx = &sc->ss[slice].tx;
3919 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3920 device_printf(sc->dev,
3921 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3922 tx->req, tx->done, tx->queue_active);
3923 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3924 tx->activate, tx->deactivate);
3925 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3927 be32toh(sc->ss->fw_stats->send_done_count));
3931 mxge_watchdog(mxge_softc_t *sc)
3934 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3937 /* see if we have outstanding transmits, which
3938 have been pending for more than mxge_ticks */
3940 #ifdef IFNET_BUF_RING
3941 (i < sc->num_slices) && (err == 0);
3943 (i < 1) && (err == 0);
3947 if (tx->req != tx->done &&
3948 tx->watchdog_req != tx->watchdog_done &&
3949 tx->done == tx->watchdog_done) {
3950 /* check for pause blocking before resetting */
3951 if (tx->watchdog_rx_pause == rx_pause) {
3952 mxge_warn_stuck(sc, tx, i);
3953 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3957 device_printf(sc->dev, "Flow control blocking "
3958 "xmits, check link partner\n");
3961 tx->watchdog_req = tx->req;
3962 tx->watchdog_done = tx->done;
3963 tx->watchdog_rx_pause = rx_pause;
3966 if (sc->need_media_probe)
3967 mxge_media_probe(sc);
3972 mxge_get_counter(struct ifnet *ifp, ift_counter cnt)
3974 struct mxge_softc *sc;
3977 sc = if_getsoftc(ifp);
3981 case IFCOUNTER_IPACKETS:
3982 for (int s = 0; s < sc->num_slices; s++)
3983 rv += sc->ss[s].ipackets;
3985 case IFCOUNTER_OPACKETS:
3986 for (int s = 0; s < sc->num_slices; s++)
3987 rv += sc->ss[s].opackets;
3989 case IFCOUNTER_OERRORS:
3990 for (int s = 0; s < sc->num_slices; s++)
3991 rv += sc->ss[s].oerrors;
3993 #ifdef IFNET_BUF_RING
3994 case IFCOUNTER_OBYTES:
3995 for (int s = 0; s < sc->num_slices; s++)
3996 rv += sc->ss[s].obytes;
3998 case IFCOUNTER_OMCASTS:
3999 for (int s = 0; s < sc->num_slices; s++)
4000 rv += sc->ss[s].omcasts;
4002 case IFCOUNTER_OQDROPS:
4003 for (int s = 0; s < sc->num_slices; s++)
4004 rv += sc->ss[s].tx.br->br_drops;
4008 return (if_get_counter_default(ifp, cnt));
4013 mxge_tick(void *arg)
4015 mxge_softc_t *sc = arg;
4022 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4024 if (!sc->watchdog_countdown) {
4025 err = mxge_watchdog(sc);
4026 sc->watchdog_countdown = 4;
4028 sc->watchdog_countdown--;
4031 /* ensure NIC did not suffer h/w fault while idle */
4032 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4033 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4035 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4038 /* look less often if NIC is idle */
4043 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4048 mxge_media_change(struct ifnet *ifp)
4054 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4056 struct ifnet *ifp = sc->ifp;
4057 int real_mtu, old_mtu;
4060 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4061 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4063 mtx_lock(&sc->driver_mtx);
4064 old_mtu = ifp->if_mtu;
4066 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4068 err = mxge_open(sc);
4070 ifp->if_mtu = old_mtu;
4072 (void) mxge_open(sc);
4075 mtx_unlock(&sc->driver_mtx);
4080 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4082 mxge_softc_t *sc = ifp->if_softc;
4086 ifmr->ifm_status = IFM_AVALID;
4087 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4088 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4089 ifmr->ifm_active |= sc->current_media;
4093 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c)
4099 if (i2c->dev_addr != 0xA0 &&
4100 i2c->dev_addr != 0xA2)
4102 if (i2c->len > sizeof(i2c->data))
4105 for (i = 0; i < i2c->len; i++) {
4106 i2c_args = i2c->dev_addr << 0x8;
4107 i2c_args |= i2c->offset + i;
4108 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
4109 cmd.data1 = i2c_args;
4110 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
4112 if (err != MXGEFW_CMD_OK)
4114 /* now we wait for the data to be cached */
4115 cmd.data0 = i2c_args & 0xff;
4116 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
4117 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
4118 cmd.data0 = i2c_args & 0xff;
4119 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
4123 if (err != MXGEFW_CMD_OK)
4125 i2c->data[i] = cmd.data0;
4131 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4133 mxge_softc_t *sc = ifp->if_softc;
4134 struct ifreq *ifr = (struct ifreq *)data;
4135 struct ifi2creq i2c;
4141 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4145 mtx_lock(&sc->driver_mtx);
4147 mtx_unlock(&sc->driver_mtx);
4150 if (ifp->if_flags & IFF_UP) {
4151 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4152 err = mxge_open(sc);
4154 /* take care of promis can allmulti
4156 mxge_change_promisc(sc,
4157 ifp->if_flags & IFF_PROMISC);
4158 mxge_set_multicast_list(sc);
4161 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4165 mtx_unlock(&sc->driver_mtx);
4170 mtx_lock(&sc->driver_mtx);
4172 mtx_unlock(&sc->driver_mtx);
4175 mxge_set_multicast_list(sc);
4176 mtx_unlock(&sc->driver_mtx);
4180 mtx_lock(&sc->driver_mtx);
4181 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4182 if (mask & IFCAP_TXCSUM) {
4183 if (IFCAP_TXCSUM & ifp->if_capenable) {
4184 mask &= ~IFCAP_TSO4;
4185 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4186 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4188 ifp->if_capenable |= IFCAP_TXCSUM;
4189 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4192 if (mask & IFCAP_RXCSUM) {
4193 if (IFCAP_RXCSUM & ifp->if_capenable) {
4194 ifp->if_capenable &= ~IFCAP_RXCSUM;
4196 ifp->if_capenable |= IFCAP_RXCSUM;
4199 if (mask & IFCAP_TSO4) {
4200 if (IFCAP_TSO4 & ifp->if_capenable) {
4201 ifp->if_capenable &= ~IFCAP_TSO4;
4202 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4203 ifp->if_capenable |= IFCAP_TSO4;
4204 ifp->if_hwassist |= CSUM_TSO;
4206 printf("mxge requires tx checksum offload"
4207 " be enabled to use TSO\n");
4212 if (mask & IFCAP_TXCSUM_IPV6) {
4213 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4214 mask &= ~IFCAP_TSO6;
4215 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4217 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4220 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4221 ifp->if_hwassist |= (CSUM_TCP_IPV6
4225 if (mask & IFCAP_RXCSUM_IPV6) {
4226 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4227 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4229 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4232 if (mask & IFCAP_TSO6) {
4233 if (IFCAP_TSO6 & ifp->if_capenable) {
4234 ifp->if_capenable &= ~IFCAP_TSO6;
4235 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4236 ifp->if_capenable |= IFCAP_TSO6;
4237 ifp->if_hwassist |= CSUM_TSO;
4239 printf("mxge requires tx checksum offload"
4240 " be enabled to use TSO\n");
4244 #endif /*IFCAP_TSO6 */
4246 if (mask & IFCAP_LRO)
4247 ifp->if_capenable ^= IFCAP_LRO;
4248 if (mask & IFCAP_VLAN_HWTAGGING)
4249 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4250 if (mask & IFCAP_VLAN_HWTSO)
4251 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4253 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4254 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4255 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4257 mtx_unlock(&sc->driver_mtx);
4258 VLAN_CAPABILITIES(ifp);
4263 mtx_lock(&sc->driver_mtx);
4265 mtx_unlock(&sc->driver_mtx);
4268 mxge_media_probe(sc);
4269 mtx_unlock(&sc->driver_mtx);
4270 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4271 &sc->media, command);
4275 if (sc->connector != MXGE_XFP &&
4276 sc->connector != MXGE_SFP) {
4280 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
4283 mtx_lock(&sc->driver_mtx);
4285 mtx_unlock(&sc->driver_mtx);
4288 err = mxge_fetch_i2c(sc, &i2c);
4289 mtx_unlock(&sc->driver_mtx);
4291 err = copyout(&i2c, ifr_data_get_ptr(ifr),
4295 err = ether_ioctl(ifp, command, data);
4302 mxge_fetch_tunables(mxge_softc_t *sc)
4305 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4306 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4307 &mxge_flow_control);
4308 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4309 &mxge_intr_coal_delay);
4310 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4311 &mxge_nvidia_ecrc_enable);
4312 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4313 &mxge_force_firmware);
4314 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4315 &mxge_deassert_wait);
4316 TUNABLE_INT_FETCH("hw.mxge.verbose",
4318 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4319 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4320 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4321 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4322 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4323 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4327 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4328 mxge_intr_coal_delay = 30;
4329 if (mxge_ticks == 0)
4330 mxge_ticks = hz / 2;
4331 sc->pause = mxge_flow_control;
4332 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4333 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4334 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4336 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4337 mxge_initial_mtu < ETHER_MIN_LEN)
4338 mxge_initial_mtu = ETHERMTU_JUMBO;
4340 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4341 mxge_throttle = MXGE_MAX_THROTTLE;
4342 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4343 mxge_throttle = MXGE_MIN_THROTTLE;
4344 sc->throttle = mxge_throttle;
4348 mxge_free_slices(mxge_softc_t *sc)
4350 struct mxge_slice_state *ss;
4356 for (i = 0; i < sc->num_slices; i++) {
4358 if (ss->fw_stats != NULL) {
4359 mxge_dma_free(&ss->fw_stats_dma);
4360 ss->fw_stats = NULL;
4361 #ifdef IFNET_BUF_RING
4362 if (ss->tx.br != NULL) {
4363 drbr_free(ss->tx.br, M_DEVBUF);
4367 mtx_destroy(&ss->tx.mtx);
4369 if (ss->rx_done.entry != NULL) {
4370 mxge_dma_free(&ss->rx_done.dma);
4371 ss->rx_done.entry = NULL;
4374 free(sc->ss, M_DEVBUF);
4379 mxge_alloc_slices(mxge_softc_t *sc)
4382 struct mxge_slice_state *ss;
4384 int err, i, max_intr_slots;
4386 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4388 device_printf(sc->dev, "Cannot determine rx ring size\n");
4391 sc->rx_ring_size = cmd.data0;
4392 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4394 bytes = sizeof (*sc->ss) * sc->num_slices;
4395 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4398 for (i = 0; i < sc->num_slices; i++) {
4403 /* allocate per-slice rx interrupt queues */
4405 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4406 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4409 ss->rx_done.entry = ss->rx_done.dma.addr;
4410 bzero(ss->rx_done.entry, bytes);
4413 * allocate the per-slice firmware stats; stats
4414 * (including tx) are used used only on the first
4417 #ifndef IFNET_BUF_RING
4422 bytes = sizeof (*ss->fw_stats);
4423 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4424 sizeof (*ss->fw_stats), 64);
4427 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4428 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4429 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4430 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4431 #ifdef IFNET_BUF_RING
4432 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4440 mxge_free_slices(sc);
4445 mxge_slice_probe(mxge_softc_t *sc)
4449 int msix_cnt, status, max_intr_slots;
4453 * don't enable multiple slices if they are not enabled,
4454 * or if this is not an SMP system
4457 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4460 /* see how many MSI-X interrupts are available */
4461 msix_cnt = pci_msix_count(sc->dev);
4465 /* now load the slice aware firmware see what it supports */
4466 old_fw = sc->fw_name;
4467 if (old_fw == mxge_fw_aligned)
4468 sc->fw_name = mxge_fw_rss_aligned;
4470 sc->fw_name = mxge_fw_rss_unaligned;
4471 status = mxge_load_firmware(sc, 0);
4473 device_printf(sc->dev, "Falling back to a single slice\n");
4477 /* try to send a reset command to the card to see if it
4479 memset(&cmd, 0, sizeof (cmd));
4480 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4482 device_printf(sc->dev, "failed reset\n");
4486 /* get rx ring size */
4487 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4489 device_printf(sc->dev, "Cannot determine rx ring size\n");
4492 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4494 /* tell it the size of the interrupt queues */
4495 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4496 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4498 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4502 /* ask the maximum number of slices it supports */
4503 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4505 device_printf(sc->dev,
4506 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4509 sc->num_slices = cmd.data0;
4510 if (sc->num_slices > msix_cnt)
4511 sc->num_slices = msix_cnt;
4513 if (mxge_max_slices == -1) {
4514 /* cap to number of CPUs in system */
4515 if (sc->num_slices > mp_ncpus)
4516 sc->num_slices = mp_ncpus;
4518 if (sc->num_slices > mxge_max_slices)
4519 sc->num_slices = mxge_max_slices;
4521 /* make sure it is a power of two */
4522 while (sc->num_slices & (sc->num_slices - 1))
4526 device_printf(sc->dev, "using %d slices\n",
4532 sc->fw_name = old_fw;
4533 (void) mxge_load_firmware(sc, 0);
4537 mxge_add_msix_irqs(mxge_softc_t *sc)
4540 int count, err, i, rid;
4543 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4546 if (sc->msix_table_res == NULL) {
4547 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4551 count = sc->num_slices;
4552 err = pci_alloc_msix(sc->dev, &count);
4554 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4555 "err = %d \n", sc->num_slices, err);
4556 goto abort_with_msix_table;
4558 if (count < sc->num_slices) {
4559 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4560 count, sc->num_slices);
4561 device_printf(sc->dev,
4562 "Try setting hw.mxge.max_slices to %d\n",
4565 goto abort_with_msix;
4567 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4568 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4569 if (sc->msix_irq_res == NULL) {
4571 goto abort_with_msix;
4574 for (i = 0; i < sc->num_slices; i++) {
4576 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4579 if (sc->msix_irq_res[i] == NULL) {
4580 device_printf(sc->dev, "couldn't allocate IRQ res"
4581 " for message %d\n", i);
4583 goto abort_with_res;
4587 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4588 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4590 for (i = 0; i < sc->num_slices; i++) {
4591 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4592 INTR_TYPE_NET | INTR_MPSAFE,
4593 #if __FreeBSD_version > 700030
4596 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4598 device_printf(sc->dev, "couldn't setup intr for "
4600 goto abort_with_intr;
4602 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4603 sc->msix_ih[i], "s%d", i);
4607 device_printf(sc->dev, "using %d msix IRQs:",
4609 for (i = 0; i < sc->num_slices; i++)
4610 printf(" %jd", rman_get_start(sc->msix_irq_res[i]));
4616 for (i = 0; i < sc->num_slices; i++) {
4617 if (sc->msix_ih[i] != NULL) {
4618 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4620 sc->msix_ih[i] = NULL;
4623 free(sc->msix_ih, M_DEVBUF);
4626 for (i = 0; i < sc->num_slices; i++) {
4628 if (sc->msix_irq_res[i] != NULL)
4629 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4630 sc->msix_irq_res[i]);
4631 sc->msix_irq_res[i] = NULL;
4633 free(sc->msix_irq_res, M_DEVBUF);
4636 pci_release_msi(sc->dev);
4638 abort_with_msix_table:
4639 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4640 sc->msix_table_res);
4646 mxge_add_single_irq(mxge_softc_t *sc)
4648 int count, err, rid;
4650 count = pci_msi_count(sc->dev);
4651 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4657 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
4658 RF_SHAREABLE | RF_ACTIVE);
4659 if (sc->irq_res == NULL) {
4660 device_printf(sc->dev, "could not alloc interrupt\n");
4664 device_printf(sc->dev, "using %s irq %jd\n",
4665 sc->legacy_irq ? "INTx" : "MSI",
4666 rman_get_start(sc->irq_res));
4667 err = bus_setup_intr(sc->dev, sc->irq_res,
4668 INTR_TYPE_NET | INTR_MPSAFE,
4669 #if __FreeBSD_version > 700030
4672 mxge_intr, &sc->ss[0], &sc->ih);
4674 bus_release_resource(sc->dev, SYS_RES_IRQ,
4675 sc->legacy_irq ? 0 : 1, sc->irq_res);
4676 if (!sc->legacy_irq)
4677 pci_release_msi(sc->dev);
4683 mxge_rem_msix_irqs(mxge_softc_t *sc)
4687 for (i = 0; i < sc->num_slices; i++) {
4688 if (sc->msix_ih[i] != NULL) {
4689 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4691 sc->msix_ih[i] = NULL;
4694 free(sc->msix_ih, M_DEVBUF);
4696 for (i = 0; i < sc->num_slices; i++) {
4698 if (sc->msix_irq_res[i] != NULL)
4699 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4700 sc->msix_irq_res[i]);
4701 sc->msix_irq_res[i] = NULL;
4703 free(sc->msix_irq_res, M_DEVBUF);
4705 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4706 sc->msix_table_res);
4708 pci_release_msi(sc->dev);
4713 mxge_rem_single_irq(mxge_softc_t *sc)
4715 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4716 bus_release_resource(sc->dev, SYS_RES_IRQ,
4717 sc->legacy_irq ? 0 : 1, sc->irq_res);
4718 if (!sc->legacy_irq)
4719 pci_release_msi(sc->dev);
4723 mxge_rem_irq(mxge_softc_t *sc)
4725 if (sc->num_slices > 1)
4726 mxge_rem_msix_irqs(sc);
4728 mxge_rem_single_irq(sc);
4732 mxge_add_irq(mxge_softc_t *sc)
4736 if (sc->num_slices > 1)
4737 err = mxge_add_msix_irqs(sc);
4739 err = mxge_add_single_irq(sc);
4741 if (0 && err == 0 && sc->num_slices > 1) {
4742 mxge_rem_msix_irqs(sc);
4743 err = mxge_add_msix_irqs(sc);
4749 mxge_attach(device_t dev)
4752 mxge_softc_t *sc = device_get_softc(dev);
4757 mxge_fetch_tunables(sc);
4759 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4760 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4761 taskqueue_thread_enqueue, &sc->tq);
4762 if (sc->tq == NULL) {
4764 goto abort_with_nothing;
4767 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4770 BUS_SPACE_MAXADDR, /* low */
4771 BUS_SPACE_MAXADDR, /* high */
4772 NULL, NULL, /* filter */
4773 65536 + 256, /* maxsize */
4774 MXGE_MAX_SEND_DESC, /* num segs */
4775 65536, /* maxsegsize */
4777 NULL, NULL, /* lock */
4778 &sc->parent_dmat); /* tag */
4781 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4786 ifp = sc->ifp = if_alloc(IFT_ETHER);
4788 device_printf(dev, "can not if_alloc()\n");
4790 goto abort_with_parent_dmat;
4792 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4794 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4795 device_get_nameunit(dev));
4796 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4797 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4798 "%s:drv", device_get_nameunit(dev));
4799 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4800 MTX_NETWORK_LOCK, MTX_DEF);
4802 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4804 mxge_setup_cfg_space(sc);
4806 /* Map the board into the kernel */
4808 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
4810 if (sc->mem_res == NULL) {
4811 device_printf(dev, "could not map memory\n");
4813 goto abort_with_lock;
4815 sc->sram = rman_get_virtual(sc->mem_res);
4816 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4817 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4818 device_printf(dev, "impossible memory region size %jd\n",
4819 rman_get_size(sc->mem_res));
4821 goto abort_with_mem_res;
4824 /* make NULL terminated copy of the EEPROM strings section of
4826 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4827 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4828 rman_get_bushandle(sc->mem_res),
4829 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4831 MXGE_EEPROM_STRINGS_SIZE - 2);
4832 err = mxge_parse_strings(sc);
4834 goto abort_with_mem_res;
4836 /* Enable write combining for efficient use of PCIe bus */
4839 /* Allocate the out of band dma memory */
4840 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4841 sizeof (mxge_cmd_t), 64);
4843 goto abort_with_mem_res;
4844 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4845 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4847 goto abort_with_cmd_dma;
4849 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4851 goto abort_with_zeropad_dma;
4853 /* select & load the firmware */
4854 err = mxge_select_firmware(sc);
4856 goto abort_with_dmabench;
4857 sc->intr_coal_delay = mxge_intr_coal_delay;
4859 mxge_slice_probe(sc);
4860 err = mxge_alloc_slices(sc);
4862 goto abort_with_dmabench;
4864 err = mxge_reset(sc, 0);
4866 goto abort_with_slices;
4868 err = mxge_alloc_rings(sc);
4870 device_printf(sc->dev, "failed to allocate rings\n");
4871 goto abort_with_slices;
4874 err = mxge_add_irq(sc);
4876 device_printf(sc->dev, "failed to add irq\n");
4877 goto abort_with_rings;
4880 ifp->if_baudrate = IF_Gbps(10);
4881 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4882 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4884 #if defined(INET) || defined(INET6)
4885 ifp->if_capabilities |= IFCAP_LRO;
4888 #ifdef MXGE_NEW_VLAN_API
4889 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4891 /* Only FW 1.4.32 and newer can do TSO over vlans */
4892 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4893 sc->fw_ver_tiny >= 32)
4894 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4896 sc->max_mtu = mxge_max_mtu(sc);
4897 if (sc->max_mtu >= 9000)
4898 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4900 device_printf(dev, "MTU limited to %d. Install "
4901 "latest firmware for 9000 byte jumbo support\n",
4902 sc->max_mtu - ETHER_HDR_LEN);
4903 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4904 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4905 /* check to see if f/w supports TSO for IPv6 */
4906 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4908 ifp->if_capabilities |= IFCAP_TSO6;
4909 sc->max_tso6_hlen = min(cmd.data0,
4910 sizeof (sc->ss[0].scratch));
4912 ifp->if_capenable = ifp->if_capabilities;
4913 if (sc->lro_cnt == 0)
4914 ifp->if_capenable &= ~IFCAP_LRO;
4915 ifp->if_init = mxge_init;
4917 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4918 ifp->if_ioctl = mxge_ioctl;
4919 ifp->if_start = mxge_start;
4920 ifp->if_get_counter = mxge_get_counter;
4921 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
4922 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc;
4923 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET;
4924 /* Initialise the ifmedia structure */
4925 ifmedia_init(&sc->media, 0, mxge_media_change,
4927 mxge_media_init(sc);
4928 mxge_media_probe(sc);
4930 ether_ifattach(ifp, sc->mac_addr);
4931 /* ether_ifattach sets mtu to ETHERMTU */
4932 if (mxge_initial_mtu != ETHERMTU)
4933 mxge_change_mtu(sc, mxge_initial_mtu);
4935 mxge_add_sysctls(sc);
4936 #ifdef IFNET_BUF_RING
4937 ifp->if_transmit = mxge_transmit;
4938 ifp->if_qflush = mxge_qflush;
4940 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4941 device_get_nameunit(sc->dev));
4942 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4946 mxge_free_rings(sc);
4948 mxge_free_slices(sc);
4949 abort_with_dmabench:
4950 mxge_dma_free(&sc->dmabench_dma);
4951 abort_with_zeropad_dma:
4952 mxge_dma_free(&sc->zeropad_dma);
4954 mxge_dma_free(&sc->cmd_dma);
4956 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4958 pci_disable_busmaster(dev);
4959 mtx_destroy(&sc->cmd_mtx);
4960 mtx_destroy(&sc->driver_mtx);
4962 abort_with_parent_dmat:
4963 bus_dma_tag_destroy(sc->parent_dmat);
4965 if (sc->tq != NULL) {
4966 taskqueue_drain(sc->tq, &sc->watchdog_task);
4967 taskqueue_free(sc->tq);
4975 mxge_detach(device_t dev)
4977 mxge_softc_t *sc = device_get_softc(dev);
4979 if (mxge_vlans_active(sc)) {
4980 device_printf(sc->dev,
4981 "Detach vlans before removing module\n");
4984 mtx_lock(&sc->driver_mtx);
4986 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4988 mtx_unlock(&sc->driver_mtx);
4989 ether_ifdetach(sc->ifp);
4990 if (sc->tq != NULL) {
4991 taskqueue_drain(sc->tq, &sc->watchdog_task);
4992 taskqueue_free(sc->tq);
4995 callout_drain(&sc->co_hdl);
4996 ifmedia_removeall(&sc->media);
4997 mxge_dummy_rdma(sc, 0);
4998 mxge_rem_sysctls(sc);
5000 mxge_free_rings(sc);
5001 mxge_free_slices(sc);
5002 mxge_dma_free(&sc->dmabench_dma);
5003 mxge_dma_free(&sc->zeropad_dma);
5004 mxge_dma_free(&sc->cmd_dma);
5005 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5006 pci_disable_busmaster(dev);
5007 mtx_destroy(&sc->cmd_mtx);
5008 mtx_destroy(&sc->driver_mtx);
5010 bus_dma_tag_destroy(sc->parent_dmat);
5015 mxge_shutdown(device_t dev)
5021 This file uses Myri10GE driver indentation.
5024 c-file-style:"linux"