1 /******************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 Copyright (c) 2006-2013, Myricom Inc.
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
13 2. Neither the name of the Myricom Inc, nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
29 ***************************************************************************/
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/linker.h>
37 #include <sys/firmware.h>
38 #include <sys/endian.h>
39 #include <sys/sockio.h>
41 #include <sys/malloc.h>
43 #include <sys/kernel.h>
45 #include <sys/module.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
49 #include <sys/taskqueue.h>
50 #include <contrib/zlib/zlib.h>
51 #include <dev/zlib/zcalloc.h>
54 #include <net/if_var.h>
55 #include <net/if_arp.h>
56 #include <net/ethernet.h>
57 #include <net/if_dl.h>
58 #include <net/if_media.h>
62 #include <net/if_types.h>
63 #include <net/if_vlan_var.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip6.h>
69 #include <netinet/tcp.h>
70 #include <netinet/tcp_lro.h>
71 #include <netinet6/ip6_var.h>
73 #include <machine/bus.h>
74 #include <machine/in_cksum.h>
75 #include <machine/resource.h>
80 #include <dev/pci/pcireg.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
84 #include <vm/vm.h> /* for pmap_mapdev() */
87 #if defined(__i386) || defined(__amd64)
88 #include <machine/specialreg.h>
91 #include <dev/mxge/mxge_mcp.h>
92 #include <dev/mxge/mcp_gen_header.h>
93 /*#define MXGE_FAKE_IFP*/
94 #include <dev/mxge/if_mxge_var.h>
96 #include <sys/buf_ring.h>
100 #include "opt_inet6.h"
103 static int mxge_nvidia_ecrc_enable = 1;
104 static int mxge_force_firmware = 0;
105 static int mxge_intr_coal_delay = 30;
106 static int mxge_deassert_wait = 1;
107 static int mxge_flow_control = 1;
108 static int mxge_verbose = 0;
109 static int mxge_ticks;
110 static int mxge_max_slices = 1;
111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
112 static int mxge_always_promisc = 0;
113 static int mxge_initial_mtu = ETHERMTU_JUMBO;
114 static int mxge_throttle = 0;
115 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
116 static char *mxge_fw_aligned = "mxge_eth_z8e";
117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
120 static int mxge_probe(device_t dev);
121 static int mxge_attach(device_t dev);
122 static int mxge_detach(device_t dev);
123 static int mxge_shutdown(device_t dev);
124 static void mxge_intr(void *arg);
126 static device_method_t mxge_methods[] =
128 /* Device interface */
129 DEVMETHOD(device_probe, mxge_probe),
130 DEVMETHOD(device_attach, mxge_attach),
131 DEVMETHOD(device_detach, mxge_detach),
132 DEVMETHOD(device_shutdown, mxge_shutdown),
137 static driver_t mxge_driver =
141 sizeof(mxge_softc_t),
144 static devclass_t mxge_devclass;
146 /* Declare ourselves to be a child of the PCI bus.*/
147 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
148 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
149 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
151 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
152 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
153 static int mxge_close(mxge_softc_t *sc, int down);
154 static int mxge_open(mxge_softc_t *sc);
155 static void mxge_tick(void *arg);
158 mxge_probe(device_t dev)
163 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
164 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
165 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
166 rev = pci_get_revid(dev);
168 case MXGE_PCI_REV_Z8E:
169 device_set_desc(dev, "Myri10G-PCIE-8A");
171 case MXGE_PCI_REV_Z8ES:
172 device_set_desc(dev, "Myri10G-PCIE-8B");
175 device_set_desc(dev, "Myri10G-PCIE-8??");
176 device_printf(dev, "Unrecognized rev %d NIC\n",
186 mxge_enable_wc(mxge_softc_t *sc)
188 #if defined(__i386) || defined(__amd64)
193 len = rman_get_size(sc->mem_res);
194 err = pmap_change_attr((vm_offset_t) sc->sram,
195 len, PAT_WRITE_COMBINING);
197 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
205 /* callback to get our DMA address */
207 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
211 *(bus_addr_t *) arg = segs->ds_addr;
216 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
217 bus_size_t alignment)
220 device_t dev = sc->dev;
221 bus_size_t boundary, maxsegsize;
223 if (bytes > 4096 && alignment == 4096) {
231 /* allocate DMAable memory tags */
232 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
233 alignment, /* alignment */
234 boundary, /* boundary */
235 BUS_SPACE_MAXADDR, /* low */
236 BUS_SPACE_MAXADDR, /* high */
237 NULL, NULL, /* filter */
240 maxsegsize, /* maxsegsize */
241 BUS_DMA_COHERENT, /* flags */
242 NULL, NULL, /* lock */
243 &dma->dmat); /* tag */
245 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
249 /* allocate DMAable memory & map */
250 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
251 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
252 | BUS_DMA_ZERO), &dma->map);
254 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
255 goto abort_with_dmat;
258 /* load the memory */
259 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
260 mxge_dmamap_callback,
261 (void *)&dma->bus_addr, 0);
263 device_printf(dev, "couldn't load map (err = %d)\n", err);
269 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
271 (void)bus_dma_tag_destroy(dma->dmat);
277 mxge_dma_free(mxge_dma_t *dma)
279 bus_dmamap_unload(dma->dmat, dma->map);
280 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
281 (void)bus_dma_tag_destroy(dma->dmat);
285 * The eeprom strings on the lanaiX have the format
292 mxge_parse_strings(mxge_softc_t *sc)
295 int i, found_mac, found_sn2;
298 ptr = sc->eeprom_strings;
301 while (*ptr != '\0') {
302 if (strncmp(ptr, "MAC=", 4) == 0) {
305 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
306 if (endptr - ptr != 2)
315 } else if (strncmp(ptr, "PC=", 3) == 0) {
317 strlcpy(sc->product_code_string, ptr,
318 sizeof(sc->product_code_string));
319 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
321 strlcpy(sc->serial_number_string, ptr,
322 sizeof(sc->serial_number_string));
323 } else if (strncmp(ptr, "SN2=", 4) == 0) {
324 /* SN2 takes precedence over SN */
327 strlcpy(sc->serial_number_string, ptr,
328 sizeof(sc->serial_number_string));
330 while (*ptr++ != '\0') {}
337 device_printf(sc->dev, "failed to parse eeprom_strings\n");
342 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
344 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
347 unsigned long base, off;
349 device_t pdev, mcp55;
350 uint16_t vendor_id, device_id, word;
351 uintptr_t bus, slot, func, ivend, idev;
355 if (!mxge_nvidia_ecrc_enable)
358 pdev = device_get_parent(device_get_parent(sc->dev));
360 device_printf(sc->dev, "could not find parent?\n");
363 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
364 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
366 if (vendor_id != 0x10de)
371 if (device_id == 0x005d) {
372 /* ck804, base address is magic */
374 } else if (device_id >= 0x0374 && device_id <= 0x378) {
375 /* mcp55, base address stored in chipset */
376 mcp55 = pci_find_bsf(0, 0, 0);
378 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
379 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
380 word = pci_read_config(mcp55, 0x90, 2);
381 base = ((unsigned long)word & 0x7ffeU) << 25;
388 Test below is commented because it is believed that doing
389 config read/write beyond 0xff will access the config space
390 for the next larger function. Uncomment this and remove
391 the hacky pmap_mapdev() way of accessing config space when
392 FreeBSD grows support for extended pcie config space access
395 /* See if we can, by some miracle, access the extended
397 val = pci_read_config(pdev, 0x178, 4);
398 if (val != 0xffffffff) {
400 pci_write_config(pdev, 0x178, val, 4);
404 /* Rather than using normal pci config space writes, we must
405 * map the Nvidia config space ourselves. This is because on
406 * opteron/nvidia class machine the 0xe000000 mapping is
407 * handled by the nvidia chipset, that means the internal PCI
408 * device (the on-chip northbridge), or the amd-8131 bridge
409 * and things behind them are not visible by this method.
412 BUS_READ_IVAR(device_get_parent(pdev), pdev,
414 BUS_READ_IVAR(device_get_parent(pdev), pdev,
415 PCI_IVAR_SLOT, &slot);
416 BUS_READ_IVAR(device_get_parent(pdev), pdev,
417 PCI_IVAR_FUNCTION, &func);
418 BUS_READ_IVAR(device_get_parent(pdev), pdev,
419 PCI_IVAR_VENDOR, &ivend);
420 BUS_READ_IVAR(device_get_parent(pdev), pdev,
421 PCI_IVAR_DEVICE, &idev);
424 + 0x00100000UL * (unsigned long)bus
425 + 0x00001000UL * (unsigned long)(func
428 /* map it into the kernel */
429 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
433 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
436 /* get a pointer to the config space mapped into the kernel */
437 cfgptr = va + (off & PAGE_MASK);
439 /* make sure that we can really access it */
440 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
441 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
442 if (! (vendor_id == ivend && device_id == idev)) {
443 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
444 vendor_id, device_id);
445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
449 ptr32 = (uint32_t*)(cfgptr + 0x178);
452 if (val == 0xffffffff) {
453 device_printf(sc->dev, "extended mapping failed\n");
454 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
458 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
460 device_printf(sc->dev,
461 "Enabled ECRC on upstream Nvidia bridge "
463 (int)bus, (int)slot, (int)func);
468 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
470 device_printf(sc->dev,
471 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
478 mxge_dma_test(mxge_softc_t *sc, int test_type)
481 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
487 /* Run a small DMA test.
488 * The magic multipliers to the length tell the firmware
489 * to do DMA read, write, or read+write tests. The
490 * results are returned in cmd.data0. The upper 16
491 * bits of the return is the number of transfers completed.
492 * The lower 16 bits is the time in 0.5us ticks that the
493 * transfers took to complete.
496 len = sc->tx_boundary;
498 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
499 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
500 cmd.data2 = len * 0x10000;
501 status = mxge_send_cmd(sc, test_type, &cmd);
506 sc->read_dma = ((cmd.data0>>16) * len * 2) /
507 (cmd.data0 & 0xffff);
508 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
509 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
510 cmd.data2 = len * 0x1;
511 status = mxge_send_cmd(sc, test_type, &cmd);
516 sc->write_dma = ((cmd.data0>>16) * len * 2) /
517 (cmd.data0 & 0xffff);
519 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
520 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
521 cmd.data2 = len * 0x10001;
522 status = mxge_send_cmd(sc, test_type, &cmd);
527 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
528 (cmd.data0 & 0xffff);
531 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
532 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
539 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
540 * when the PCI-E Completion packets are aligned on an 8-byte
541 * boundary. Some PCI-E chip sets always align Completion packets; on
542 * the ones that do not, the alignment can be enforced by enabling
543 * ECRC generation (if supported).
545 * When PCI-E Completion packets are not aligned, it is actually more
546 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
548 * If the driver can neither enable ECRC nor verify that it has
549 * already been enabled, then it must use a firmware image which works
550 * around unaligned completion packets (ethp_z8e.dat), and it should
551 * also ensure that it never gives the device a Read-DMA which is
552 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
553 * enabled, then the driver should use the aligned (eth_z8e.dat)
554 * firmware image, and set tx_boundary to 4KB.
558 mxge_firmware_probe(mxge_softc_t *sc)
560 device_t dev = sc->dev;
564 sc->tx_boundary = 4096;
566 * Verify the max read request size was set to 4KB
567 * before trying the test with 4KB.
569 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
570 pectl = pci_read_config(dev, reg + 0x8, 2);
571 if ((pectl & (5 << 12)) != (5 << 12)) {
572 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
574 sc->tx_boundary = 2048;
579 * load the optimized firmware (which assumes aligned PCIe
580 * completions) in order to see if it works on this host.
582 sc->fw_name = mxge_fw_aligned;
583 status = mxge_load_firmware(sc, 1);
589 * Enable ECRC if possible
591 mxge_enable_nvidia_ecrc(sc);
594 * Run a DMA test which watches for unaligned completions and
595 * aborts on the first one seen. Not required on Z8ES or newer.
597 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
599 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
601 return 0; /* keep the aligned firmware */
604 device_printf(dev, "DMA test failed: %d\n", status);
605 if (status == ENOSYS)
606 device_printf(dev, "Falling back to ethp! "
607 "Please install up to date fw\n");
612 mxge_select_firmware(mxge_softc_t *sc)
615 int force_firmware = mxge_force_firmware;
618 force_firmware = sc->throttle;
620 if (force_firmware != 0) {
621 if (force_firmware == 1)
626 device_printf(sc->dev,
627 "Assuming %s completions (forced)\n",
628 aligned ? "aligned" : "unaligned");
632 /* if the PCIe link width is 4 or less, we can use the aligned
633 firmware and skip any checks */
634 if (sc->link_width != 0 && sc->link_width <= 4) {
635 device_printf(sc->dev,
636 "PCIe x%d Link, expect reduced performance\n",
642 if (0 == mxge_firmware_probe(sc))
647 sc->fw_name = mxge_fw_aligned;
648 sc->tx_boundary = 4096;
650 sc->fw_name = mxge_fw_unaligned;
651 sc->tx_boundary = 2048;
653 return (mxge_load_firmware(sc, 0));
657 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
661 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
662 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
663 be32toh(hdr->mcp_type));
667 /* save firmware version for sysctl */
668 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
670 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
672 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
673 &sc->fw_ver_minor, &sc->fw_ver_tiny);
675 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
676 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
677 device_printf(sc->dev, "Found firmware version %s\n",
679 device_printf(sc->dev, "Driver needs %d.%d\n",
680 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
688 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
691 char *inflate_buffer;
692 const struct firmware *fw;
693 const mcp_gen_header_t *hdr;
700 fw = firmware_get(sc->fw_name);
702 device_printf(sc->dev, "Could not find firmware image %s\n",
709 /* setup zlib and decompress f/w */
710 bzero(&zs, sizeof (zs));
711 zs.zalloc = zcalloc_nowait;
713 status = inflateInit(&zs);
714 if (status != Z_OK) {
719 /* the uncompressed size is stored as the firmware version,
720 which would otherwise go unused */
721 fw_len = (size_t) fw->version;
722 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
723 if (inflate_buffer == NULL)
725 zs.avail_in = fw->datasize;
726 zs.next_in = __DECONST(char *, fw->data);
727 zs.avail_out = fw_len;
728 zs.next_out = inflate_buffer;
729 status = inflate(&zs, Z_FINISH);
730 if (status != Z_STREAM_END) {
731 device_printf(sc->dev, "zlib %d\n", status);
733 goto abort_with_buffer;
737 hdr_offset = htobe32(*(const uint32_t *)
738 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
739 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
740 device_printf(sc->dev, "Bad firmware file");
742 goto abort_with_buffer;
744 hdr = (const void*)(inflate_buffer + hdr_offset);
746 status = mxge_validate_firmware(sc, hdr);
748 goto abort_with_buffer;
750 /* Copy the inflated firmware to NIC SRAM. */
751 for (i = 0; i < fw_len; i += 256) {
752 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
754 min(256U, (unsigned)(fw_len - i)));
763 free(inflate_buffer, M_TEMP);
767 firmware_put(fw, FIRMWARE_UNLOAD);
772 * Enable or disable periodic RDMAs from the host to make certain
773 * chipsets resend dropped PCIe messages
777 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
780 volatile uint32_t *confirm;
781 volatile char *submit;
782 uint32_t *buf, dma_low, dma_high;
785 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
787 /* clear confirmation addr */
788 confirm = (volatile uint32_t *)sc->cmd;
792 /* send an rdma command to the PCIe engine, and wait for the
793 response in the confirmation address. The firmware should
794 write a -1 there to indicate it is alive and well
797 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
798 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
799 buf[0] = htobe32(dma_high); /* confirm addr MSW */
800 buf[1] = htobe32(dma_low); /* confirm addr LSW */
801 buf[2] = htobe32(0xffffffff); /* confirm data */
802 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
803 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
804 buf[3] = htobe32(dma_high); /* dummy addr MSW */
805 buf[4] = htobe32(dma_low); /* dummy addr LSW */
806 buf[5] = htobe32(enable); /* enable? */
809 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
811 mxge_pio_copy(submit, buf, 64);
816 while (*confirm != 0xffffffff && i < 20) {
820 if (*confirm != 0xffffffff) {
821 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
822 (enable ? "enable" : "disable"), confirm,
829 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
832 char buf_bytes[sizeof(*buf) + 8];
833 volatile mcp_cmd_response_t *response = sc->cmd;
834 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
835 uint32_t dma_low, dma_high;
836 int err, sleep_total = 0;
838 /* ensure buf is aligned to 8 bytes */
839 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
841 buf->data0 = htobe32(data->data0);
842 buf->data1 = htobe32(data->data1);
843 buf->data2 = htobe32(data->data2);
844 buf->cmd = htobe32(cmd);
845 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
846 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
848 buf->response_addr.low = htobe32(dma_low);
849 buf->response_addr.high = htobe32(dma_high);
850 mtx_lock(&sc->cmd_mtx);
851 response->result = 0xffffffff;
853 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
855 /* wait up to 20ms */
857 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
858 bus_dmamap_sync(sc->cmd_dma.dmat,
859 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
861 switch (be32toh(response->result)) {
863 data->data0 = be32toh(response->data);
869 case MXGEFW_CMD_UNKNOWN:
872 case MXGEFW_CMD_ERROR_UNALIGNED:
875 case MXGEFW_CMD_ERROR_BUSY:
878 case MXGEFW_CMD_ERROR_I2C_ABSENT:
882 device_printf(sc->dev,
884 "failed, result = %d\n",
885 cmd, be32toh(response->result));
893 device_printf(sc->dev, "mxge: command %d timed out"
895 cmd, be32toh(response->result));
896 mtx_unlock(&sc->cmd_mtx);
901 mxge_adopt_running_firmware(mxge_softc_t *sc)
903 struct mcp_gen_header *hdr;
904 const size_t bytes = sizeof (struct mcp_gen_header);
908 /* find running firmware header */
909 hdr_offset = htobe32(*(volatile uint32_t *)
910 (sc->sram + MCP_HEADER_PTR_OFFSET));
912 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
913 device_printf(sc->dev,
914 "Running firmware has bad header offset (%d)\n",
919 /* copy header of running firmware from SRAM to host memory to
920 * validate firmware */
921 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
923 device_printf(sc->dev, "could not malloc firmware hdr\n");
926 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
927 rman_get_bushandle(sc->mem_res),
928 hdr_offset, (char *)hdr, bytes);
929 status = mxge_validate_firmware(sc, hdr);
933 * check to see if adopted firmware has bug where adopting
934 * it will cause broadcasts to be filtered unless the NIC
935 * is kept in ALLMULTI mode
937 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
938 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
939 sc->adopted_rx_filter_bug = 1;
940 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
941 "working around rx filter bug\n",
942 sc->fw_ver_major, sc->fw_ver_minor,
951 mxge_load_firmware(mxge_softc_t *sc, int adopt)
953 volatile uint32_t *confirm;
954 volatile char *submit;
956 uint32_t *buf, size, dma_low, dma_high;
959 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
961 size = sc->sram_size;
962 status = mxge_load_firmware_helper(sc, &size);
966 /* Try to use the currently running firmware, if
968 status = mxge_adopt_running_firmware(sc);
970 device_printf(sc->dev,
971 "failed to adopt running firmware\n");
974 device_printf(sc->dev,
975 "Successfully adopted running firmware\n");
976 if (sc->tx_boundary == 4096) {
977 device_printf(sc->dev,
978 "Using firmware currently running on NIC"
980 device_printf(sc->dev,
981 "performance consider loading optimized "
984 sc->fw_name = mxge_fw_unaligned;
985 sc->tx_boundary = 2048;
988 /* clear confirmation addr */
989 confirm = (volatile uint32_t *)sc->cmd;
992 /* send a reload command to the bootstrap MCP, and wait for the
993 response in the confirmation address. The firmware should
994 write a -1 there to indicate it is alive and well
997 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
998 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1000 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1001 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1002 buf[2] = htobe32(0xffffffff); /* confirm data */
1004 /* FIX: All newest firmware should un-protect the bottom of
1005 the sram before handoff. However, the very first interfaces
1006 do not. Therefore the handoff copy must skip the first 8 bytes
1008 /* where the code starts*/
1009 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1010 buf[4] = htobe32(size - 8); /* length of code */
1011 buf[5] = htobe32(8); /* where to copy to */
1012 buf[6] = htobe32(0); /* where to jump to */
1014 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1015 mxge_pio_copy(submit, buf, 64);
1020 while (*confirm != 0xffffffff && i < 20) {
1023 bus_dmamap_sync(sc->cmd_dma.dmat,
1024 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1026 if (*confirm != 0xffffffff) {
1027 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1036 mxge_update_mac_address(mxge_softc_t *sc)
1039 uint8_t *addr = sc->mac_addr;
1043 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1044 | (addr[2] << 8) | addr[3]);
1046 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1048 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1053 mxge_change_pause(mxge_softc_t *sc, int pause)
1059 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1062 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1066 device_printf(sc->dev, "Failed to set flow control mode\n");
1074 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1079 if (mxge_always_promisc)
1083 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1086 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1090 device_printf(sc->dev, "Failed to set promisc mode\n");
1094 struct mxge_add_maddr_ctx {
1100 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
1102 struct mxge_add_maddr_ctx *ctx = arg;
1105 if (ctx->error != 0)
1107 bcopy(LLADDR(sdl), &cmd.data0, 4);
1108 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2);
1109 cmd.data0 = htonl(cmd.data0);
1110 cmd.data1 = htonl(cmd.data1);
1112 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1118 mxge_set_multicast_list(mxge_softc_t *sc)
1120 struct mxge_add_maddr_ctx ctx;
1121 struct ifnet *ifp = sc->ifp;
1125 /* This firmware is known to not support multicast */
1126 if (!sc->fw_multicast_support)
1129 /* Disable multicast filtering while we play with the lists*/
1130 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1132 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1133 " error status: %d\n", err);
1137 if (sc->adopted_rx_filter_bug)
1140 if (ifp->if_flags & IFF_ALLMULTI)
1141 /* request to disable multicast filtering, so quit here */
1144 /* Flush all the filters */
1146 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1148 device_printf(sc->dev,
1149 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1150 ", error status: %d\n", err);
1154 /* Walk the multicast list, and add each address */
1157 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx);
1158 if (ctx.error != 0) {
1159 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
1160 "error status:" "%d\t", ctx.error);
1161 /* abort, leaving multicast filtering off */
1165 /* Enable multicast filtering */
1166 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1168 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1169 ", error status: %d\n", err);
1174 mxge_max_mtu(mxge_softc_t *sc)
1179 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1180 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1182 /* try to set nbufs to see if it we can
1183 use virtually contiguous jumbos */
1185 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1188 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1190 /* otherwise, we're limited to MJUMPAGESIZE */
1191 return MJUMPAGESIZE - MXGEFW_PAD;
1195 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1197 struct mxge_slice_state *ss;
1198 mxge_rx_done_t *rx_done;
1199 volatile uint32_t *irq_claim;
1203 /* try to send a reset command to the card to see if it
1205 memset(&cmd, 0, sizeof (cmd));
1206 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1208 device_printf(sc->dev, "failed reset\n");
1212 mxge_dummy_rdma(sc, 1);
1215 /* set the intrq size */
1216 cmd.data0 = sc->rx_ring_size;
1217 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1220 * Even though we already know how many slices are supported
1221 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1222 * has magic side effects, and must be called after a reset.
1223 * It must be called prior to calling any RSS related cmds,
1224 * including assigning an interrupt queue for anything but
1225 * slice 0. It must also be called *after*
1226 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1227 * the firmware to compute offsets.
1230 if (sc->num_slices > 1) {
1231 /* ask the maximum number of slices it supports */
1232 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1235 device_printf(sc->dev,
1236 "failed to get number of slices\n");
1240 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1241 * to setting up the interrupt queue DMA
1243 cmd.data0 = sc->num_slices;
1244 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1245 #ifdef IFNET_BUF_RING
1246 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1248 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1251 device_printf(sc->dev,
1252 "failed to set number of slices\n");
1258 if (interrupts_setup) {
1259 /* Now exchange information about interrupts */
1260 for (slice = 0; slice < sc->num_slices; slice++) {
1261 rx_done = &sc->ss[slice].rx_done;
1262 memset(rx_done->entry, 0, sc->rx_ring_size);
1263 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1264 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1266 status |= mxge_send_cmd(sc,
1267 MXGEFW_CMD_SET_INTRQ_DMA,
1272 status |= mxge_send_cmd(sc,
1273 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1276 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1278 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1279 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1282 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1284 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1286 device_printf(sc->dev, "failed set interrupt parameters\n");
1291 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1294 /* run a DMA benchmark */
1295 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1297 for (slice = 0; slice < sc->num_slices; slice++) {
1298 ss = &sc->ss[slice];
1300 ss->irq_claim = irq_claim + (2 * slice);
1301 /* reset mcp/driver shared state back to 0 */
1302 ss->rx_done.idx = 0;
1303 ss->rx_done.cnt = 0;
1306 ss->tx.pkt_done = 0;
1307 ss->tx.queue_active = 0;
1308 ss->tx.activate = 0;
1309 ss->tx.deactivate = 0;
1314 ss->rx_small.cnt = 0;
1315 ss->lc.lro_bad_csum = 0;
1316 ss->lc.lro_queued = 0;
1317 ss->lc.lro_flushed = 0;
1318 if (ss->fw_stats != NULL) {
1319 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1322 sc->rdma_tags_available = 15;
1323 status = mxge_update_mac_address(sc);
1324 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1325 mxge_change_pause(sc, sc->pause);
1326 mxge_set_multicast_list(sc);
1328 cmd.data0 = sc->throttle;
1329 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1331 device_printf(sc->dev,
1332 "can't enable throttle\n");
1339 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1344 unsigned int throttle;
1347 throttle = sc->throttle;
1348 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1353 if (throttle == sc->throttle)
1356 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1359 mtx_lock(&sc->driver_mtx);
1360 cmd.data0 = throttle;
1361 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1363 sc->throttle = throttle;
1364 mtx_unlock(&sc->driver_mtx);
1369 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1372 unsigned int intr_coal_delay;
1376 intr_coal_delay = sc->intr_coal_delay;
1377 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1381 if (intr_coal_delay == sc->intr_coal_delay)
1384 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1387 mtx_lock(&sc->driver_mtx);
1388 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1389 sc->intr_coal_delay = intr_coal_delay;
1391 mtx_unlock(&sc->driver_mtx);
1396 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1399 unsigned int enabled;
1403 enabled = sc->pause;
1404 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1408 if (enabled == sc->pause)
1411 mtx_lock(&sc->driver_mtx);
1412 err = mxge_change_pause(sc, enabled);
1413 mtx_unlock(&sc->driver_mtx);
1418 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1424 arg2 = be32toh(*(int *)arg1);
1426 err = sysctl_handle_int(oidp, arg1, arg2, req);
1432 mxge_rem_sysctls(mxge_softc_t *sc)
1434 struct mxge_slice_state *ss;
1437 if (sc->slice_sysctl_tree == NULL)
1440 for (slice = 0; slice < sc->num_slices; slice++) {
1441 ss = &sc->ss[slice];
1442 if (ss == NULL || ss->sysctl_tree == NULL)
1444 sysctl_ctx_free(&ss->sysctl_ctx);
1445 ss->sysctl_tree = NULL;
1447 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1448 sc->slice_sysctl_tree = NULL;
1452 mxge_add_sysctls(mxge_softc_t *sc)
1454 struct sysctl_ctx_list *ctx;
1455 struct sysctl_oid_list *children;
1457 struct mxge_slice_state *ss;
1461 ctx = device_get_sysctl_ctx(sc->dev);
1462 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1463 fw = sc->ss[0].fw_stats;
1465 /* random information */
1466 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1468 CTLFLAG_RD, sc->fw_version,
1469 0, "firmware version");
1470 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1472 CTLFLAG_RD, sc->serial_number_string,
1473 0, "serial number");
1474 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1476 CTLFLAG_RD, sc->product_code_string,
1478 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1480 CTLFLAG_RD, &sc->link_width,
1482 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1484 CTLFLAG_RD, &sc->tx_boundary,
1486 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1488 CTLFLAG_RD, &sc->wc,
1489 0, "write combining PIO?");
1490 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1492 CTLFLAG_RD, &sc->read_dma,
1493 0, "DMA Read speed in MB/s");
1494 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1496 CTLFLAG_RD, &sc->write_dma,
1497 0, "DMA Write speed in MB/s");
1498 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1499 "read_write_dma_MBs",
1500 CTLFLAG_RD, &sc->read_write_dma,
1501 0, "DMA concurrent Read/Write speed in MB/s");
1502 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1504 CTLFLAG_RD, &sc->watchdog_resets,
1505 0, "Number of times NIC was reset");
1508 /* performance related tunables */
1509 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1510 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
1511 sc, 0, mxge_change_intr_coal, "I",
1512 "interrupt coalescing delay in usecs");
1514 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1515 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1516 mxge_change_throttle, "I", "transmit throttling");
1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1519 "flow_control_enabled",
1520 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1521 mxge_change_flow_control, "I",
1522 "interrupt coalescing delay in usecs");
1524 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1526 CTLFLAG_RW, &mxge_deassert_wait,
1527 0, "Wait for IRQ line to go low in ihandler");
1529 /* stats block from firmware is in network byte order.
1531 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1532 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1533 &fw->link_up, 0, mxge_handle_be32, "I", "link up");
1534 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1535 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1536 &fw->rdma_tags_available, 0, mxge_handle_be32, "I",
1537 "rdma_tags_available");
1538 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1539 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1540 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I",
1541 "dropped_bad_crc32");
1542 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1543 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1544 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy");
1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1546 "dropped_link_error_or_filtered",
1547 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1548 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I",
1549 "dropped_link_error_or_filtered");
1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1551 "dropped_link_overflow",
1552 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1553 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I",
1554 "dropped_link_overflow");
1555 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1556 "dropped_multicast_filtered",
1557 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1558 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I",
1559 "dropped_multicast_filtered");
1560 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1561 "dropped_no_big_buffer",
1562 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1563 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I",
1564 "dropped_no_big_buffer");
1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1566 "dropped_no_small_buffer",
1567 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1568 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I",
1569 "dropped_no_small_buffer");
1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1572 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1573 &fw->dropped_overrun, 0, mxge_handle_be32, "I",
1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1576 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1577 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause");
1578 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1579 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1580 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt");
1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1583 "dropped_unicast_filtered",
1584 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1585 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I",
1586 "dropped_unicast_filtered");
1588 /* verbose printing? */
1589 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1591 CTLFLAG_RW, &mxge_verbose,
1592 0, "verbose printing");
1594 /* add counters exported for debugging from all slices */
1595 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1596 sc->slice_sysctl_tree =
1597 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1598 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1600 for (slice = 0; slice < sc->num_slices; slice++) {
1601 ss = &sc->ss[slice];
1602 sysctl_ctx_init(&ss->sysctl_ctx);
1603 ctx = &ss->sysctl_ctx;
1604 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1605 sprintf(slice_num, "%d", slice);
1607 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1608 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1609 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1610 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1612 CTLFLAG_RD, &ss->rx_small.cnt,
1614 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1616 CTLFLAG_RD, &ss->rx_big.cnt,
1618 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1619 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1620 0, "number of lro merge queues flushed");
1622 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1623 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1624 0, "number of bad csums preventing LRO");
1626 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1627 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1628 0, "number of frames appended to lro merge"
1631 #ifndef IFNET_BUF_RING
1632 /* only transmit from slice 0 for now */
1636 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1638 CTLFLAG_RD, &ss->tx.req,
1641 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1643 CTLFLAG_RD, &ss->tx.done,
1645 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1647 CTLFLAG_RD, &ss->tx.pkt_done,
1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1651 CTLFLAG_RD, &ss->tx.stall,
1653 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1655 CTLFLAG_RD, &ss->tx.wake,
1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1659 CTLFLAG_RD, &ss->tx.defrag,
1661 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1663 CTLFLAG_RD, &ss->tx.queue_active,
1664 0, "tx_queue_active");
1665 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1667 CTLFLAG_RD, &ss->tx.activate,
1669 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1671 CTLFLAG_RD, &ss->tx.deactivate,
1672 0, "tx_deactivate");
1676 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1677 backwards one at a time and handle ring wraps */
1680 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1681 mcp_kreq_ether_send_t *src, int cnt)
1683 int idx, starting_slot;
1684 starting_slot = tx->req;
1687 idx = (starting_slot + cnt) & tx->mask;
1688 mxge_pio_copy(&tx->lanai[idx],
1689 &src[cnt], sizeof(*src));
1695 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1696 * at most 32 bytes at a time, so as to avoid involving the software
1697 * pio handler in the nic. We re-write the first segment's flags
1698 * to mark them valid only after writing the entire chain
1702 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1707 volatile uint32_t *dst_ints;
1708 mcp_kreq_ether_send_t *srcp;
1709 volatile mcp_kreq_ether_send_t *dstp, *dst;
1712 idx = tx->req & tx->mask;
1714 last_flags = src->flags;
1717 dst = dstp = &tx->lanai[idx];
1720 if ((idx + cnt) < tx->mask) {
1721 for (i = 0; i < (cnt - 1); i += 2) {
1722 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1723 wmb(); /* force write every 32 bytes */
1728 /* submit all but the first request, and ensure
1729 that it is submitted below */
1730 mxge_submit_req_backwards(tx, src, cnt);
1734 /* submit the first request */
1735 mxge_pio_copy(dstp, srcp, sizeof(*src));
1736 wmb(); /* barrier before setting valid flag */
1739 /* re-write the last 32-bits with the valid flags */
1740 src->flags = last_flags;
1741 src_ints = (uint32_t *)src;
1743 dst_ints = (volatile uint32_t *)dst;
1745 *dst_ints = *src_ints;
1751 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1752 struct mxge_pkt_info *pi)
1754 struct ether_vlan_header *eh;
1756 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1757 #if IFCAP_TSO6 && defined(INET6)
1761 eh = mtod(m, struct ether_vlan_header *);
1762 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1763 etype = ntohs(eh->evl_proto);
1764 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1766 etype = ntohs(eh->evl_encap_proto);
1767 pi->ip_off = ETHER_HDR_LEN;
1773 * ensure ip header is in first mbuf, copy it to a
1774 * scratch buffer if not
1776 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1778 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1779 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1781 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1783 pi->ip_hlen = pi->ip->ip_hl << 2;
1787 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1788 sizeof(struct tcphdr))) {
1789 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1790 sizeof(struct tcphdr), ss->scratch);
1791 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1793 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1795 #if IFCAP_TSO6 && defined(INET6)
1796 case ETHERTYPE_IPV6:
1797 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1798 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1799 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1801 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1804 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1805 pi->ip_hlen -= pi->ip_off;
1806 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1812 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1815 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1816 sizeof(struct tcphdr))) {
1817 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1818 sizeof(struct tcphdr), ss->scratch);
1819 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1821 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1833 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1834 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1837 mcp_kreq_ether_send_t *req;
1838 bus_dma_segment_t *seg;
1839 uint32_t low, high_swapped;
1840 int len, seglen, cum_len, cum_len_next;
1841 int next_is_first, chop, cnt, rdma_count, small;
1842 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1843 uint8_t flags, flags_next;
1846 mss = m->m_pkthdr.tso_segsz;
1848 /* negative cum_len signifies to the
1849 * send loop that we are still in the
1850 * header portion of the TSO packet.
1853 cksum_offset = pi->ip_off + pi->ip_hlen;
1854 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1856 /* TSO implies checksum offload on this hardware */
1857 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1859 * If packet has full TCP csum, replace it with pseudo hdr
1860 * sum that the NIC expects, otherwise the NIC will emit
1861 * packets with bad TCP checksums.
1863 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1865 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1866 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1867 sum = in6_cksum_pseudo(pi->ip6,
1868 m->m_pkthdr.len - cksum_offset,
1873 m->m_pkthdr.csum_flags |= CSUM_TCP;
1874 sum = in_pseudo(pi->ip->ip_src.s_addr,
1875 pi->ip->ip_dst.s_addr,
1876 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1880 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1881 cksum_offset, sizeof(sum), (caddr_t)&sum);
1883 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1886 /* for TSO, pseudo_hdr_offset holds mss.
1887 * The firmware figures out where to put
1888 * the checksum by parsing the header. */
1889 pseudo_hdr_offset = htobe16(mss);
1893 * for IPv6 TSO, the "checksum offset" is re-purposed
1894 * to store the TCP header len
1896 cksum_offset = (pi->tcp->th_off << 2);
1904 /* "rdma_count" is the number of RDMAs belonging to the
1905 * current packet BEFORE the current send request. For
1906 * non-TSO packets, this is equal to "count".
1907 * For TSO packets, rdma_count needs to be reset
1908 * to 0 after a segment cut.
1910 * The rdma_count field of the send request is
1911 * the number of RDMAs of the packet starting at
1912 * that request. For TSO send requests with one ore more cuts
1913 * in the middle, this is the number of RDMAs starting
1914 * after the last cut in the request. All previous
1915 * segments before the last cut implicitly have 1 RDMA.
1917 * Since the number of RDMAs is not known beforehand,
1918 * it must be filled-in retroactively - after each
1919 * segmentation cut or at the end of the entire packet.
1922 while (busdma_seg_cnt) {
1923 /* Break the busdma segment up into pieces*/
1924 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1925 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1929 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1931 cum_len_next = cum_len + seglen;
1932 (req-rdma_count)->rdma_count = rdma_count + 1;
1933 if (__predict_true(cum_len >= 0)) {
1935 chop = (cum_len_next > mss);
1936 cum_len_next = cum_len_next % mss;
1937 next_is_first = (cum_len_next == 0);
1938 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1939 flags_next |= next_is_first *
1941 rdma_count |= -(chop | next_is_first);
1942 rdma_count += chop & !next_is_first;
1943 } else if (cum_len_next >= 0) {
1948 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1949 flags_next = MXGEFW_FLAGS_TSO_PLD |
1950 MXGEFW_FLAGS_FIRST |
1951 (small * MXGEFW_FLAGS_SMALL);
1954 req->addr_high = high_swapped;
1955 req->addr_low = htobe32(low);
1956 req->pseudo_hdr_offset = pseudo_hdr_offset;
1958 req->rdma_count = 1;
1959 req->length = htobe16(seglen);
1960 req->cksum_offset = cksum_offset;
1961 req->flags = flags | ((cum_len & 1) *
1962 MXGEFW_FLAGS_ALIGN_ODD);
1965 cum_len = cum_len_next;
1970 if (cksum_offset != 0 && !pi->ip6) {
1971 if (__predict_false(cksum_offset > seglen))
1972 cksum_offset -= seglen;
1976 if (__predict_false(cnt > tx->max_desc))
1982 (req-rdma_count)->rdma_count = rdma_count;
1986 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1987 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1989 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1990 mxge_submit_req(tx, tx->req_list, cnt);
1991 #ifdef IFNET_BUF_RING
1992 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1993 /* tell the NIC to start polling this slice */
1995 tx->queue_active = 1;
2003 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
2007 printf("tx->max_desc exceeded via TSO!\n");
2008 printf("mss = %d, %ld, %d!\n", mss,
2009 (long)seg - (long)tx->seg_list, tx->max_desc);
2016 #endif /* IFCAP_TSO4 */
2018 #ifdef MXGE_NEW_VLAN_API
2020 * We reproduce the software vlan tag insertion from
2021 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2022 * vlan tag insertion. We need to advertise this in order to have the
2023 * vlan interface respect our csum offload flags.
2025 static struct mbuf *
2026 mxge_vlan_tag_insert(struct mbuf *m)
2028 struct ether_vlan_header *evl;
2030 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2031 if (__predict_false(m == NULL))
2033 if (m->m_len < sizeof(*evl)) {
2034 m = m_pullup(m, sizeof(*evl));
2035 if (__predict_false(m == NULL))
2039 * Transform the Ethernet header into an Ethernet header
2040 * with 802.1Q encapsulation.
2042 evl = mtod(m, struct ether_vlan_header *);
2043 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2044 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2045 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2046 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2047 m->m_flags &= ~M_VLANTAG;
2050 #endif /* MXGE_NEW_VLAN_API */
2053 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2055 struct mxge_pkt_info pi = {0,0,0,0};
2057 mcp_kreq_ether_send_t *req;
2058 bus_dma_segment_t *seg;
2062 int cnt, cum_len, err, i, idx, odd_flag;
2063 uint16_t pseudo_hdr_offset;
2064 uint8_t flags, cksum_offset;
2071 #ifdef MXGE_NEW_VLAN_API
2072 if (m->m_flags & M_VLANTAG) {
2073 m = mxge_vlan_tag_insert(m);
2074 if (__predict_false(m == NULL))
2075 goto drop_without_m;
2078 if (m->m_pkthdr.csum_flags &
2079 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2080 if (mxge_parse_tx(ss, m, &pi))
2084 /* (try to) map the frame for DMA */
2085 idx = tx->req & tx->mask;
2086 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2087 m, tx->seg_list, &cnt,
2089 if (__predict_false(err == EFBIG)) {
2090 /* Too many segments in the chain. Try
2092 m_tmp = m_defrag(m, M_NOWAIT);
2093 if (m_tmp == NULL) {
2098 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2100 m, tx->seg_list, &cnt,
2103 if (__predict_false(err != 0)) {
2104 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2105 " packet len = %d\n", err, m->m_pkthdr.len);
2108 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2109 BUS_DMASYNC_PREWRITE);
2110 tx->info[idx].m = m;
2113 /* TSO is different enough, we handle it in another routine */
2114 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2115 mxge_encap_tso(ss, m, cnt, &pi);
2122 pseudo_hdr_offset = 0;
2123 flags = MXGEFW_FLAGS_NO_TSO;
2125 /* checksum offloading? */
2126 if (m->m_pkthdr.csum_flags &
2127 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2128 /* ensure ip header is in first mbuf, copy
2129 it to a scratch buffer if not */
2130 cksum_offset = pi.ip_off + pi.ip_hlen;
2131 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2132 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2133 req->cksum_offset = cksum_offset;
2134 flags |= MXGEFW_FLAGS_CKSUM;
2135 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2139 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2140 flags |= MXGEFW_FLAGS_SMALL;
2142 /* convert segments into a request list */
2145 req->flags = MXGEFW_FLAGS_FIRST;
2146 for (i = 0; i < cnt; i++) {
2148 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2150 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2151 req->length = htobe16(seg->ds_len);
2152 req->cksum_offset = cksum_offset;
2153 if (cksum_offset > seg->ds_len)
2154 cksum_offset -= seg->ds_len;
2157 req->pseudo_hdr_offset = pseudo_hdr_offset;
2158 req->pad = 0; /* complete solid 16-byte block */
2159 req->rdma_count = 1;
2160 req->flags |= flags | ((cum_len & 1) * odd_flag);
2161 cum_len += seg->ds_len;
2167 /* pad runts to 60 bytes */
2171 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2173 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2174 req->length = htobe16(60 - cum_len);
2175 req->cksum_offset = 0;
2176 req->pseudo_hdr_offset = pseudo_hdr_offset;
2177 req->pad = 0; /* complete solid 16-byte block */
2178 req->rdma_count = 1;
2179 req->flags |= flags | ((cum_len & 1) * odd_flag);
2183 tx->req_list[0].rdma_count = cnt;
2185 /* print what the firmware will see */
2186 for (i = 0; i < cnt; i++) {
2187 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2188 "cso:%d, flags:0x%x, rdma:%d\n",
2189 i, (int)ntohl(tx->req_list[i].addr_high),
2190 (int)ntohl(tx->req_list[i].addr_low),
2191 (int)ntohs(tx->req_list[i].length),
2192 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2193 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2194 tx->req_list[i].rdma_count);
2196 printf("--------------\n");
2198 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2199 mxge_submit_req(tx, tx->req_list, cnt);
2200 #ifdef IFNET_BUF_RING
2201 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2202 /* tell the NIC to start polling this slice */
2204 tx->queue_active = 1;
2218 #ifdef IFNET_BUF_RING
2220 mxge_qflush(struct ifnet *ifp)
2222 mxge_softc_t *sc = ifp->if_softc;
2227 for (slice = 0; slice < sc->num_slices; slice++) {
2228 tx = &sc->ss[slice].tx;
2230 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2232 mtx_unlock(&tx->mtx);
2238 mxge_start_locked(struct mxge_slice_state *ss)
2249 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2250 m = drbr_dequeue(ifp, tx->br);
2254 /* let BPF see it */
2257 /* give it to the nic */
2260 /* ran out of transmit slots */
2261 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2262 && (!drbr_empty(ifp, tx->br))) {
2263 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2269 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2280 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2282 err = drbr_enqueue(ifp, tx->br, m);
2286 if (!drbr_needs_enqueue(ifp, tx->br) &&
2287 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2288 /* let BPF see it */
2290 /* give it to the nic */
2292 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2295 if (!drbr_empty(ifp, tx->br))
2296 mxge_start_locked(ss);
2301 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2303 mxge_softc_t *sc = ifp->if_softc;
2304 struct mxge_slice_state *ss;
2309 slice = m->m_pkthdr.flowid;
2310 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2312 ss = &sc->ss[slice];
2315 if (mtx_trylock(&tx->mtx)) {
2316 err = mxge_transmit_locked(ss, m);
2317 mtx_unlock(&tx->mtx);
2319 err = drbr_enqueue(ifp, tx->br, m);
2328 mxge_start_locked(struct mxge_slice_state *ss)
2338 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2339 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2343 /* let BPF see it */
2346 /* give it to the nic */
2349 /* ran out of transmit slots */
2350 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2351 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2357 mxge_start(struct ifnet *ifp)
2359 mxge_softc_t *sc = ifp->if_softc;
2360 struct mxge_slice_state *ss;
2362 /* only use the first slice for now */
2364 mtx_lock(&ss->tx.mtx);
2365 mxge_start_locked(ss);
2366 mtx_unlock(&ss->tx.mtx);
2370 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2371 * at most 32 bytes at a time, so as to avoid involving the software
2372 * pio handler in the nic. We re-write the first segment's low
2373 * DMA address to mark it valid only after we write the entire chunk
2377 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2378 mcp_kreq_ether_recv_t *src)
2382 low = src->addr_low;
2383 src->addr_low = 0xffffffff;
2384 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2386 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2388 src->addr_low = low;
2389 dst->addr_low = low;
2394 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2396 bus_dma_segment_t seg;
2398 mxge_rx_ring_t *rx = &ss->rx_small;
2401 m = m_gethdr(M_NOWAIT, MT_DATA);
2408 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2409 &seg, &cnt, BUS_DMA_NOWAIT);
2414 rx->info[idx].m = m;
2415 rx->shadow[idx].addr_low =
2416 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2417 rx->shadow[idx].addr_high =
2418 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2422 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2427 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2429 bus_dma_segment_t seg[3];
2431 mxge_rx_ring_t *rx = &ss->rx_big;
2434 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2440 m->m_len = rx->mlen;
2441 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2442 seg, &cnt, BUS_DMA_NOWAIT);
2447 rx->info[idx].m = m;
2448 rx->shadow[idx].addr_low =
2449 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2450 rx->shadow[idx].addr_high =
2451 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2453 #if MXGE_VIRT_JUMBOS
2454 for (i = 1; i < cnt; i++) {
2455 rx->shadow[idx + i].addr_low =
2456 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2457 rx->shadow[idx + i].addr_high =
2458 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2463 for (i = 0; i < rx->nbufs; i++) {
2464 if ((idx & 7) == 7) {
2465 mxge_submit_8rx(&rx->lanai[idx - 7],
2466 &rx->shadow[idx - 7]);
2476 mxge_csum_generic(uint16_t *raw, int len)
2487 csum = (csum >> 16) + (csum & 0xffff);
2488 csum = (csum >> 16) + (csum & 0xffff);
2489 return (uint16_t)csum;
2492 static inline uint16_t
2493 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2496 int nxt, cksum_offset;
2497 struct ip6_hdr *ip6 = p;
2501 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2502 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2503 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2504 IPPROTO_IPV6, &nxt);
2505 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2510 * IPv6 headers do not contain a checksum, and hence
2511 * do not checksum to zero, so they don't "fall out"
2512 * of the partial checksum calculation like IPv4
2513 * headers do. We need to fix the partial checksum by
2514 * subtracting the checksum of the IPv6 header.
2517 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2520 csum += (csum < ~partial);
2521 csum = (csum >> 16) + (csum & 0xFFFF);
2522 csum = (csum >> 16) + (csum & 0xFFFF);
2523 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2530 * Myri10GE hardware checksums are not valid if the sender
2531 * padded the frame with non-zero padding. This is because
2532 * the firmware just does a simple 16-bit 1s complement
2533 * checksum across the entire frame, excluding the first 14
2534 * bytes. It is best to simply to check the checksum and
2535 * tell the stack about it only if the checksum is good
2538 static inline uint16_t
2539 mxge_rx_csum(struct mbuf *m, int csum)
2541 struct ether_header *eh;
2545 #if defined(INET) || defined(INET6)
2546 int cap = m->m_pkthdr.rcvif->if_capenable;
2551 eh = mtod(m, struct ether_header *);
2552 etype = ntohs(eh->ether_type);
2556 if ((cap & IFCAP_RXCSUM) == 0)
2558 ip = (struct ip *)(eh + 1);
2559 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2561 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2562 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2563 (ip->ip_hl << 2) + ip->ip_p));
2568 case ETHERTYPE_IPV6:
2569 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2571 c = mxge_rx_csum6((eh + 1), m, csum);
2581 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2583 struct ether_vlan_header *evl;
2584 struct ether_header *eh;
2587 evl = mtod(m, struct ether_vlan_header *);
2588 eh = mtod(m, struct ether_header *);
2591 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2592 * after what the firmware thought was the end of the ethernet
2596 /* put checksum into host byte order */
2597 *csum = ntohs(*csum);
2598 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2599 (*csum) += ~partial;
2600 (*csum) += ((*csum) < ~partial);
2601 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2602 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2604 /* restore checksum to network byte order;
2605 later consumers expect this */
2606 *csum = htons(*csum);
2609 #ifdef MXGE_NEW_VLAN_API
2610 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2614 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2618 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2619 m_tag_prepend(m, mtag);
2623 m->m_flags |= M_VLANTAG;
2626 * Remove the 802.1q header by copying the Ethernet
2627 * addresses over it and adjusting the beginning of
2628 * the data in the mbuf. The encapsulated Ethernet
2629 * type field is already in place.
2631 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2632 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2633 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2638 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2639 uint32_t csum, int lro)
2644 struct ether_header *eh;
2646 bus_dmamap_t old_map;
2652 idx = rx->cnt & rx->mask;
2653 rx->cnt += rx->nbufs;
2654 /* save a pointer to the received mbuf */
2655 m = rx->info[idx].m;
2656 /* try to replace the received mbuf */
2657 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2658 /* drop the frame -- the old mbuf is re-cycled */
2659 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2663 /* unmap the received buffer */
2664 old_map = rx->info[idx].map;
2665 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2666 bus_dmamap_unload(rx->dmat, old_map);
2668 /* swap the bus_dmamap_t's */
2669 rx->info[idx].map = rx->extra_map;
2670 rx->extra_map = old_map;
2672 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2674 m->m_data += MXGEFW_PAD;
2676 m->m_pkthdr.rcvif = ifp;
2677 m->m_len = m->m_pkthdr.len = len;
2679 eh = mtod(m, struct ether_header *);
2680 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2681 mxge_vlan_tag_remove(m, &csum);
2683 /* flowid only valid if RSS hashing is enabled */
2684 if (sc->num_slices > 1) {
2685 m->m_pkthdr.flowid = (ss - sc->ss);
2686 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2688 /* if the checksum is valid, mark it in the mbuf header */
2689 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2690 (0 == mxge_rx_csum(m, csum))) {
2691 /* Tell the stack that the checksum is good */
2692 m->m_pkthdr.csum_data = 0xffff;
2693 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2696 #if defined(INET) || defined (INET6)
2697 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2701 /* pass the frame up the stack */
2702 (*ifp->if_input)(ifp, m);
2706 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2707 uint32_t csum, int lro)
2711 struct ether_header *eh;
2714 bus_dmamap_t old_map;
2720 idx = rx->cnt & rx->mask;
2722 /* save a pointer to the received mbuf */
2723 m = rx->info[idx].m;
2724 /* try to replace the received mbuf */
2725 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2726 /* drop the frame -- the old mbuf is re-cycled */
2727 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2731 /* unmap the received buffer */
2732 old_map = rx->info[idx].map;
2733 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2734 bus_dmamap_unload(rx->dmat, old_map);
2736 /* swap the bus_dmamap_t's */
2737 rx->info[idx].map = rx->extra_map;
2738 rx->extra_map = old_map;
2740 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2742 m->m_data += MXGEFW_PAD;
2744 m->m_pkthdr.rcvif = ifp;
2745 m->m_len = m->m_pkthdr.len = len;
2747 eh = mtod(m, struct ether_header *);
2748 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2749 mxge_vlan_tag_remove(m, &csum);
2751 /* flowid only valid if RSS hashing is enabled */
2752 if (sc->num_slices > 1) {
2753 m->m_pkthdr.flowid = (ss - sc->ss);
2754 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2756 /* if the checksum is valid, mark it in the mbuf header */
2757 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2758 (0 == mxge_rx_csum(m, csum))) {
2759 /* Tell the stack that the checksum is good */
2760 m->m_pkthdr.csum_data = 0xffff;
2761 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2764 #if defined(INET) || defined (INET6)
2765 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2769 /* pass the frame up the stack */
2770 (*ifp->if_input)(ifp, m);
2774 mxge_clean_rx_done(struct mxge_slice_state *ss)
2776 mxge_rx_done_t *rx_done = &ss->rx_done;
2782 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2783 while (rx_done->entry[rx_done->idx].length != 0) {
2784 length = ntohs(rx_done->entry[rx_done->idx].length);
2785 rx_done->entry[rx_done->idx].length = 0;
2786 checksum = rx_done->entry[rx_done->idx].checksum;
2787 if (length <= (MHLEN - MXGEFW_PAD))
2788 mxge_rx_done_small(ss, length, checksum, lro);
2790 mxge_rx_done_big(ss, length, checksum, lro);
2792 rx_done->idx = rx_done->cnt & rx_done->mask;
2794 /* limit potential for livelock */
2795 if (__predict_false(++limit > rx_done->mask / 2))
2798 #if defined(INET) || defined (INET6)
2799 tcp_lro_flush_all(&ss->lc);
2805 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2816 while (tx->pkt_done != mcp_idx) {
2817 idx = tx->done & tx->mask;
2819 m = tx->info[idx].m;
2820 /* mbuf and DMA map only attached to the first
2823 ss->obytes += m->m_pkthdr.len;
2824 if (m->m_flags & M_MCAST)
2827 tx->info[idx].m = NULL;
2828 map = tx->info[idx].map;
2829 bus_dmamap_unload(tx->dmat, map);
2832 if (tx->info[idx].flag) {
2833 tx->info[idx].flag = 0;
2838 /* If we have space, clear IFF_OACTIVE to tell the stack that
2839 its OK to send packets */
2840 #ifdef IFNET_BUF_RING
2841 flags = &ss->if_drv_flags;
2843 flags = &ifp->if_drv_flags;
2845 mtx_lock(&ss->tx.mtx);
2846 if ((*flags) & IFF_DRV_OACTIVE &&
2847 tx->req - tx->done < (tx->mask + 1)/4) {
2848 *(flags) &= ~IFF_DRV_OACTIVE;
2850 mxge_start_locked(ss);
2852 #ifdef IFNET_BUF_RING
2853 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2854 /* let the NIC stop polling this queue, since there
2855 * are no more transmits pending */
2856 if (tx->req == tx->done) {
2858 tx->queue_active = 0;
2864 mtx_unlock(&ss->tx.mtx);
2868 static struct mxge_media_type mxge_xfp_media_types[] =
2870 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2871 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2872 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2873 {0, (1 << 5), "10GBASE-ER"},
2874 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2875 {0, (1 << 3), "10GBASE-SW"},
2876 {0, (1 << 2), "10GBASE-LW"},
2877 {0, (1 << 1), "10GBASE-EW"},
2878 {0, (1 << 0), "Reserved"}
2880 static struct mxge_media_type mxge_sfp_media_types[] =
2882 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2883 {0, (1 << 7), "Reserved"},
2884 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2885 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2886 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2887 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2891 mxge_media_set(mxge_softc_t *sc, int media_type)
2895 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2897 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2898 sc->current_media = media_type;
2899 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2903 mxge_media_init(mxge_softc_t *sc)
2908 ifmedia_removeall(&sc->media);
2909 mxge_media_set(sc, IFM_AUTO);
2912 * parse the product code to deterimine the interface type
2913 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2914 * after the 3rd dash in the driver's cached copy of the
2915 * EEPROM's product code string.
2917 ptr = sc->product_code_string;
2919 device_printf(sc->dev, "Missing product code\n");
2923 for (i = 0; i < 3; i++, ptr++) {
2924 ptr = strchr(ptr, '-');
2926 device_printf(sc->dev,
2927 "only %d dashes in PC?!?\n", i);
2931 if (*ptr == 'C' || *(ptr +1) == 'C') {
2933 sc->connector = MXGE_CX4;
2934 mxge_media_set(sc, IFM_10G_CX4);
2935 } else if (*ptr == 'Q') {
2936 /* -Q is Quad Ribbon Fiber */
2937 sc->connector = MXGE_QRF;
2938 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2939 /* FreeBSD has no media type for Quad ribbon fiber */
2940 } else if (*ptr == 'R') {
2942 sc->connector = MXGE_XFP;
2943 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2944 /* -S or -2S is SFP+ */
2945 sc->connector = MXGE_SFP;
2947 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2952 * Determine the media type for a NIC. Some XFPs will identify
2953 * themselves only when their link is up, so this is initiated via a
2954 * link up interrupt. However, this can potentially take up to
2955 * several milliseconds, so it is run via the watchdog routine, rather
2956 * than in the interrupt handler itself.
2959 mxge_media_probe(mxge_softc_t *sc)
2964 struct mxge_media_type *mxge_media_types = NULL;
2965 int i, err, ms, mxge_media_type_entries;
2968 sc->need_media_probe = 0;
2970 if (sc->connector == MXGE_XFP) {
2972 mxge_media_types = mxge_xfp_media_types;
2973 mxge_media_type_entries =
2974 nitems(mxge_xfp_media_types);
2975 byte = MXGE_XFP_COMPLIANCE_BYTE;
2977 } else if (sc->connector == MXGE_SFP) {
2978 /* -S or -2S is SFP+ */
2979 mxge_media_types = mxge_sfp_media_types;
2980 mxge_media_type_entries =
2981 nitems(mxge_sfp_media_types);
2985 /* nothing to do; media type cannot change */
2990 * At this point we know the NIC has an XFP cage, so now we
2991 * try to determine what is in the cage by using the
2992 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2993 * register. We read just one byte, which may take over
2997 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2999 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3000 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
3001 device_printf(sc->dev, "failed to read XFP\n");
3003 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
3004 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
3006 if (err != MXGEFW_CMD_OK) {
3010 /* now we wait for the data to be cached */
3012 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3013 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3016 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3018 if (err != MXGEFW_CMD_OK) {
3019 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
3020 cage_type, err, ms);
3024 if (cmd.data0 == mxge_media_types[0].bitmask) {
3026 device_printf(sc->dev, "%s:%s\n", cage_type,
3027 mxge_media_types[0].name);
3028 if (sc->current_media != mxge_media_types[0].flag) {
3029 mxge_media_init(sc);
3030 mxge_media_set(sc, mxge_media_types[0].flag);
3034 for (i = 1; i < mxge_media_type_entries; i++) {
3035 if (cmd.data0 & mxge_media_types[i].bitmask) {
3037 device_printf(sc->dev, "%s:%s\n",
3039 mxge_media_types[i].name);
3041 if (sc->current_media != mxge_media_types[i].flag) {
3042 mxge_media_init(sc);
3043 mxge_media_set(sc, mxge_media_types[i].flag);
3049 device_printf(sc->dev, "%s media 0x%x unknown\n",
3050 cage_type, cmd.data0);
3056 mxge_intr(void *arg)
3058 struct mxge_slice_state *ss = arg;
3059 mxge_softc_t *sc = ss->sc;
3060 mcp_irq_data_t *stats = ss->fw_stats;
3061 mxge_tx_ring_t *tx = &ss->tx;
3062 mxge_rx_done_t *rx_done = &ss->rx_done;
3063 uint32_t send_done_count;
3067 #ifndef IFNET_BUF_RING
3068 /* an interrupt on a non-zero slice is implicitly valid
3069 since MSI-X irqs are not shared */
3071 mxge_clean_rx_done(ss);
3072 *ss->irq_claim = be32toh(3);
3077 /* make sure the DMA has finished */
3078 if (!stats->valid) {
3081 valid = stats->valid;
3083 if (sc->legacy_irq) {
3084 /* lower legacy IRQ */
3085 *sc->irq_deassert = 0;
3086 if (!mxge_deassert_wait)
3087 /* don't wait for conf. that irq is low */
3093 /* loop while waiting for legacy irq deassertion */
3095 /* check for transmit completes and receives */
3096 send_done_count = be32toh(stats->send_done_count);
3097 while ((send_done_count != tx->pkt_done) ||
3098 (rx_done->entry[rx_done->idx].length != 0)) {
3099 if (send_done_count != tx->pkt_done)
3100 mxge_tx_done(ss, (int)send_done_count);
3101 mxge_clean_rx_done(ss);
3102 send_done_count = be32toh(stats->send_done_count);
3104 if (sc->legacy_irq && mxge_deassert_wait)
3106 } while (*((volatile uint8_t *) &stats->valid));
3108 /* fw link & error stats meaningful only on the first slice */
3109 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3110 if (sc->link_state != stats->link_up) {
3111 sc->link_state = stats->link_up;
3112 if (sc->link_state) {
3113 if_link_state_change(sc->ifp, LINK_STATE_UP);
3115 device_printf(sc->dev, "link up\n");
3117 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3119 device_printf(sc->dev, "link down\n");
3121 sc->need_media_probe = 1;
3123 if (sc->rdma_tags_available !=
3124 be32toh(stats->rdma_tags_available)) {
3125 sc->rdma_tags_available =
3126 be32toh(stats->rdma_tags_available);
3127 device_printf(sc->dev, "RDMA timed out! %d tags "
3128 "left\n", sc->rdma_tags_available);
3131 if (stats->link_down) {
3132 sc->down_cnt += stats->link_down;
3134 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3138 /* check to see if we have rx token to pass back */
3140 *ss->irq_claim = be32toh(3);
3141 *(ss->irq_claim + 1) = be32toh(3);
3145 mxge_init(void *arg)
3147 mxge_softc_t *sc = arg;
3148 struct ifnet *ifp = sc->ifp;
3151 mtx_lock(&sc->driver_mtx);
3152 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3153 (void) mxge_open(sc);
3154 mtx_unlock(&sc->driver_mtx);
3160 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3164 #if defined(INET) || defined(INET6)
3165 tcp_lro_free(&ss->lc);
3167 for (i = 0; i <= ss->rx_big.mask; i++) {
3168 if (ss->rx_big.info[i].m == NULL)
3170 bus_dmamap_unload(ss->rx_big.dmat,
3171 ss->rx_big.info[i].map);
3172 m_freem(ss->rx_big.info[i].m);
3173 ss->rx_big.info[i].m = NULL;
3176 for (i = 0; i <= ss->rx_small.mask; i++) {
3177 if (ss->rx_small.info[i].m == NULL)
3179 bus_dmamap_unload(ss->rx_small.dmat,
3180 ss->rx_small.info[i].map);
3181 m_freem(ss->rx_small.info[i].m);
3182 ss->rx_small.info[i].m = NULL;
3185 /* transmit ring used only on the first slice */
3186 if (ss->tx.info == NULL)
3189 for (i = 0; i <= ss->tx.mask; i++) {
3190 ss->tx.info[i].flag = 0;
3191 if (ss->tx.info[i].m == NULL)
3193 bus_dmamap_unload(ss->tx.dmat,
3194 ss->tx.info[i].map);
3195 m_freem(ss->tx.info[i].m);
3196 ss->tx.info[i].m = NULL;
3201 mxge_free_mbufs(mxge_softc_t *sc)
3205 for (slice = 0; slice < sc->num_slices; slice++)
3206 mxge_free_slice_mbufs(&sc->ss[slice]);
3210 mxge_free_slice_rings(struct mxge_slice_state *ss)
3215 if (ss->rx_done.entry != NULL)
3216 mxge_dma_free(&ss->rx_done.dma);
3217 ss->rx_done.entry = NULL;
3219 if (ss->tx.req_bytes != NULL)
3220 free(ss->tx.req_bytes, M_DEVBUF);
3221 ss->tx.req_bytes = NULL;
3223 if (ss->tx.seg_list != NULL)
3224 free(ss->tx.seg_list, M_DEVBUF);
3225 ss->tx.seg_list = NULL;
3227 if (ss->rx_small.shadow != NULL)
3228 free(ss->rx_small.shadow, M_DEVBUF);
3229 ss->rx_small.shadow = NULL;
3231 if (ss->rx_big.shadow != NULL)
3232 free(ss->rx_big.shadow, M_DEVBUF);
3233 ss->rx_big.shadow = NULL;
3235 if (ss->tx.info != NULL) {
3236 if (ss->tx.dmat != NULL) {
3237 for (i = 0; i <= ss->tx.mask; i++) {
3238 bus_dmamap_destroy(ss->tx.dmat,
3239 ss->tx.info[i].map);
3241 bus_dma_tag_destroy(ss->tx.dmat);
3243 free(ss->tx.info, M_DEVBUF);
3247 if (ss->rx_small.info != NULL) {
3248 if (ss->rx_small.dmat != NULL) {
3249 for (i = 0; i <= ss->rx_small.mask; i++) {
3250 bus_dmamap_destroy(ss->rx_small.dmat,
3251 ss->rx_small.info[i].map);
3253 bus_dmamap_destroy(ss->rx_small.dmat,
3254 ss->rx_small.extra_map);
3255 bus_dma_tag_destroy(ss->rx_small.dmat);
3257 free(ss->rx_small.info, M_DEVBUF);
3259 ss->rx_small.info = NULL;
3261 if (ss->rx_big.info != NULL) {
3262 if (ss->rx_big.dmat != NULL) {
3263 for (i = 0; i <= ss->rx_big.mask; i++) {
3264 bus_dmamap_destroy(ss->rx_big.dmat,
3265 ss->rx_big.info[i].map);
3267 bus_dmamap_destroy(ss->rx_big.dmat,
3268 ss->rx_big.extra_map);
3269 bus_dma_tag_destroy(ss->rx_big.dmat);
3271 free(ss->rx_big.info, M_DEVBUF);
3273 ss->rx_big.info = NULL;
3277 mxge_free_rings(mxge_softc_t *sc)
3281 for (slice = 0; slice < sc->num_slices; slice++)
3282 mxge_free_slice_rings(&sc->ss[slice]);
3286 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3287 int tx_ring_entries)
3289 mxge_softc_t *sc = ss->sc;
3293 /* allocate per-slice receive resources */
3295 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3296 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3298 /* allocate the rx shadow rings */
3299 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3300 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3302 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3303 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3305 /* allocate the rx host info rings */
3306 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3307 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3309 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3310 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3312 /* allocate the rx busdma resources */
3313 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3315 4096, /* boundary */
3316 BUS_SPACE_MAXADDR, /* low */
3317 BUS_SPACE_MAXADDR, /* high */
3318 NULL, NULL, /* filter */
3319 MHLEN, /* maxsize */
3321 MHLEN, /* maxsegsize */
3322 BUS_DMA_ALLOCNOW, /* flags */
3323 NULL, NULL, /* lock */
3324 &ss->rx_small.dmat); /* tag */
3326 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3331 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3333 #if MXGE_VIRT_JUMBOS
3334 4096, /* boundary */
3338 BUS_SPACE_MAXADDR, /* low */
3339 BUS_SPACE_MAXADDR, /* high */
3340 NULL, NULL, /* filter */
3341 3*4096, /* maxsize */
3342 #if MXGE_VIRT_JUMBOS
3344 4096, /* maxsegsize*/
3347 MJUM9BYTES, /* maxsegsize*/
3349 BUS_DMA_ALLOCNOW, /* flags */
3350 NULL, NULL, /* lock */
3351 &ss->rx_big.dmat); /* tag */
3353 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3357 for (i = 0; i <= ss->rx_small.mask; i++) {
3358 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3359 &ss->rx_small.info[i].map);
3361 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3366 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3367 &ss->rx_small.extra_map);
3369 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3374 for (i = 0; i <= ss->rx_big.mask; i++) {
3375 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3376 &ss->rx_big.info[i].map);
3378 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3383 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3384 &ss->rx_big.extra_map);
3386 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3391 /* now allocate TX resources */
3393 #ifndef IFNET_BUF_RING
3394 /* only use a single TX ring for now */
3395 if (ss != ss->sc->ss)
3399 ss->tx.mask = tx_ring_entries - 1;
3400 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3403 /* allocate the tx request copy block */
3405 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3406 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3407 /* ensure req_list entries are aligned to 8 bytes */
3408 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3409 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3411 /* allocate the tx busdma segment list */
3412 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3413 ss->tx.seg_list = (bus_dma_segment_t *)
3414 malloc(bytes, M_DEVBUF, M_WAITOK);
3416 /* allocate the tx host info ring */
3417 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3418 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3420 /* allocate the tx busdma resources */
3421 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3423 sc->tx_boundary, /* boundary */
3424 BUS_SPACE_MAXADDR, /* low */
3425 BUS_SPACE_MAXADDR, /* high */
3426 NULL, NULL, /* filter */
3427 65536 + 256, /* maxsize */
3428 ss->tx.max_desc - 2, /* num segs */
3429 sc->tx_boundary, /* maxsegsz */
3430 BUS_DMA_ALLOCNOW, /* flags */
3431 NULL, NULL, /* lock */
3432 &ss->tx.dmat); /* tag */
3435 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3440 /* now use these tags to setup dmamaps for each slot
3442 for (i = 0; i <= ss->tx.mask; i++) {
3443 err = bus_dmamap_create(ss->tx.dmat, 0,
3444 &ss->tx.info[i].map);
3446 device_printf(sc->dev, "Err %d tx dmamap\n",
3456 mxge_alloc_rings(mxge_softc_t *sc)
3460 int tx_ring_entries, rx_ring_entries;
3463 /* get ring sizes */
3464 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3465 tx_ring_size = cmd.data0;
3467 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3471 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3472 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3473 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3474 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3475 IFQ_SET_READY(&sc->ifp->if_snd);
3477 for (slice = 0; slice < sc->num_slices; slice++) {
3478 err = mxge_alloc_slice_rings(&sc->ss[slice],
3487 mxge_free_rings(sc);
3494 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3496 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3498 if (bufsize < MCLBYTES) {
3499 /* easy, everything fits in a single buffer */
3500 *big_buf_size = MCLBYTES;
3501 *cl_size = MCLBYTES;
3506 if (bufsize < MJUMPAGESIZE) {
3507 /* still easy, everything still fits in a single buffer */
3508 *big_buf_size = MJUMPAGESIZE;
3509 *cl_size = MJUMPAGESIZE;
3513 #if MXGE_VIRT_JUMBOS
3514 /* now we need to use virtually contiguous buffers */
3515 *cl_size = MJUM9BYTES;
3516 *big_buf_size = 4096;
3517 *nbufs = mtu / 4096 + 1;
3518 /* needs to be a power of two, so round up */
3522 *cl_size = MJUM9BYTES;
3523 *big_buf_size = MJUM9BYTES;
3529 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3538 slice = ss - sc->ss;
3540 #if defined(INET) || defined(INET6)
3541 (void)tcp_lro_init(&ss->lc);
3543 ss->lc.ifp = sc->ifp;
3545 /* get the lanai pointers to the send and receive rings */
3548 #ifndef IFNET_BUF_RING
3549 /* We currently only send from the first slice */
3553 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3555 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3556 ss->tx.send_go = (volatile uint32_t *)
3557 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3558 ss->tx.send_stop = (volatile uint32_t *)
3559 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3560 #ifndef IFNET_BUF_RING
3564 err |= mxge_send_cmd(sc,
3565 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3566 ss->rx_small.lanai =
3567 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3569 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3571 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3574 device_printf(sc->dev,
3575 "failed to get ring sizes or locations\n");
3579 /* stock receive rings */
3580 for (i = 0; i <= ss->rx_small.mask; i++) {
3581 map = ss->rx_small.info[i].map;
3582 err = mxge_get_buf_small(ss, map, i);
3584 device_printf(sc->dev, "alloced %d/%d smalls\n",
3585 i, ss->rx_small.mask + 1);
3589 for (i = 0; i <= ss->rx_big.mask; i++) {
3590 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3591 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3593 ss->rx_big.nbufs = nbufs;
3594 ss->rx_big.cl_size = cl_size;
3595 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3596 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3597 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3598 map = ss->rx_big.info[i].map;
3599 err = mxge_get_buf_big(ss, map, i);
3601 device_printf(sc->dev, "alloced %d/%d bigs\n",
3602 i, ss->rx_big.mask + 1);
3610 mxge_open(mxge_softc_t *sc)
3613 int err, big_bytes, nbufs, slice, cl_size, i;
3615 volatile uint8_t *itable;
3616 struct mxge_slice_state *ss;
3618 /* Copy the MAC address in case it was overridden */
3619 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3621 err = mxge_reset(sc, 1);
3623 device_printf(sc->dev, "failed to reset\n");
3627 if (sc->num_slices > 1) {
3628 /* setup the indirection table */
3629 cmd.data0 = sc->num_slices;
3630 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3633 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3636 device_printf(sc->dev,
3637 "failed to setup rss tables\n");
3641 /* just enable an identity mapping */
3642 itable = sc->sram + cmd.data0;
3643 for (i = 0; i < sc->num_slices; i++)
3644 itable[i] = (uint8_t)i;
3647 cmd.data1 = mxge_rss_hash_type;
3648 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3650 device_printf(sc->dev, "failed to enable slices\n");
3656 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3659 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3661 /* error is only meaningful if we're trying to set
3662 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3663 if (err && nbufs > 1) {
3664 device_printf(sc->dev,
3665 "Failed to set alway-use-n to %d\n",
3669 /* Give the firmware the mtu and the big and small buffer
3670 sizes. The firmware wants the big buf size to be a power
3671 of two. Luckily, FreeBSD's clusters are powers of two */
3672 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3673 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3674 cmd.data0 = MHLEN - MXGEFW_PAD;
3675 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3677 cmd.data0 = big_bytes;
3678 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3681 device_printf(sc->dev, "failed to setup params\n");
3685 /* Now give him the pointer to the stats block */
3687 #ifdef IFNET_BUF_RING
3688 slice < sc->num_slices;
3693 ss = &sc->ss[slice];
3695 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3697 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3698 cmd.data2 = sizeof(struct mcp_irq_data);
3699 cmd.data2 |= (slice << 16);
3700 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3704 bus = sc->ss->fw_stats_dma.bus_addr;
3705 bus += offsetof(struct mcp_irq_data, send_done_count);
3706 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3707 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3708 err = mxge_send_cmd(sc,
3709 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3711 /* Firmware cannot support multicast without STATS_DMA_V2 */
3712 sc->fw_multicast_support = 0;
3714 sc->fw_multicast_support = 1;
3718 device_printf(sc->dev, "failed to setup params\n");
3722 for (slice = 0; slice < sc->num_slices; slice++) {
3723 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3725 device_printf(sc->dev, "couldn't open slice %d\n",
3731 /* Finally, start the firmware running */
3732 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3734 device_printf(sc->dev, "Couldn't bring up link\n");
3737 #ifdef IFNET_BUF_RING
3738 for (slice = 0; slice < sc->num_slices; slice++) {
3739 ss = &sc->ss[slice];
3740 ss->if_drv_flags |= IFF_DRV_RUNNING;
3741 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3744 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3745 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3751 mxge_free_mbufs(sc);
3757 mxge_close(mxge_softc_t *sc, int down)
3760 int err, old_down_cnt;
3761 #ifdef IFNET_BUF_RING
3762 struct mxge_slice_state *ss;
3766 #ifdef IFNET_BUF_RING
3767 for (slice = 0; slice < sc->num_slices; slice++) {
3768 ss = &sc->ss[slice];
3769 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3772 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3774 old_down_cnt = sc->down_cnt;
3776 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3778 device_printf(sc->dev,
3779 "Couldn't bring down link\n");
3781 if (old_down_cnt == sc->down_cnt) {
3782 /* wait for down irq */
3783 DELAY(10 * sc->intr_coal_delay);
3786 if (old_down_cnt == sc->down_cnt) {
3787 device_printf(sc->dev, "never got down irq\n");
3790 mxge_free_mbufs(sc);
3796 mxge_setup_cfg_space(mxge_softc_t *sc)
3798 device_t dev = sc->dev;
3800 uint16_t lnk, pectl;
3802 /* find the PCIe link width and set max read request to 4KB*/
3803 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3804 lnk = pci_read_config(dev, reg + 0x12, 2);
3805 sc->link_width = (lnk >> 4) & 0x3f;
3807 if (sc->pectl == 0) {
3808 pectl = pci_read_config(dev, reg + 0x8, 2);
3809 pectl = (pectl & ~0x7000) | (5 << 12);
3810 pci_write_config(dev, reg + 0x8, pectl, 2);
3813 /* restore saved pectl after watchdog reset */
3814 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3818 /* Enable DMA and Memory space access */
3819 pci_enable_busmaster(dev);
3823 mxge_read_reboot(mxge_softc_t *sc)
3825 device_t dev = sc->dev;
3828 /* find the vendor specific offset */
3829 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3830 device_printf(sc->dev,
3831 "could not find vendor specific offset\n");
3832 return (uint32_t)-1;
3834 /* enable read32 mode */
3835 pci_write_config(dev, vs + 0x10, 0x3, 1);
3836 /* tell NIC which register to read */
3837 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3838 return (pci_read_config(dev, vs + 0x14, 4));
3842 mxge_watchdog_reset(mxge_softc_t *sc)
3844 struct pci_devinfo *dinfo;
3845 struct mxge_slice_state *ss;
3846 int err, running, s, num_tx_slices = 1;
3852 device_printf(sc->dev, "Watchdog reset!\n");
3855 * check to see if the NIC rebooted. If it did, then all of
3856 * PCI config space has been reset, and things like the
3857 * busmaster bit will be zero. If this is the case, then we
3858 * must restore PCI config space before the NIC can be used
3861 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3862 if (cmd == 0xffff) {
3864 * maybe the watchdog caught the NIC rebooting; wait
3865 * up to 100ms for it to finish. If it does not come
3866 * back, then give up
3869 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3870 if (cmd == 0xffff) {
3871 device_printf(sc->dev, "NIC disappeared!\n");
3874 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3875 /* print the reboot status */
3876 reboot = mxge_read_reboot(sc);
3877 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3879 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3883 * quiesce NIC so that TX routines will not try to
3884 * xmit after restoration of BAR
3887 /* Mark the link as down */
3888 if (sc->link_state) {
3890 if_link_state_change(sc->ifp,
3893 #ifdef IFNET_BUF_RING
3894 num_tx_slices = sc->num_slices;
3896 /* grab all TX locks to ensure no tx */
3897 for (s = 0; s < num_tx_slices; s++) {
3899 mtx_lock(&ss->tx.mtx);
3903 /* restore PCI configuration space */
3904 dinfo = device_get_ivars(sc->dev);
3905 pci_cfg_restore(sc->dev, dinfo);
3907 /* and redo any changes we made to our config space */
3908 mxge_setup_cfg_space(sc);
3911 err = mxge_load_firmware(sc, 0);
3913 device_printf(sc->dev,
3914 "Unable to re-load f/w\n");
3918 err = mxge_open(sc);
3919 /* release all TX locks */
3920 for (s = 0; s < num_tx_slices; s++) {
3922 #ifdef IFNET_BUF_RING
3923 mxge_start_locked(ss);
3925 mtx_unlock(&ss->tx.mtx);
3928 sc->watchdog_resets++;
3930 device_printf(sc->dev,
3931 "NIC did not reboot, not resetting\n");
3935 device_printf(sc->dev, "watchdog reset failed\n");
3939 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3944 mxge_watchdog_task(void *arg, int pending)
3946 mxge_softc_t *sc = arg;
3949 mtx_lock(&sc->driver_mtx);
3950 mxge_watchdog_reset(sc);
3951 mtx_unlock(&sc->driver_mtx);
3955 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3957 tx = &sc->ss[slice].tx;
3958 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3959 device_printf(sc->dev,
3960 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3961 tx->req, tx->done, tx->queue_active);
3962 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3963 tx->activate, tx->deactivate);
3964 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3966 be32toh(sc->ss->fw_stats->send_done_count));
3970 mxge_watchdog(mxge_softc_t *sc)
3973 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3976 /* see if we have outstanding transmits, which
3977 have been pending for more than mxge_ticks */
3979 #ifdef IFNET_BUF_RING
3980 (i < sc->num_slices) && (err == 0);
3982 (i < 1) && (err == 0);
3986 if (tx->req != tx->done &&
3987 tx->watchdog_req != tx->watchdog_done &&
3988 tx->done == tx->watchdog_done) {
3989 /* check for pause blocking before resetting */
3990 if (tx->watchdog_rx_pause == rx_pause) {
3991 mxge_warn_stuck(sc, tx, i);
3992 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3996 device_printf(sc->dev, "Flow control blocking "
3997 "xmits, check link partner\n");
4000 tx->watchdog_req = tx->req;
4001 tx->watchdog_done = tx->done;
4002 tx->watchdog_rx_pause = rx_pause;
4005 if (sc->need_media_probe)
4006 mxge_media_probe(sc);
4011 mxge_get_counter(struct ifnet *ifp, ift_counter cnt)
4013 struct mxge_softc *sc;
4016 sc = if_getsoftc(ifp);
4020 case IFCOUNTER_IPACKETS:
4021 for (int s = 0; s < sc->num_slices; s++)
4022 rv += sc->ss[s].ipackets;
4024 case IFCOUNTER_OPACKETS:
4025 for (int s = 0; s < sc->num_slices; s++)
4026 rv += sc->ss[s].opackets;
4028 case IFCOUNTER_OERRORS:
4029 for (int s = 0; s < sc->num_slices; s++)
4030 rv += sc->ss[s].oerrors;
4032 #ifdef IFNET_BUF_RING
4033 case IFCOUNTER_OBYTES:
4034 for (int s = 0; s < sc->num_slices; s++)
4035 rv += sc->ss[s].obytes;
4037 case IFCOUNTER_OMCASTS:
4038 for (int s = 0; s < sc->num_slices; s++)
4039 rv += sc->ss[s].omcasts;
4041 case IFCOUNTER_OQDROPS:
4042 for (int s = 0; s < sc->num_slices; s++)
4043 rv += sc->ss[s].tx.br->br_drops;
4047 return (if_get_counter_default(ifp, cnt));
4052 mxge_tick(void *arg)
4054 mxge_softc_t *sc = arg;
4061 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4063 if (!sc->watchdog_countdown) {
4064 err = mxge_watchdog(sc);
4065 sc->watchdog_countdown = 4;
4067 sc->watchdog_countdown--;
4070 /* ensure NIC did not suffer h/w fault while idle */
4071 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4072 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4074 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4077 /* look less often if NIC is idle */
4082 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4087 mxge_media_change(struct ifnet *ifp)
4093 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4095 struct ifnet *ifp = sc->ifp;
4096 int real_mtu, old_mtu;
4100 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4101 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4103 mtx_lock(&sc->driver_mtx);
4104 old_mtu = ifp->if_mtu;
4106 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4108 err = mxge_open(sc);
4110 ifp->if_mtu = old_mtu;
4112 (void) mxge_open(sc);
4115 mtx_unlock(&sc->driver_mtx);
4120 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4122 mxge_softc_t *sc = ifp->if_softc;
4127 ifmr->ifm_status = IFM_AVALID;
4128 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4129 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4130 ifmr->ifm_active |= sc->current_media;
4134 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c)
4141 if (i2c->dev_addr != 0xA0 &&
4142 i2c->dev_addr != 0xA2)
4144 if (i2c->len > sizeof(i2c->data))
4147 for (i = 0; i < i2c->len; i++) {
4148 i2c_args = i2c->dev_addr << 0x8;
4149 i2c_args |= i2c->offset + i;
4150 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
4151 cmd.data1 = i2c_args;
4152 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
4154 if (err != MXGEFW_CMD_OK)
4156 /* now we wait for the data to be cached */
4157 cmd.data0 = i2c_args & 0xff;
4158 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
4159 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
4160 cmd.data0 = i2c_args & 0xff;
4161 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
4165 if (err != MXGEFW_CMD_OK)
4167 i2c->data[i] = cmd.data0;
4173 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4175 mxge_softc_t *sc = ifp->if_softc;
4176 struct ifreq *ifr = (struct ifreq *)data;
4177 struct ifi2creq i2c;
4183 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4187 mtx_lock(&sc->driver_mtx);
4189 mtx_unlock(&sc->driver_mtx);
4192 if (ifp->if_flags & IFF_UP) {
4193 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4194 err = mxge_open(sc);
4196 /* take care of promis can allmulti
4198 mxge_change_promisc(sc,
4199 ifp->if_flags & IFF_PROMISC);
4200 mxge_set_multicast_list(sc);
4203 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4207 mtx_unlock(&sc->driver_mtx);
4212 mtx_lock(&sc->driver_mtx);
4214 mtx_unlock(&sc->driver_mtx);
4217 mxge_set_multicast_list(sc);
4218 mtx_unlock(&sc->driver_mtx);
4222 mtx_lock(&sc->driver_mtx);
4223 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4224 if (mask & IFCAP_TXCSUM) {
4225 if (IFCAP_TXCSUM & ifp->if_capenable) {
4226 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4227 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4229 ifp->if_capenable |= IFCAP_TXCSUM;
4230 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4232 } else if (mask & IFCAP_RXCSUM) {
4233 if (IFCAP_RXCSUM & ifp->if_capenable) {
4234 ifp->if_capenable &= ~IFCAP_RXCSUM;
4236 ifp->if_capenable |= IFCAP_RXCSUM;
4239 if (mask & IFCAP_TSO4) {
4240 if (IFCAP_TSO4 & ifp->if_capenable) {
4241 ifp->if_capenable &= ~IFCAP_TSO4;
4242 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4243 ifp->if_capenable |= IFCAP_TSO4;
4244 ifp->if_hwassist |= CSUM_TSO;
4246 printf("mxge requires tx checksum offload"
4247 " be enabled to use TSO\n");
4252 if (mask & IFCAP_TXCSUM_IPV6) {
4253 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4254 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4256 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4259 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4260 ifp->if_hwassist |= (CSUM_TCP_IPV6
4263 } else if (mask & IFCAP_RXCSUM_IPV6) {
4264 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4265 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4267 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4270 if (mask & IFCAP_TSO6) {
4271 if (IFCAP_TSO6 & ifp->if_capenable) {
4272 ifp->if_capenable &= ~IFCAP_TSO6;
4273 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4274 ifp->if_capenable |= IFCAP_TSO6;
4275 ifp->if_hwassist |= CSUM_TSO;
4277 printf("mxge requires tx checksum offload"
4278 " be enabled to use TSO\n");
4282 #endif /*IFCAP_TSO6 */
4284 if (mask & IFCAP_LRO)
4285 ifp->if_capenable ^= IFCAP_LRO;
4286 if (mask & IFCAP_VLAN_HWTAGGING)
4287 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4288 if (mask & IFCAP_VLAN_HWTSO)
4289 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4291 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4292 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4293 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4295 mtx_unlock(&sc->driver_mtx);
4296 VLAN_CAPABILITIES(ifp);
4301 mtx_lock(&sc->driver_mtx);
4303 mtx_unlock(&sc->driver_mtx);
4306 mxge_media_probe(sc);
4307 mtx_unlock(&sc->driver_mtx);
4308 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4309 &sc->media, command);
4313 if (sc->connector != MXGE_XFP &&
4314 sc->connector != MXGE_SFP) {
4318 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
4321 mtx_lock(&sc->driver_mtx);
4323 mtx_unlock(&sc->driver_mtx);
4326 err = mxge_fetch_i2c(sc, &i2c);
4327 mtx_unlock(&sc->driver_mtx);
4329 err = copyout(&i2c, ifr->ifr_ifru.ifru_data,
4333 err = ether_ioctl(ifp, command, data);
4340 mxge_fetch_tunables(mxge_softc_t *sc)
4343 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4344 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4345 &mxge_flow_control);
4346 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4347 &mxge_intr_coal_delay);
4348 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4349 &mxge_nvidia_ecrc_enable);
4350 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4351 &mxge_force_firmware);
4352 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4353 &mxge_deassert_wait);
4354 TUNABLE_INT_FETCH("hw.mxge.verbose",
4356 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4357 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4358 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4359 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4360 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4361 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4365 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4366 mxge_intr_coal_delay = 30;
4367 if (mxge_ticks == 0)
4368 mxge_ticks = hz / 2;
4369 sc->pause = mxge_flow_control;
4370 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4371 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4372 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4374 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4375 mxge_initial_mtu < ETHER_MIN_LEN)
4376 mxge_initial_mtu = ETHERMTU_JUMBO;
4378 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4379 mxge_throttle = MXGE_MAX_THROTTLE;
4380 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4381 mxge_throttle = MXGE_MIN_THROTTLE;
4382 sc->throttle = mxge_throttle;
4387 mxge_free_slices(mxge_softc_t *sc)
4389 struct mxge_slice_state *ss;
4396 for (i = 0; i < sc->num_slices; i++) {
4398 if (ss->fw_stats != NULL) {
4399 mxge_dma_free(&ss->fw_stats_dma);
4400 ss->fw_stats = NULL;
4401 #ifdef IFNET_BUF_RING
4402 if (ss->tx.br != NULL) {
4403 drbr_free(ss->tx.br, M_DEVBUF);
4407 mtx_destroy(&ss->tx.mtx);
4409 if (ss->rx_done.entry != NULL) {
4410 mxge_dma_free(&ss->rx_done.dma);
4411 ss->rx_done.entry = NULL;
4414 free(sc->ss, M_DEVBUF);
4419 mxge_alloc_slices(mxge_softc_t *sc)
4422 struct mxge_slice_state *ss;
4424 int err, i, max_intr_slots;
4426 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4428 device_printf(sc->dev, "Cannot determine rx ring size\n");
4431 sc->rx_ring_size = cmd.data0;
4432 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4434 bytes = sizeof (*sc->ss) * sc->num_slices;
4435 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4438 for (i = 0; i < sc->num_slices; i++) {
4443 /* allocate per-slice rx interrupt queues */
4445 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4446 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4449 ss->rx_done.entry = ss->rx_done.dma.addr;
4450 bzero(ss->rx_done.entry, bytes);
4453 * allocate the per-slice firmware stats; stats
4454 * (including tx) are used used only on the first
4457 #ifndef IFNET_BUF_RING
4462 bytes = sizeof (*ss->fw_stats);
4463 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4464 sizeof (*ss->fw_stats), 64);
4467 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4468 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4469 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4470 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4471 #ifdef IFNET_BUF_RING
4472 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4480 mxge_free_slices(sc);
4485 mxge_slice_probe(mxge_softc_t *sc)
4489 int msix_cnt, status, max_intr_slots;
4493 * don't enable multiple slices if they are not enabled,
4494 * or if this is not an SMP system
4497 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4500 /* see how many MSI-X interrupts are available */
4501 msix_cnt = pci_msix_count(sc->dev);
4505 /* now load the slice aware firmware see what it supports */
4506 old_fw = sc->fw_name;
4507 if (old_fw == mxge_fw_aligned)
4508 sc->fw_name = mxge_fw_rss_aligned;
4510 sc->fw_name = mxge_fw_rss_unaligned;
4511 status = mxge_load_firmware(sc, 0);
4513 device_printf(sc->dev, "Falling back to a single slice\n");
4517 /* try to send a reset command to the card to see if it
4519 memset(&cmd, 0, sizeof (cmd));
4520 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4522 device_printf(sc->dev, "failed reset\n");
4526 /* get rx ring size */
4527 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4529 device_printf(sc->dev, "Cannot determine rx ring size\n");
4532 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4534 /* tell it the size of the interrupt queues */
4535 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4536 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4538 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4542 /* ask the maximum number of slices it supports */
4543 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4545 device_printf(sc->dev,
4546 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4549 sc->num_slices = cmd.data0;
4550 if (sc->num_slices > msix_cnt)
4551 sc->num_slices = msix_cnt;
4553 if (mxge_max_slices == -1) {
4554 /* cap to number of CPUs in system */
4555 if (sc->num_slices > mp_ncpus)
4556 sc->num_slices = mp_ncpus;
4558 if (sc->num_slices > mxge_max_slices)
4559 sc->num_slices = mxge_max_slices;
4561 /* make sure it is a power of two */
4562 while (sc->num_slices & (sc->num_slices - 1))
4566 device_printf(sc->dev, "using %d slices\n",
4572 sc->fw_name = old_fw;
4573 (void) mxge_load_firmware(sc, 0);
4577 mxge_add_msix_irqs(mxge_softc_t *sc)
4580 int count, err, i, rid;
4583 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4586 if (sc->msix_table_res == NULL) {
4587 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4591 count = sc->num_slices;
4592 err = pci_alloc_msix(sc->dev, &count);
4594 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4595 "err = %d \n", sc->num_slices, err);
4596 goto abort_with_msix_table;
4598 if (count < sc->num_slices) {
4599 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4600 count, sc->num_slices);
4601 device_printf(sc->dev,
4602 "Try setting hw.mxge.max_slices to %d\n",
4605 goto abort_with_msix;
4607 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4608 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4609 if (sc->msix_irq_res == NULL) {
4611 goto abort_with_msix;
4614 for (i = 0; i < sc->num_slices; i++) {
4616 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4619 if (sc->msix_irq_res[i] == NULL) {
4620 device_printf(sc->dev, "couldn't allocate IRQ res"
4621 " for message %d\n", i);
4623 goto abort_with_res;
4627 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4628 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4630 for (i = 0; i < sc->num_slices; i++) {
4631 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4632 INTR_TYPE_NET | INTR_MPSAFE,
4633 #if __FreeBSD_version > 700030
4636 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4638 device_printf(sc->dev, "couldn't setup intr for "
4640 goto abort_with_intr;
4642 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4643 sc->msix_ih[i], "s%d", i);
4647 device_printf(sc->dev, "using %d msix IRQs:",
4649 for (i = 0; i < sc->num_slices; i++)
4650 printf(" %jd", rman_get_start(sc->msix_irq_res[i]));
4656 for (i = 0; i < sc->num_slices; i++) {
4657 if (sc->msix_ih[i] != NULL) {
4658 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4660 sc->msix_ih[i] = NULL;
4663 free(sc->msix_ih, M_DEVBUF);
4667 for (i = 0; i < sc->num_slices; i++) {
4669 if (sc->msix_irq_res[i] != NULL)
4670 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4671 sc->msix_irq_res[i]);
4672 sc->msix_irq_res[i] = NULL;
4674 free(sc->msix_irq_res, M_DEVBUF);
4678 pci_release_msi(sc->dev);
4680 abort_with_msix_table:
4681 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4682 sc->msix_table_res);
4688 mxge_add_single_irq(mxge_softc_t *sc)
4690 int count, err, rid;
4692 count = pci_msi_count(sc->dev);
4693 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4699 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
4700 RF_SHAREABLE | RF_ACTIVE);
4701 if (sc->irq_res == NULL) {
4702 device_printf(sc->dev, "could not alloc interrupt\n");
4706 device_printf(sc->dev, "using %s irq %jd\n",
4707 sc->legacy_irq ? "INTx" : "MSI",
4708 rman_get_start(sc->irq_res));
4709 err = bus_setup_intr(sc->dev, sc->irq_res,
4710 INTR_TYPE_NET | INTR_MPSAFE,
4711 #if __FreeBSD_version > 700030
4714 mxge_intr, &sc->ss[0], &sc->ih);
4716 bus_release_resource(sc->dev, SYS_RES_IRQ,
4717 sc->legacy_irq ? 0 : 1, sc->irq_res);
4718 if (!sc->legacy_irq)
4719 pci_release_msi(sc->dev);
4725 mxge_rem_msix_irqs(mxge_softc_t *sc)
4729 for (i = 0; i < sc->num_slices; i++) {
4730 if (sc->msix_ih[i] != NULL) {
4731 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4733 sc->msix_ih[i] = NULL;
4736 free(sc->msix_ih, M_DEVBUF);
4738 for (i = 0; i < sc->num_slices; i++) {
4740 if (sc->msix_irq_res[i] != NULL)
4741 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4742 sc->msix_irq_res[i]);
4743 sc->msix_irq_res[i] = NULL;
4745 free(sc->msix_irq_res, M_DEVBUF);
4747 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4748 sc->msix_table_res);
4750 pci_release_msi(sc->dev);
4755 mxge_rem_single_irq(mxge_softc_t *sc)
4757 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4758 bus_release_resource(sc->dev, SYS_RES_IRQ,
4759 sc->legacy_irq ? 0 : 1, sc->irq_res);
4760 if (!sc->legacy_irq)
4761 pci_release_msi(sc->dev);
4765 mxge_rem_irq(mxge_softc_t *sc)
4767 if (sc->num_slices > 1)
4768 mxge_rem_msix_irqs(sc);
4770 mxge_rem_single_irq(sc);
4774 mxge_add_irq(mxge_softc_t *sc)
4778 if (sc->num_slices > 1)
4779 err = mxge_add_msix_irqs(sc);
4781 err = mxge_add_single_irq(sc);
4783 if (0 && err == 0 && sc->num_slices > 1) {
4784 mxge_rem_msix_irqs(sc);
4785 err = mxge_add_msix_irqs(sc);
4792 mxge_attach(device_t dev)
4795 mxge_softc_t *sc = device_get_softc(dev);
4800 mxge_fetch_tunables(sc);
4802 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4803 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4804 taskqueue_thread_enqueue, &sc->tq);
4805 if (sc->tq == NULL) {
4807 goto abort_with_nothing;
4810 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4813 BUS_SPACE_MAXADDR, /* low */
4814 BUS_SPACE_MAXADDR, /* high */
4815 NULL, NULL, /* filter */
4816 65536 + 256, /* maxsize */
4817 MXGE_MAX_SEND_DESC, /* num segs */
4818 65536, /* maxsegsize */
4820 NULL, NULL, /* lock */
4821 &sc->parent_dmat); /* tag */
4824 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4829 ifp = sc->ifp = if_alloc(IFT_ETHER);
4831 device_printf(dev, "can not if_alloc()\n");
4833 goto abort_with_parent_dmat;
4835 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4837 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4838 device_get_nameunit(dev));
4839 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4840 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4841 "%s:drv", device_get_nameunit(dev));
4842 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4843 MTX_NETWORK_LOCK, MTX_DEF);
4845 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4847 mxge_setup_cfg_space(sc);
4849 /* Map the board into the kernel */
4851 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
4853 if (sc->mem_res == NULL) {
4854 device_printf(dev, "could not map memory\n");
4856 goto abort_with_lock;
4858 sc->sram = rman_get_virtual(sc->mem_res);
4859 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4860 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4861 device_printf(dev, "impossible memory region size %jd\n",
4862 rman_get_size(sc->mem_res));
4864 goto abort_with_mem_res;
4867 /* make NULL terminated copy of the EEPROM strings section of
4869 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4870 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4871 rman_get_bushandle(sc->mem_res),
4872 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4874 MXGE_EEPROM_STRINGS_SIZE - 2);
4875 err = mxge_parse_strings(sc);
4877 goto abort_with_mem_res;
4879 /* Enable write combining for efficient use of PCIe bus */
4882 /* Allocate the out of band dma memory */
4883 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4884 sizeof (mxge_cmd_t), 64);
4886 goto abort_with_mem_res;
4887 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4888 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4890 goto abort_with_cmd_dma;
4892 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4894 goto abort_with_zeropad_dma;
4896 /* select & load the firmware */
4897 err = mxge_select_firmware(sc);
4899 goto abort_with_dmabench;
4900 sc->intr_coal_delay = mxge_intr_coal_delay;
4902 mxge_slice_probe(sc);
4903 err = mxge_alloc_slices(sc);
4905 goto abort_with_dmabench;
4907 err = mxge_reset(sc, 0);
4909 goto abort_with_slices;
4911 err = mxge_alloc_rings(sc);
4913 device_printf(sc->dev, "failed to allocate rings\n");
4914 goto abort_with_slices;
4917 err = mxge_add_irq(sc);
4919 device_printf(sc->dev, "failed to add irq\n");
4920 goto abort_with_rings;
4923 ifp->if_baudrate = IF_Gbps(10);
4924 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4925 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4927 #if defined(INET) || defined(INET6)
4928 ifp->if_capabilities |= IFCAP_LRO;
4931 #ifdef MXGE_NEW_VLAN_API
4932 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4934 /* Only FW 1.4.32 and newer can do TSO over vlans */
4935 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4936 sc->fw_ver_tiny >= 32)
4937 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4939 sc->max_mtu = mxge_max_mtu(sc);
4940 if (sc->max_mtu >= 9000)
4941 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4943 device_printf(dev, "MTU limited to %d. Install "
4944 "latest firmware for 9000 byte jumbo support\n",
4945 sc->max_mtu - ETHER_HDR_LEN);
4946 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4947 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4948 /* check to see if f/w supports TSO for IPv6 */
4949 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4951 ifp->if_capabilities |= IFCAP_TSO6;
4952 sc->max_tso6_hlen = min(cmd.data0,
4953 sizeof (sc->ss[0].scratch));
4955 ifp->if_capenable = ifp->if_capabilities;
4956 if (sc->lro_cnt == 0)
4957 ifp->if_capenable &= ~IFCAP_LRO;
4958 ifp->if_init = mxge_init;
4960 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4961 ifp->if_ioctl = mxge_ioctl;
4962 ifp->if_start = mxge_start;
4963 ifp->if_get_counter = mxge_get_counter;
4964 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
4965 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc;
4966 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET;
4967 /* Initialise the ifmedia structure */
4968 ifmedia_init(&sc->media, 0, mxge_media_change,
4970 mxge_media_init(sc);
4971 mxge_media_probe(sc);
4973 ether_ifattach(ifp, sc->mac_addr);
4974 /* ether_ifattach sets mtu to ETHERMTU */
4975 if (mxge_initial_mtu != ETHERMTU)
4976 mxge_change_mtu(sc, mxge_initial_mtu);
4978 mxge_add_sysctls(sc);
4979 #ifdef IFNET_BUF_RING
4980 ifp->if_transmit = mxge_transmit;
4981 ifp->if_qflush = mxge_qflush;
4983 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4984 device_get_nameunit(sc->dev));
4985 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4989 mxge_free_rings(sc);
4991 mxge_free_slices(sc);
4992 abort_with_dmabench:
4993 mxge_dma_free(&sc->dmabench_dma);
4994 abort_with_zeropad_dma:
4995 mxge_dma_free(&sc->zeropad_dma);
4997 mxge_dma_free(&sc->cmd_dma);
4999 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5001 pci_disable_busmaster(dev);
5002 mtx_destroy(&sc->cmd_mtx);
5003 mtx_destroy(&sc->driver_mtx);
5005 abort_with_parent_dmat:
5006 bus_dma_tag_destroy(sc->parent_dmat);
5008 if (sc->tq != NULL) {
5009 taskqueue_drain(sc->tq, &sc->watchdog_task);
5010 taskqueue_free(sc->tq);
5018 mxge_detach(device_t dev)
5020 mxge_softc_t *sc = device_get_softc(dev);
5022 if (mxge_vlans_active(sc)) {
5023 device_printf(sc->dev,
5024 "Detach vlans before removing module\n");
5027 mtx_lock(&sc->driver_mtx);
5029 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
5031 mtx_unlock(&sc->driver_mtx);
5032 ether_ifdetach(sc->ifp);
5033 if (sc->tq != NULL) {
5034 taskqueue_drain(sc->tq, &sc->watchdog_task);
5035 taskqueue_free(sc->tq);
5038 callout_drain(&sc->co_hdl);
5039 ifmedia_removeall(&sc->media);
5040 mxge_dummy_rdma(sc, 0);
5041 mxge_rem_sysctls(sc);
5043 mxge_free_rings(sc);
5044 mxge_free_slices(sc);
5045 mxge_dma_free(&sc->dmabench_dma);
5046 mxge_dma_free(&sc->zeropad_dma);
5047 mxge_dma_free(&sc->cmd_dma);
5048 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5049 pci_disable_busmaster(dev);
5050 mtx_destroy(&sc->cmd_mtx);
5051 mtx_destroy(&sc->driver_mtx);
5053 bus_dma_tag_destroy(sc->parent_dmat);
5058 mxge_shutdown(device_t dev)
5064 This file uses Myri10GE driver indentation.
5067 c-file-style:"linux"