1 /******************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 Copyright (c) 2006-2013, Myricom Inc.
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
13 2. Neither the name of the Myricom Inc, nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
29 ***************************************************************************/
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/linker.h>
37 #include <sys/firmware.h>
38 #include <sys/endian.h>
39 #include <sys/sockio.h>
41 #include <sys/malloc.h>
43 #include <sys/kernel.h>
45 #include <sys/module.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
49 #include <sys/taskqueue.h>
50 #include <contrib/zlib/zlib.h>
51 #include <dev/zlib/zcalloc.h>
54 #include <net/if_var.h>
55 #include <net/if_arp.h>
56 #include <net/ethernet.h>
57 #include <net/if_dl.h>
58 #include <net/if_media.h>
62 #include <net/if_types.h>
63 #include <net/if_vlan_var.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip6.h>
69 #include <netinet/tcp.h>
70 #include <netinet/tcp_lro.h>
71 #include <netinet6/ip6_var.h>
73 #include <machine/bus.h>
74 #include <machine/in_cksum.h>
75 #include <machine/resource.h>
80 #include <dev/pci/pcireg.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
84 #include <vm/vm.h> /* for pmap_mapdev() */
87 #if defined(__i386) || defined(__amd64)
88 #include <machine/specialreg.h>
91 #include <dev/mxge/mxge_mcp.h>
92 #include <dev/mxge/mcp_gen_header.h>
93 /*#define MXGE_FAKE_IFP*/
94 #include <dev/mxge/if_mxge_var.h>
96 #include <sys/buf_ring.h>
100 #include "opt_inet6.h"
103 static int mxge_nvidia_ecrc_enable = 1;
104 static int mxge_force_firmware = 0;
105 static int mxge_intr_coal_delay = 30;
106 static int mxge_deassert_wait = 1;
107 static int mxge_flow_control = 1;
108 static int mxge_verbose = 0;
109 static int mxge_ticks;
110 static int mxge_max_slices = 1;
111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
112 static int mxge_always_promisc = 0;
113 static int mxge_initial_mtu = ETHERMTU_JUMBO;
114 static int mxge_throttle = 0;
115 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
116 static char *mxge_fw_aligned = "mxge_eth_z8e";
117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
120 static int mxge_probe(device_t dev);
121 static int mxge_attach(device_t dev);
122 static int mxge_detach(device_t dev);
123 static int mxge_shutdown(device_t dev);
124 static void mxge_intr(void *arg);
126 static device_method_t mxge_methods[] =
128 /* Device interface */
129 DEVMETHOD(device_probe, mxge_probe),
130 DEVMETHOD(device_attach, mxge_attach),
131 DEVMETHOD(device_detach, mxge_detach),
132 DEVMETHOD(device_shutdown, mxge_shutdown),
137 static driver_t mxge_driver =
141 sizeof(mxge_softc_t),
144 static devclass_t mxge_devclass;
146 /* Declare ourselves to be a child of the PCI bus.*/
147 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
148 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
149 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
151 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
152 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
153 static int mxge_close(mxge_softc_t *sc, int down);
154 static int mxge_open(mxge_softc_t *sc);
155 static void mxge_tick(void *arg);
158 mxge_probe(device_t dev)
163 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
164 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
165 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
166 rev = pci_get_revid(dev);
168 case MXGE_PCI_REV_Z8E:
169 device_set_desc(dev, "Myri10G-PCIE-8A");
171 case MXGE_PCI_REV_Z8ES:
172 device_set_desc(dev, "Myri10G-PCIE-8B");
175 device_set_desc(dev, "Myri10G-PCIE-8??");
176 device_printf(dev, "Unrecognized rev %d NIC\n",
186 mxge_enable_wc(mxge_softc_t *sc)
188 #if defined(__i386) || defined(__amd64)
193 len = rman_get_size(sc->mem_res);
194 err = pmap_change_attr((vm_offset_t) sc->sram,
195 len, PAT_WRITE_COMBINING);
197 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
205 /* callback to get our DMA address */
207 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
211 *(bus_addr_t *) arg = segs->ds_addr;
216 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
217 bus_size_t alignment)
220 device_t dev = sc->dev;
221 bus_size_t boundary, maxsegsize;
223 if (bytes > 4096 && alignment == 4096) {
231 /* allocate DMAable memory tags */
232 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
233 alignment, /* alignment */
234 boundary, /* boundary */
235 BUS_SPACE_MAXADDR, /* low */
236 BUS_SPACE_MAXADDR, /* high */
237 NULL, NULL, /* filter */
240 maxsegsize, /* maxsegsize */
241 BUS_DMA_COHERENT, /* flags */
242 NULL, NULL, /* lock */
243 &dma->dmat); /* tag */
245 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
249 /* allocate DMAable memory & map */
250 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
251 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
252 | BUS_DMA_ZERO), &dma->map);
254 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
255 goto abort_with_dmat;
258 /* load the memory */
259 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
260 mxge_dmamap_callback,
261 (void *)&dma->bus_addr, 0);
263 device_printf(dev, "couldn't load map (err = %d)\n", err);
269 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
271 (void)bus_dma_tag_destroy(dma->dmat);
277 mxge_dma_free(mxge_dma_t *dma)
279 bus_dmamap_unload(dma->dmat, dma->map);
280 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
281 (void)bus_dma_tag_destroy(dma->dmat);
285 * The eeprom strings on the lanaiX have the format
292 mxge_parse_strings(mxge_softc_t *sc)
295 int i, found_mac, found_sn2;
298 ptr = sc->eeprom_strings;
301 while (*ptr != '\0') {
302 if (strncmp(ptr, "MAC=", 4) == 0) {
305 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
306 if (endptr - ptr != 2)
315 } else if (strncmp(ptr, "PC=", 3) == 0) {
317 strlcpy(sc->product_code_string, ptr,
318 sizeof(sc->product_code_string));
319 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
321 strlcpy(sc->serial_number_string, ptr,
322 sizeof(sc->serial_number_string));
323 } else if (strncmp(ptr, "SN2=", 4) == 0) {
324 /* SN2 takes precedence over SN */
327 strlcpy(sc->serial_number_string, ptr,
328 sizeof(sc->serial_number_string));
330 while (*ptr++ != '\0') {}
337 device_printf(sc->dev, "failed to parse eeprom_strings\n");
342 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
344 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
347 unsigned long base, off;
349 device_t pdev, mcp55;
350 uint16_t vendor_id, device_id, word;
351 uintptr_t bus, slot, func, ivend, idev;
355 if (!mxge_nvidia_ecrc_enable)
358 pdev = device_get_parent(device_get_parent(sc->dev));
360 device_printf(sc->dev, "could not find parent?\n");
363 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
364 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
366 if (vendor_id != 0x10de)
371 if (device_id == 0x005d) {
372 /* ck804, base address is magic */
374 } else if (device_id >= 0x0374 && device_id <= 0x378) {
375 /* mcp55, base address stored in chipset */
376 mcp55 = pci_find_bsf(0, 0, 0);
378 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
379 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
380 word = pci_read_config(mcp55, 0x90, 2);
381 base = ((unsigned long)word & 0x7ffeU) << 25;
388 Test below is commented because it is believed that doing
389 config read/write beyond 0xff will access the config space
390 for the next larger function. Uncomment this and remove
391 the hacky pmap_mapdev() way of accessing config space when
392 FreeBSD grows support for extended pcie config space access
395 /* See if we can, by some miracle, access the extended
397 val = pci_read_config(pdev, 0x178, 4);
398 if (val != 0xffffffff) {
400 pci_write_config(pdev, 0x178, val, 4);
404 /* Rather than using normal pci config space writes, we must
405 * map the Nvidia config space ourselves. This is because on
406 * opteron/nvidia class machine the 0xe000000 mapping is
407 * handled by the nvidia chipset, that means the internal PCI
408 * device (the on-chip northbridge), or the amd-8131 bridge
409 * and things behind them are not visible by this method.
412 BUS_READ_IVAR(device_get_parent(pdev), pdev,
414 BUS_READ_IVAR(device_get_parent(pdev), pdev,
415 PCI_IVAR_SLOT, &slot);
416 BUS_READ_IVAR(device_get_parent(pdev), pdev,
417 PCI_IVAR_FUNCTION, &func);
418 BUS_READ_IVAR(device_get_parent(pdev), pdev,
419 PCI_IVAR_VENDOR, &ivend);
420 BUS_READ_IVAR(device_get_parent(pdev), pdev,
421 PCI_IVAR_DEVICE, &idev);
424 + 0x00100000UL * (unsigned long)bus
425 + 0x00001000UL * (unsigned long)(func
428 /* map it into the kernel */
429 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
433 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
436 /* get a pointer to the config space mapped into the kernel */
437 cfgptr = va + (off & PAGE_MASK);
439 /* make sure that we can really access it */
440 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
441 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
442 if (! (vendor_id == ivend && device_id == idev)) {
443 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
444 vendor_id, device_id);
445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
449 ptr32 = (uint32_t*)(cfgptr + 0x178);
452 if (val == 0xffffffff) {
453 device_printf(sc->dev, "extended mapping failed\n");
454 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
458 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
460 device_printf(sc->dev,
461 "Enabled ECRC on upstream Nvidia bridge "
463 (int)bus, (int)slot, (int)func);
468 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
470 device_printf(sc->dev,
471 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
478 mxge_dma_test(mxge_softc_t *sc, int test_type)
481 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
487 /* Run a small DMA test.
488 * The magic multipliers to the length tell the firmware
489 * to do DMA read, write, or read+write tests. The
490 * results are returned in cmd.data0. The upper 16
491 * bits of the return is the number of transfers completed.
492 * The lower 16 bits is the time in 0.5us ticks that the
493 * transfers took to complete.
496 len = sc->tx_boundary;
498 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
499 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
500 cmd.data2 = len * 0x10000;
501 status = mxge_send_cmd(sc, test_type, &cmd);
506 sc->read_dma = ((cmd.data0>>16) * len * 2) /
507 (cmd.data0 & 0xffff);
508 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
509 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
510 cmd.data2 = len * 0x1;
511 status = mxge_send_cmd(sc, test_type, &cmd);
516 sc->write_dma = ((cmd.data0>>16) * len * 2) /
517 (cmd.data0 & 0xffff);
519 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
520 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
521 cmd.data2 = len * 0x10001;
522 status = mxge_send_cmd(sc, test_type, &cmd);
527 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
528 (cmd.data0 & 0xffff);
531 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
532 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
539 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
540 * when the PCI-E Completion packets are aligned on an 8-byte
541 * boundary. Some PCI-E chip sets always align Completion packets; on
542 * the ones that do not, the alignment can be enforced by enabling
543 * ECRC generation (if supported).
545 * When PCI-E Completion packets are not aligned, it is actually more
546 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
548 * If the driver can neither enable ECRC nor verify that it has
549 * already been enabled, then it must use a firmware image which works
550 * around unaligned completion packets (ethp_z8e.dat), and it should
551 * also ensure that it never gives the device a Read-DMA which is
552 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
553 * enabled, then the driver should use the aligned (eth_z8e.dat)
554 * firmware image, and set tx_boundary to 4KB.
558 mxge_firmware_probe(mxge_softc_t *sc)
560 device_t dev = sc->dev;
564 sc->tx_boundary = 4096;
566 * Verify the max read request size was set to 4KB
567 * before trying the test with 4KB.
569 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
570 pectl = pci_read_config(dev, reg + 0x8, 2);
571 if ((pectl & (5 << 12)) != (5 << 12)) {
572 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
574 sc->tx_boundary = 2048;
579 * load the optimized firmware (which assumes aligned PCIe
580 * completions) in order to see if it works on this host.
582 sc->fw_name = mxge_fw_aligned;
583 status = mxge_load_firmware(sc, 1);
589 * Enable ECRC if possible
591 mxge_enable_nvidia_ecrc(sc);
594 * Run a DMA test which watches for unaligned completions and
595 * aborts on the first one seen. Not required on Z8ES or newer.
597 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
599 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
601 return 0; /* keep the aligned firmware */
604 device_printf(dev, "DMA test failed: %d\n", status);
605 if (status == ENOSYS)
606 device_printf(dev, "Falling back to ethp! "
607 "Please install up to date fw\n");
612 mxge_select_firmware(mxge_softc_t *sc)
615 int force_firmware = mxge_force_firmware;
618 force_firmware = sc->throttle;
620 if (force_firmware != 0) {
621 if (force_firmware == 1)
626 device_printf(sc->dev,
627 "Assuming %s completions (forced)\n",
628 aligned ? "aligned" : "unaligned");
632 /* if the PCIe link width is 4 or less, we can use the aligned
633 firmware and skip any checks */
634 if (sc->link_width != 0 && sc->link_width <= 4) {
635 device_printf(sc->dev,
636 "PCIe x%d Link, expect reduced performance\n",
642 if (0 == mxge_firmware_probe(sc))
647 sc->fw_name = mxge_fw_aligned;
648 sc->tx_boundary = 4096;
650 sc->fw_name = mxge_fw_unaligned;
651 sc->tx_boundary = 2048;
653 return (mxge_load_firmware(sc, 0));
657 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
661 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
662 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
663 be32toh(hdr->mcp_type));
667 /* save firmware version for sysctl */
668 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
670 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
672 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
673 &sc->fw_ver_minor, &sc->fw_ver_tiny);
675 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
676 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
677 device_printf(sc->dev, "Found firmware version %s\n",
679 device_printf(sc->dev, "Driver needs %d.%d\n",
680 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
688 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
691 char *inflate_buffer;
692 const struct firmware *fw;
693 const mcp_gen_header_t *hdr;
700 fw = firmware_get(sc->fw_name);
702 device_printf(sc->dev, "Could not find firmware image %s\n",
709 /* setup zlib and decompress f/w */
710 bzero(&zs, sizeof (zs));
711 zs.zalloc = zcalloc_nowait;
713 status = inflateInit(&zs);
714 if (status != Z_OK) {
719 /* the uncompressed size is stored as the firmware version,
720 which would otherwise go unused */
721 fw_len = (size_t) fw->version;
722 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
723 if (inflate_buffer == NULL)
725 zs.avail_in = fw->datasize;
726 zs.next_in = __DECONST(char *, fw->data);
727 zs.avail_out = fw_len;
728 zs.next_out = inflate_buffer;
729 status = inflate(&zs, Z_FINISH);
730 if (status != Z_STREAM_END) {
731 device_printf(sc->dev, "zlib %d\n", status);
733 goto abort_with_buffer;
737 hdr_offset = htobe32(*(const uint32_t *)
738 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
739 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
740 device_printf(sc->dev, "Bad firmware file");
742 goto abort_with_buffer;
744 hdr = (const void*)(inflate_buffer + hdr_offset);
746 status = mxge_validate_firmware(sc, hdr);
748 goto abort_with_buffer;
750 /* Copy the inflated firmware to NIC SRAM. */
751 for (i = 0; i < fw_len; i += 256) {
752 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
754 min(256U, (unsigned)(fw_len - i)));
763 free(inflate_buffer, M_TEMP);
767 firmware_put(fw, FIRMWARE_UNLOAD);
772 * Enable or disable periodic RDMAs from the host to make certain
773 * chipsets resend dropped PCIe messages
777 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
780 volatile uint32_t *confirm;
781 volatile char *submit;
782 uint32_t *buf, dma_low, dma_high;
785 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
787 /* clear confirmation addr */
788 confirm = (volatile uint32_t *)sc->cmd;
792 /* send an rdma command to the PCIe engine, and wait for the
793 response in the confirmation address. The firmware should
794 write a -1 there to indicate it is alive and well
797 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
798 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
799 buf[0] = htobe32(dma_high); /* confirm addr MSW */
800 buf[1] = htobe32(dma_low); /* confirm addr LSW */
801 buf[2] = htobe32(0xffffffff); /* confirm data */
802 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
803 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
804 buf[3] = htobe32(dma_high); /* dummy addr MSW */
805 buf[4] = htobe32(dma_low); /* dummy addr LSW */
806 buf[5] = htobe32(enable); /* enable? */
809 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
811 mxge_pio_copy(submit, buf, 64);
816 while (*confirm != 0xffffffff && i < 20) {
820 if (*confirm != 0xffffffff) {
821 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
822 (enable ? "enable" : "disable"), confirm,
829 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
832 char buf_bytes[sizeof(*buf) + 8];
833 volatile mcp_cmd_response_t *response = sc->cmd;
834 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
835 uint32_t dma_low, dma_high;
836 int err, sleep_total = 0;
838 /* ensure buf is aligned to 8 bytes */
839 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
841 buf->data0 = htobe32(data->data0);
842 buf->data1 = htobe32(data->data1);
843 buf->data2 = htobe32(data->data2);
844 buf->cmd = htobe32(cmd);
845 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
846 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
848 buf->response_addr.low = htobe32(dma_low);
849 buf->response_addr.high = htobe32(dma_high);
850 mtx_lock(&sc->cmd_mtx);
851 response->result = 0xffffffff;
853 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
855 /* wait up to 20ms */
857 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
858 bus_dmamap_sync(sc->cmd_dma.dmat,
859 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
861 switch (be32toh(response->result)) {
863 data->data0 = be32toh(response->data);
869 case MXGEFW_CMD_UNKNOWN:
872 case MXGEFW_CMD_ERROR_UNALIGNED:
875 case MXGEFW_CMD_ERROR_BUSY:
878 case MXGEFW_CMD_ERROR_I2C_ABSENT:
882 device_printf(sc->dev,
884 "failed, result = %d\n",
885 cmd, be32toh(response->result));
893 device_printf(sc->dev, "mxge: command %d timed out"
895 cmd, be32toh(response->result));
896 mtx_unlock(&sc->cmd_mtx);
901 mxge_adopt_running_firmware(mxge_softc_t *sc)
903 struct mcp_gen_header *hdr;
904 const size_t bytes = sizeof (struct mcp_gen_header);
908 /* find running firmware header */
909 hdr_offset = htobe32(*(volatile uint32_t *)
910 (sc->sram + MCP_HEADER_PTR_OFFSET));
912 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
913 device_printf(sc->dev,
914 "Running firmware has bad header offset (%d)\n",
919 /* copy header of running firmware from SRAM to host memory to
920 * validate firmware */
921 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
923 device_printf(sc->dev, "could not malloc firmware hdr\n");
926 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
927 rman_get_bushandle(sc->mem_res),
928 hdr_offset, (char *)hdr, bytes);
929 status = mxge_validate_firmware(sc, hdr);
933 * check to see if adopted firmware has bug where adopting
934 * it will cause broadcasts to be filtered unless the NIC
935 * is kept in ALLMULTI mode
937 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
938 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
939 sc->adopted_rx_filter_bug = 1;
940 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
941 "working around rx filter bug\n",
942 sc->fw_ver_major, sc->fw_ver_minor,
951 mxge_load_firmware(mxge_softc_t *sc, int adopt)
953 volatile uint32_t *confirm;
954 volatile char *submit;
956 uint32_t *buf, size, dma_low, dma_high;
959 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
961 size = sc->sram_size;
962 status = mxge_load_firmware_helper(sc, &size);
966 /* Try to use the currently running firmware, if
968 status = mxge_adopt_running_firmware(sc);
970 device_printf(sc->dev,
971 "failed to adopt running firmware\n");
974 device_printf(sc->dev,
975 "Successfully adopted running firmware\n");
976 if (sc->tx_boundary == 4096) {
977 device_printf(sc->dev,
978 "Using firmware currently running on NIC"
980 device_printf(sc->dev,
981 "performance consider loading optimized "
984 sc->fw_name = mxge_fw_unaligned;
985 sc->tx_boundary = 2048;
988 /* clear confirmation addr */
989 confirm = (volatile uint32_t *)sc->cmd;
992 /* send a reload command to the bootstrap MCP, and wait for the
993 response in the confirmation address. The firmware should
994 write a -1 there to indicate it is alive and well
997 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
998 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1000 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1001 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1002 buf[2] = htobe32(0xffffffff); /* confirm data */
1004 /* FIX: All newest firmware should un-protect the bottom of
1005 the sram before handoff. However, the very first interfaces
1006 do not. Therefore the handoff copy must skip the first 8 bytes
1008 /* where the code starts*/
1009 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1010 buf[4] = htobe32(size - 8); /* length of code */
1011 buf[5] = htobe32(8); /* where to copy to */
1012 buf[6] = htobe32(0); /* where to jump to */
1014 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1015 mxge_pio_copy(submit, buf, 64);
1020 while (*confirm != 0xffffffff && i < 20) {
1023 bus_dmamap_sync(sc->cmd_dma.dmat,
1024 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1026 if (*confirm != 0xffffffff) {
1027 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1036 mxge_update_mac_address(mxge_softc_t *sc)
1039 uint8_t *addr = sc->mac_addr;
1043 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1044 | (addr[2] << 8) | addr[3]);
1046 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1048 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1053 mxge_change_pause(mxge_softc_t *sc, int pause)
1059 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1062 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1066 device_printf(sc->dev, "Failed to set flow control mode\n");
1074 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1079 if (mxge_always_promisc)
1083 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1086 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1090 device_printf(sc->dev, "Failed to set promisc mode\n");
1094 struct mxge_add_maddr_ctx {
1100 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
1102 struct mxge_add_maddr_ctx *ctx = arg;
1105 if (ctx->error != 0)
1107 bcopy(LLADDR(sdl), &cmd.data0, 4);
1108 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2);
1109 cmd.data0 = htonl(cmd.data0);
1110 cmd.data1 = htonl(cmd.data1);
1112 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1118 mxge_set_multicast_list(mxge_softc_t *sc)
1120 struct mxge_add_maddr_ctx ctx;
1121 struct ifnet *ifp = sc->ifp;
1125 /* This firmware is known to not support multicast */
1126 if (!sc->fw_multicast_support)
1129 /* Disable multicast filtering while we play with the lists*/
1130 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1132 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1133 " error status: %d\n", err);
1137 if (sc->adopted_rx_filter_bug)
1140 if (ifp->if_flags & IFF_ALLMULTI)
1141 /* request to disable multicast filtering, so quit here */
1144 /* Flush all the filters */
1146 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1148 device_printf(sc->dev,
1149 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1150 ", error status: %d\n", err);
1154 /* Walk the multicast list, and add each address */
1157 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx);
1158 if (ctx.error != 0) {
1159 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
1160 "error status:" "%d\t", ctx.error);
1161 /* abort, leaving multicast filtering off */
1165 /* Enable multicast filtering */
1166 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1168 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1169 ", error status: %d\n", err);
1174 mxge_max_mtu(mxge_softc_t *sc)
1179 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1180 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1182 /* try to set nbufs to see if it we can
1183 use virtually contiguous jumbos */
1185 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1188 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1190 /* otherwise, we're limited to MJUMPAGESIZE */
1191 return MJUMPAGESIZE - MXGEFW_PAD;
1195 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1197 struct mxge_slice_state *ss;
1198 mxge_rx_done_t *rx_done;
1199 volatile uint32_t *irq_claim;
1203 /* try to send a reset command to the card to see if it
1205 memset(&cmd, 0, sizeof (cmd));
1206 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1208 device_printf(sc->dev, "failed reset\n");
1212 mxge_dummy_rdma(sc, 1);
1215 /* set the intrq size */
1216 cmd.data0 = sc->rx_ring_size;
1217 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1220 * Even though we already know how many slices are supported
1221 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1222 * has magic side effects, and must be called after a reset.
1223 * It must be called prior to calling any RSS related cmds,
1224 * including assigning an interrupt queue for anything but
1225 * slice 0. It must also be called *after*
1226 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1227 * the firmware to compute offsets.
1230 if (sc->num_slices > 1) {
1231 /* ask the maximum number of slices it supports */
1232 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1235 device_printf(sc->dev,
1236 "failed to get number of slices\n");
1240 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1241 * to setting up the interrupt queue DMA
1243 cmd.data0 = sc->num_slices;
1244 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1245 #ifdef IFNET_BUF_RING
1246 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1248 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1251 device_printf(sc->dev,
1252 "failed to set number of slices\n");
1258 if (interrupts_setup) {
1259 /* Now exchange information about interrupts */
1260 for (slice = 0; slice < sc->num_slices; slice++) {
1261 rx_done = &sc->ss[slice].rx_done;
1262 memset(rx_done->entry, 0, sc->rx_ring_size);
1263 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1264 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1266 status |= mxge_send_cmd(sc,
1267 MXGEFW_CMD_SET_INTRQ_DMA,
1272 status |= mxge_send_cmd(sc,
1273 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1276 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1278 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1279 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1282 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1284 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1286 device_printf(sc->dev, "failed set interrupt parameters\n");
1291 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1294 /* run a DMA benchmark */
1295 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1297 for (slice = 0; slice < sc->num_slices; slice++) {
1298 ss = &sc->ss[slice];
1300 ss->irq_claim = irq_claim + (2 * slice);
1301 /* reset mcp/driver shared state back to 0 */
1302 ss->rx_done.idx = 0;
1303 ss->rx_done.cnt = 0;
1306 ss->tx.pkt_done = 0;
1307 ss->tx.queue_active = 0;
1308 ss->tx.activate = 0;
1309 ss->tx.deactivate = 0;
1314 ss->rx_small.cnt = 0;
1315 ss->lc.lro_bad_csum = 0;
1316 ss->lc.lro_queued = 0;
1317 ss->lc.lro_flushed = 0;
1318 if (ss->fw_stats != NULL) {
1319 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1322 sc->rdma_tags_available = 15;
1323 status = mxge_update_mac_address(sc);
1324 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1325 mxge_change_pause(sc, sc->pause);
1326 mxge_set_multicast_list(sc);
1328 cmd.data0 = sc->throttle;
1329 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1331 device_printf(sc->dev,
1332 "can't enable throttle\n");
1339 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1344 unsigned int throttle;
1347 throttle = sc->throttle;
1348 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1353 if (throttle == sc->throttle)
1356 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1359 mtx_lock(&sc->driver_mtx);
1360 cmd.data0 = throttle;
1361 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1363 sc->throttle = throttle;
1364 mtx_unlock(&sc->driver_mtx);
1369 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1372 unsigned int intr_coal_delay;
1376 intr_coal_delay = sc->intr_coal_delay;
1377 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1381 if (intr_coal_delay == sc->intr_coal_delay)
1384 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1387 mtx_lock(&sc->driver_mtx);
1388 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1389 sc->intr_coal_delay = intr_coal_delay;
1391 mtx_unlock(&sc->driver_mtx);
1396 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1399 unsigned int enabled;
1403 enabled = sc->pause;
1404 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1408 if (enabled == sc->pause)
1411 mtx_lock(&sc->driver_mtx);
1412 err = mxge_change_pause(sc, enabled);
1413 mtx_unlock(&sc->driver_mtx);
1418 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1424 arg2 = be32toh(*(int *)arg1);
1426 err = sysctl_handle_int(oidp, arg1, arg2, req);
1432 mxge_rem_sysctls(mxge_softc_t *sc)
1434 struct mxge_slice_state *ss;
1437 if (sc->slice_sysctl_tree == NULL)
1440 for (slice = 0; slice < sc->num_slices; slice++) {
1441 ss = &sc->ss[slice];
1442 if (ss == NULL || ss->sysctl_tree == NULL)
1444 sysctl_ctx_free(&ss->sysctl_ctx);
1445 ss->sysctl_tree = NULL;
1447 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1448 sc->slice_sysctl_tree = NULL;
1452 mxge_add_sysctls(mxge_softc_t *sc)
1454 struct sysctl_ctx_list *ctx;
1455 struct sysctl_oid_list *children;
1457 struct mxge_slice_state *ss;
1461 ctx = device_get_sysctl_ctx(sc->dev);
1462 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1463 fw = sc->ss[0].fw_stats;
1465 /* random information */
1466 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1468 CTLFLAG_RD, sc->fw_version,
1469 0, "firmware version");
1470 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1472 CTLFLAG_RD, sc->serial_number_string,
1473 0, "serial number");
1474 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1476 CTLFLAG_RD, sc->product_code_string,
1478 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1480 CTLFLAG_RD, &sc->link_width,
1482 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1484 CTLFLAG_RD, &sc->tx_boundary,
1486 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1488 CTLFLAG_RD, &sc->wc,
1489 0, "write combining PIO?");
1490 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1492 CTLFLAG_RD, &sc->read_dma,
1493 0, "DMA Read speed in MB/s");
1494 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1496 CTLFLAG_RD, &sc->write_dma,
1497 0, "DMA Write speed in MB/s");
1498 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1499 "read_write_dma_MBs",
1500 CTLFLAG_RD, &sc->read_write_dma,
1501 0, "DMA concurrent Read/Write speed in MB/s");
1502 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1504 CTLFLAG_RD, &sc->watchdog_resets,
1505 0, "Number of times NIC was reset");
1508 /* performance related tunables */
1509 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1511 CTLTYPE_INT|CTLFLAG_RW, sc,
1512 0, mxge_change_intr_coal,
1513 "I", "interrupt coalescing delay in usecs");
1515 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1517 CTLTYPE_INT|CTLFLAG_RW, sc,
1518 0, mxge_change_throttle,
1519 "I", "transmit throttling");
1521 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1522 "flow_control_enabled",
1523 CTLTYPE_INT|CTLFLAG_RW, sc,
1524 0, mxge_change_flow_control,
1525 "I", "interrupt coalescing delay in usecs");
1527 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1529 CTLFLAG_RW, &mxge_deassert_wait,
1530 0, "Wait for IRQ line to go low in ihandler");
1532 /* stats block from firmware is in network byte order.
1534 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1536 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1537 0, mxge_handle_be32,
1539 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1540 "rdma_tags_available",
1541 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1542 0, mxge_handle_be32,
1543 "I", "rdma_tags_available");
1544 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1545 "dropped_bad_crc32",
1546 CTLTYPE_INT|CTLFLAG_RD,
1547 &fw->dropped_bad_crc32,
1548 0, mxge_handle_be32,
1549 "I", "dropped_bad_crc32");
1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1552 CTLTYPE_INT|CTLFLAG_RD,
1553 &fw->dropped_bad_phy,
1554 0, mxge_handle_be32,
1555 "I", "dropped_bad_phy");
1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1557 "dropped_link_error_or_filtered",
1558 CTLTYPE_INT|CTLFLAG_RD,
1559 &fw->dropped_link_error_or_filtered,
1560 0, mxge_handle_be32,
1561 "I", "dropped_link_error_or_filtered");
1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1563 "dropped_link_overflow",
1564 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1565 0, mxge_handle_be32,
1566 "I", "dropped_link_overflow");
1567 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1568 "dropped_multicast_filtered",
1569 CTLTYPE_INT|CTLFLAG_RD,
1570 &fw->dropped_multicast_filtered,
1571 0, mxge_handle_be32,
1572 "I", "dropped_multicast_filtered");
1573 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1574 "dropped_no_big_buffer",
1575 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1576 0, mxge_handle_be32,
1577 "I", "dropped_no_big_buffer");
1578 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1579 "dropped_no_small_buffer",
1580 CTLTYPE_INT|CTLFLAG_RD,
1581 &fw->dropped_no_small_buffer,
1582 0, mxge_handle_be32,
1583 "I", "dropped_no_small_buffer");
1584 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1586 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1587 0, mxge_handle_be32,
1588 "I", "dropped_overrun");
1589 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1591 CTLTYPE_INT|CTLFLAG_RD,
1593 0, mxge_handle_be32,
1594 "I", "dropped_pause");
1595 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1597 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1598 0, mxge_handle_be32,
1599 "I", "dropped_runt");
1601 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1602 "dropped_unicast_filtered",
1603 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1604 0, mxge_handle_be32,
1605 "I", "dropped_unicast_filtered");
1607 /* verbose printing? */
1608 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1610 CTLFLAG_RW, &mxge_verbose,
1611 0, "verbose printing");
1613 /* add counters exported for debugging from all slices */
1614 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1615 sc->slice_sysctl_tree =
1616 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1617 "slice", CTLFLAG_RD, 0, "");
1619 for (slice = 0; slice < sc->num_slices; slice++) {
1620 ss = &sc->ss[slice];
1621 sysctl_ctx_init(&ss->sysctl_ctx);
1622 ctx = &ss->sysctl_ctx;
1623 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1624 sprintf(slice_num, "%d", slice);
1626 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1628 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1629 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1631 CTLFLAG_RD, &ss->rx_small.cnt,
1633 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1635 CTLFLAG_RD, &ss->rx_big.cnt,
1637 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1638 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1639 0, "number of lro merge queues flushed");
1641 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1642 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1643 0, "number of bad csums preventing LRO");
1645 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1646 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1647 0, "number of frames appended to lro merge"
1650 #ifndef IFNET_BUF_RING
1651 /* only transmit from slice 0 for now */
1655 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1657 CTLFLAG_RD, &ss->tx.req,
1660 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1662 CTLFLAG_RD, &ss->tx.done,
1664 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1666 CTLFLAG_RD, &ss->tx.pkt_done,
1668 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1670 CTLFLAG_RD, &ss->tx.stall,
1672 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1674 CTLFLAG_RD, &ss->tx.wake,
1676 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1678 CTLFLAG_RD, &ss->tx.defrag,
1680 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1682 CTLFLAG_RD, &ss->tx.queue_active,
1683 0, "tx_queue_active");
1684 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1686 CTLFLAG_RD, &ss->tx.activate,
1688 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1690 CTLFLAG_RD, &ss->tx.deactivate,
1691 0, "tx_deactivate");
1695 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1696 backwards one at a time and handle ring wraps */
1699 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1700 mcp_kreq_ether_send_t *src, int cnt)
1702 int idx, starting_slot;
1703 starting_slot = tx->req;
1706 idx = (starting_slot + cnt) & tx->mask;
1707 mxge_pio_copy(&tx->lanai[idx],
1708 &src[cnt], sizeof(*src));
1714 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1715 * at most 32 bytes at a time, so as to avoid involving the software
1716 * pio handler in the nic. We re-write the first segment's flags
1717 * to mark them valid only after writing the entire chain
1721 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1726 volatile uint32_t *dst_ints;
1727 mcp_kreq_ether_send_t *srcp;
1728 volatile mcp_kreq_ether_send_t *dstp, *dst;
1731 idx = tx->req & tx->mask;
1733 last_flags = src->flags;
1736 dst = dstp = &tx->lanai[idx];
1739 if ((idx + cnt) < tx->mask) {
1740 for (i = 0; i < (cnt - 1); i += 2) {
1741 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1742 wmb(); /* force write every 32 bytes */
1747 /* submit all but the first request, and ensure
1748 that it is submitted below */
1749 mxge_submit_req_backwards(tx, src, cnt);
1753 /* submit the first request */
1754 mxge_pio_copy(dstp, srcp, sizeof(*src));
1755 wmb(); /* barrier before setting valid flag */
1758 /* re-write the last 32-bits with the valid flags */
1759 src->flags = last_flags;
1760 src_ints = (uint32_t *)src;
1762 dst_ints = (volatile uint32_t *)dst;
1764 *dst_ints = *src_ints;
1770 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1771 struct mxge_pkt_info *pi)
1773 struct ether_vlan_header *eh;
1775 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1776 #if IFCAP_TSO6 && defined(INET6)
1780 eh = mtod(m, struct ether_vlan_header *);
1781 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1782 etype = ntohs(eh->evl_proto);
1783 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1785 etype = ntohs(eh->evl_encap_proto);
1786 pi->ip_off = ETHER_HDR_LEN;
1792 * ensure ip header is in first mbuf, copy it to a
1793 * scratch buffer if not
1795 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1797 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1798 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1800 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1802 pi->ip_hlen = pi->ip->ip_hl << 2;
1806 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1807 sizeof(struct tcphdr))) {
1808 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1809 sizeof(struct tcphdr), ss->scratch);
1810 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1812 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1814 #if IFCAP_TSO6 && defined(INET6)
1815 case ETHERTYPE_IPV6:
1816 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1817 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1818 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1820 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1823 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1824 pi->ip_hlen -= pi->ip_off;
1825 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1831 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1834 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1835 sizeof(struct tcphdr))) {
1836 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1837 sizeof(struct tcphdr), ss->scratch);
1838 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1840 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1852 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1853 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1856 mcp_kreq_ether_send_t *req;
1857 bus_dma_segment_t *seg;
1858 uint32_t low, high_swapped;
1859 int len, seglen, cum_len, cum_len_next;
1860 int next_is_first, chop, cnt, rdma_count, small;
1861 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1862 uint8_t flags, flags_next;
1865 mss = m->m_pkthdr.tso_segsz;
1867 /* negative cum_len signifies to the
1868 * send loop that we are still in the
1869 * header portion of the TSO packet.
1872 cksum_offset = pi->ip_off + pi->ip_hlen;
1873 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1875 /* TSO implies checksum offload on this hardware */
1876 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1878 * If packet has full TCP csum, replace it with pseudo hdr
1879 * sum that the NIC expects, otherwise the NIC will emit
1880 * packets with bad TCP checksums.
1882 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1884 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1885 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1886 sum = in6_cksum_pseudo(pi->ip6,
1887 m->m_pkthdr.len - cksum_offset,
1892 m->m_pkthdr.csum_flags |= CSUM_TCP;
1893 sum = in_pseudo(pi->ip->ip_src.s_addr,
1894 pi->ip->ip_dst.s_addr,
1895 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1899 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1900 cksum_offset, sizeof(sum), (caddr_t)&sum);
1902 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1905 /* for TSO, pseudo_hdr_offset holds mss.
1906 * The firmware figures out where to put
1907 * the checksum by parsing the header. */
1908 pseudo_hdr_offset = htobe16(mss);
1912 * for IPv6 TSO, the "checksum offset" is re-purposed
1913 * to store the TCP header len
1915 cksum_offset = (pi->tcp->th_off << 2);
1923 /* "rdma_count" is the number of RDMAs belonging to the
1924 * current packet BEFORE the current send request. For
1925 * non-TSO packets, this is equal to "count".
1926 * For TSO packets, rdma_count needs to be reset
1927 * to 0 after a segment cut.
1929 * The rdma_count field of the send request is
1930 * the number of RDMAs of the packet starting at
1931 * that request. For TSO send requests with one ore more cuts
1932 * in the middle, this is the number of RDMAs starting
1933 * after the last cut in the request. All previous
1934 * segments before the last cut implicitly have 1 RDMA.
1936 * Since the number of RDMAs is not known beforehand,
1937 * it must be filled-in retroactively - after each
1938 * segmentation cut or at the end of the entire packet.
1941 while (busdma_seg_cnt) {
1942 /* Break the busdma segment up into pieces*/
1943 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1944 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1948 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1950 cum_len_next = cum_len + seglen;
1951 (req-rdma_count)->rdma_count = rdma_count + 1;
1952 if (__predict_true(cum_len >= 0)) {
1954 chop = (cum_len_next > mss);
1955 cum_len_next = cum_len_next % mss;
1956 next_is_first = (cum_len_next == 0);
1957 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1958 flags_next |= next_is_first *
1960 rdma_count |= -(chop | next_is_first);
1961 rdma_count += chop & !next_is_first;
1962 } else if (cum_len_next >= 0) {
1967 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1968 flags_next = MXGEFW_FLAGS_TSO_PLD |
1969 MXGEFW_FLAGS_FIRST |
1970 (small * MXGEFW_FLAGS_SMALL);
1973 req->addr_high = high_swapped;
1974 req->addr_low = htobe32(low);
1975 req->pseudo_hdr_offset = pseudo_hdr_offset;
1977 req->rdma_count = 1;
1978 req->length = htobe16(seglen);
1979 req->cksum_offset = cksum_offset;
1980 req->flags = flags | ((cum_len & 1) *
1981 MXGEFW_FLAGS_ALIGN_ODD);
1984 cum_len = cum_len_next;
1989 if (cksum_offset != 0 && !pi->ip6) {
1990 if (__predict_false(cksum_offset > seglen))
1991 cksum_offset -= seglen;
1995 if (__predict_false(cnt > tx->max_desc))
2001 (req-rdma_count)->rdma_count = rdma_count;
2005 req->flags |= MXGEFW_FLAGS_TSO_LAST;
2006 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
2008 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2009 mxge_submit_req(tx, tx->req_list, cnt);
2010 #ifdef IFNET_BUF_RING
2011 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2012 /* tell the NIC to start polling this slice */
2014 tx->queue_active = 1;
2022 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
2026 printf("tx->max_desc exceeded via TSO!\n");
2027 printf("mss = %d, %ld, %d!\n", mss,
2028 (long)seg - (long)tx->seg_list, tx->max_desc);
2035 #endif /* IFCAP_TSO4 */
2037 #ifdef MXGE_NEW_VLAN_API
2039 * We reproduce the software vlan tag insertion from
2040 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2041 * vlan tag insertion. We need to advertise this in order to have the
2042 * vlan interface respect our csum offload flags.
2044 static struct mbuf *
2045 mxge_vlan_tag_insert(struct mbuf *m)
2047 struct ether_vlan_header *evl;
2049 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2050 if (__predict_false(m == NULL))
2052 if (m->m_len < sizeof(*evl)) {
2053 m = m_pullup(m, sizeof(*evl));
2054 if (__predict_false(m == NULL))
2058 * Transform the Ethernet header into an Ethernet header
2059 * with 802.1Q encapsulation.
2061 evl = mtod(m, struct ether_vlan_header *);
2062 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2063 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2064 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2065 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2066 m->m_flags &= ~M_VLANTAG;
2069 #endif /* MXGE_NEW_VLAN_API */
2072 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2074 struct mxge_pkt_info pi = {0,0,0,0};
2076 mcp_kreq_ether_send_t *req;
2077 bus_dma_segment_t *seg;
2081 int cnt, cum_len, err, i, idx, odd_flag;
2082 uint16_t pseudo_hdr_offset;
2083 uint8_t flags, cksum_offset;
2090 #ifdef MXGE_NEW_VLAN_API
2091 if (m->m_flags & M_VLANTAG) {
2092 m = mxge_vlan_tag_insert(m);
2093 if (__predict_false(m == NULL))
2094 goto drop_without_m;
2097 if (m->m_pkthdr.csum_flags &
2098 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2099 if (mxge_parse_tx(ss, m, &pi))
2103 /* (try to) map the frame for DMA */
2104 idx = tx->req & tx->mask;
2105 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2106 m, tx->seg_list, &cnt,
2108 if (__predict_false(err == EFBIG)) {
2109 /* Too many segments in the chain. Try
2111 m_tmp = m_defrag(m, M_NOWAIT);
2112 if (m_tmp == NULL) {
2117 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2119 m, tx->seg_list, &cnt,
2122 if (__predict_false(err != 0)) {
2123 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2124 " packet len = %d\n", err, m->m_pkthdr.len);
2127 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2128 BUS_DMASYNC_PREWRITE);
2129 tx->info[idx].m = m;
2132 /* TSO is different enough, we handle it in another routine */
2133 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2134 mxge_encap_tso(ss, m, cnt, &pi);
2141 pseudo_hdr_offset = 0;
2142 flags = MXGEFW_FLAGS_NO_TSO;
2144 /* checksum offloading? */
2145 if (m->m_pkthdr.csum_flags &
2146 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2147 /* ensure ip header is in first mbuf, copy
2148 it to a scratch buffer if not */
2149 cksum_offset = pi.ip_off + pi.ip_hlen;
2150 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2151 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2152 req->cksum_offset = cksum_offset;
2153 flags |= MXGEFW_FLAGS_CKSUM;
2154 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2158 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2159 flags |= MXGEFW_FLAGS_SMALL;
2161 /* convert segments into a request list */
2164 req->flags = MXGEFW_FLAGS_FIRST;
2165 for (i = 0; i < cnt; i++) {
2167 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2169 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2170 req->length = htobe16(seg->ds_len);
2171 req->cksum_offset = cksum_offset;
2172 if (cksum_offset > seg->ds_len)
2173 cksum_offset -= seg->ds_len;
2176 req->pseudo_hdr_offset = pseudo_hdr_offset;
2177 req->pad = 0; /* complete solid 16-byte block */
2178 req->rdma_count = 1;
2179 req->flags |= flags | ((cum_len & 1) * odd_flag);
2180 cum_len += seg->ds_len;
2186 /* pad runts to 60 bytes */
2190 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2192 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2193 req->length = htobe16(60 - cum_len);
2194 req->cksum_offset = 0;
2195 req->pseudo_hdr_offset = pseudo_hdr_offset;
2196 req->pad = 0; /* complete solid 16-byte block */
2197 req->rdma_count = 1;
2198 req->flags |= flags | ((cum_len & 1) * odd_flag);
2202 tx->req_list[0].rdma_count = cnt;
2204 /* print what the firmware will see */
2205 for (i = 0; i < cnt; i++) {
2206 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2207 "cso:%d, flags:0x%x, rdma:%d\n",
2208 i, (int)ntohl(tx->req_list[i].addr_high),
2209 (int)ntohl(tx->req_list[i].addr_low),
2210 (int)ntohs(tx->req_list[i].length),
2211 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2212 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2213 tx->req_list[i].rdma_count);
2215 printf("--------------\n");
2217 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2218 mxge_submit_req(tx, tx->req_list, cnt);
2219 #ifdef IFNET_BUF_RING
2220 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2221 /* tell the NIC to start polling this slice */
2223 tx->queue_active = 1;
2237 #ifdef IFNET_BUF_RING
2239 mxge_qflush(struct ifnet *ifp)
2241 mxge_softc_t *sc = ifp->if_softc;
2246 for (slice = 0; slice < sc->num_slices; slice++) {
2247 tx = &sc->ss[slice].tx;
2249 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2251 mtx_unlock(&tx->mtx);
2257 mxge_start_locked(struct mxge_slice_state *ss)
2268 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2269 m = drbr_dequeue(ifp, tx->br);
2273 /* let BPF see it */
2276 /* give it to the nic */
2279 /* ran out of transmit slots */
2280 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2281 && (!drbr_empty(ifp, tx->br))) {
2282 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2288 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2299 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2301 err = drbr_enqueue(ifp, tx->br, m);
2305 if (!drbr_needs_enqueue(ifp, tx->br) &&
2306 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2307 /* let BPF see it */
2309 /* give it to the nic */
2311 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2314 if (!drbr_empty(ifp, tx->br))
2315 mxge_start_locked(ss);
2320 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2322 mxge_softc_t *sc = ifp->if_softc;
2323 struct mxge_slice_state *ss;
2328 slice = m->m_pkthdr.flowid;
2329 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2331 ss = &sc->ss[slice];
2334 if (mtx_trylock(&tx->mtx)) {
2335 err = mxge_transmit_locked(ss, m);
2336 mtx_unlock(&tx->mtx);
2338 err = drbr_enqueue(ifp, tx->br, m);
2347 mxge_start_locked(struct mxge_slice_state *ss)
2357 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2358 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2362 /* let BPF see it */
2365 /* give it to the nic */
2368 /* ran out of transmit slots */
2369 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2370 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2376 mxge_start(struct ifnet *ifp)
2378 mxge_softc_t *sc = ifp->if_softc;
2379 struct mxge_slice_state *ss;
2381 /* only use the first slice for now */
2383 mtx_lock(&ss->tx.mtx);
2384 mxge_start_locked(ss);
2385 mtx_unlock(&ss->tx.mtx);
2389 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2390 * at most 32 bytes at a time, so as to avoid involving the software
2391 * pio handler in the nic. We re-write the first segment's low
2392 * DMA address to mark it valid only after we write the entire chunk
2396 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2397 mcp_kreq_ether_recv_t *src)
2401 low = src->addr_low;
2402 src->addr_low = 0xffffffff;
2403 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2405 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2407 src->addr_low = low;
2408 dst->addr_low = low;
2413 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2415 bus_dma_segment_t seg;
2417 mxge_rx_ring_t *rx = &ss->rx_small;
2420 m = m_gethdr(M_NOWAIT, MT_DATA);
2427 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2428 &seg, &cnt, BUS_DMA_NOWAIT);
2433 rx->info[idx].m = m;
2434 rx->shadow[idx].addr_low =
2435 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2436 rx->shadow[idx].addr_high =
2437 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2441 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2446 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2448 bus_dma_segment_t seg[3];
2450 mxge_rx_ring_t *rx = &ss->rx_big;
2453 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2459 m->m_len = rx->mlen;
2460 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2461 seg, &cnt, BUS_DMA_NOWAIT);
2466 rx->info[idx].m = m;
2467 rx->shadow[idx].addr_low =
2468 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2469 rx->shadow[idx].addr_high =
2470 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2472 #if MXGE_VIRT_JUMBOS
2473 for (i = 1; i < cnt; i++) {
2474 rx->shadow[idx + i].addr_low =
2475 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2476 rx->shadow[idx + i].addr_high =
2477 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2482 for (i = 0; i < rx->nbufs; i++) {
2483 if ((idx & 7) == 7) {
2484 mxge_submit_8rx(&rx->lanai[idx - 7],
2485 &rx->shadow[idx - 7]);
2495 mxge_csum_generic(uint16_t *raw, int len)
2506 csum = (csum >> 16) + (csum & 0xffff);
2507 csum = (csum >> 16) + (csum & 0xffff);
2508 return (uint16_t)csum;
2511 static inline uint16_t
2512 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2515 int nxt, cksum_offset;
2516 struct ip6_hdr *ip6 = p;
2520 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2521 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2522 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2523 IPPROTO_IPV6, &nxt);
2524 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2529 * IPv6 headers do not contain a checksum, and hence
2530 * do not checksum to zero, so they don't "fall out"
2531 * of the partial checksum calculation like IPv4
2532 * headers do. We need to fix the partial checksum by
2533 * subtracting the checksum of the IPv6 header.
2536 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2539 csum += (csum < ~partial);
2540 csum = (csum >> 16) + (csum & 0xFFFF);
2541 csum = (csum >> 16) + (csum & 0xFFFF);
2542 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2549 * Myri10GE hardware checksums are not valid if the sender
2550 * padded the frame with non-zero padding. This is because
2551 * the firmware just does a simple 16-bit 1s complement
2552 * checksum across the entire frame, excluding the first 14
2553 * bytes. It is best to simply to check the checksum and
2554 * tell the stack about it only if the checksum is good
2557 static inline uint16_t
2558 mxge_rx_csum(struct mbuf *m, int csum)
2560 struct ether_header *eh;
2564 #if defined(INET) || defined(INET6)
2565 int cap = m->m_pkthdr.rcvif->if_capenable;
2570 eh = mtod(m, struct ether_header *);
2571 etype = ntohs(eh->ether_type);
2575 if ((cap & IFCAP_RXCSUM) == 0)
2577 ip = (struct ip *)(eh + 1);
2578 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2580 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2581 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2582 (ip->ip_hl << 2) + ip->ip_p));
2587 case ETHERTYPE_IPV6:
2588 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2590 c = mxge_rx_csum6((eh + 1), m, csum);
2600 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2602 struct ether_vlan_header *evl;
2603 struct ether_header *eh;
2606 evl = mtod(m, struct ether_vlan_header *);
2607 eh = mtod(m, struct ether_header *);
2610 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2611 * after what the firmware thought was the end of the ethernet
2615 /* put checksum into host byte order */
2616 *csum = ntohs(*csum);
2617 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2618 (*csum) += ~partial;
2619 (*csum) += ((*csum) < ~partial);
2620 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2621 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2623 /* restore checksum to network byte order;
2624 later consumers expect this */
2625 *csum = htons(*csum);
2628 #ifdef MXGE_NEW_VLAN_API
2629 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2633 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2637 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2638 m_tag_prepend(m, mtag);
2642 m->m_flags |= M_VLANTAG;
2645 * Remove the 802.1q header by copying the Ethernet
2646 * addresses over it and adjusting the beginning of
2647 * the data in the mbuf. The encapsulated Ethernet
2648 * type field is already in place.
2650 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2651 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2652 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2657 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2658 uint32_t csum, int lro)
2663 struct ether_header *eh;
2665 bus_dmamap_t old_map;
2671 idx = rx->cnt & rx->mask;
2672 rx->cnt += rx->nbufs;
2673 /* save a pointer to the received mbuf */
2674 m = rx->info[idx].m;
2675 /* try to replace the received mbuf */
2676 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2677 /* drop the frame -- the old mbuf is re-cycled */
2678 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2682 /* unmap the received buffer */
2683 old_map = rx->info[idx].map;
2684 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2685 bus_dmamap_unload(rx->dmat, old_map);
2687 /* swap the bus_dmamap_t's */
2688 rx->info[idx].map = rx->extra_map;
2689 rx->extra_map = old_map;
2691 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2693 m->m_data += MXGEFW_PAD;
2695 m->m_pkthdr.rcvif = ifp;
2696 m->m_len = m->m_pkthdr.len = len;
2698 eh = mtod(m, struct ether_header *);
2699 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2700 mxge_vlan_tag_remove(m, &csum);
2702 /* flowid only valid if RSS hashing is enabled */
2703 if (sc->num_slices > 1) {
2704 m->m_pkthdr.flowid = (ss - sc->ss);
2705 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2707 /* if the checksum is valid, mark it in the mbuf header */
2708 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2709 (0 == mxge_rx_csum(m, csum))) {
2710 /* Tell the stack that the checksum is good */
2711 m->m_pkthdr.csum_data = 0xffff;
2712 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2715 #if defined(INET) || defined (INET6)
2716 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2720 /* pass the frame up the stack */
2721 (*ifp->if_input)(ifp, m);
2725 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2726 uint32_t csum, int lro)
2730 struct ether_header *eh;
2733 bus_dmamap_t old_map;
2739 idx = rx->cnt & rx->mask;
2741 /* save a pointer to the received mbuf */
2742 m = rx->info[idx].m;
2743 /* try to replace the received mbuf */
2744 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2745 /* drop the frame -- the old mbuf is re-cycled */
2746 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2750 /* unmap the received buffer */
2751 old_map = rx->info[idx].map;
2752 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2753 bus_dmamap_unload(rx->dmat, old_map);
2755 /* swap the bus_dmamap_t's */
2756 rx->info[idx].map = rx->extra_map;
2757 rx->extra_map = old_map;
2759 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2761 m->m_data += MXGEFW_PAD;
2763 m->m_pkthdr.rcvif = ifp;
2764 m->m_len = m->m_pkthdr.len = len;
2766 eh = mtod(m, struct ether_header *);
2767 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2768 mxge_vlan_tag_remove(m, &csum);
2770 /* flowid only valid if RSS hashing is enabled */
2771 if (sc->num_slices > 1) {
2772 m->m_pkthdr.flowid = (ss - sc->ss);
2773 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2775 /* if the checksum is valid, mark it in the mbuf header */
2776 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2777 (0 == mxge_rx_csum(m, csum))) {
2778 /* Tell the stack that the checksum is good */
2779 m->m_pkthdr.csum_data = 0xffff;
2780 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2783 #if defined(INET) || defined (INET6)
2784 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2788 /* pass the frame up the stack */
2789 (*ifp->if_input)(ifp, m);
2793 mxge_clean_rx_done(struct mxge_slice_state *ss)
2795 mxge_rx_done_t *rx_done = &ss->rx_done;
2801 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2802 while (rx_done->entry[rx_done->idx].length != 0) {
2803 length = ntohs(rx_done->entry[rx_done->idx].length);
2804 rx_done->entry[rx_done->idx].length = 0;
2805 checksum = rx_done->entry[rx_done->idx].checksum;
2806 if (length <= (MHLEN - MXGEFW_PAD))
2807 mxge_rx_done_small(ss, length, checksum, lro);
2809 mxge_rx_done_big(ss, length, checksum, lro);
2811 rx_done->idx = rx_done->cnt & rx_done->mask;
2813 /* limit potential for livelock */
2814 if (__predict_false(++limit > rx_done->mask / 2))
2817 #if defined(INET) || defined (INET6)
2818 tcp_lro_flush_all(&ss->lc);
2824 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2835 while (tx->pkt_done != mcp_idx) {
2836 idx = tx->done & tx->mask;
2838 m = tx->info[idx].m;
2839 /* mbuf and DMA map only attached to the first
2842 ss->obytes += m->m_pkthdr.len;
2843 if (m->m_flags & M_MCAST)
2846 tx->info[idx].m = NULL;
2847 map = tx->info[idx].map;
2848 bus_dmamap_unload(tx->dmat, map);
2851 if (tx->info[idx].flag) {
2852 tx->info[idx].flag = 0;
2857 /* If we have space, clear IFF_OACTIVE to tell the stack that
2858 its OK to send packets */
2859 #ifdef IFNET_BUF_RING
2860 flags = &ss->if_drv_flags;
2862 flags = &ifp->if_drv_flags;
2864 mtx_lock(&ss->tx.mtx);
2865 if ((*flags) & IFF_DRV_OACTIVE &&
2866 tx->req - tx->done < (tx->mask + 1)/4) {
2867 *(flags) &= ~IFF_DRV_OACTIVE;
2869 mxge_start_locked(ss);
2871 #ifdef IFNET_BUF_RING
2872 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2873 /* let the NIC stop polling this queue, since there
2874 * are no more transmits pending */
2875 if (tx->req == tx->done) {
2877 tx->queue_active = 0;
2883 mtx_unlock(&ss->tx.mtx);
2887 static struct mxge_media_type mxge_xfp_media_types[] =
2889 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2890 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2891 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2892 {0, (1 << 5), "10GBASE-ER"},
2893 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2894 {0, (1 << 3), "10GBASE-SW"},
2895 {0, (1 << 2), "10GBASE-LW"},
2896 {0, (1 << 1), "10GBASE-EW"},
2897 {0, (1 << 0), "Reserved"}
2899 static struct mxge_media_type mxge_sfp_media_types[] =
2901 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2902 {0, (1 << 7), "Reserved"},
2903 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2904 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2905 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2906 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2910 mxge_media_set(mxge_softc_t *sc, int media_type)
2914 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2916 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2917 sc->current_media = media_type;
2918 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2922 mxge_media_init(mxge_softc_t *sc)
2927 ifmedia_removeall(&sc->media);
2928 mxge_media_set(sc, IFM_AUTO);
2931 * parse the product code to deterimine the interface type
2932 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2933 * after the 3rd dash in the driver's cached copy of the
2934 * EEPROM's product code string.
2936 ptr = sc->product_code_string;
2938 device_printf(sc->dev, "Missing product code\n");
2942 for (i = 0; i < 3; i++, ptr++) {
2943 ptr = strchr(ptr, '-');
2945 device_printf(sc->dev,
2946 "only %d dashes in PC?!?\n", i);
2950 if (*ptr == 'C' || *(ptr +1) == 'C') {
2952 sc->connector = MXGE_CX4;
2953 mxge_media_set(sc, IFM_10G_CX4);
2954 } else if (*ptr == 'Q') {
2955 /* -Q is Quad Ribbon Fiber */
2956 sc->connector = MXGE_QRF;
2957 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2958 /* FreeBSD has no media type for Quad ribbon fiber */
2959 } else if (*ptr == 'R') {
2961 sc->connector = MXGE_XFP;
2962 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2963 /* -S or -2S is SFP+ */
2964 sc->connector = MXGE_SFP;
2966 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2971 * Determine the media type for a NIC. Some XFPs will identify
2972 * themselves only when their link is up, so this is initiated via a
2973 * link up interrupt. However, this can potentially take up to
2974 * several milliseconds, so it is run via the watchdog routine, rather
2975 * than in the interrupt handler itself.
2978 mxge_media_probe(mxge_softc_t *sc)
2983 struct mxge_media_type *mxge_media_types = NULL;
2984 int i, err, ms, mxge_media_type_entries;
2987 sc->need_media_probe = 0;
2989 if (sc->connector == MXGE_XFP) {
2991 mxge_media_types = mxge_xfp_media_types;
2992 mxge_media_type_entries =
2993 nitems(mxge_xfp_media_types);
2994 byte = MXGE_XFP_COMPLIANCE_BYTE;
2996 } else if (sc->connector == MXGE_SFP) {
2997 /* -S or -2S is SFP+ */
2998 mxge_media_types = mxge_sfp_media_types;
2999 mxge_media_type_entries =
3000 nitems(mxge_sfp_media_types);
3004 /* nothing to do; media type cannot change */
3009 * At this point we know the NIC has an XFP cage, so now we
3010 * try to determine what is in the cage by using the
3011 * firmware's XFP I2C commands to read the XFP 10GbE compilance
3012 * register. We read just one byte, which may take over
3016 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
3018 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3019 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
3020 device_printf(sc->dev, "failed to read XFP\n");
3022 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
3023 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
3025 if (err != MXGEFW_CMD_OK) {
3029 /* now we wait for the data to be cached */
3031 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3032 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3035 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3037 if (err != MXGEFW_CMD_OK) {
3038 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
3039 cage_type, err, ms);
3043 if (cmd.data0 == mxge_media_types[0].bitmask) {
3045 device_printf(sc->dev, "%s:%s\n", cage_type,
3046 mxge_media_types[0].name);
3047 if (sc->current_media != mxge_media_types[0].flag) {
3048 mxge_media_init(sc);
3049 mxge_media_set(sc, mxge_media_types[0].flag);
3053 for (i = 1; i < mxge_media_type_entries; i++) {
3054 if (cmd.data0 & mxge_media_types[i].bitmask) {
3056 device_printf(sc->dev, "%s:%s\n",
3058 mxge_media_types[i].name);
3060 if (sc->current_media != mxge_media_types[i].flag) {
3061 mxge_media_init(sc);
3062 mxge_media_set(sc, mxge_media_types[i].flag);
3068 device_printf(sc->dev, "%s media 0x%x unknown\n",
3069 cage_type, cmd.data0);
3075 mxge_intr(void *arg)
3077 struct mxge_slice_state *ss = arg;
3078 mxge_softc_t *sc = ss->sc;
3079 mcp_irq_data_t *stats = ss->fw_stats;
3080 mxge_tx_ring_t *tx = &ss->tx;
3081 mxge_rx_done_t *rx_done = &ss->rx_done;
3082 uint32_t send_done_count;
3086 #ifndef IFNET_BUF_RING
3087 /* an interrupt on a non-zero slice is implicitly valid
3088 since MSI-X irqs are not shared */
3090 mxge_clean_rx_done(ss);
3091 *ss->irq_claim = be32toh(3);
3096 /* make sure the DMA has finished */
3097 if (!stats->valid) {
3100 valid = stats->valid;
3102 if (sc->legacy_irq) {
3103 /* lower legacy IRQ */
3104 *sc->irq_deassert = 0;
3105 if (!mxge_deassert_wait)
3106 /* don't wait for conf. that irq is low */
3112 /* loop while waiting for legacy irq deassertion */
3114 /* check for transmit completes and receives */
3115 send_done_count = be32toh(stats->send_done_count);
3116 while ((send_done_count != tx->pkt_done) ||
3117 (rx_done->entry[rx_done->idx].length != 0)) {
3118 if (send_done_count != tx->pkt_done)
3119 mxge_tx_done(ss, (int)send_done_count);
3120 mxge_clean_rx_done(ss);
3121 send_done_count = be32toh(stats->send_done_count);
3123 if (sc->legacy_irq && mxge_deassert_wait)
3125 } while (*((volatile uint8_t *) &stats->valid));
3127 /* fw link & error stats meaningful only on the first slice */
3128 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3129 if (sc->link_state != stats->link_up) {
3130 sc->link_state = stats->link_up;
3131 if (sc->link_state) {
3132 if_link_state_change(sc->ifp, LINK_STATE_UP);
3134 device_printf(sc->dev, "link up\n");
3136 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3138 device_printf(sc->dev, "link down\n");
3140 sc->need_media_probe = 1;
3142 if (sc->rdma_tags_available !=
3143 be32toh(stats->rdma_tags_available)) {
3144 sc->rdma_tags_available =
3145 be32toh(stats->rdma_tags_available);
3146 device_printf(sc->dev, "RDMA timed out! %d tags "
3147 "left\n", sc->rdma_tags_available);
3150 if (stats->link_down) {
3151 sc->down_cnt += stats->link_down;
3153 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3157 /* check to see if we have rx token to pass back */
3159 *ss->irq_claim = be32toh(3);
3160 *(ss->irq_claim + 1) = be32toh(3);
3164 mxge_init(void *arg)
3166 mxge_softc_t *sc = arg;
3167 struct ifnet *ifp = sc->ifp;
3170 mtx_lock(&sc->driver_mtx);
3171 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3172 (void) mxge_open(sc);
3173 mtx_unlock(&sc->driver_mtx);
3179 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3183 #if defined(INET) || defined(INET6)
3184 tcp_lro_free(&ss->lc);
3186 for (i = 0; i <= ss->rx_big.mask; i++) {
3187 if (ss->rx_big.info[i].m == NULL)
3189 bus_dmamap_unload(ss->rx_big.dmat,
3190 ss->rx_big.info[i].map);
3191 m_freem(ss->rx_big.info[i].m);
3192 ss->rx_big.info[i].m = NULL;
3195 for (i = 0; i <= ss->rx_small.mask; i++) {
3196 if (ss->rx_small.info[i].m == NULL)
3198 bus_dmamap_unload(ss->rx_small.dmat,
3199 ss->rx_small.info[i].map);
3200 m_freem(ss->rx_small.info[i].m);
3201 ss->rx_small.info[i].m = NULL;
3204 /* transmit ring used only on the first slice */
3205 if (ss->tx.info == NULL)
3208 for (i = 0; i <= ss->tx.mask; i++) {
3209 ss->tx.info[i].flag = 0;
3210 if (ss->tx.info[i].m == NULL)
3212 bus_dmamap_unload(ss->tx.dmat,
3213 ss->tx.info[i].map);
3214 m_freem(ss->tx.info[i].m);
3215 ss->tx.info[i].m = NULL;
3220 mxge_free_mbufs(mxge_softc_t *sc)
3224 for (slice = 0; slice < sc->num_slices; slice++)
3225 mxge_free_slice_mbufs(&sc->ss[slice]);
3229 mxge_free_slice_rings(struct mxge_slice_state *ss)
3234 if (ss->rx_done.entry != NULL)
3235 mxge_dma_free(&ss->rx_done.dma);
3236 ss->rx_done.entry = NULL;
3238 if (ss->tx.req_bytes != NULL)
3239 free(ss->tx.req_bytes, M_DEVBUF);
3240 ss->tx.req_bytes = NULL;
3242 if (ss->tx.seg_list != NULL)
3243 free(ss->tx.seg_list, M_DEVBUF);
3244 ss->tx.seg_list = NULL;
3246 if (ss->rx_small.shadow != NULL)
3247 free(ss->rx_small.shadow, M_DEVBUF);
3248 ss->rx_small.shadow = NULL;
3250 if (ss->rx_big.shadow != NULL)
3251 free(ss->rx_big.shadow, M_DEVBUF);
3252 ss->rx_big.shadow = NULL;
3254 if (ss->tx.info != NULL) {
3255 if (ss->tx.dmat != NULL) {
3256 for (i = 0; i <= ss->tx.mask; i++) {
3257 bus_dmamap_destroy(ss->tx.dmat,
3258 ss->tx.info[i].map);
3260 bus_dma_tag_destroy(ss->tx.dmat);
3262 free(ss->tx.info, M_DEVBUF);
3266 if (ss->rx_small.info != NULL) {
3267 if (ss->rx_small.dmat != NULL) {
3268 for (i = 0; i <= ss->rx_small.mask; i++) {
3269 bus_dmamap_destroy(ss->rx_small.dmat,
3270 ss->rx_small.info[i].map);
3272 bus_dmamap_destroy(ss->rx_small.dmat,
3273 ss->rx_small.extra_map);
3274 bus_dma_tag_destroy(ss->rx_small.dmat);
3276 free(ss->rx_small.info, M_DEVBUF);
3278 ss->rx_small.info = NULL;
3280 if (ss->rx_big.info != NULL) {
3281 if (ss->rx_big.dmat != NULL) {
3282 for (i = 0; i <= ss->rx_big.mask; i++) {
3283 bus_dmamap_destroy(ss->rx_big.dmat,
3284 ss->rx_big.info[i].map);
3286 bus_dmamap_destroy(ss->rx_big.dmat,
3287 ss->rx_big.extra_map);
3288 bus_dma_tag_destroy(ss->rx_big.dmat);
3290 free(ss->rx_big.info, M_DEVBUF);
3292 ss->rx_big.info = NULL;
3296 mxge_free_rings(mxge_softc_t *sc)
3300 for (slice = 0; slice < sc->num_slices; slice++)
3301 mxge_free_slice_rings(&sc->ss[slice]);
3305 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3306 int tx_ring_entries)
3308 mxge_softc_t *sc = ss->sc;
3312 /* allocate per-slice receive resources */
3314 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3315 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3317 /* allocate the rx shadow rings */
3318 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3319 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3321 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3322 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3324 /* allocate the rx host info rings */
3325 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3326 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3328 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3329 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3331 /* allocate the rx busdma resources */
3332 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3334 4096, /* boundary */
3335 BUS_SPACE_MAXADDR, /* low */
3336 BUS_SPACE_MAXADDR, /* high */
3337 NULL, NULL, /* filter */
3338 MHLEN, /* maxsize */
3340 MHLEN, /* maxsegsize */
3341 BUS_DMA_ALLOCNOW, /* flags */
3342 NULL, NULL, /* lock */
3343 &ss->rx_small.dmat); /* tag */
3345 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3350 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3352 #if MXGE_VIRT_JUMBOS
3353 4096, /* boundary */
3357 BUS_SPACE_MAXADDR, /* low */
3358 BUS_SPACE_MAXADDR, /* high */
3359 NULL, NULL, /* filter */
3360 3*4096, /* maxsize */
3361 #if MXGE_VIRT_JUMBOS
3363 4096, /* maxsegsize*/
3366 MJUM9BYTES, /* maxsegsize*/
3368 BUS_DMA_ALLOCNOW, /* flags */
3369 NULL, NULL, /* lock */
3370 &ss->rx_big.dmat); /* tag */
3372 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3376 for (i = 0; i <= ss->rx_small.mask; i++) {
3377 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3378 &ss->rx_small.info[i].map);
3380 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3385 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3386 &ss->rx_small.extra_map);
3388 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3393 for (i = 0; i <= ss->rx_big.mask; i++) {
3394 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3395 &ss->rx_big.info[i].map);
3397 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3402 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3403 &ss->rx_big.extra_map);
3405 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3410 /* now allocate TX resources */
3412 #ifndef IFNET_BUF_RING
3413 /* only use a single TX ring for now */
3414 if (ss != ss->sc->ss)
3418 ss->tx.mask = tx_ring_entries - 1;
3419 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3422 /* allocate the tx request copy block */
3424 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3425 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3426 /* ensure req_list entries are aligned to 8 bytes */
3427 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3428 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3430 /* allocate the tx busdma segment list */
3431 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3432 ss->tx.seg_list = (bus_dma_segment_t *)
3433 malloc(bytes, M_DEVBUF, M_WAITOK);
3435 /* allocate the tx host info ring */
3436 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3437 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3439 /* allocate the tx busdma resources */
3440 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3442 sc->tx_boundary, /* boundary */
3443 BUS_SPACE_MAXADDR, /* low */
3444 BUS_SPACE_MAXADDR, /* high */
3445 NULL, NULL, /* filter */
3446 65536 + 256, /* maxsize */
3447 ss->tx.max_desc - 2, /* num segs */
3448 sc->tx_boundary, /* maxsegsz */
3449 BUS_DMA_ALLOCNOW, /* flags */
3450 NULL, NULL, /* lock */
3451 &ss->tx.dmat); /* tag */
3454 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3459 /* now use these tags to setup dmamaps for each slot
3461 for (i = 0; i <= ss->tx.mask; i++) {
3462 err = bus_dmamap_create(ss->tx.dmat, 0,
3463 &ss->tx.info[i].map);
3465 device_printf(sc->dev, "Err %d tx dmamap\n",
3475 mxge_alloc_rings(mxge_softc_t *sc)
3479 int tx_ring_entries, rx_ring_entries;
3482 /* get ring sizes */
3483 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3484 tx_ring_size = cmd.data0;
3486 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3490 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3491 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3492 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3493 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3494 IFQ_SET_READY(&sc->ifp->if_snd);
3496 for (slice = 0; slice < sc->num_slices; slice++) {
3497 err = mxge_alloc_slice_rings(&sc->ss[slice],
3506 mxge_free_rings(sc);
3513 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3515 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3517 if (bufsize < MCLBYTES) {
3518 /* easy, everything fits in a single buffer */
3519 *big_buf_size = MCLBYTES;
3520 *cl_size = MCLBYTES;
3525 if (bufsize < MJUMPAGESIZE) {
3526 /* still easy, everything still fits in a single buffer */
3527 *big_buf_size = MJUMPAGESIZE;
3528 *cl_size = MJUMPAGESIZE;
3532 #if MXGE_VIRT_JUMBOS
3533 /* now we need to use virtually contiguous buffers */
3534 *cl_size = MJUM9BYTES;
3535 *big_buf_size = 4096;
3536 *nbufs = mtu / 4096 + 1;
3537 /* needs to be a power of two, so round up */
3541 *cl_size = MJUM9BYTES;
3542 *big_buf_size = MJUM9BYTES;
3548 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3557 slice = ss - sc->ss;
3559 #if defined(INET) || defined(INET6)
3560 (void)tcp_lro_init(&ss->lc);
3562 ss->lc.ifp = sc->ifp;
3564 /* get the lanai pointers to the send and receive rings */
3567 #ifndef IFNET_BUF_RING
3568 /* We currently only send from the first slice */
3572 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3574 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3575 ss->tx.send_go = (volatile uint32_t *)
3576 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3577 ss->tx.send_stop = (volatile uint32_t *)
3578 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3579 #ifndef IFNET_BUF_RING
3583 err |= mxge_send_cmd(sc,
3584 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3585 ss->rx_small.lanai =
3586 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3588 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3590 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3593 device_printf(sc->dev,
3594 "failed to get ring sizes or locations\n");
3598 /* stock receive rings */
3599 for (i = 0; i <= ss->rx_small.mask; i++) {
3600 map = ss->rx_small.info[i].map;
3601 err = mxge_get_buf_small(ss, map, i);
3603 device_printf(sc->dev, "alloced %d/%d smalls\n",
3604 i, ss->rx_small.mask + 1);
3608 for (i = 0; i <= ss->rx_big.mask; i++) {
3609 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3610 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3612 ss->rx_big.nbufs = nbufs;
3613 ss->rx_big.cl_size = cl_size;
3614 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3615 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3616 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3617 map = ss->rx_big.info[i].map;
3618 err = mxge_get_buf_big(ss, map, i);
3620 device_printf(sc->dev, "alloced %d/%d bigs\n",
3621 i, ss->rx_big.mask + 1);
3629 mxge_open(mxge_softc_t *sc)
3632 int err, big_bytes, nbufs, slice, cl_size, i;
3634 volatile uint8_t *itable;
3635 struct mxge_slice_state *ss;
3637 /* Copy the MAC address in case it was overridden */
3638 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3640 err = mxge_reset(sc, 1);
3642 device_printf(sc->dev, "failed to reset\n");
3646 if (sc->num_slices > 1) {
3647 /* setup the indirection table */
3648 cmd.data0 = sc->num_slices;
3649 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3652 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3655 device_printf(sc->dev,
3656 "failed to setup rss tables\n");
3660 /* just enable an identity mapping */
3661 itable = sc->sram + cmd.data0;
3662 for (i = 0; i < sc->num_slices; i++)
3663 itable[i] = (uint8_t)i;
3666 cmd.data1 = mxge_rss_hash_type;
3667 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3669 device_printf(sc->dev, "failed to enable slices\n");
3675 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3678 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3680 /* error is only meaningful if we're trying to set
3681 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3682 if (err && nbufs > 1) {
3683 device_printf(sc->dev,
3684 "Failed to set alway-use-n to %d\n",
3688 /* Give the firmware the mtu and the big and small buffer
3689 sizes. The firmware wants the big buf size to be a power
3690 of two. Luckily, FreeBSD's clusters are powers of two */
3691 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3692 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3693 cmd.data0 = MHLEN - MXGEFW_PAD;
3694 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3696 cmd.data0 = big_bytes;
3697 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3700 device_printf(sc->dev, "failed to setup params\n");
3704 /* Now give him the pointer to the stats block */
3706 #ifdef IFNET_BUF_RING
3707 slice < sc->num_slices;
3712 ss = &sc->ss[slice];
3714 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3716 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3717 cmd.data2 = sizeof(struct mcp_irq_data);
3718 cmd.data2 |= (slice << 16);
3719 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3723 bus = sc->ss->fw_stats_dma.bus_addr;
3724 bus += offsetof(struct mcp_irq_data, send_done_count);
3725 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3726 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3727 err = mxge_send_cmd(sc,
3728 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3730 /* Firmware cannot support multicast without STATS_DMA_V2 */
3731 sc->fw_multicast_support = 0;
3733 sc->fw_multicast_support = 1;
3737 device_printf(sc->dev, "failed to setup params\n");
3741 for (slice = 0; slice < sc->num_slices; slice++) {
3742 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3744 device_printf(sc->dev, "couldn't open slice %d\n",
3750 /* Finally, start the firmware running */
3751 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3753 device_printf(sc->dev, "Couldn't bring up link\n");
3756 #ifdef IFNET_BUF_RING
3757 for (slice = 0; slice < sc->num_slices; slice++) {
3758 ss = &sc->ss[slice];
3759 ss->if_drv_flags |= IFF_DRV_RUNNING;
3760 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3763 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3764 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3770 mxge_free_mbufs(sc);
3776 mxge_close(mxge_softc_t *sc, int down)
3779 int err, old_down_cnt;
3780 #ifdef IFNET_BUF_RING
3781 struct mxge_slice_state *ss;
3785 #ifdef IFNET_BUF_RING
3786 for (slice = 0; slice < sc->num_slices; slice++) {
3787 ss = &sc->ss[slice];
3788 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3791 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3793 old_down_cnt = sc->down_cnt;
3795 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3797 device_printf(sc->dev,
3798 "Couldn't bring down link\n");
3800 if (old_down_cnt == sc->down_cnt) {
3801 /* wait for down irq */
3802 DELAY(10 * sc->intr_coal_delay);
3805 if (old_down_cnt == sc->down_cnt) {
3806 device_printf(sc->dev, "never got down irq\n");
3809 mxge_free_mbufs(sc);
3815 mxge_setup_cfg_space(mxge_softc_t *sc)
3817 device_t dev = sc->dev;
3819 uint16_t lnk, pectl;
3821 /* find the PCIe link width and set max read request to 4KB*/
3822 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3823 lnk = pci_read_config(dev, reg + 0x12, 2);
3824 sc->link_width = (lnk >> 4) & 0x3f;
3826 if (sc->pectl == 0) {
3827 pectl = pci_read_config(dev, reg + 0x8, 2);
3828 pectl = (pectl & ~0x7000) | (5 << 12);
3829 pci_write_config(dev, reg + 0x8, pectl, 2);
3832 /* restore saved pectl after watchdog reset */
3833 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3837 /* Enable DMA and Memory space access */
3838 pci_enable_busmaster(dev);
3842 mxge_read_reboot(mxge_softc_t *sc)
3844 device_t dev = sc->dev;
3847 /* find the vendor specific offset */
3848 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3849 device_printf(sc->dev,
3850 "could not find vendor specific offset\n");
3851 return (uint32_t)-1;
3853 /* enable read32 mode */
3854 pci_write_config(dev, vs + 0x10, 0x3, 1);
3855 /* tell NIC which register to read */
3856 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3857 return (pci_read_config(dev, vs + 0x14, 4));
3861 mxge_watchdog_reset(mxge_softc_t *sc)
3863 struct pci_devinfo *dinfo;
3864 struct mxge_slice_state *ss;
3865 int err, running, s, num_tx_slices = 1;
3871 device_printf(sc->dev, "Watchdog reset!\n");
3874 * check to see if the NIC rebooted. If it did, then all of
3875 * PCI config space has been reset, and things like the
3876 * busmaster bit will be zero. If this is the case, then we
3877 * must restore PCI config space before the NIC can be used
3880 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3881 if (cmd == 0xffff) {
3883 * maybe the watchdog caught the NIC rebooting; wait
3884 * up to 100ms for it to finish. If it does not come
3885 * back, then give up
3888 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3889 if (cmd == 0xffff) {
3890 device_printf(sc->dev, "NIC disappeared!\n");
3893 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3894 /* print the reboot status */
3895 reboot = mxge_read_reboot(sc);
3896 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3898 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3902 * quiesce NIC so that TX routines will not try to
3903 * xmit after restoration of BAR
3906 /* Mark the link as down */
3907 if (sc->link_state) {
3909 if_link_state_change(sc->ifp,
3912 #ifdef IFNET_BUF_RING
3913 num_tx_slices = sc->num_slices;
3915 /* grab all TX locks to ensure no tx */
3916 for (s = 0; s < num_tx_slices; s++) {
3918 mtx_lock(&ss->tx.mtx);
3922 /* restore PCI configuration space */
3923 dinfo = device_get_ivars(sc->dev);
3924 pci_cfg_restore(sc->dev, dinfo);
3926 /* and redo any changes we made to our config space */
3927 mxge_setup_cfg_space(sc);
3930 err = mxge_load_firmware(sc, 0);
3932 device_printf(sc->dev,
3933 "Unable to re-load f/w\n");
3937 err = mxge_open(sc);
3938 /* release all TX locks */
3939 for (s = 0; s < num_tx_slices; s++) {
3941 #ifdef IFNET_BUF_RING
3942 mxge_start_locked(ss);
3944 mtx_unlock(&ss->tx.mtx);
3947 sc->watchdog_resets++;
3949 device_printf(sc->dev,
3950 "NIC did not reboot, not resetting\n");
3954 device_printf(sc->dev, "watchdog reset failed\n");
3958 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3963 mxge_watchdog_task(void *arg, int pending)
3965 mxge_softc_t *sc = arg;
3968 mtx_lock(&sc->driver_mtx);
3969 mxge_watchdog_reset(sc);
3970 mtx_unlock(&sc->driver_mtx);
3974 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3976 tx = &sc->ss[slice].tx;
3977 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3978 device_printf(sc->dev,
3979 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3980 tx->req, tx->done, tx->queue_active);
3981 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3982 tx->activate, tx->deactivate);
3983 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3985 be32toh(sc->ss->fw_stats->send_done_count));
3989 mxge_watchdog(mxge_softc_t *sc)
3992 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3995 /* see if we have outstanding transmits, which
3996 have been pending for more than mxge_ticks */
3998 #ifdef IFNET_BUF_RING
3999 (i < sc->num_slices) && (err == 0);
4001 (i < 1) && (err == 0);
4005 if (tx->req != tx->done &&
4006 tx->watchdog_req != tx->watchdog_done &&
4007 tx->done == tx->watchdog_done) {
4008 /* check for pause blocking before resetting */
4009 if (tx->watchdog_rx_pause == rx_pause) {
4010 mxge_warn_stuck(sc, tx, i);
4011 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4015 device_printf(sc->dev, "Flow control blocking "
4016 "xmits, check link partner\n");
4019 tx->watchdog_req = tx->req;
4020 tx->watchdog_done = tx->done;
4021 tx->watchdog_rx_pause = rx_pause;
4024 if (sc->need_media_probe)
4025 mxge_media_probe(sc);
4030 mxge_get_counter(struct ifnet *ifp, ift_counter cnt)
4032 struct mxge_softc *sc;
4035 sc = if_getsoftc(ifp);
4039 case IFCOUNTER_IPACKETS:
4040 for (int s = 0; s < sc->num_slices; s++)
4041 rv += sc->ss[s].ipackets;
4043 case IFCOUNTER_OPACKETS:
4044 for (int s = 0; s < sc->num_slices; s++)
4045 rv += sc->ss[s].opackets;
4047 case IFCOUNTER_OERRORS:
4048 for (int s = 0; s < sc->num_slices; s++)
4049 rv += sc->ss[s].oerrors;
4051 #ifdef IFNET_BUF_RING
4052 case IFCOUNTER_OBYTES:
4053 for (int s = 0; s < sc->num_slices; s++)
4054 rv += sc->ss[s].obytes;
4056 case IFCOUNTER_OMCASTS:
4057 for (int s = 0; s < sc->num_slices; s++)
4058 rv += sc->ss[s].omcasts;
4060 case IFCOUNTER_OQDROPS:
4061 for (int s = 0; s < sc->num_slices; s++)
4062 rv += sc->ss[s].tx.br->br_drops;
4066 return (if_get_counter_default(ifp, cnt));
4071 mxge_tick(void *arg)
4073 mxge_softc_t *sc = arg;
4080 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4082 if (!sc->watchdog_countdown) {
4083 err = mxge_watchdog(sc);
4084 sc->watchdog_countdown = 4;
4086 sc->watchdog_countdown--;
4089 /* ensure NIC did not suffer h/w fault while idle */
4090 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4091 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4093 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4096 /* look less often if NIC is idle */
4101 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4106 mxge_media_change(struct ifnet *ifp)
4112 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4114 struct ifnet *ifp = sc->ifp;
4115 int real_mtu, old_mtu;
4119 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4120 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4122 mtx_lock(&sc->driver_mtx);
4123 old_mtu = ifp->if_mtu;
4125 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4127 err = mxge_open(sc);
4129 ifp->if_mtu = old_mtu;
4131 (void) mxge_open(sc);
4134 mtx_unlock(&sc->driver_mtx);
4139 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4141 mxge_softc_t *sc = ifp->if_softc;
4146 ifmr->ifm_status = IFM_AVALID;
4147 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4148 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4149 ifmr->ifm_active |= sc->current_media;
4153 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c)
4160 if (i2c->dev_addr != 0xA0 &&
4161 i2c->dev_addr != 0xA2)
4163 if (i2c->len > sizeof(i2c->data))
4166 for (i = 0; i < i2c->len; i++) {
4167 i2c_args = i2c->dev_addr << 0x8;
4168 i2c_args |= i2c->offset + i;
4169 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
4170 cmd.data1 = i2c_args;
4171 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
4173 if (err != MXGEFW_CMD_OK)
4175 /* now we wait for the data to be cached */
4176 cmd.data0 = i2c_args & 0xff;
4177 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
4178 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
4179 cmd.data0 = i2c_args & 0xff;
4180 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
4184 if (err != MXGEFW_CMD_OK)
4186 i2c->data[i] = cmd.data0;
4192 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4194 mxge_softc_t *sc = ifp->if_softc;
4195 struct ifreq *ifr = (struct ifreq *)data;
4196 struct ifi2creq i2c;
4202 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4206 mtx_lock(&sc->driver_mtx);
4208 mtx_unlock(&sc->driver_mtx);
4211 if (ifp->if_flags & IFF_UP) {
4212 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4213 err = mxge_open(sc);
4215 /* take care of promis can allmulti
4217 mxge_change_promisc(sc,
4218 ifp->if_flags & IFF_PROMISC);
4219 mxge_set_multicast_list(sc);
4222 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4226 mtx_unlock(&sc->driver_mtx);
4231 mtx_lock(&sc->driver_mtx);
4233 mtx_unlock(&sc->driver_mtx);
4236 mxge_set_multicast_list(sc);
4237 mtx_unlock(&sc->driver_mtx);
4241 mtx_lock(&sc->driver_mtx);
4242 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4243 if (mask & IFCAP_TXCSUM) {
4244 if (IFCAP_TXCSUM & ifp->if_capenable) {
4245 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4246 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4248 ifp->if_capenable |= IFCAP_TXCSUM;
4249 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4251 } else if (mask & IFCAP_RXCSUM) {
4252 if (IFCAP_RXCSUM & ifp->if_capenable) {
4253 ifp->if_capenable &= ~IFCAP_RXCSUM;
4255 ifp->if_capenable |= IFCAP_RXCSUM;
4258 if (mask & IFCAP_TSO4) {
4259 if (IFCAP_TSO4 & ifp->if_capenable) {
4260 ifp->if_capenable &= ~IFCAP_TSO4;
4261 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4262 ifp->if_capenable |= IFCAP_TSO4;
4263 ifp->if_hwassist |= CSUM_TSO;
4265 printf("mxge requires tx checksum offload"
4266 " be enabled to use TSO\n");
4271 if (mask & IFCAP_TXCSUM_IPV6) {
4272 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4273 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4275 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4278 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4279 ifp->if_hwassist |= (CSUM_TCP_IPV6
4282 } else if (mask & IFCAP_RXCSUM_IPV6) {
4283 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4284 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4286 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4289 if (mask & IFCAP_TSO6) {
4290 if (IFCAP_TSO6 & ifp->if_capenable) {
4291 ifp->if_capenable &= ~IFCAP_TSO6;
4292 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4293 ifp->if_capenable |= IFCAP_TSO6;
4294 ifp->if_hwassist |= CSUM_TSO;
4296 printf("mxge requires tx checksum offload"
4297 " be enabled to use TSO\n");
4301 #endif /*IFCAP_TSO6 */
4303 if (mask & IFCAP_LRO)
4304 ifp->if_capenable ^= IFCAP_LRO;
4305 if (mask & IFCAP_VLAN_HWTAGGING)
4306 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4307 if (mask & IFCAP_VLAN_HWTSO)
4308 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4310 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4311 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4312 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4314 mtx_unlock(&sc->driver_mtx);
4315 VLAN_CAPABILITIES(ifp);
4320 mtx_lock(&sc->driver_mtx);
4322 mtx_unlock(&sc->driver_mtx);
4325 mxge_media_probe(sc);
4326 mtx_unlock(&sc->driver_mtx);
4327 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4328 &sc->media, command);
4332 if (sc->connector != MXGE_XFP &&
4333 sc->connector != MXGE_SFP) {
4337 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
4340 mtx_lock(&sc->driver_mtx);
4342 mtx_unlock(&sc->driver_mtx);
4345 err = mxge_fetch_i2c(sc, &i2c);
4346 mtx_unlock(&sc->driver_mtx);
4348 err = copyout(&i2c, ifr->ifr_ifru.ifru_data,
4352 err = ether_ioctl(ifp, command, data);
4359 mxge_fetch_tunables(mxge_softc_t *sc)
4362 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4363 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4364 &mxge_flow_control);
4365 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4366 &mxge_intr_coal_delay);
4367 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4368 &mxge_nvidia_ecrc_enable);
4369 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4370 &mxge_force_firmware);
4371 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4372 &mxge_deassert_wait);
4373 TUNABLE_INT_FETCH("hw.mxge.verbose",
4375 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4376 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4377 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4378 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4379 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4380 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4384 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4385 mxge_intr_coal_delay = 30;
4386 if (mxge_ticks == 0)
4387 mxge_ticks = hz / 2;
4388 sc->pause = mxge_flow_control;
4389 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4390 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4391 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4393 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4394 mxge_initial_mtu < ETHER_MIN_LEN)
4395 mxge_initial_mtu = ETHERMTU_JUMBO;
4397 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4398 mxge_throttle = MXGE_MAX_THROTTLE;
4399 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4400 mxge_throttle = MXGE_MIN_THROTTLE;
4401 sc->throttle = mxge_throttle;
4406 mxge_free_slices(mxge_softc_t *sc)
4408 struct mxge_slice_state *ss;
4415 for (i = 0; i < sc->num_slices; i++) {
4417 if (ss->fw_stats != NULL) {
4418 mxge_dma_free(&ss->fw_stats_dma);
4419 ss->fw_stats = NULL;
4420 #ifdef IFNET_BUF_RING
4421 if (ss->tx.br != NULL) {
4422 drbr_free(ss->tx.br, M_DEVBUF);
4426 mtx_destroy(&ss->tx.mtx);
4428 if (ss->rx_done.entry != NULL) {
4429 mxge_dma_free(&ss->rx_done.dma);
4430 ss->rx_done.entry = NULL;
4433 free(sc->ss, M_DEVBUF);
4438 mxge_alloc_slices(mxge_softc_t *sc)
4441 struct mxge_slice_state *ss;
4443 int err, i, max_intr_slots;
4445 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4447 device_printf(sc->dev, "Cannot determine rx ring size\n");
4450 sc->rx_ring_size = cmd.data0;
4451 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4453 bytes = sizeof (*sc->ss) * sc->num_slices;
4454 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4457 for (i = 0; i < sc->num_slices; i++) {
4462 /* allocate per-slice rx interrupt queues */
4464 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4465 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4468 ss->rx_done.entry = ss->rx_done.dma.addr;
4469 bzero(ss->rx_done.entry, bytes);
4472 * allocate the per-slice firmware stats; stats
4473 * (including tx) are used used only on the first
4476 #ifndef IFNET_BUF_RING
4481 bytes = sizeof (*ss->fw_stats);
4482 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4483 sizeof (*ss->fw_stats), 64);
4486 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4487 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4488 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4489 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4490 #ifdef IFNET_BUF_RING
4491 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4499 mxge_free_slices(sc);
4504 mxge_slice_probe(mxge_softc_t *sc)
4508 int msix_cnt, status, max_intr_slots;
4512 * don't enable multiple slices if they are not enabled,
4513 * or if this is not an SMP system
4516 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4519 /* see how many MSI-X interrupts are available */
4520 msix_cnt = pci_msix_count(sc->dev);
4524 /* now load the slice aware firmware see what it supports */
4525 old_fw = sc->fw_name;
4526 if (old_fw == mxge_fw_aligned)
4527 sc->fw_name = mxge_fw_rss_aligned;
4529 sc->fw_name = mxge_fw_rss_unaligned;
4530 status = mxge_load_firmware(sc, 0);
4532 device_printf(sc->dev, "Falling back to a single slice\n");
4536 /* try to send a reset command to the card to see if it
4538 memset(&cmd, 0, sizeof (cmd));
4539 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4541 device_printf(sc->dev, "failed reset\n");
4545 /* get rx ring size */
4546 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4548 device_printf(sc->dev, "Cannot determine rx ring size\n");
4551 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4553 /* tell it the size of the interrupt queues */
4554 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4555 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4557 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4561 /* ask the maximum number of slices it supports */
4562 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4564 device_printf(sc->dev,
4565 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4568 sc->num_slices = cmd.data0;
4569 if (sc->num_slices > msix_cnt)
4570 sc->num_slices = msix_cnt;
4572 if (mxge_max_slices == -1) {
4573 /* cap to number of CPUs in system */
4574 if (sc->num_slices > mp_ncpus)
4575 sc->num_slices = mp_ncpus;
4577 if (sc->num_slices > mxge_max_slices)
4578 sc->num_slices = mxge_max_slices;
4580 /* make sure it is a power of two */
4581 while (sc->num_slices & (sc->num_slices - 1))
4585 device_printf(sc->dev, "using %d slices\n",
4591 sc->fw_name = old_fw;
4592 (void) mxge_load_firmware(sc, 0);
4596 mxge_add_msix_irqs(mxge_softc_t *sc)
4599 int count, err, i, rid;
4602 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4605 if (sc->msix_table_res == NULL) {
4606 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4610 count = sc->num_slices;
4611 err = pci_alloc_msix(sc->dev, &count);
4613 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4614 "err = %d \n", sc->num_slices, err);
4615 goto abort_with_msix_table;
4617 if (count < sc->num_slices) {
4618 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4619 count, sc->num_slices);
4620 device_printf(sc->dev,
4621 "Try setting hw.mxge.max_slices to %d\n",
4624 goto abort_with_msix;
4626 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4627 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4628 if (sc->msix_irq_res == NULL) {
4630 goto abort_with_msix;
4633 for (i = 0; i < sc->num_slices; i++) {
4635 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4638 if (sc->msix_irq_res[i] == NULL) {
4639 device_printf(sc->dev, "couldn't allocate IRQ res"
4640 " for message %d\n", i);
4642 goto abort_with_res;
4646 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4647 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4649 for (i = 0; i < sc->num_slices; i++) {
4650 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4651 INTR_TYPE_NET | INTR_MPSAFE,
4652 #if __FreeBSD_version > 700030
4655 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4657 device_printf(sc->dev, "couldn't setup intr for "
4659 goto abort_with_intr;
4661 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4662 sc->msix_ih[i], "s%d", i);
4666 device_printf(sc->dev, "using %d msix IRQs:",
4668 for (i = 0; i < sc->num_slices; i++)
4669 printf(" %jd", rman_get_start(sc->msix_irq_res[i]));
4675 for (i = 0; i < sc->num_slices; i++) {
4676 if (sc->msix_ih[i] != NULL) {
4677 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4679 sc->msix_ih[i] = NULL;
4682 free(sc->msix_ih, M_DEVBUF);
4686 for (i = 0; i < sc->num_slices; i++) {
4688 if (sc->msix_irq_res[i] != NULL)
4689 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4690 sc->msix_irq_res[i]);
4691 sc->msix_irq_res[i] = NULL;
4693 free(sc->msix_irq_res, M_DEVBUF);
4697 pci_release_msi(sc->dev);
4699 abort_with_msix_table:
4700 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4701 sc->msix_table_res);
4707 mxge_add_single_irq(mxge_softc_t *sc)
4709 int count, err, rid;
4711 count = pci_msi_count(sc->dev);
4712 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4718 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
4719 RF_SHAREABLE | RF_ACTIVE);
4720 if (sc->irq_res == NULL) {
4721 device_printf(sc->dev, "could not alloc interrupt\n");
4725 device_printf(sc->dev, "using %s irq %jd\n",
4726 sc->legacy_irq ? "INTx" : "MSI",
4727 rman_get_start(sc->irq_res));
4728 err = bus_setup_intr(sc->dev, sc->irq_res,
4729 INTR_TYPE_NET | INTR_MPSAFE,
4730 #if __FreeBSD_version > 700030
4733 mxge_intr, &sc->ss[0], &sc->ih);
4735 bus_release_resource(sc->dev, SYS_RES_IRQ,
4736 sc->legacy_irq ? 0 : 1, sc->irq_res);
4737 if (!sc->legacy_irq)
4738 pci_release_msi(sc->dev);
4744 mxge_rem_msix_irqs(mxge_softc_t *sc)
4748 for (i = 0; i < sc->num_slices; i++) {
4749 if (sc->msix_ih[i] != NULL) {
4750 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4752 sc->msix_ih[i] = NULL;
4755 free(sc->msix_ih, M_DEVBUF);
4757 for (i = 0; i < sc->num_slices; i++) {
4759 if (sc->msix_irq_res[i] != NULL)
4760 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4761 sc->msix_irq_res[i]);
4762 sc->msix_irq_res[i] = NULL;
4764 free(sc->msix_irq_res, M_DEVBUF);
4766 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4767 sc->msix_table_res);
4769 pci_release_msi(sc->dev);
4774 mxge_rem_single_irq(mxge_softc_t *sc)
4776 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4777 bus_release_resource(sc->dev, SYS_RES_IRQ,
4778 sc->legacy_irq ? 0 : 1, sc->irq_res);
4779 if (!sc->legacy_irq)
4780 pci_release_msi(sc->dev);
4784 mxge_rem_irq(mxge_softc_t *sc)
4786 if (sc->num_slices > 1)
4787 mxge_rem_msix_irqs(sc);
4789 mxge_rem_single_irq(sc);
4793 mxge_add_irq(mxge_softc_t *sc)
4797 if (sc->num_slices > 1)
4798 err = mxge_add_msix_irqs(sc);
4800 err = mxge_add_single_irq(sc);
4802 if (0 && err == 0 && sc->num_slices > 1) {
4803 mxge_rem_msix_irqs(sc);
4804 err = mxge_add_msix_irqs(sc);
4811 mxge_attach(device_t dev)
4814 mxge_softc_t *sc = device_get_softc(dev);
4819 mxge_fetch_tunables(sc);
4821 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4822 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4823 taskqueue_thread_enqueue, &sc->tq);
4824 if (sc->tq == NULL) {
4826 goto abort_with_nothing;
4829 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4832 BUS_SPACE_MAXADDR, /* low */
4833 BUS_SPACE_MAXADDR, /* high */
4834 NULL, NULL, /* filter */
4835 65536 + 256, /* maxsize */
4836 MXGE_MAX_SEND_DESC, /* num segs */
4837 65536, /* maxsegsize */
4839 NULL, NULL, /* lock */
4840 &sc->parent_dmat); /* tag */
4843 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4848 ifp = sc->ifp = if_alloc(IFT_ETHER);
4850 device_printf(dev, "can not if_alloc()\n");
4852 goto abort_with_parent_dmat;
4854 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4856 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4857 device_get_nameunit(dev));
4858 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4859 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4860 "%s:drv", device_get_nameunit(dev));
4861 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4862 MTX_NETWORK_LOCK, MTX_DEF);
4864 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4866 mxge_setup_cfg_space(sc);
4868 /* Map the board into the kernel */
4870 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
4872 if (sc->mem_res == NULL) {
4873 device_printf(dev, "could not map memory\n");
4875 goto abort_with_lock;
4877 sc->sram = rman_get_virtual(sc->mem_res);
4878 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4879 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4880 device_printf(dev, "impossible memory region size %jd\n",
4881 rman_get_size(sc->mem_res));
4883 goto abort_with_mem_res;
4886 /* make NULL terminated copy of the EEPROM strings section of
4888 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4889 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4890 rman_get_bushandle(sc->mem_res),
4891 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4893 MXGE_EEPROM_STRINGS_SIZE - 2);
4894 err = mxge_parse_strings(sc);
4896 goto abort_with_mem_res;
4898 /* Enable write combining for efficient use of PCIe bus */
4901 /* Allocate the out of band dma memory */
4902 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4903 sizeof (mxge_cmd_t), 64);
4905 goto abort_with_mem_res;
4906 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4907 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4909 goto abort_with_cmd_dma;
4911 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4913 goto abort_with_zeropad_dma;
4915 /* select & load the firmware */
4916 err = mxge_select_firmware(sc);
4918 goto abort_with_dmabench;
4919 sc->intr_coal_delay = mxge_intr_coal_delay;
4921 mxge_slice_probe(sc);
4922 err = mxge_alloc_slices(sc);
4924 goto abort_with_dmabench;
4926 err = mxge_reset(sc, 0);
4928 goto abort_with_slices;
4930 err = mxge_alloc_rings(sc);
4932 device_printf(sc->dev, "failed to allocate rings\n");
4933 goto abort_with_slices;
4936 err = mxge_add_irq(sc);
4938 device_printf(sc->dev, "failed to add irq\n");
4939 goto abort_with_rings;
4942 ifp->if_baudrate = IF_Gbps(10);
4943 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4944 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4946 #if defined(INET) || defined(INET6)
4947 ifp->if_capabilities |= IFCAP_LRO;
4950 #ifdef MXGE_NEW_VLAN_API
4951 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4953 /* Only FW 1.4.32 and newer can do TSO over vlans */
4954 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4955 sc->fw_ver_tiny >= 32)
4956 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4958 sc->max_mtu = mxge_max_mtu(sc);
4959 if (sc->max_mtu >= 9000)
4960 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4962 device_printf(dev, "MTU limited to %d. Install "
4963 "latest firmware for 9000 byte jumbo support\n",
4964 sc->max_mtu - ETHER_HDR_LEN);
4965 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4966 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4967 /* check to see if f/w supports TSO for IPv6 */
4968 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4970 ifp->if_capabilities |= IFCAP_TSO6;
4971 sc->max_tso6_hlen = min(cmd.data0,
4972 sizeof (sc->ss[0].scratch));
4974 ifp->if_capenable = ifp->if_capabilities;
4975 if (sc->lro_cnt == 0)
4976 ifp->if_capenable &= ~IFCAP_LRO;
4977 ifp->if_init = mxge_init;
4979 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4980 ifp->if_ioctl = mxge_ioctl;
4981 ifp->if_start = mxge_start;
4982 ifp->if_get_counter = mxge_get_counter;
4983 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
4984 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc;
4985 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET;
4986 /* Initialise the ifmedia structure */
4987 ifmedia_init(&sc->media, 0, mxge_media_change,
4989 mxge_media_init(sc);
4990 mxge_media_probe(sc);
4992 ether_ifattach(ifp, sc->mac_addr);
4993 /* ether_ifattach sets mtu to ETHERMTU */
4994 if (mxge_initial_mtu != ETHERMTU)
4995 mxge_change_mtu(sc, mxge_initial_mtu);
4997 mxge_add_sysctls(sc);
4998 #ifdef IFNET_BUF_RING
4999 ifp->if_transmit = mxge_transmit;
5000 ifp->if_qflush = mxge_qflush;
5002 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
5003 device_get_nameunit(sc->dev));
5004 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
5008 mxge_free_rings(sc);
5010 mxge_free_slices(sc);
5011 abort_with_dmabench:
5012 mxge_dma_free(&sc->dmabench_dma);
5013 abort_with_zeropad_dma:
5014 mxge_dma_free(&sc->zeropad_dma);
5016 mxge_dma_free(&sc->cmd_dma);
5018 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5020 pci_disable_busmaster(dev);
5021 mtx_destroy(&sc->cmd_mtx);
5022 mtx_destroy(&sc->driver_mtx);
5024 abort_with_parent_dmat:
5025 bus_dma_tag_destroy(sc->parent_dmat);
5027 if (sc->tq != NULL) {
5028 taskqueue_drain(sc->tq, &sc->watchdog_task);
5029 taskqueue_free(sc->tq);
5037 mxge_detach(device_t dev)
5039 mxge_softc_t *sc = device_get_softc(dev);
5041 if (mxge_vlans_active(sc)) {
5042 device_printf(sc->dev,
5043 "Detach vlans before removing module\n");
5046 mtx_lock(&sc->driver_mtx);
5048 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
5050 mtx_unlock(&sc->driver_mtx);
5051 ether_ifdetach(sc->ifp);
5052 if (sc->tq != NULL) {
5053 taskqueue_drain(sc->tq, &sc->watchdog_task);
5054 taskqueue_free(sc->tq);
5057 callout_drain(&sc->co_hdl);
5058 ifmedia_removeall(&sc->media);
5059 mxge_dummy_rdma(sc, 0);
5060 mxge_rem_sysctls(sc);
5062 mxge_free_rings(sc);
5063 mxge_free_slices(sc);
5064 mxge_dma_free(&sc->dmabench_dma);
5065 mxge_dma_free(&sc->zeropad_dma);
5066 mxge_dma_free(&sc->cmd_dma);
5067 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5068 pci_disable_busmaster(dev);
5069 mtx_destroy(&sc->cmd_mtx);
5070 mtx_destroy(&sc->driver_mtx);
5072 bus_dma_tag_destroy(sc->parent_dmat);
5077 mxge_shutdown(device_t dev)
5083 This file uses Myri10GE driver indentation.
5086 c-file-style:"linux"