1 /******************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 Copyright (c) 2006-2013, Myricom Inc.
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
13 2. Neither the name of the Myricom Inc, nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
29 ***************************************************************************/
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/linker.h>
37 #include <sys/firmware.h>
38 #include <sys/endian.h>
39 #include <sys/sockio.h>
41 #include <sys/malloc.h>
43 #include <sys/kernel.h>
45 #include <sys/module.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
49 #include <sys/taskqueue.h>
50 #include <contrib/zlib/zlib.h>
51 #include <dev/zlib/zcalloc.h>
54 #include <net/if_var.h>
55 #include <net/if_arp.h>
56 #include <net/ethernet.h>
57 #include <net/if_dl.h>
58 #include <net/if_media.h>
62 #include <net/if_types.h>
63 #include <net/if_vlan_var.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip6.h>
69 #include <netinet/tcp.h>
70 #include <netinet/tcp_lro.h>
71 #include <netinet6/ip6_var.h>
73 #include <machine/bus.h>
74 #include <machine/in_cksum.h>
75 #include <machine/resource.h>
80 #include <dev/pci/pcireg.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
84 #include <vm/vm.h> /* for pmap_mapdev() */
87 #if defined(__i386) || defined(__amd64)
88 #include <machine/specialreg.h>
91 #include <dev/mxge/mxge_mcp.h>
92 #include <dev/mxge/mcp_gen_header.h>
93 /*#define MXGE_FAKE_IFP*/
94 #include <dev/mxge/if_mxge_var.h>
96 #include <sys/buf_ring.h>
100 #include "opt_inet6.h"
103 static int mxge_nvidia_ecrc_enable = 1;
104 static int mxge_force_firmware = 0;
105 static int mxge_intr_coal_delay = 30;
106 static int mxge_deassert_wait = 1;
107 static int mxge_flow_control = 1;
108 static int mxge_verbose = 0;
109 static int mxge_ticks;
110 static int mxge_max_slices = 1;
111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
112 static int mxge_always_promisc = 0;
113 static int mxge_initial_mtu = ETHERMTU_JUMBO;
114 static int mxge_throttle = 0;
115 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
116 static char *mxge_fw_aligned = "mxge_eth_z8e";
117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
120 static int mxge_probe(device_t dev);
121 static int mxge_attach(device_t dev);
122 static int mxge_detach(device_t dev);
123 static int mxge_shutdown(device_t dev);
124 static void mxge_intr(void *arg);
126 static device_method_t mxge_methods[] =
128 /* Device interface */
129 DEVMETHOD(device_probe, mxge_probe),
130 DEVMETHOD(device_attach, mxge_attach),
131 DEVMETHOD(device_detach, mxge_detach),
132 DEVMETHOD(device_shutdown, mxge_shutdown),
137 static driver_t mxge_driver =
141 sizeof(mxge_softc_t),
144 static devclass_t mxge_devclass;
146 /* Declare ourselves to be a child of the PCI bus.*/
147 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
148 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
149 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
151 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
152 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
153 static int mxge_close(mxge_softc_t *sc, int down);
154 static int mxge_open(mxge_softc_t *sc);
155 static void mxge_tick(void *arg);
158 mxge_probe(device_t dev)
163 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
164 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
165 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
166 rev = pci_get_revid(dev);
168 case MXGE_PCI_REV_Z8E:
169 device_set_desc(dev, "Myri10G-PCIE-8A");
171 case MXGE_PCI_REV_Z8ES:
172 device_set_desc(dev, "Myri10G-PCIE-8B");
175 device_set_desc(dev, "Myri10G-PCIE-8??");
176 device_printf(dev, "Unrecognized rev %d NIC\n",
186 mxge_enable_wc(mxge_softc_t *sc)
188 #if defined(__i386) || defined(__amd64)
193 len = rman_get_size(sc->mem_res);
194 err = pmap_change_attr((vm_offset_t) sc->sram,
195 len, PAT_WRITE_COMBINING);
197 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
205 /* callback to get our DMA address */
207 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
211 *(bus_addr_t *) arg = segs->ds_addr;
216 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
217 bus_size_t alignment)
220 device_t dev = sc->dev;
221 bus_size_t boundary, maxsegsize;
223 if (bytes > 4096 && alignment == 4096) {
231 /* allocate DMAable memory tags */
232 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
233 alignment, /* alignment */
234 boundary, /* boundary */
235 BUS_SPACE_MAXADDR, /* low */
236 BUS_SPACE_MAXADDR, /* high */
237 NULL, NULL, /* filter */
240 maxsegsize, /* maxsegsize */
241 BUS_DMA_COHERENT, /* flags */
242 NULL, NULL, /* lock */
243 &dma->dmat); /* tag */
245 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
249 /* allocate DMAable memory & map */
250 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
251 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
252 | BUS_DMA_ZERO), &dma->map);
254 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
255 goto abort_with_dmat;
258 /* load the memory */
259 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
260 mxge_dmamap_callback,
261 (void *)&dma->bus_addr, 0);
263 device_printf(dev, "couldn't load map (err = %d)\n", err);
269 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
271 (void)bus_dma_tag_destroy(dma->dmat);
277 mxge_dma_free(mxge_dma_t *dma)
279 bus_dmamap_unload(dma->dmat, dma->map);
280 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
281 (void)bus_dma_tag_destroy(dma->dmat);
285 * The eeprom strings on the lanaiX have the format
292 mxge_parse_strings(mxge_softc_t *sc)
295 int i, found_mac, found_sn2;
298 ptr = sc->eeprom_strings;
301 while (*ptr != '\0') {
302 if (strncmp(ptr, "MAC=", 4) == 0) {
305 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
306 if (endptr - ptr != 2)
315 } else if (strncmp(ptr, "PC=", 3) == 0) {
317 strlcpy(sc->product_code_string, ptr,
318 sizeof(sc->product_code_string));
319 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
321 strlcpy(sc->serial_number_string, ptr,
322 sizeof(sc->serial_number_string));
323 } else if (strncmp(ptr, "SN2=", 4) == 0) {
324 /* SN2 takes precedence over SN */
327 strlcpy(sc->serial_number_string, ptr,
328 sizeof(sc->serial_number_string));
330 while (*ptr++ != '\0') {}
337 device_printf(sc->dev, "failed to parse eeprom_strings\n");
342 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
344 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
347 unsigned long base, off;
349 device_t pdev, mcp55;
350 uint16_t vendor_id, device_id, word;
351 uintptr_t bus, slot, func, ivend, idev;
355 if (!mxge_nvidia_ecrc_enable)
358 pdev = device_get_parent(device_get_parent(sc->dev));
360 device_printf(sc->dev, "could not find parent?\n");
363 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
364 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
366 if (vendor_id != 0x10de)
371 if (device_id == 0x005d) {
372 /* ck804, base address is magic */
374 } else if (device_id >= 0x0374 && device_id <= 0x378) {
375 /* mcp55, base address stored in chipset */
376 mcp55 = pci_find_bsf(0, 0, 0);
378 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
379 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
380 word = pci_read_config(mcp55, 0x90, 2);
381 base = ((unsigned long)word & 0x7ffeU) << 25;
388 Test below is commented because it is believed that doing
389 config read/write beyond 0xff will access the config space
390 for the next larger function. Uncomment this and remove
391 the hacky pmap_mapdev() way of accessing config space when
392 FreeBSD grows support for extended pcie config space access
395 /* See if we can, by some miracle, access the extended
397 val = pci_read_config(pdev, 0x178, 4);
398 if (val != 0xffffffff) {
400 pci_write_config(pdev, 0x178, val, 4);
404 /* Rather than using normal pci config space writes, we must
405 * map the Nvidia config space ourselves. This is because on
406 * opteron/nvidia class machine the 0xe000000 mapping is
407 * handled by the nvidia chipset, that means the internal PCI
408 * device (the on-chip northbridge), or the amd-8131 bridge
409 * and things behind them are not visible by this method.
412 BUS_READ_IVAR(device_get_parent(pdev), pdev,
414 BUS_READ_IVAR(device_get_parent(pdev), pdev,
415 PCI_IVAR_SLOT, &slot);
416 BUS_READ_IVAR(device_get_parent(pdev), pdev,
417 PCI_IVAR_FUNCTION, &func);
418 BUS_READ_IVAR(device_get_parent(pdev), pdev,
419 PCI_IVAR_VENDOR, &ivend);
420 BUS_READ_IVAR(device_get_parent(pdev), pdev,
421 PCI_IVAR_DEVICE, &idev);
424 + 0x00100000UL * (unsigned long)bus
425 + 0x00001000UL * (unsigned long)(func
428 /* map it into the kernel */
429 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
433 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
436 /* get a pointer to the config space mapped into the kernel */
437 cfgptr = va + (off & PAGE_MASK);
439 /* make sure that we can really access it */
440 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
441 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
442 if (! (vendor_id == ivend && device_id == idev)) {
443 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
444 vendor_id, device_id);
445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
449 ptr32 = (uint32_t*)(cfgptr + 0x178);
452 if (val == 0xffffffff) {
453 device_printf(sc->dev, "extended mapping failed\n");
454 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
458 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
460 device_printf(sc->dev,
461 "Enabled ECRC on upstream Nvidia bridge "
463 (int)bus, (int)slot, (int)func);
468 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
470 device_printf(sc->dev,
471 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
478 mxge_dma_test(mxge_softc_t *sc, int test_type)
481 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
487 /* Run a small DMA test.
488 * The magic multipliers to the length tell the firmware
489 * to do DMA read, write, or read+write tests. The
490 * results are returned in cmd.data0. The upper 16
491 * bits of the return is the number of transfers completed.
492 * The lower 16 bits is the time in 0.5us ticks that the
493 * transfers took to complete.
496 len = sc->tx_boundary;
498 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
499 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
500 cmd.data2 = len * 0x10000;
501 status = mxge_send_cmd(sc, test_type, &cmd);
506 sc->read_dma = ((cmd.data0>>16) * len * 2) /
507 (cmd.data0 & 0xffff);
508 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
509 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
510 cmd.data2 = len * 0x1;
511 status = mxge_send_cmd(sc, test_type, &cmd);
516 sc->write_dma = ((cmd.data0>>16) * len * 2) /
517 (cmd.data0 & 0xffff);
519 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
520 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
521 cmd.data2 = len * 0x10001;
522 status = mxge_send_cmd(sc, test_type, &cmd);
527 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
528 (cmd.data0 & 0xffff);
531 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
532 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
539 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
540 * when the PCI-E Completion packets are aligned on an 8-byte
541 * boundary. Some PCI-E chip sets always align Completion packets; on
542 * the ones that do not, the alignment can be enforced by enabling
543 * ECRC generation (if supported).
545 * When PCI-E Completion packets are not aligned, it is actually more
546 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
548 * If the driver can neither enable ECRC nor verify that it has
549 * already been enabled, then it must use a firmware image which works
550 * around unaligned completion packets (ethp_z8e.dat), and it should
551 * also ensure that it never gives the device a Read-DMA which is
552 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
553 * enabled, then the driver should use the aligned (eth_z8e.dat)
554 * firmware image, and set tx_boundary to 4KB.
558 mxge_firmware_probe(mxge_softc_t *sc)
560 device_t dev = sc->dev;
564 sc->tx_boundary = 4096;
566 * Verify the max read request size was set to 4KB
567 * before trying the test with 4KB.
569 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
570 pectl = pci_read_config(dev, reg + 0x8, 2);
571 if ((pectl & (5 << 12)) != (5 << 12)) {
572 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
574 sc->tx_boundary = 2048;
579 * load the optimized firmware (which assumes aligned PCIe
580 * completions) in order to see if it works on this host.
582 sc->fw_name = mxge_fw_aligned;
583 status = mxge_load_firmware(sc, 1);
589 * Enable ECRC if possible
591 mxge_enable_nvidia_ecrc(sc);
594 * Run a DMA test which watches for unaligned completions and
595 * aborts on the first one seen. Not required on Z8ES or newer.
597 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
599 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
601 return 0; /* keep the aligned firmware */
604 device_printf(dev, "DMA test failed: %d\n", status);
605 if (status == ENOSYS)
606 device_printf(dev, "Falling back to ethp! "
607 "Please install up to date fw\n");
612 mxge_select_firmware(mxge_softc_t *sc)
615 int force_firmware = mxge_force_firmware;
618 force_firmware = sc->throttle;
620 if (force_firmware != 0) {
621 if (force_firmware == 1)
626 device_printf(sc->dev,
627 "Assuming %s completions (forced)\n",
628 aligned ? "aligned" : "unaligned");
632 /* if the PCIe link width is 4 or less, we can use the aligned
633 firmware and skip any checks */
634 if (sc->link_width != 0 && sc->link_width <= 4) {
635 device_printf(sc->dev,
636 "PCIe x%d Link, expect reduced performance\n",
642 if (0 == mxge_firmware_probe(sc))
647 sc->fw_name = mxge_fw_aligned;
648 sc->tx_boundary = 4096;
650 sc->fw_name = mxge_fw_unaligned;
651 sc->tx_boundary = 2048;
653 return (mxge_load_firmware(sc, 0));
657 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
661 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
662 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
663 be32toh(hdr->mcp_type));
667 /* save firmware version for sysctl */
668 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
670 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
672 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
673 &sc->fw_ver_minor, &sc->fw_ver_tiny);
675 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
676 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
677 device_printf(sc->dev, "Found firmware version %s\n",
679 device_printf(sc->dev, "Driver needs %d.%d\n",
680 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
688 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
691 char *inflate_buffer;
692 const struct firmware *fw;
693 const mcp_gen_header_t *hdr;
700 fw = firmware_get(sc->fw_name);
702 device_printf(sc->dev, "Could not find firmware image %s\n",
709 /* setup zlib and decompress f/w */
710 bzero(&zs, sizeof (zs));
711 zs.zalloc = zcalloc_nowait;
713 status = inflateInit(&zs);
714 if (status != Z_OK) {
719 /* the uncompressed size is stored as the firmware version,
720 which would otherwise go unused */
721 fw_len = (size_t) fw->version;
722 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
723 if (inflate_buffer == NULL)
725 zs.avail_in = fw->datasize;
726 zs.next_in = __DECONST(char *, fw->data);
727 zs.avail_out = fw_len;
728 zs.next_out = inflate_buffer;
729 status = inflate(&zs, Z_FINISH);
730 if (status != Z_STREAM_END) {
731 device_printf(sc->dev, "zlib %d\n", status);
733 goto abort_with_buffer;
737 hdr_offset = htobe32(*(const uint32_t *)
738 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
739 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
740 device_printf(sc->dev, "Bad firmware file");
742 goto abort_with_buffer;
744 hdr = (const void*)(inflate_buffer + hdr_offset);
746 status = mxge_validate_firmware(sc, hdr);
748 goto abort_with_buffer;
750 /* Copy the inflated firmware to NIC SRAM. */
751 for (i = 0; i < fw_len; i += 256) {
752 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
754 min(256U, (unsigned)(fw_len - i)));
763 free(inflate_buffer, M_TEMP);
767 firmware_put(fw, FIRMWARE_UNLOAD);
772 * Enable or disable periodic RDMAs from the host to make certain
773 * chipsets resend dropped PCIe messages
777 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
780 volatile uint32_t *confirm;
781 volatile char *submit;
782 uint32_t *buf, dma_low, dma_high;
785 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
787 /* clear confirmation addr */
788 confirm = (volatile uint32_t *)sc->cmd;
792 /* send an rdma command to the PCIe engine, and wait for the
793 response in the confirmation address. The firmware should
794 write a -1 there to indicate it is alive and well
797 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
798 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
799 buf[0] = htobe32(dma_high); /* confirm addr MSW */
800 buf[1] = htobe32(dma_low); /* confirm addr LSW */
801 buf[2] = htobe32(0xffffffff); /* confirm data */
802 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
803 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
804 buf[3] = htobe32(dma_high); /* dummy addr MSW */
805 buf[4] = htobe32(dma_low); /* dummy addr LSW */
806 buf[5] = htobe32(enable); /* enable? */
809 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
811 mxge_pio_copy(submit, buf, 64);
816 while (*confirm != 0xffffffff && i < 20) {
820 if (*confirm != 0xffffffff) {
821 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
822 (enable ? "enable" : "disable"), confirm,
829 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
832 char buf_bytes[sizeof(*buf) + 8];
833 volatile mcp_cmd_response_t *response = sc->cmd;
834 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
835 uint32_t dma_low, dma_high;
836 int err, sleep_total = 0;
838 /* ensure buf is aligned to 8 bytes */
839 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
841 buf->data0 = htobe32(data->data0);
842 buf->data1 = htobe32(data->data1);
843 buf->data2 = htobe32(data->data2);
844 buf->cmd = htobe32(cmd);
845 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
846 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
848 buf->response_addr.low = htobe32(dma_low);
849 buf->response_addr.high = htobe32(dma_high);
850 mtx_lock(&sc->cmd_mtx);
851 response->result = 0xffffffff;
853 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
855 /* wait up to 20ms */
857 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
858 bus_dmamap_sync(sc->cmd_dma.dmat,
859 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
861 switch (be32toh(response->result)) {
863 data->data0 = be32toh(response->data);
869 case MXGEFW_CMD_UNKNOWN:
872 case MXGEFW_CMD_ERROR_UNALIGNED:
875 case MXGEFW_CMD_ERROR_BUSY:
878 case MXGEFW_CMD_ERROR_I2C_ABSENT:
882 device_printf(sc->dev,
884 "failed, result = %d\n",
885 cmd, be32toh(response->result));
893 device_printf(sc->dev, "mxge: command %d timed out"
895 cmd, be32toh(response->result));
896 mtx_unlock(&sc->cmd_mtx);
901 mxge_adopt_running_firmware(mxge_softc_t *sc)
903 struct mcp_gen_header *hdr;
904 const size_t bytes = sizeof (struct mcp_gen_header);
908 /* find running firmware header */
909 hdr_offset = htobe32(*(volatile uint32_t *)
910 (sc->sram + MCP_HEADER_PTR_OFFSET));
912 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
913 device_printf(sc->dev,
914 "Running firmware has bad header offset (%d)\n",
919 /* copy header of running firmware from SRAM to host memory to
920 * validate firmware */
921 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
923 device_printf(sc->dev, "could not malloc firmware hdr\n");
926 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
927 rman_get_bushandle(sc->mem_res),
928 hdr_offset, (char *)hdr, bytes);
929 status = mxge_validate_firmware(sc, hdr);
933 * check to see if adopted firmware has bug where adopting
934 * it will cause broadcasts to be filtered unless the NIC
935 * is kept in ALLMULTI mode
937 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
938 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
939 sc->adopted_rx_filter_bug = 1;
940 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
941 "working around rx filter bug\n",
942 sc->fw_ver_major, sc->fw_ver_minor,
951 mxge_load_firmware(mxge_softc_t *sc, int adopt)
953 volatile uint32_t *confirm;
954 volatile char *submit;
956 uint32_t *buf, size, dma_low, dma_high;
959 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
961 size = sc->sram_size;
962 status = mxge_load_firmware_helper(sc, &size);
966 /* Try to use the currently running firmware, if
968 status = mxge_adopt_running_firmware(sc);
970 device_printf(sc->dev,
971 "failed to adopt running firmware\n");
974 device_printf(sc->dev,
975 "Successfully adopted running firmware\n");
976 if (sc->tx_boundary == 4096) {
977 device_printf(sc->dev,
978 "Using firmware currently running on NIC"
980 device_printf(sc->dev,
981 "performance consider loading optimized "
984 sc->fw_name = mxge_fw_unaligned;
985 sc->tx_boundary = 2048;
988 /* clear confirmation addr */
989 confirm = (volatile uint32_t *)sc->cmd;
992 /* send a reload command to the bootstrap MCP, and wait for the
993 response in the confirmation address. The firmware should
994 write a -1 there to indicate it is alive and well
997 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
998 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1000 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1001 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1002 buf[2] = htobe32(0xffffffff); /* confirm data */
1004 /* FIX: All newest firmware should un-protect the bottom of
1005 the sram before handoff. However, the very first interfaces
1006 do not. Therefore the handoff copy must skip the first 8 bytes
1008 /* where the code starts*/
1009 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1010 buf[4] = htobe32(size - 8); /* length of code */
1011 buf[5] = htobe32(8); /* where to copy to */
1012 buf[6] = htobe32(0); /* where to jump to */
1014 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1015 mxge_pio_copy(submit, buf, 64);
1020 while (*confirm != 0xffffffff && i < 20) {
1023 bus_dmamap_sync(sc->cmd_dma.dmat,
1024 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1026 if (*confirm != 0xffffffff) {
1027 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1036 mxge_update_mac_address(mxge_softc_t *sc)
1039 uint8_t *addr = sc->mac_addr;
1043 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1044 | (addr[2] << 8) | addr[3]);
1046 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1048 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1053 mxge_change_pause(mxge_softc_t *sc, int pause)
1059 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1062 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1066 device_printf(sc->dev, "Failed to set flow control mode\n");
1074 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1079 if (mxge_always_promisc)
1083 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1086 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1090 device_printf(sc->dev, "Failed to set promisc mode\n");
1095 mxge_set_multicast_list(mxge_softc_t *sc)
1098 struct ifmultiaddr *ifma;
1099 struct ifnet *ifp = sc->ifp;
1102 /* This firmware is known to not support multicast */
1103 if (!sc->fw_multicast_support)
1106 /* Disable multicast filtering while we play with the lists*/
1107 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1109 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1110 " error status: %d\n", err);
1114 if (sc->adopted_rx_filter_bug)
1117 if (ifp->if_flags & IFF_ALLMULTI)
1118 /* request to disable multicast filtering, so quit here */
1121 /* Flush all the filters */
1123 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1125 device_printf(sc->dev,
1126 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1127 ", error status: %d\n", err);
1131 /* Walk the multicast list, and add each address */
1133 if_maddr_rlock(ifp);
1134 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1135 if (ifma->ifma_addr->sa_family != AF_LINK)
1137 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1139 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1141 cmd.data0 = htonl(cmd.data0);
1142 cmd.data1 = htonl(cmd.data1);
1143 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1145 device_printf(sc->dev, "Failed "
1146 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1148 /* abort, leaving multicast filtering off */
1149 if_maddr_runlock(ifp);
1153 if_maddr_runlock(ifp);
1154 /* Enable multicast filtering */
1155 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1157 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1158 ", error status: %d\n", err);
1163 mxge_max_mtu(mxge_softc_t *sc)
1168 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1169 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1171 /* try to set nbufs to see if it we can
1172 use virtually contiguous jumbos */
1174 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1177 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1179 /* otherwise, we're limited to MJUMPAGESIZE */
1180 return MJUMPAGESIZE - MXGEFW_PAD;
1184 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1186 struct mxge_slice_state *ss;
1187 mxge_rx_done_t *rx_done;
1188 volatile uint32_t *irq_claim;
1192 /* try to send a reset command to the card to see if it
1194 memset(&cmd, 0, sizeof (cmd));
1195 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1197 device_printf(sc->dev, "failed reset\n");
1201 mxge_dummy_rdma(sc, 1);
1204 /* set the intrq size */
1205 cmd.data0 = sc->rx_ring_size;
1206 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1209 * Even though we already know how many slices are supported
1210 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1211 * has magic side effects, and must be called after a reset.
1212 * It must be called prior to calling any RSS related cmds,
1213 * including assigning an interrupt queue for anything but
1214 * slice 0. It must also be called *after*
1215 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1216 * the firmware to compute offsets.
1219 if (sc->num_slices > 1) {
1220 /* ask the maximum number of slices it supports */
1221 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1224 device_printf(sc->dev,
1225 "failed to get number of slices\n");
1229 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1230 * to setting up the interrupt queue DMA
1232 cmd.data0 = sc->num_slices;
1233 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1234 #ifdef IFNET_BUF_RING
1235 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1237 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1240 device_printf(sc->dev,
1241 "failed to set number of slices\n");
1247 if (interrupts_setup) {
1248 /* Now exchange information about interrupts */
1249 for (slice = 0; slice < sc->num_slices; slice++) {
1250 rx_done = &sc->ss[slice].rx_done;
1251 memset(rx_done->entry, 0, sc->rx_ring_size);
1252 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1253 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1255 status |= mxge_send_cmd(sc,
1256 MXGEFW_CMD_SET_INTRQ_DMA,
1261 status |= mxge_send_cmd(sc,
1262 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1265 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1267 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1268 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1271 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1273 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1275 device_printf(sc->dev, "failed set interrupt parameters\n");
1280 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1283 /* run a DMA benchmark */
1284 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1286 for (slice = 0; slice < sc->num_slices; slice++) {
1287 ss = &sc->ss[slice];
1289 ss->irq_claim = irq_claim + (2 * slice);
1290 /* reset mcp/driver shared state back to 0 */
1291 ss->rx_done.idx = 0;
1292 ss->rx_done.cnt = 0;
1295 ss->tx.pkt_done = 0;
1296 ss->tx.queue_active = 0;
1297 ss->tx.activate = 0;
1298 ss->tx.deactivate = 0;
1303 ss->rx_small.cnt = 0;
1304 ss->lc.lro_bad_csum = 0;
1305 ss->lc.lro_queued = 0;
1306 ss->lc.lro_flushed = 0;
1307 if (ss->fw_stats != NULL) {
1308 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1311 sc->rdma_tags_available = 15;
1312 status = mxge_update_mac_address(sc);
1313 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1314 mxge_change_pause(sc, sc->pause);
1315 mxge_set_multicast_list(sc);
1317 cmd.data0 = sc->throttle;
1318 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1320 device_printf(sc->dev,
1321 "can't enable throttle\n");
1328 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1333 unsigned int throttle;
1336 throttle = sc->throttle;
1337 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1342 if (throttle == sc->throttle)
1345 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1348 mtx_lock(&sc->driver_mtx);
1349 cmd.data0 = throttle;
1350 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1352 sc->throttle = throttle;
1353 mtx_unlock(&sc->driver_mtx);
1358 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1361 unsigned int intr_coal_delay;
1365 intr_coal_delay = sc->intr_coal_delay;
1366 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1370 if (intr_coal_delay == sc->intr_coal_delay)
1373 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1376 mtx_lock(&sc->driver_mtx);
1377 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1378 sc->intr_coal_delay = intr_coal_delay;
1380 mtx_unlock(&sc->driver_mtx);
1385 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1388 unsigned int enabled;
1392 enabled = sc->pause;
1393 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1397 if (enabled == sc->pause)
1400 mtx_lock(&sc->driver_mtx);
1401 err = mxge_change_pause(sc, enabled);
1402 mtx_unlock(&sc->driver_mtx);
1407 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1413 arg2 = be32toh(*(int *)arg1);
1415 err = sysctl_handle_int(oidp, arg1, arg2, req);
1421 mxge_rem_sysctls(mxge_softc_t *sc)
1423 struct mxge_slice_state *ss;
1426 if (sc->slice_sysctl_tree == NULL)
1429 for (slice = 0; slice < sc->num_slices; slice++) {
1430 ss = &sc->ss[slice];
1431 if (ss == NULL || ss->sysctl_tree == NULL)
1433 sysctl_ctx_free(&ss->sysctl_ctx);
1434 ss->sysctl_tree = NULL;
1436 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1437 sc->slice_sysctl_tree = NULL;
1441 mxge_add_sysctls(mxge_softc_t *sc)
1443 struct sysctl_ctx_list *ctx;
1444 struct sysctl_oid_list *children;
1446 struct mxge_slice_state *ss;
1450 ctx = device_get_sysctl_ctx(sc->dev);
1451 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1452 fw = sc->ss[0].fw_stats;
1454 /* random information */
1455 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1457 CTLFLAG_RD, sc->fw_version,
1458 0, "firmware version");
1459 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1461 CTLFLAG_RD, sc->serial_number_string,
1462 0, "serial number");
1463 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1465 CTLFLAG_RD, sc->product_code_string,
1467 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1469 CTLFLAG_RD, &sc->link_width,
1471 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1473 CTLFLAG_RD, &sc->tx_boundary,
1475 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1477 CTLFLAG_RD, &sc->wc,
1478 0, "write combining PIO?");
1479 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1481 CTLFLAG_RD, &sc->read_dma,
1482 0, "DMA Read speed in MB/s");
1483 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1485 CTLFLAG_RD, &sc->write_dma,
1486 0, "DMA Write speed in MB/s");
1487 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1488 "read_write_dma_MBs",
1489 CTLFLAG_RD, &sc->read_write_dma,
1490 0, "DMA concurrent Read/Write speed in MB/s");
1491 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1493 CTLFLAG_RD, &sc->watchdog_resets,
1494 0, "Number of times NIC was reset");
1497 /* performance related tunables */
1498 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1500 CTLTYPE_INT|CTLFLAG_RW, sc,
1501 0, mxge_change_intr_coal,
1502 "I", "interrupt coalescing delay in usecs");
1504 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1506 CTLTYPE_INT|CTLFLAG_RW, sc,
1507 0, mxge_change_throttle,
1508 "I", "transmit throttling");
1510 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1511 "flow_control_enabled",
1512 CTLTYPE_INT|CTLFLAG_RW, sc,
1513 0, mxge_change_flow_control,
1514 "I", "interrupt coalescing delay in usecs");
1516 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1518 CTLFLAG_RW, &mxge_deassert_wait,
1519 0, "Wait for IRQ line to go low in ihandler");
1521 /* stats block from firmware is in network byte order.
1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1525 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1526 0, mxge_handle_be32,
1528 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1529 "rdma_tags_available",
1530 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1531 0, mxge_handle_be32,
1532 "I", "rdma_tags_available");
1533 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1534 "dropped_bad_crc32",
1535 CTLTYPE_INT|CTLFLAG_RD,
1536 &fw->dropped_bad_crc32,
1537 0, mxge_handle_be32,
1538 "I", "dropped_bad_crc32");
1539 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1541 CTLTYPE_INT|CTLFLAG_RD,
1542 &fw->dropped_bad_phy,
1543 0, mxge_handle_be32,
1544 "I", "dropped_bad_phy");
1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1546 "dropped_link_error_or_filtered",
1547 CTLTYPE_INT|CTLFLAG_RD,
1548 &fw->dropped_link_error_or_filtered,
1549 0, mxge_handle_be32,
1550 "I", "dropped_link_error_or_filtered");
1551 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1552 "dropped_link_overflow",
1553 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1554 0, mxge_handle_be32,
1555 "I", "dropped_link_overflow");
1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1557 "dropped_multicast_filtered",
1558 CTLTYPE_INT|CTLFLAG_RD,
1559 &fw->dropped_multicast_filtered,
1560 0, mxge_handle_be32,
1561 "I", "dropped_multicast_filtered");
1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1563 "dropped_no_big_buffer",
1564 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1565 0, mxge_handle_be32,
1566 "I", "dropped_no_big_buffer");
1567 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1568 "dropped_no_small_buffer",
1569 CTLTYPE_INT|CTLFLAG_RD,
1570 &fw->dropped_no_small_buffer,
1571 0, mxge_handle_be32,
1572 "I", "dropped_no_small_buffer");
1573 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1575 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1576 0, mxge_handle_be32,
1577 "I", "dropped_overrun");
1578 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1580 CTLTYPE_INT|CTLFLAG_RD,
1582 0, mxge_handle_be32,
1583 "I", "dropped_pause");
1584 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1586 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1587 0, mxge_handle_be32,
1588 "I", "dropped_runt");
1590 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1591 "dropped_unicast_filtered",
1592 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1593 0, mxge_handle_be32,
1594 "I", "dropped_unicast_filtered");
1596 /* verbose printing? */
1597 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1599 CTLFLAG_RW, &mxge_verbose,
1600 0, "verbose printing");
1602 /* add counters exported for debugging from all slices */
1603 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1604 sc->slice_sysctl_tree =
1605 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1606 "slice", CTLFLAG_RD, 0, "");
1608 for (slice = 0; slice < sc->num_slices; slice++) {
1609 ss = &sc->ss[slice];
1610 sysctl_ctx_init(&ss->sysctl_ctx);
1611 ctx = &ss->sysctl_ctx;
1612 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1613 sprintf(slice_num, "%d", slice);
1615 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1617 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1618 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1620 CTLFLAG_RD, &ss->rx_small.cnt,
1622 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1624 CTLFLAG_RD, &ss->rx_big.cnt,
1626 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1627 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1628 0, "number of lro merge queues flushed");
1630 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1631 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1632 0, "number of bad csums preventing LRO");
1634 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1635 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1636 0, "number of frames appended to lro merge"
1639 #ifndef IFNET_BUF_RING
1640 /* only transmit from slice 0 for now */
1644 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1646 CTLFLAG_RD, &ss->tx.req,
1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1651 CTLFLAG_RD, &ss->tx.done,
1653 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1655 CTLFLAG_RD, &ss->tx.pkt_done,
1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1659 CTLFLAG_RD, &ss->tx.stall,
1661 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1663 CTLFLAG_RD, &ss->tx.wake,
1665 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1667 CTLFLAG_RD, &ss->tx.defrag,
1669 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1671 CTLFLAG_RD, &ss->tx.queue_active,
1672 0, "tx_queue_active");
1673 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1675 CTLFLAG_RD, &ss->tx.activate,
1677 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1679 CTLFLAG_RD, &ss->tx.deactivate,
1680 0, "tx_deactivate");
1684 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1685 backwards one at a time and handle ring wraps */
1688 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1689 mcp_kreq_ether_send_t *src, int cnt)
1691 int idx, starting_slot;
1692 starting_slot = tx->req;
1695 idx = (starting_slot + cnt) & tx->mask;
1696 mxge_pio_copy(&tx->lanai[idx],
1697 &src[cnt], sizeof(*src));
1703 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1704 * at most 32 bytes at a time, so as to avoid involving the software
1705 * pio handler in the nic. We re-write the first segment's flags
1706 * to mark them valid only after writing the entire chain
1710 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1715 volatile uint32_t *dst_ints;
1716 mcp_kreq_ether_send_t *srcp;
1717 volatile mcp_kreq_ether_send_t *dstp, *dst;
1720 idx = tx->req & tx->mask;
1722 last_flags = src->flags;
1725 dst = dstp = &tx->lanai[idx];
1728 if ((idx + cnt) < tx->mask) {
1729 for (i = 0; i < (cnt - 1); i += 2) {
1730 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1731 wmb(); /* force write every 32 bytes */
1736 /* submit all but the first request, and ensure
1737 that it is submitted below */
1738 mxge_submit_req_backwards(tx, src, cnt);
1742 /* submit the first request */
1743 mxge_pio_copy(dstp, srcp, sizeof(*src));
1744 wmb(); /* barrier before setting valid flag */
1747 /* re-write the last 32-bits with the valid flags */
1748 src->flags = last_flags;
1749 src_ints = (uint32_t *)src;
1751 dst_ints = (volatile uint32_t *)dst;
1753 *dst_ints = *src_ints;
1759 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1760 struct mxge_pkt_info *pi)
1762 struct ether_vlan_header *eh;
1764 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1765 #if IFCAP_TSO6 && defined(INET6)
1769 eh = mtod(m, struct ether_vlan_header *);
1770 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1771 etype = ntohs(eh->evl_proto);
1772 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1774 etype = ntohs(eh->evl_encap_proto);
1775 pi->ip_off = ETHER_HDR_LEN;
1781 * ensure ip header is in first mbuf, copy it to a
1782 * scratch buffer if not
1784 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1786 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1787 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1789 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1791 pi->ip_hlen = pi->ip->ip_hl << 2;
1795 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1796 sizeof(struct tcphdr))) {
1797 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1798 sizeof(struct tcphdr), ss->scratch);
1799 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1801 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1803 #if IFCAP_TSO6 && defined(INET6)
1804 case ETHERTYPE_IPV6:
1805 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1806 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1807 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1809 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1812 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1813 pi->ip_hlen -= pi->ip_off;
1814 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1820 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1823 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1824 sizeof(struct tcphdr))) {
1825 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1826 sizeof(struct tcphdr), ss->scratch);
1827 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1829 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1841 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1842 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1845 mcp_kreq_ether_send_t *req;
1846 bus_dma_segment_t *seg;
1847 uint32_t low, high_swapped;
1848 int len, seglen, cum_len, cum_len_next;
1849 int next_is_first, chop, cnt, rdma_count, small;
1850 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1851 uint8_t flags, flags_next;
1854 mss = m->m_pkthdr.tso_segsz;
1856 /* negative cum_len signifies to the
1857 * send loop that we are still in the
1858 * header portion of the TSO packet.
1861 cksum_offset = pi->ip_off + pi->ip_hlen;
1862 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1864 /* TSO implies checksum offload on this hardware */
1865 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1867 * If packet has full TCP csum, replace it with pseudo hdr
1868 * sum that the NIC expects, otherwise the NIC will emit
1869 * packets with bad TCP checksums.
1871 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1873 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1874 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1875 sum = in6_cksum_pseudo(pi->ip6,
1876 m->m_pkthdr.len - cksum_offset,
1881 m->m_pkthdr.csum_flags |= CSUM_TCP;
1882 sum = in_pseudo(pi->ip->ip_src.s_addr,
1883 pi->ip->ip_dst.s_addr,
1884 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1888 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1889 cksum_offset, sizeof(sum), (caddr_t)&sum);
1891 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1894 /* for TSO, pseudo_hdr_offset holds mss.
1895 * The firmware figures out where to put
1896 * the checksum by parsing the header. */
1897 pseudo_hdr_offset = htobe16(mss);
1901 * for IPv6 TSO, the "checksum offset" is re-purposed
1902 * to store the TCP header len
1904 cksum_offset = (pi->tcp->th_off << 2);
1912 /* "rdma_count" is the number of RDMAs belonging to the
1913 * current packet BEFORE the current send request. For
1914 * non-TSO packets, this is equal to "count".
1915 * For TSO packets, rdma_count needs to be reset
1916 * to 0 after a segment cut.
1918 * The rdma_count field of the send request is
1919 * the number of RDMAs of the packet starting at
1920 * that request. For TSO send requests with one ore more cuts
1921 * in the middle, this is the number of RDMAs starting
1922 * after the last cut in the request. All previous
1923 * segments before the last cut implicitly have 1 RDMA.
1925 * Since the number of RDMAs is not known beforehand,
1926 * it must be filled-in retroactively - after each
1927 * segmentation cut or at the end of the entire packet.
1930 while (busdma_seg_cnt) {
1931 /* Break the busdma segment up into pieces*/
1932 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1933 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1937 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1939 cum_len_next = cum_len + seglen;
1940 (req-rdma_count)->rdma_count = rdma_count + 1;
1941 if (__predict_true(cum_len >= 0)) {
1943 chop = (cum_len_next > mss);
1944 cum_len_next = cum_len_next % mss;
1945 next_is_first = (cum_len_next == 0);
1946 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1947 flags_next |= next_is_first *
1949 rdma_count |= -(chop | next_is_first);
1950 rdma_count += chop & !next_is_first;
1951 } else if (cum_len_next >= 0) {
1956 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1957 flags_next = MXGEFW_FLAGS_TSO_PLD |
1958 MXGEFW_FLAGS_FIRST |
1959 (small * MXGEFW_FLAGS_SMALL);
1962 req->addr_high = high_swapped;
1963 req->addr_low = htobe32(low);
1964 req->pseudo_hdr_offset = pseudo_hdr_offset;
1966 req->rdma_count = 1;
1967 req->length = htobe16(seglen);
1968 req->cksum_offset = cksum_offset;
1969 req->flags = flags | ((cum_len & 1) *
1970 MXGEFW_FLAGS_ALIGN_ODD);
1973 cum_len = cum_len_next;
1978 if (cksum_offset != 0 && !pi->ip6) {
1979 if (__predict_false(cksum_offset > seglen))
1980 cksum_offset -= seglen;
1984 if (__predict_false(cnt > tx->max_desc))
1990 (req-rdma_count)->rdma_count = rdma_count;
1994 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1995 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1997 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1998 mxge_submit_req(tx, tx->req_list, cnt);
1999 #ifdef IFNET_BUF_RING
2000 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2001 /* tell the NIC to start polling this slice */
2003 tx->queue_active = 1;
2011 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
2015 printf("tx->max_desc exceeded via TSO!\n");
2016 printf("mss = %d, %ld, %d!\n", mss,
2017 (long)seg - (long)tx->seg_list, tx->max_desc);
2024 #endif /* IFCAP_TSO4 */
2026 #ifdef MXGE_NEW_VLAN_API
2028 * We reproduce the software vlan tag insertion from
2029 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2030 * vlan tag insertion. We need to advertise this in order to have the
2031 * vlan interface respect our csum offload flags.
2033 static struct mbuf *
2034 mxge_vlan_tag_insert(struct mbuf *m)
2036 struct ether_vlan_header *evl;
2038 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2039 if (__predict_false(m == NULL))
2041 if (m->m_len < sizeof(*evl)) {
2042 m = m_pullup(m, sizeof(*evl));
2043 if (__predict_false(m == NULL))
2047 * Transform the Ethernet header into an Ethernet header
2048 * with 802.1Q encapsulation.
2050 evl = mtod(m, struct ether_vlan_header *);
2051 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2052 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2053 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2054 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2055 m->m_flags &= ~M_VLANTAG;
2058 #endif /* MXGE_NEW_VLAN_API */
2061 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2063 struct mxge_pkt_info pi = {0,0,0,0};
2065 mcp_kreq_ether_send_t *req;
2066 bus_dma_segment_t *seg;
2070 int cnt, cum_len, err, i, idx, odd_flag;
2071 uint16_t pseudo_hdr_offset;
2072 uint8_t flags, cksum_offset;
2079 #ifdef MXGE_NEW_VLAN_API
2080 if (m->m_flags & M_VLANTAG) {
2081 m = mxge_vlan_tag_insert(m);
2082 if (__predict_false(m == NULL))
2083 goto drop_without_m;
2086 if (m->m_pkthdr.csum_flags &
2087 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2088 if (mxge_parse_tx(ss, m, &pi))
2092 /* (try to) map the frame for DMA */
2093 idx = tx->req & tx->mask;
2094 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2095 m, tx->seg_list, &cnt,
2097 if (__predict_false(err == EFBIG)) {
2098 /* Too many segments in the chain. Try
2100 m_tmp = m_defrag(m, M_NOWAIT);
2101 if (m_tmp == NULL) {
2106 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2108 m, tx->seg_list, &cnt,
2111 if (__predict_false(err != 0)) {
2112 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2113 " packet len = %d\n", err, m->m_pkthdr.len);
2116 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2117 BUS_DMASYNC_PREWRITE);
2118 tx->info[idx].m = m;
2121 /* TSO is different enough, we handle it in another routine */
2122 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2123 mxge_encap_tso(ss, m, cnt, &pi);
2130 pseudo_hdr_offset = 0;
2131 flags = MXGEFW_FLAGS_NO_TSO;
2133 /* checksum offloading? */
2134 if (m->m_pkthdr.csum_flags &
2135 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2136 /* ensure ip header is in first mbuf, copy
2137 it to a scratch buffer if not */
2138 cksum_offset = pi.ip_off + pi.ip_hlen;
2139 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2140 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2141 req->cksum_offset = cksum_offset;
2142 flags |= MXGEFW_FLAGS_CKSUM;
2143 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2147 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2148 flags |= MXGEFW_FLAGS_SMALL;
2150 /* convert segments into a request list */
2153 req->flags = MXGEFW_FLAGS_FIRST;
2154 for (i = 0; i < cnt; i++) {
2156 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2158 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2159 req->length = htobe16(seg->ds_len);
2160 req->cksum_offset = cksum_offset;
2161 if (cksum_offset > seg->ds_len)
2162 cksum_offset -= seg->ds_len;
2165 req->pseudo_hdr_offset = pseudo_hdr_offset;
2166 req->pad = 0; /* complete solid 16-byte block */
2167 req->rdma_count = 1;
2168 req->flags |= flags | ((cum_len & 1) * odd_flag);
2169 cum_len += seg->ds_len;
2175 /* pad runts to 60 bytes */
2179 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2181 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2182 req->length = htobe16(60 - cum_len);
2183 req->cksum_offset = 0;
2184 req->pseudo_hdr_offset = pseudo_hdr_offset;
2185 req->pad = 0; /* complete solid 16-byte block */
2186 req->rdma_count = 1;
2187 req->flags |= flags | ((cum_len & 1) * odd_flag);
2191 tx->req_list[0].rdma_count = cnt;
2193 /* print what the firmware will see */
2194 for (i = 0; i < cnt; i++) {
2195 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2196 "cso:%d, flags:0x%x, rdma:%d\n",
2197 i, (int)ntohl(tx->req_list[i].addr_high),
2198 (int)ntohl(tx->req_list[i].addr_low),
2199 (int)ntohs(tx->req_list[i].length),
2200 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2201 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2202 tx->req_list[i].rdma_count);
2204 printf("--------------\n");
2206 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2207 mxge_submit_req(tx, tx->req_list, cnt);
2208 #ifdef IFNET_BUF_RING
2209 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2210 /* tell the NIC to start polling this slice */
2212 tx->queue_active = 1;
2226 #ifdef IFNET_BUF_RING
2228 mxge_qflush(struct ifnet *ifp)
2230 mxge_softc_t *sc = ifp->if_softc;
2235 for (slice = 0; slice < sc->num_slices; slice++) {
2236 tx = &sc->ss[slice].tx;
2238 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2240 mtx_unlock(&tx->mtx);
2246 mxge_start_locked(struct mxge_slice_state *ss)
2257 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2258 m = drbr_dequeue(ifp, tx->br);
2262 /* let BPF see it */
2265 /* give it to the nic */
2268 /* ran out of transmit slots */
2269 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2270 && (!drbr_empty(ifp, tx->br))) {
2271 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2277 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2288 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2290 err = drbr_enqueue(ifp, tx->br, m);
2294 if (!drbr_needs_enqueue(ifp, tx->br) &&
2295 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2296 /* let BPF see it */
2298 /* give it to the nic */
2300 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2303 if (!drbr_empty(ifp, tx->br))
2304 mxge_start_locked(ss);
2309 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2311 mxge_softc_t *sc = ifp->if_softc;
2312 struct mxge_slice_state *ss;
2317 slice = m->m_pkthdr.flowid;
2318 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2320 ss = &sc->ss[slice];
2323 if (mtx_trylock(&tx->mtx)) {
2324 err = mxge_transmit_locked(ss, m);
2325 mtx_unlock(&tx->mtx);
2327 err = drbr_enqueue(ifp, tx->br, m);
2336 mxge_start_locked(struct mxge_slice_state *ss)
2346 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2347 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2351 /* let BPF see it */
2354 /* give it to the nic */
2357 /* ran out of transmit slots */
2358 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2359 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2365 mxge_start(struct ifnet *ifp)
2367 mxge_softc_t *sc = ifp->if_softc;
2368 struct mxge_slice_state *ss;
2370 /* only use the first slice for now */
2372 mtx_lock(&ss->tx.mtx);
2373 mxge_start_locked(ss);
2374 mtx_unlock(&ss->tx.mtx);
2378 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2379 * at most 32 bytes at a time, so as to avoid involving the software
2380 * pio handler in the nic. We re-write the first segment's low
2381 * DMA address to mark it valid only after we write the entire chunk
2385 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2386 mcp_kreq_ether_recv_t *src)
2390 low = src->addr_low;
2391 src->addr_low = 0xffffffff;
2392 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2394 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2396 src->addr_low = low;
2397 dst->addr_low = low;
2402 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2404 bus_dma_segment_t seg;
2406 mxge_rx_ring_t *rx = &ss->rx_small;
2409 m = m_gethdr(M_NOWAIT, MT_DATA);
2416 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2417 &seg, &cnt, BUS_DMA_NOWAIT);
2422 rx->info[idx].m = m;
2423 rx->shadow[idx].addr_low =
2424 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2425 rx->shadow[idx].addr_high =
2426 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2430 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2435 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2437 bus_dma_segment_t seg[3];
2439 mxge_rx_ring_t *rx = &ss->rx_big;
2442 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2448 m->m_len = rx->mlen;
2449 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2450 seg, &cnt, BUS_DMA_NOWAIT);
2455 rx->info[idx].m = m;
2456 rx->shadow[idx].addr_low =
2457 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2458 rx->shadow[idx].addr_high =
2459 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2461 #if MXGE_VIRT_JUMBOS
2462 for (i = 1; i < cnt; i++) {
2463 rx->shadow[idx + i].addr_low =
2464 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2465 rx->shadow[idx + i].addr_high =
2466 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2471 for (i = 0; i < rx->nbufs; i++) {
2472 if ((idx & 7) == 7) {
2473 mxge_submit_8rx(&rx->lanai[idx - 7],
2474 &rx->shadow[idx - 7]);
2484 mxge_csum_generic(uint16_t *raw, int len)
2495 csum = (csum >> 16) + (csum & 0xffff);
2496 csum = (csum >> 16) + (csum & 0xffff);
2497 return (uint16_t)csum;
2500 static inline uint16_t
2501 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2504 int nxt, cksum_offset;
2505 struct ip6_hdr *ip6 = p;
2509 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2510 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2511 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2512 IPPROTO_IPV6, &nxt);
2513 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2518 * IPv6 headers do not contain a checksum, and hence
2519 * do not checksum to zero, so they don't "fall out"
2520 * of the partial checksum calculation like IPv4
2521 * headers do. We need to fix the partial checksum by
2522 * subtracting the checksum of the IPv6 header.
2525 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2528 csum += (csum < ~partial);
2529 csum = (csum >> 16) + (csum & 0xFFFF);
2530 csum = (csum >> 16) + (csum & 0xFFFF);
2531 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2538 * Myri10GE hardware checksums are not valid if the sender
2539 * padded the frame with non-zero padding. This is because
2540 * the firmware just does a simple 16-bit 1s complement
2541 * checksum across the entire frame, excluding the first 14
2542 * bytes. It is best to simply to check the checksum and
2543 * tell the stack about it only if the checksum is good
2546 static inline uint16_t
2547 mxge_rx_csum(struct mbuf *m, int csum)
2549 struct ether_header *eh;
2553 #if defined(INET) || defined(INET6)
2554 int cap = m->m_pkthdr.rcvif->if_capenable;
2559 eh = mtod(m, struct ether_header *);
2560 etype = ntohs(eh->ether_type);
2564 if ((cap & IFCAP_RXCSUM) == 0)
2566 ip = (struct ip *)(eh + 1);
2567 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2569 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2570 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2571 (ip->ip_hl << 2) + ip->ip_p));
2576 case ETHERTYPE_IPV6:
2577 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2579 c = mxge_rx_csum6((eh + 1), m, csum);
2589 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2591 struct ether_vlan_header *evl;
2592 struct ether_header *eh;
2595 evl = mtod(m, struct ether_vlan_header *);
2596 eh = mtod(m, struct ether_header *);
2599 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2600 * after what the firmware thought was the end of the ethernet
2604 /* put checksum into host byte order */
2605 *csum = ntohs(*csum);
2606 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2607 (*csum) += ~partial;
2608 (*csum) += ((*csum) < ~partial);
2609 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2610 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2612 /* restore checksum to network byte order;
2613 later consumers expect this */
2614 *csum = htons(*csum);
2617 #ifdef MXGE_NEW_VLAN_API
2618 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2622 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2626 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2627 m_tag_prepend(m, mtag);
2631 m->m_flags |= M_VLANTAG;
2634 * Remove the 802.1q header by copying the Ethernet
2635 * addresses over it and adjusting the beginning of
2636 * the data in the mbuf. The encapsulated Ethernet
2637 * type field is already in place.
2639 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2640 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2641 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2646 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2647 uint32_t csum, int lro)
2652 struct ether_header *eh;
2654 bus_dmamap_t old_map;
2660 idx = rx->cnt & rx->mask;
2661 rx->cnt += rx->nbufs;
2662 /* save a pointer to the received mbuf */
2663 m = rx->info[idx].m;
2664 /* try to replace the received mbuf */
2665 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2666 /* drop the frame -- the old mbuf is re-cycled */
2667 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2671 /* unmap the received buffer */
2672 old_map = rx->info[idx].map;
2673 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2674 bus_dmamap_unload(rx->dmat, old_map);
2676 /* swap the bus_dmamap_t's */
2677 rx->info[idx].map = rx->extra_map;
2678 rx->extra_map = old_map;
2680 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2682 m->m_data += MXGEFW_PAD;
2684 m->m_pkthdr.rcvif = ifp;
2685 m->m_len = m->m_pkthdr.len = len;
2687 eh = mtod(m, struct ether_header *);
2688 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2689 mxge_vlan_tag_remove(m, &csum);
2691 /* flowid only valid if RSS hashing is enabled */
2692 if (sc->num_slices > 1) {
2693 m->m_pkthdr.flowid = (ss - sc->ss);
2694 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2696 /* if the checksum is valid, mark it in the mbuf header */
2697 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2698 (0 == mxge_rx_csum(m, csum))) {
2699 /* Tell the stack that the checksum is good */
2700 m->m_pkthdr.csum_data = 0xffff;
2701 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2704 #if defined(INET) || defined (INET6)
2705 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2709 /* pass the frame up the stack */
2710 (*ifp->if_input)(ifp, m);
2714 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2715 uint32_t csum, int lro)
2719 struct ether_header *eh;
2722 bus_dmamap_t old_map;
2728 idx = rx->cnt & rx->mask;
2730 /* save a pointer to the received mbuf */
2731 m = rx->info[idx].m;
2732 /* try to replace the received mbuf */
2733 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2734 /* drop the frame -- the old mbuf is re-cycled */
2735 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2739 /* unmap the received buffer */
2740 old_map = rx->info[idx].map;
2741 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2742 bus_dmamap_unload(rx->dmat, old_map);
2744 /* swap the bus_dmamap_t's */
2745 rx->info[idx].map = rx->extra_map;
2746 rx->extra_map = old_map;
2748 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2750 m->m_data += MXGEFW_PAD;
2752 m->m_pkthdr.rcvif = ifp;
2753 m->m_len = m->m_pkthdr.len = len;
2755 eh = mtod(m, struct ether_header *);
2756 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2757 mxge_vlan_tag_remove(m, &csum);
2759 /* flowid only valid if RSS hashing is enabled */
2760 if (sc->num_slices > 1) {
2761 m->m_pkthdr.flowid = (ss - sc->ss);
2762 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2764 /* if the checksum is valid, mark it in the mbuf header */
2765 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2766 (0 == mxge_rx_csum(m, csum))) {
2767 /* Tell the stack that the checksum is good */
2768 m->m_pkthdr.csum_data = 0xffff;
2769 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2772 #if defined(INET) || defined (INET6)
2773 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2777 /* pass the frame up the stack */
2778 (*ifp->if_input)(ifp, m);
2782 mxge_clean_rx_done(struct mxge_slice_state *ss)
2784 mxge_rx_done_t *rx_done = &ss->rx_done;
2790 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2791 while (rx_done->entry[rx_done->idx].length != 0) {
2792 length = ntohs(rx_done->entry[rx_done->idx].length);
2793 rx_done->entry[rx_done->idx].length = 0;
2794 checksum = rx_done->entry[rx_done->idx].checksum;
2795 if (length <= (MHLEN - MXGEFW_PAD))
2796 mxge_rx_done_small(ss, length, checksum, lro);
2798 mxge_rx_done_big(ss, length, checksum, lro);
2800 rx_done->idx = rx_done->cnt & rx_done->mask;
2802 /* limit potential for livelock */
2803 if (__predict_false(++limit > rx_done->mask / 2))
2806 #if defined(INET) || defined (INET6)
2807 tcp_lro_flush_all(&ss->lc);
2813 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2824 while (tx->pkt_done != mcp_idx) {
2825 idx = tx->done & tx->mask;
2827 m = tx->info[idx].m;
2828 /* mbuf and DMA map only attached to the first
2831 ss->obytes += m->m_pkthdr.len;
2832 if (m->m_flags & M_MCAST)
2835 tx->info[idx].m = NULL;
2836 map = tx->info[idx].map;
2837 bus_dmamap_unload(tx->dmat, map);
2840 if (tx->info[idx].flag) {
2841 tx->info[idx].flag = 0;
2846 /* If we have space, clear IFF_OACTIVE to tell the stack that
2847 its OK to send packets */
2848 #ifdef IFNET_BUF_RING
2849 flags = &ss->if_drv_flags;
2851 flags = &ifp->if_drv_flags;
2853 mtx_lock(&ss->tx.mtx);
2854 if ((*flags) & IFF_DRV_OACTIVE &&
2855 tx->req - tx->done < (tx->mask + 1)/4) {
2856 *(flags) &= ~IFF_DRV_OACTIVE;
2858 mxge_start_locked(ss);
2860 #ifdef IFNET_BUF_RING
2861 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2862 /* let the NIC stop polling this queue, since there
2863 * are no more transmits pending */
2864 if (tx->req == tx->done) {
2866 tx->queue_active = 0;
2872 mtx_unlock(&ss->tx.mtx);
2876 static struct mxge_media_type mxge_xfp_media_types[] =
2878 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2879 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2880 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2881 {0, (1 << 5), "10GBASE-ER"},
2882 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2883 {0, (1 << 3), "10GBASE-SW"},
2884 {0, (1 << 2), "10GBASE-LW"},
2885 {0, (1 << 1), "10GBASE-EW"},
2886 {0, (1 << 0), "Reserved"}
2888 static struct mxge_media_type mxge_sfp_media_types[] =
2890 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2891 {0, (1 << 7), "Reserved"},
2892 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2893 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2894 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2895 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2899 mxge_media_set(mxge_softc_t *sc, int media_type)
2903 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2905 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2906 sc->current_media = media_type;
2907 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2911 mxge_media_init(mxge_softc_t *sc)
2916 ifmedia_removeall(&sc->media);
2917 mxge_media_set(sc, IFM_AUTO);
2920 * parse the product code to deterimine the interface type
2921 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2922 * after the 3rd dash in the driver's cached copy of the
2923 * EEPROM's product code string.
2925 ptr = sc->product_code_string;
2927 device_printf(sc->dev, "Missing product code\n");
2931 for (i = 0; i < 3; i++, ptr++) {
2932 ptr = strchr(ptr, '-');
2934 device_printf(sc->dev,
2935 "only %d dashes in PC?!?\n", i);
2939 if (*ptr == 'C' || *(ptr +1) == 'C') {
2941 sc->connector = MXGE_CX4;
2942 mxge_media_set(sc, IFM_10G_CX4);
2943 } else if (*ptr == 'Q') {
2944 /* -Q is Quad Ribbon Fiber */
2945 sc->connector = MXGE_QRF;
2946 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2947 /* FreeBSD has no media type for Quad ribbon fiber */
2948 } else if (*ptr == 'R') {
2950 sc->connector = MXGE_XFP;
2951 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2952 /* -S or -2S is SFP+ */
2953 sc->connector = MXGE_SFP;
2955 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2960 * Determine the media type for a NIC. Some XFPs will identify
2961 * themselves only when their link is up, so this is initiated via a
2962 * link up interrupt. However, this can potentially take up to
2963 * several milliseconds, so it is run via the watchdog routine, rather
2964 * than in the interrupt handler itself.
2967 mxge_media_probe(mxge_softc_t *sc)
2972 struct mxge_media_type *mxge_media_types = NULL;
2973 int i, err, ms, mxge_media_type_entries;
2976 sc->need_media_probe = 0;
2978 if (sc->connector == MXGE_XFP) {
2980 mxge_media_types = mxge_xfp_media_types;
2981 mxge_media_type_entries =
2982 nitems(mxge_xfp_media_types);
2983 byte = MXGE_XFP_COMPLIANCE_BYTE;
2985 } else if (sc->connector == MXGE_SFP) {
2986 /* -S or -2S is SFP+ */
2987 mxge_media_types = mxge_sfp_media_types;
2988 mxge_media_type_entries =
2989 nitems(mxge_sfp_media_types);
2993 /* nothing to do; media type cannot change */
2998 * At this point we know the NIC has an XFP cage, so now we
2999 * try to determine what is in the cage by using the
3000 * firmware's XFP I2C commands to read the XFP 10GbE compilance
3001 * register. We read just one byte, which may take over
3005 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
3007 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3008 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
3009 device_printf(sc->dev, "failed to read XFP\n");
3011 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
3012 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
3014 if (err != MXGEFW_CMD_OK) {
3018 /* now we wait for the data to be cached */
3020 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3021 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3024 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3026 if (err != MXGEFW_CMD_OK) {
3027 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
3028 cage_type, err, ms);
3032 if (cmd.data0 == mxge_media_types[0].bitmask) {
3034 device_printf(sc->dev, "%s:%s\n", cage_type,
3035 mxge_media_types[0].name);
3036 if (sc->current_media != mxge_media_types[0].flag) {
3037 mxge_media_init(sc);
3038 mxge_media_set(sc, mxge_media_types[0].flag);
3042 for (i = 1; i < mxge_media_type_entries; i++) {
3043 if (cmd.data0 & mxge_media_types[i].bitmask) {
3045 device_printf(sc->dev, "%s:%s\n",
3047 mxge_media_types[i].name);
3049 if (sc->current_media != mxge_media_types[i].flag) {
3050 mxge_media_init(sc);
3051 mxge_media_set(sc, mxge_media_types[i].flag);
3057 device_printf(sc->dev, "%s media 0x%x unknown\n",
3058 cage_type, cmd.data0);
3064 mxge_intr(void *arg)
3066 struct mxge_slice_state *ss = arg;
3067 mxge_softc_t *sc = ss->sc;
3068 mcp_irq_data_t *stats = ss->fw_stats;
3069 mxge_tx_ring_t *tx = &ss->tx;
3070 mxge_rx_done_t *rx_done = &ss->rx_done;
3071 uint32_t send_done_count;
3075 #ifndef IFNET_BUF_RING
3076 /* an interrupt on a non-zero slice is implicitly valid
3077 since MSI-X irqs are not shared */
3079 mxge_clean_rx_done(ss);
3080 *ss->irq_claim = be32toh(3);
3085 /* make sure the DMA has finished */
3086 if (!stats->valid) {
3089 valid = stats->valid;
3091 if (sc->legacy_irq) {
3092 /* lower legacy IRQ */
3093 *sc->irq_deassert = 0;
3094 if (!mxge_deassert_wait)
3095 /* don't wait for conf. that irq is low */
3101 /* loop while waiting for legacy irq deassertion */
3103 /* check for transmit completes and receives */
3104 send_done_count = be32toh(stats->send_done_count);
3105 while ((send_done_count != tx->pkt_done) ||
3106 (rx_done->entry[rx_done->idx].length != 0)) {
3107 if (send_done_count != tx->pkt_done)
3108 mxge_tx_done(ss, (int)send_done_count);
3109 mxge_clean_rx_done(ss);
3110 send_done_count = be32toh(stats->send_done_count);
3112 if (sc->legacy_irq && mxge_deassert_wait)
3114 } while (*((volatile uint8_t *) &stats->valid));
3116 /* fw link & error stats meaningful only on the first slice */
3117 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3118 if (sc->link_state != stats->link_up) {
3119 sc->link_state = stats->link_up;
3120 if (sc->link_state) {
3121 if_link_state_change(sc->ifp, LINK_STATE_UP);
3123 device_printf(sc->dev, "link up\n");
3125 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3127 device_printf(sc->dev, "link down\n");
3129 sc->need_media_probe = 1;
3131 if (sc->rdma_tags_available !=
3132 be32toh(stats->rdma_tags_available)) {
3133 sc->rdma_tags_available =
3134 be32toh(stats->rdma_tags_available);
3135 device_printf(sc->dev, "RDMA timed out! %d tags "
3136 "left\n", sc->rdma_tags_available);
3139 if (stats->link_down) {
3140 sc->down_cnt += stats->link_down;
3142 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3146 /* check to see if we have rx token to pass back */
3148 *ss->irq_claim = be32toh(3);
3149 *(ss->irq_claim + 1) = be32toh(3);
3153 mxge_init(void *arg)
3155 mxge_softc_t *sc = arg;
3156 struct ifnet *ifp = sc->ifp;
3159 mtx_lock(&sc->driver_mtx);
3160 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3161 (void) mxge_open(sc);
3162 mtx_unlock(&sc->driver_mtx);
3168 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3172 #if defined(INET) || defined(INET6)
3173 tcp_lro_free(&ss->lc);
3175 for (i = 0; i <= ss->rx_big.mask; i++) {
3176 if (ss->rx_big.info[i].m == NULL)
3178 bus_dmamap_unload(ss->rx_big.dmat,
3179 ss->rx_big.info[i].map);
3180 m_freem(ss->rx_big.info[i].m);
3181 ss->rx_big.info[i].m = NULL;
3184 for (i = 0; i <= ss->rx_small.mask; i++) {
3185 if (ss->rx_small.info[i].m == NULL)
3187 bus_dmamap_unload(ss->rx_small.dmat,
3188 ss->rx_small.info[i].map);
3189 m_freem(ss->rx_small.info[i].m);
3190 ss->rx_small.info[i].m = NULL;
3193 /* transmit ring used only on the first slice */
3194 if (ss->tx.info == NULL)
3197 for (i = 0; i <= ss->tx.mask; i++) {
3198 ss->tx.info[i].flag = 0;
3199 if (ss->tx.info[i].m == NULL)
3201 bus_dmamap_unload(ss->tx.dmat,
3202 ss->tx.info[i].map);
3203 m_freem(ss->tx.info[i].m);
3204 ss->tx.info[i].m = NULL;
3209 mxge_free_mbufs(mxge_softc_t *sc)
3213 for (slice = 0; slice < sc->num_slices; slice++)
3214 mxge_free_slice_mbufs(&sc->ss[slice]);
3218 mxge_free_slice_rings(struct mxge_slice_state *ss)
3223 if (ss->rx_done.entry != NULL)
3224 mxge_dma_free(&ss->rx_done.dma);
3225 ss->rx_done.entry = NULL;
3227 if (ss->tx.req_bytes != NULL)
3228 free(ss->tx.req_bytes, M_DEVBUF);
3229 ss->tx.req_bytes = NULL;
3231 if (ss->tx.seg_list != NULL)
3232 free(ss->tx.seg_list, M_DEVBUF);
3233 ss->tx.seg_list = NULL;
3235 if (ss->rx_small.shadow != NULL)
3236 free(ss->rx_small.shadow, M_DEVBUF);
3237 ss->rx_small.shadow = NULL;
3239 if (ss->rx_big.shadow != NULL)
3240 free(ss->rx_big.shadow, M_DEVBUF);
3241 ss->rx_big.shadow = NULL;
3243 if (ss->tx.info != NULL) {
3244 if (ss->tx.dmat != NULL) {
3245 for (i = 0; i <= ss->tx.mask; i++) {
3246 bus_dmamap_destroy(ss->tx.dmat,
3247 ss->tx.info[i].map);
3249 bus_dma_tag_destroy(ss->tx.dmat);
3251 free(ss->tx.info, M_DEVBUF);
3255 if (ss->rx_small.info != NULL) {
3256 if (ss->rx_small.dmat != NULL) {
3257 for (i = 0; i <= ss->rx_small.mask; i++) {
3258 bus_dmamap_destroy(ss->rx_small.dmat,
3259 ss->rx_small.info[i].map);
3261 bus_dmamap_destroy(ss->rx_small.dmat,
3262 ss->rx_small.extra_map);
3263 bus_dma_tag_destroy(ss->rx_small.dmat);
3265 free(ss->rx_small.info, M_DEVBUF);
3267 ss->rx_small.info = NULL;
3269 if (ss->rx_big.info != NULL) {
3270 if (ss->rx_big.dmat != NULL) {
3271 for (i = 0; i <= ss->rx_big.mask; i++) {
3272 bus_dmamap_destroy(ss->rx_big.dmat,
3273 ss->rx_big.info[i].map);
3275 bus_dmamap_destroy(ss->rx_big.dmat,
3276 ss->rx_big.extra_map);
3277 bus_dma_tag_destroy(ss->rx_big.dmat);
3279 free(ss->rx_big.info, M_DEVBUF);
3281 ss->rx_big.info = NULL;
3285 mxge_free_rings(mxge_softc_t *sc)
3289 for (slice = 0; slice < sc->num_slices; slice++)
3290 mxge_free_slice_rings(&sc->ss[slice]);
3294 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3295 int tx_ring_entries)
3297 mxge_softc_t *sc = ss->sc;
3301 /* allocate per-slice receive resources */
3303 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3304 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3306 /* allocate the rx shadow rings */
3307 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3308 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3310 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3311 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3313 /* allocate the rx host info rings */
3314 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3315 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3317 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3318 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3320 /* allocate the rx busdma resources */
3321 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3323 4096, /* boundary */
3324 BUS_SPACE_MAXADDR, /* low */
3325 BUS_SPACE_MAXADDR, /* high */
3326 NULL, NULL, /* filter */
3327 MHLEN, /* maxsize */
3329 MHLEN, /* maxsegsize */
3330 BUS_DMA_ALLOCNOW, /* flags */
3331 NULL, NULL, /* lock */
3332 &ss->rx_small.dmat); /* tag */
3334 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3339 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3341 #if MXGE_VIRT_JUMBOS
3342 4096, /* boundary */
3346 BUS_SPACE_MAXADDR, /* low */
3347 BUS_SPACE_MAXADDR, /* high */
3348 NULL, NULL, /* filter */
3349 3*4096, /* maxsize */
3350 #if MXGE_VIRT_JUMBOS
3352 4096, /* maxsegsize*/
3355 MJUM9BYTES, /* maxsegsize*/
3357 BUS_DMA_ALLOCNOW, /* flags */
3358 NULL, NULL, /* lock */
3359 &ss->rx_big.dmat); /* tag */
3361 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3365 for (i = 0; i <= ss->rx_small.mask; i++) {
3366 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3367 &ss->rx_small.info[i].map);
3369 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3374 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3375 &ss->rx_small.extra_map);
3377 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3382 for (i = 0; i <= ss->rx_big.mask; i++) {
3383 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3384 &ss->rx_big.info[i].map);
3386 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3391 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3392 &ss->rx_big.extra_map);
3394 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3399 /* now allocate TX resources */
3401 #ifndef IFNET_BUF_RING
3402 /* only use a single TX ring for now */
3403 if (ss != ss->sc->ss)
3407 ss->tx.mask = tx_ring_entries - 1;
3408 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3411 /* allocate the tx request copy block */
3413 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3414 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3415 /* ensure req_list entries are aligned to 8 bytes */
3416 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3417 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3419 /* allocate the tx busdma segment list */
3420 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3421 ss->tx.seg_list = (bus_dma_segment_t *)
3422 malloc(bytes, M_DEVBUF, M_WAITOK);
3424 /* allocate the tx host info ring */
3425 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3426 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3428 /* allocate the tx busdma resources */
3429 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3431 sc->tx_boundary, /* boundary */
3432 BUS_SPACE_MAXADDR, /* low */
3433 BUS_SPACE_MAXADDR, /* high */
3434 NULL, NULL, /* filter */
3435 65536 + 256, /* maxsize */
3436 ss->tx.max_desc - 2, /* num segs */
3437 sc->tx_boundary, /* maxsegsz */
3438 BUS_DMA_ALLOCNOW, /* flags */
3439 NULL, NULL, /* lock */
3440 &ss->tx.dmat); /* tag */
3443 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3448 /* now use these tags to setup dmamaps for each slot
3450 for (i = 0; i <= ss->tx.mask; i++) {
3451 err = bus_dmamap_create(ss->tx.dmat, 0,
3452 &ss->tx.info[i].map);
3454 device_printf(sc->dev, "Err %d tx dmamap\n",
3464 mxge_alloc_rings(mxge_softc_t *sc)
3468 int tx_ring_entries, rx_ring_entries;
3471 /* get ring sizes */
3472 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3473 tx_ring_size = cmd.data0;
3475 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3479 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3480 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3481 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3482 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3483 IFQ_SET_READY(&sc->ifp->if_snd);
3485 for (slice = 0; slice < sc->num_slices; slice++) {
3486 err = mxge_alloc_slice_rings(&sc->ss[slice],
3495 mxge_free_rings(sc);
3502 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3504 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3506 if (bufsize < MCLBYTES) {
3507 /* easy, everything fits in a single buffer */
3508 *big_buf_size = MCLBYTES;
3509 *cl_size = MCLBYTES;
3514 if (bufsize < MJUMPAGESIZE) {
3515 /* still easy, everything still fits in a single buffer */
3516 *big_buf_size = MJUMPAGESIZE;
3517 *cl_size = MJUMPAGESIZE;
3521 #if MXGE_VIRT_JUMBOS
3522 /* now we need to use virtually contiguous buffers */
3523 *cl_size = MJUM9BYTES;
3524 *big_buf_size = 4096;
3525 *nbufs = mtu / 4096 + 1;
3526 /* needs to be a power of two, so round up */
3530 *cl_size = MJUM9BYTES;
3531 *big_buf_size = MJUM9BYTES;
3537 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3546 slice = ss - sc->ss;
3548 #if defined(INET) || defined(INET6)
3549 (void)tcp_lro_init(&ss->lc);
3551 ss->lc.ifp = sc->ifp;
3553 /* get the lanai pointers to the send and receive rings */
3556 #ifndef IFNET_BUF_RING
3557 /* We currently only send from the first slice */
3561 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3563 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3564 ss->tx.send_go = (volatile uint32_t *)
3565 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3566 ss->tx.send_stop = (volatile uint32_t *)
3567 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3568 #ifndef IFNET_BUF_RING
3572 err |= mxge_send_cmd(sc,
3573 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3574 ss->rx_small.lanai =
3575 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3577 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3579 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3582 device_printf(sc->dev,
3583 "failed to get ring sizes or locations\n");
3587 /* stock receive rings */
3588 for (i = 0; i <= ss->rx_small.mask; i++) {
3589 map = ss->rx_small.info[i].map;
3590 err = mxge_get_buf_small(ss, map, i);
3592 device_printf(sc->dev, "alloced %d/%d smalls\n",
3593 i, ss->rx_small.mask + 1);
3597 for (i = 0; i <= ss->rx_big.mask; i++) {
3598 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3599 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3601 ss->rx_big.nbufs = nbufs;
3602 ss->rx_big.cl_size = cl_size;
3603 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3604 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3605 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3606 map = ss->rx_big.info[i].map;
3607 err = mxge_get_buf_big(ss, map, i);
3609 device_printf(sc->dev, "alloced %d/%d bigs\n",
3610 i, ss->rx_big.mask + 1);
3618 mxge_open(mxge_softc_t *sc)
3621 int err, big_bytes, nbufs, slice, cl_size, i;
3623 volatile uint8_t *itable;
3624 struct mxge_slice_state *ss;
3626 /* Copy the MAC address in case it was overridden */
3627 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3629 err = mxge_reset(sc, 1);
3631 device_printf(sc->dev, "failed to reset\n");
3635 if (sc->num_slices > 1) {
3636 /* setup the indirection table */
3637 cmd.data0 = sc->num_slices;
3638 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3641 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3644 device_printf(sc->dev,
3645 "failed to setup rss tables\n");
3649 /* just enable an identity mapping */
3650 itable = sc->sram + cmd.data0;
3651 for (i = 0; i < sc->num_slices; i++)
3652 itable[i] = (uint8_t)i;
3655 cmd.data1 = mxge_rss_hash_type;
3656 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3658 device_printf(sc->dev, "failed to enable slices\n");
3664 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3667 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3669 /* error is only meaningful if we're trying to set
3670 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3671 if (err && nbufs > 1) {
3672 device_printf(sc->dev,
3673 "Failed to set alway-use-n to %d\n",
3677 /* Give the firmware the mtu and the big and small buffer
3678 sizes. The firmware wants the big buf size to be a power
3679 of two. Luckily, FreeBSD's clusters are powers of two */
3680 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3681 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3682 cmd.data0 = MHLEN - MXGEFW_PAD;
3683 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3685 cmd.data0 = big_bytes;
3686 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3689 device_printf(sc->dev, "failed to setup params\n");
3693 /* Now give him the pointer to the stats block */
3695 #ifdef IFNET_BUF_RING
3696 slice < sc->num_slices;
3701 ss = &sc->ss[slice];
3703 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3705 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3706 cmd.data2 = sizeof(struct mcp_irq_data);
3707 cmd.data2 |= (slice << 16);
3708 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3712 bus = sc->ss->fw_stats_dma.bus_addr;
3713 bus += offsetof(struct mcp_irq_data, send_done_count);
3714 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3715 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3716 err = mxge_send_cmd(sc,
3717 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3719 /* Firmware cannot support multicast without STATS_DMA_V2 */
3720 sc->fw_multicast_support = 0;
3722 sc->fw_multicast_support = 1;
3726 device_printf(sc->dev, "failed to setup params\n");
3730 for (slice = 0; slice < sc->num_slices; slice++) {
3731 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3733 device_printf(sc->dev, "couldn't open slice %d\n",
3739 /* Finally, start the firmware running */
3740 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3742 device_printf(sc->dev, "Couldn't bring up link\n");
3745 #ifdef IFNET_BUF_RING
3746 for (slice = 0; slice < sc->num_slices; slice++) {
3747 ss = &sc->ss[slice];
3748 ss->if_drv_flags |= IFF_DRV_RUNNING;
3749 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3752 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3753 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3759 mxge_free_mbufs(sc);
3765 mxge_close(mxge_softc_t *sc, int down)
3768 int err, old_down_cnt;
3769 #ifdef IFNET_BUF_RING
3770 struct mxge_slice_state *ss;
3774 #ifdef IFNET_BUF_RING
3775 for (slice = 0; slice < sc->num_slices; slice++) {
3776 ss = &sc->ss[slice];
3777 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3780 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3782 old_down_cnt = sc->down_cnt;
3784 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3786 device_printf(sc->dev,
3787 "Couldn't bring down link\n");
3789 if (old_down_cnt == sc->down_cnt) {
3790 /* wait for down irq */
3791 DELAY(10 * sc->intr_coal_delay);
3794 if (old_down_cnt == sc->down_cnt) {
3795 device_printf(sc->dev, "never got down irq\n");
3798 mxge_free_mbufs(sc);
3804 mxge_setup_cfg_space(mxge_softc_t *sc)
3806 device_t dev = sc->dev;
3808 uint16_t lnk, pectl;
3810 /* find the PCIe link width and set max read request to 4KB*/
3811 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3812 lnk = pci_read_config(dev, reg + 0x12, 2);
3813 sc->link_width = (lnk >> 4) & 0x3f;
3815 if (sc->pectl == 0) {
3816 pectl = pci_read_config(dev, reg + 0x8, 2);
3817 pectl = (pectl & ~0x7000) | (5 << 12);
3818 pci_write_config(dev, reg + 0x8, pectl, 2);
3821 /* restore saved pectl after watchdog reset */
3822 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3826 /* Enable DMA and Memory space access */
3827 pci_enable_busmaster(dev);
3831 mxge_read_reboot(mxge_softc_t *sc)
3833 device_t dev = sc->dev;
3836 /* find the vendor specific offset */
3837 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3838 device_printf(sc->dev,
3839 "could not find vendor specific offset\n");
3840 return (uint32_t)-1;
3842 /* enable read32 mode */
3843 pci_write_config(dev, vs + 0x10, 0x3, 1);
3844 /* tell NIC which register to read */
3845 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3846 return (pci_read_config(dev, vs + 0x14, 4));
3850 mxge_watchdog_reset(mxge_softc_t *sc)
3852 struct pci_devinfo *dinfo;
3853 struct mxge_slice_state *ss;
3854 int err, running, s, num_tx_slices = 1;
3860 device_printf(sc->dev, "Watchdog reset!\n");
3863 * check to see if the NIC rebooted. If it did, then all of
3864 * PCI config space has been reset, and things like the
3865 * busmaster bit will be zero. If this is the case, then we
3866 * must restore PCI config space before the NIC can be used
3869 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3870 if (cmd == 0xffff) {
3872 * maybe the watchdog caught the NIC rebooting; wait
3873 * up to 100ms for it to finish. If it does not come
3874 * back, then give up
3877 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3878 if (cmd == 0xffff) {
3879 device_printf(sc->dev, "NIC disappeared!\n");
3882 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3883 /* print the reboot status */
3884 reboot = mxge_read_reboot(sc);
3885 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3887 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3891 * quiesce NIC so that TX routines will not try to
3892 * xmit after restoration of BAR
3895 /* Mark the link as down */
3896 if (sc->link_state) {
3898 if_link_state_change(sc->ifp,
3901 #ifdef IFNET_BUF_RING
3902 num_tx_slices = sc->num_slices;
3904 /* grab all TX locks to ensure no tx */
3905 for (s = 0; s < num_tx_slices; s++) {
3907 mtx_lock(&ss->tx.mtx);
3911 /* restore PCI configuration space */
3912 dinfo = device_get_ivars(sc->dev);
3913 pci_cfg_restore(sc->dev, dinfo);
3915 /* and redo any changes we made to our config space */
3916 mxge_setup_cfg_space(sc);
3919 err = mxge_load_firmware(sc, 0);
3921 device_printf(sc->dev,
3922 "Unable to re-load f/w\n");
3926 err = mxge_open(sc);
3927 /* release all TX locks */
3928 for (s = 0; s < num_tx_slices; s++) {
3930 #ifdef IFNET_BUF_RING
3931 mxge_start_locked(ss);
3933 mtx_unlock(&ss->tx.mtx);
3936 sc->watchdog_resets++;
3938 device_printf(sc->dev,
3939 "NIC did not reboot, not resetting\n");
3943 device_printf(sc->dev, "watchdog reset failed\n");
3947 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3952 mxge_watchdog_task(void *arg, int pending)
3954 mxge_softc_t *sc = arg;
3957 mtx_lock(&sc->driver_mtx);
3958 mxge_watchdog_reset(sc);
3959 mtx_unlock(&sc->driver_mtx);
3963 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3965 tx = &sc->ss[slice].tx;
3966 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3967 device_printf(sc->dev,
3968 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3969 tx->req, tx->done, tx->queue_active);
3970 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3971 tx->activate, tx->deactivate);
3972 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3974 be32toh(sc->ss->fw_stats->send_done_count));
3978 mxge_watchdog(mxge_softc_t *sc)
3981 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3984 /* see if we have outstanding transmits, which
3985 have been pending for more than mxge_ticks */
3987 #ifdef IFNET_BUF_RING
3988 (i < sc->num_slices) && (err == 0);
3990 (i < 1) && (err == 0);
3994 if (tx->req != tx->done &&
3995 tx->watchdog_req != tx->watchdog_done &&
3996 tx->done == tx->watchdog_done) {
3997 /* check for pause blocking before resetting */
3998 if (tx->watchdog_rx_pause == rx_pause) {
3999 mxge_warn_stuck(sc, tx, i);
4000 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4004 device_printf(sc->dev, "Flow control blocking "
4005 "xmits, check link partner\n");
4008 tx->watchdog_req = tx->req;
4009 tx->watchdog_done = tx->done;
4010 tx->watchdog_rx_pause = rx_pause;
4013 if (sc->need_media_probe)
4014 mxge_media_probe(sc);
4019 mxge_get_counter(struct ifnet *ifp, ift_counter cnt)
4021 struct mxge_softc *sc;
4024 sc = if_getsoftc(ifp);
4028 case IFCOUNTER_IPACKETS:
4029 for (int s = 0; s < sc->num_slices; s++)
4030 rv += sc->ss[s].ipackets;
4032 case IFCOUNTER_OPACKETS:
4033 for (int s = 0; s < sc->num_slices; s++)
4034 rv += sc->ss[s].opackets;
4036 case IFCOUNTER_OERRORS:
4037 for (int s = 0; s < sc->num_slices; s++)
4038 rv += sc->ss[s].oerrors;
4040 #ifdef IFNET_BUF_RING
4041 case IFCOUNTER_OBYTES:
4042 for (int s = 0; s < sc->num_slices; s++)
4043 rv += sc->ss[s].obytes;
4045 case IFCOUNTER_OMCASTS:
4046 for (int s = 0; s < sc->num_slices; s++)
4047 rv += sc->ss[s].omcasts;
4049 case IFCOUNTER_OQDROPS:
4050 for (int s = 0; s < sc->num_slices; s++)
4051 rv += sc->ss[s].tx.br->br_drops;
4055 return (if_get_counter_default(ifp, cnt));
4060 mxge_tick(void *arg)
4062 mxge_softc_t *sc = arg;
4069 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4071 if (!sc->watchdog_countdown) {
4072 err = mxge_watchdog(sc);
4073 sc->watchdog_countdown = 4;
4075 sc->watchdog_countdown--;
4078 /* ensure NIC did not suffer h/w fault while idle */
4079 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4080 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4082 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4085 /* look less often if NIC is idle */
4090 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4095 mxge_media_change(struct ifnet *ifp)
4101 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4103 struct ifnet *ifp = sc->ifp;
4104 int real_mtu, old_mtu;
4108 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4109 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4111 mtx_lock(&sc->driver_mtx);
4112 old_mtu = ifp->if_mtu;
4114 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4116 err = mxge_open(sc);
4118 ifp->if_mtu = old_mtu;
4120 (void) mxge_open(sc);
4123 mtx_unlock(&sc->driver_mtx);
4128 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4130 mxge_softc_t *sc = ifp->if_softc;
4135 ifmr->ifm_status = IFM_AVALID;
4136 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4137 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4138 ifmr->ifm_active |= sc->current_media;
4142 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c)
4149 if (i2c->dev_addr != 0xA0 &&
4150 i2c->dev_addr != 0xA2)
4152 if (i2c->len > sizeof(i2c->data))
4155 for (i = 0; i < i2c->len; i++) {
4156 i2c_args = i2c->dev_addr << 0x8;
4157 i2c_args |= i2c->offset + i;
4158 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
4159 cmd.data1 = i2c_args;
4160 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
4162 if (err != MXGEFW_CMD_OK)
4164 /* now we wait for the data to be cached */
4165 cmd.data0 = i2c_args & 0xff;
4166 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
4167 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
4168 cmd.data0 = i2c_args & 0xff;
4169 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
4173 if (err != MXGEFW_CMD_OK)
4175 i2c->data[i] = cmd.data0;
4181 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4183 mxge_softc_t *sc = ifp->if_softc;
4184 struct ifreq *ifr = (struct ifreq *)data;
4185 struct ifi2creq i2c;
4191 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4195 mtx_lock(&sc->driver_mtx);
4197 mtx_unlock(&sc->driver_mtx);
4200 if (ifp->if_flags & IFF_UP) {
4201 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4202 err = mxge_open(sc);
4204 /* take care of promis can allmulti
4206 mxge_change_promisc(sc,
4207 ifp->if_flags & IFF_PROMISC);
4208 mxge_set_multicast_list(sc);
4211 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4215 mtx_unlock(&sc->driver_mtx);
4220 mtx_lock(&sc->driver_mtx);
4222 mtx_unlock(&sc->driver_mtx);
4225 mxge_set_multicast_list(sc);
4226 mtx_unlock(&sc->driver_mtx);
4230 mtx_lock(&sc->driver_mtx);
4231 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4232 if (mask & IFCAP_TXCSUM) {
4233 if (IFCAP_TXCSUM & ifp->if_capenable) {
4234 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4235 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4237 ifp->if_capenable |= IFCAP_TXCSUM;
4238 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4240 } else if (mask & IFCAP_RXCSUM) {
4241 if (IFCAP_RXCSUM & ifp->if_capenable) {
4242 ifp->if_capenable &= ~IFCAP_RXCSUM;
4244 ifp->if_capenable |= IFCAP_RXCSUM;
4247 if (mask & IFCAP_TSO4) {
4248 if (IFCAP_TSO4 & ifp->if_capenable) {
4249 ifp->if_capenable &= ~IFCAP_TSO4;
4250 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4251 ifp->if_capenable |= IFCAP_TSO4;
4252 ifp->if_hwassist |= CSUM_TSO;
4254 printf("mxge requires tx checksum offload"
4255 " be enabled to use TSO\n");
4260 if (mask & IFCAP_TXCSUM_IPV6) {
4261 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4262 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4264 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4267 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4268 ifp->if_hwassist |= (CSUM_TCP_IPV6
4271 } else if (mask & IFCAP_RXCSUM_IPV6) {
4272 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4273 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4275 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4278 if (mask & IFCAP_TSO6) {
4279 if (IFCAP_TSO6 & ifp->if_capenable) {
4280 ifp->if_capenable &= ~IFCAP_TSO6;
4281 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4282 ifp->if_capenable |= IFCAP_TSO6;
4283 ifp->if_hwassist |= CSUM_TSO;
4285 printf("mxge requires tx checksum offload"
4286 " be enabled to use TSO\n");
4290 #endif /*IFCAP_TSO6 */
4292 if (mask & IFCAP_LRO)
4293 ifp->if_capenable ^= IFCAP_LRO;
4294 if (mask & IFCAP_VLAN_HWTAGGING)
4295 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4296 if (mask & IFCAP_VLAN_HWTSO)
4297 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4299 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4300 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4301 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4303 mtx_unlock(&sc->driver_mtx);
4304 VLAN_CAPABILITIES(ifp);
4309 mtx_lock(&sc->driver_mtx);
4311 mtx_unlock(&sc->driver_mtx);
4314 mxge_media_probe(sc);
4315 mtx_unlock(&sc->driver_mtx);
4316 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4317 &sc->media, command);
4321 if (sc->connector != MXGE_XFP &&
4322 sc->connector != MXGE_SFP) {
4326 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
4329 mtx_lock(&sc->driver_mtx);
4331 mtx_unlock(&sc->driver_mtx);
4334 err = mxge_fetch_i2c(sc, &i2c);
4335 mtx_unlock(&sc->driver_mtx);
4337 err = copyout(&i2c, ifr->ifr_ifru.ifru_data,
4341 err = ether_ioctl(ifp, command, data);
4348 mxge_fetch_tunables(mxge_softc_t *sc)
4351 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4352 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4353 &mxge_flow_control);
4354 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4355 &mxge_intr_coal_delay);
4356 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4357 &mxge_nvidia_ecrc_enable);
4358 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4359 &mxge_force_firmware);
4360 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4361 &mxge_deassert_wait);
4362 TUNABLE_INT_FETCH("hw.mxge.verbose",
4364 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4365 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4366 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4367 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4368 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4369 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4373 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4374 mxge_intr_coal_delay = 30;
4375 if (mxge_ticks == 0)
4376 mxge_ticks = hz / 2;
4377 sc->pause = mxge_flow_control;
4378 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4379 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4380 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4382 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4383 mxge_initial_mtu < ETHER_MIN_LEN)
4384 mxge_initial_mtu = ETHERMTU_JUMBO;
4386 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4387 mxge_throttle = MXGE_MAX_THROTTLE;
4388 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4389 mxge_throttle = MXGE_MIN_THROTTLE;
4390 sc->throttle = mxge_throttle;
4395 mxge_free_slices(mxge_softc_t *sc)
4397 struct mxge_slice_state *ss;
4404 for (i = 0; i < sc->num_slices; i++) {
4406 if (ss->fw_stats != NULL) {
4407 mxge_dma_free(&ss->fw_stats_dma);
4408 ss->fw_stats = NULL;
4409 #ifdef IFNET_BUF_RING
4410 if (ss->tx.br != NULL) {
4411 drbr_free(ss->tx.br, M_DEVBUF);
4415 mtx_destroy(&ss->tx.mtx);
4417 if (ss->rx_done.entry != NULL) {
4418 mxge_dma_free(&ss->rx_done.dma);
4419 ss->rx_done.entry = NULL;
4422 free(sc->ss, M_DEVBUF);
4427 mxge_alloc_slices(mxge_softc_t *sc)
4430 struct mxge_slice_state *ss;
4432 int err, i, max_intr_slots;
4434 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4436 device_printf(sc->dev, "Cannot determine rx ring size\n");
4439 sc->rx_ring_size = cmd.data0;
4440 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4442 bytes = sizeof (*sc->ss) * sc->num_slices;
4443 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4446 for (i = 0; i < sc->num_slices; i++) {
4451 /* allocate per-slice rx interrupt queues */
4453 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4454 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4457 ss->rx_done.entry = ss->rx_done.dma.addr;
4458 bzero(ss->rx_done.entry, bytes);
4461 * allocate the per-slice firmware stats; stats
4462 * (including tx) are used used only on the first
4465 #ifndef IFNET_BUF_RING
4470 bytes = sizeof (*ss->fw_stats);
4471 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4472 sizeof (*ss->fw_stats), 64);
4475 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4476 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4477 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4478 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4479 #ifdef IFNET_BUF_RING
4480 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4488 mxge_free_slices(sc);
4493 mxge_slice_probe(mxge_softc_t *sc)
4497 int msix_cnt, status, max_intr_slots;
4501 * don't enable multiple slices if they are not enabled,
4502 * or if this is not an SMP system
4505 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4508 /* see how many MSI-X interrupts are available */
4509 msix_cnt = pci_msix_count(sc->dev);
4513 /* now load the slice aware firmware see what it supports */
4514 old_fw = sc->fw_name;
4515 if (old_fw == mxge_fw_aligned)
4516 sc->fw_name = mxge_fw_rss_aligned;
4518 sc->fw_name = mxge_fw_rss_unaligned;
4519 status = mxge_load_firmware(sc, 0);
4521 device_printf(sc->dev, "Falling back to a single slice\n");
4525 /* try to send a reset command to the card to see if it
4527 memset(&cmd, 0, sizeof (cmd));
4528 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4530 device_printf(sc->dev, "failed reset\n");
4534 /* get rx ring size */
4535 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4537 device_printf(sc->dev, "Cannot determine rx ring size\n");
4540 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4542 /* tell it the size of the interrupt queues */
4543 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4544 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4546 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4550 /* ask the maximum number of slices it supports */
4551 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4553 device_printf(sc->dev,
4554 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4557 sc->num_slices = cmd.data0;
4558 if (sc->num_slices > msix_cnt)
4559 sc->num_slices = msix_cnt;
4561 if (mxge_max_slices == -1) {
4562 /* cap to number of CPUs in system */
4563 if (sc->num_slices > mp_ncpus)
4564 sc->num_slices = mp_ncpus;
4566 if (sc->num_slices > mxge_max_slices)
4567 sc->num_slices = mxge_max_slices;
4569 /* make sure it is a power of two */
4570 while (sc->num_slices & (sc->num_slices - 1))
4574 device_printf(sc->dev, "using %d slices\n",
4580 sc->fw_name = old_fw;
4581 (void) mxge_load_firmware(sc, 0);
4585 mxge_add_msix_irqs(mxge_softc_t *sc)
4588 int count, err, i, rid;
4591 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4594 if (sc->msix_table_res == NULL) {
4595 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4599 count = sc->num_slices;
4600 err = pci_alloc_msix(sc->dev, &count);
4602 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4603 "err = %d \n", sc->num_slices, err);
4604 goto abort_with_msix_table;
4606 if (count < sc->num_slices) {
4607 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4608 count, sc->num_slices);
4609 device_printf(sc->dev,
4610 "Try setting hw.mxge.max_slices to %d\n",
4613 goto abort_with_msix;
4615 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4616 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4617 if (sc->msix_irq_res == NULL) {
4619 goto abort_with_msix;
4622 for (i = 0; i < sc->num_slices; i++) {
4624 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4627 if (sc->msix_irq_res[i] == NULL) {
4628 device_printf(sc->dev, "couldn't allocate IRQ res"
4629 " for message %d\n", i);
4631 goto abort_with_res;
4635 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4636 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4638 for (i = 0; i < sc->num_slices; i++) {
4639 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4640 INTR_TYPE_NET | INTR_MPSAFE,
4641 #if __FreeBSD_version > 700030
4644 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4646 device_printf(sc->dev, "couldn't setup intr for "
4648 goto abort_with_intr;
4650 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4651 sc->msix_ih[i], "s%d", i);
4655 device_printf(sc->dev, "using %d msix IRQs:",
4657 for (i = 0; i < sc->num_slices; i++)
4658 printf(" %jd", rman_get_start(sc->msix_irq_res[i]));
4664 for (i = 0; i < sc->num_slices; i++) {
4665 if (sc->msix_ih[i] != NULL) {
4666 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4668 sc->msix_ih[i] = NULL;
4671 free(sc->msix_ih, M_DEVBUF);
4675 for (i = 0; i < sc->num_slices; i++) {
4677 if (sc->msix_irq_res[i] != NULL)
4678 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4679 sc->msix_irq_res[i]);
4680 sc->msix_irq_res[i] = NULL;
4682 free(sc->msix_irq_res, M_DEVBUF);
4686 pci_release_msi(sc->dev);
4688 abort_with_msix_table:
4689 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4690 sc->msix_table_res);
4696 mxge_add_single_irq(mxge_softc_t *sc)
4698 int count, err, rid;
4700 count = pci_msi_count(sc->dev);
4701 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4707 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
4708 RF_SHAREABLE | RF_ACTIVE);
4709 if (sc->irq_res == NULL) {
4710 device_printf(sc->dev, "could not alloc interrupt\n");
4714 device_printf(sc->dev, "using %s irq %jd\n",
4715 sc->legacy_irq ? "INTx" : "MSI",
4716 rman_get_start(sc->irq_res));
4717 err = bus_setup_intr(sc->dev, sc->irq_res,
4718 INTR_TYPE_NET | INTR_MPSAFE,
4719 #if __FreeBSD_version > 700030
4722 mxge_intr, &sc->ss[0], &sc->ih);
4724 bus_release_resource(sc->dev, SYS_RES_IRQ,
4725 sc->legacy_irq ? 0 : 1, sc->irq_res);
4726 if (!sc->legacy_irq)
4727 pci_release_msi(sc->dev);
4733 mxge_rem_msix_irqs(mxge_softc_t *sc)
4737 for (i = 0; i < sc->num_slices; i++) {
4738 if (sc->msix_ih[i] != NULL) {
4739 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4741 sc->msix_ih[i] = NULL;
4744 free(sc->msix_ih, M_DEVBUF);
4746 for (i = 0; i < sc->num_slices; i++) {
4748 if (sc->msix_irq_res[i] != NULL)
4749 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4750 sc->msix_irq_res[i]);
4751 sc->msix_irq_res[i] = NULL;
4753 free(sc->msix_irq_res, M_DEVBUF);
4755 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4756 sc->msix_table_res);
4758 pci_release_msi(sc->dev);
4763 mxge_rem_single_irq(mxge_softc_t *sc)
4765 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4766 bus_release_resource(sc->dev, SYS_RES_IRQ,
4767 sc->legacy_irq ? 0 : 1, sc->irq_res);
4768 if (!sc->legacy_irq)
4769 pci_release_msi(sc->dev);
4773 mxge_rem_irq(mxge_softc_t *sc)
4775 if (sc->num_slices > 1)
4776 mxge_rem_msix_irqs(sc);
4778 mxge_rem_single_irq(sc);
4782 mxge_add_irq(mxge_softc_t *sc)
4786 if (sc->num_slices > 1)
4787 err = mxge_add_msix_irqs(sc);
4789 err = mxge_add_single_irq(sc);
4791 if (0 && err == 0 && sc->num_slices > 1) {
4792 mxge_rem_msix_irqs(sc);
4793 err = mxge_add_msix_irqs(sc);
4800 mxge_attach(device_t dev)
4803 mxge_softc_t *sc = device_get_softc(dev);
4808 mxge_fetch_tunables(sc);
4810 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4811 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4812 taskqueue_thread_enqueue, &sc->tq);
4813 if (sc->tq == NULL) {
4815 goto abort_with_nothing;
4818 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4821 BUS_SPACE_MAXADDR, /* low */
4822 BUS_SPACE_MAXADDR, /* high */
4823 NULL, NULL, /* filter */
4824 65536 + 256, /* maxsize */
4825 MXGE_MAX_SEND_DESC, /* num segs */
4826 65536, /* maxsegsize */
4828 NULL, NULL, /* lock */
4829 &sc->parent_dmat); /* tag */
4832 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4837 ifp = sc->ifp = if_alloc(IFT_ETHER);
4839 device_printf(dev, "can not if_alloc()\n");
4841 goto abort_with_parent_dmat;
4843 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4845 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4846 device_get_nameunit(dev));
4847 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4848 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4849 "%s:drv", device_get_nameunit(dev));
4850 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4851 MTX_NETWORK_LOCK, MTX_DEF);
4853 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4855 mxge_setup_cfg_space(sc);
4857 /* Map the board into the kernel */
4859 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
4861 if (sc->mem_res == NULL) {
4862 device_printf(dev, "could not map memory\n");
4864 goto abort_with_lock;
4866 sc->sram = rman_get_virtual(sc->mem_res);
4867 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4868 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4869 device_printf(dev, "impossible memory region size %jd\n",
4870 rman_get_size(sc->mem_res));
4872 goto abort_with_mem_res;
4875 /* make NULL terminated copy of the EEPROM strings section of
4877 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4878 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4879 rman_get_bushandle(sc->mem_res),
4880 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4882 MXGE_EEPROM_STRINGS_SIZE - 2);
4883 err = mxge_parse_strings(sc);
4885 goto abort_with_mem_res;
4887 /* Enable write combining for efficient use of PCIe bus */
4890 /* Allocate the out of band dma memory */
4891 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4892 sizeof (mxge_cmd_t), 64);
4894 goto abort_with_mem_res;
4895 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4896 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4898 goto abort_with_cmd_dma;
4900 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4902 goto abort_with_zeropad_dma;
4904 /* select & load the firmware */
4905 err = mxge_select_firmware(sc);
4907 goto abort_with_dmabench;
4908 sc->intr_coal_delay = mxge_intr_coal_delay;
4910 mxge_slice_probe(sc);
4911 err = mxge_alloc_slices(sc);
4913 goto abort_with_dmabench;
4915 err = mxge_reset(sc, 0);
4917 goto abort_with_slices;
4919 err = mxge_alloc_rings(sc);
4921 device_printf(sc->dev, "failed to allocate rings\n");
4922 goto abort_with_slices;
4925 err = mxge_add_irq(sc);
4927 device_printf(sc->dev, "failed to add irq\n");
4928 goto abort_with_rings;
4931 ifp->if_baudrate = IF_Gbps(10);
4932 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4933 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4935 #if defined(INET) || defined(INET6)
4936 ifp->if_capabilities |= IFCAP_LRO;
4939 #ifdef MXGE_NEW_VLAN_API
4940 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4942 /* Only FW 1.4.32 and newer can do TSO over vlans */
4943 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4944 sc->fw_ver_tiny >= 32)
4945 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4947 sc->max_mtu = mxge_max_mtu(sc);
4948 if (sc->max_mtu >= 9000)
4949 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4951 device_printf(dev, "MTU limited to %d. Install "
4952 "latest firmware for 9000 byte jumbo support\n",
4953 sc->max_mtu - ETHER_HDR_LEN);
4954 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4955 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4956 /* check to see if f/w supports TSO for IPv6 */
4957 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4959 ifp->if_capabilities |= IFCAP_TSO6;
4960 sc->max_tso6_hlen = min(cmd.data0,
4961 sizeof (sc->ss[0].scratch));
4963 ifp->if_capenable = ifp->if_capabilities;
4964 if (sc->lro_cnt == 0)
4965 ifp->if_capenable &= ~IFCAP_LRO;
4966 ifp->if_init = mxge_init;
4968 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4969 ifp->if_ioctl = mxge_ioctl;
4970 ifp->if_start = mxge_start;
4971 ifp->if_get_counter = mxge_get_counter;
4972 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
4973 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc;
4974 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET;
4975 /* Initialise the ifmedia structure */
4976 ifmedia_init(&sc->media, 0, mxge_media_change,
4978 mxge_media_init(sc);
4979 mxge_media_probe(sc);
4981 ether_ifattach(ifp, sc->mac_addr);
4982 /* ether_ifattach sets mtu to ETHERMTU */
4983 if (mxge_initial_mtu != ETHERMTU)
4984 mxge_change_mtu(sc, mxge_initial_mtu);
4986 mxge_add_sysctls(sc);
4987 #ifdef IFNET_BUF_RING
4988 ifp->if_transmit = mxge_transmit;
4989 ifp->if_qflush = mxge_qflush;
4991 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4992 device_get_nameunit(sc->dev));
4993 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4997 mxge_free_rings(sc);
4999 mxge_free_slices(sc);
5000 abort_with_dmabench:
5001 mxge_dma_free(&sc->dmabench_dma);
5002 abort_with_zeropad_dma:
5003 mxge_dma_free(&sc->zeropad_dma);
5005 mxge_dma_free(&sc->cmd_dma);
5007 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5009 pci_disable_busmaster(dev);
5010 mtx_destroy(&sc->cmd_mtx);
5011 mtx_destroy(&sc->driver_mtx);
5013 abort_with_parent_dmat:
5014 bus_dma_tag_destroy(sc->parent_dmat);
5016 if (sc->tq != NULL) {
5017 taskqueue_drain(sc->tq, &sc->watchdog_task);
5018 taskqueue_free(sc->tq);
5026 mxge_detach(device_t dev)
5028 mxge_softc_t *sc = device_get_softc(dev);
5030 if (mxge_vlans_active(sc)) {
5031 device_printf(sc->dev,
5032 "Detach vlans before removing module\n");
5035 mtx_lock(&sc->driver_mtx);
5037 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
5039 mtx_unlock(&sc->driver_mtx);
5040 ether_ifdetach(sc->ifp);
5041 if (sc->tq != NULL) {
5042 taskqueue_drain(sc->tq, &sc->watchdog_task);
5043 taskqueue_free(sc->tq);
5046 callout_drain(&sc->co_hdl);
5047 ifmedia_removeall(&sc->media);
5048 mxge_dummy_rdma(sc, 0);
5049 mxge_rem_sysctls(sc);
5051 mxge_free_rings(sc);
5052 mxge_free_slices(sc);
5053 mxge_dma_free(&sc->dmabench_dma);
5054 mxge_dma_free(&sc->zeropad_dma);
5055 mxge_dma_free(&sc->cmd_dma);
5056 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5057 pci_disable_busmaster(dev);
5058 mtx_destroy(&sc->cmd_mtx);
5059 mtx_destroy(&sc->driver_mtx);
5061 bus_dma_tag_destroy(sc->parent_dmat);
5066 mxge_shutdown(device_t dev)
5072 This file uses Myri10GE driver indentation.
5075 c-file-style:"linux"