1 /******************************************************************************
3 Copyright (c) 2006-2013, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/sockio.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/module.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
51 #include <net/if_arp.h>
52 #include <net/ethernet.h>
53 #include <net/if_dl.h>
54 #include <net/if_media.h>
58 #include <net/if_types.h>
59 #include <net/if_vlan_var.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/in.h>
64 #include <netinet/ip.h>
65 #include <netinet/ip6.h>
66 #include <netinet/tcp.h>
67 #include <netinet/tcp_lro.h>
68 #include <netinet6/ip6_var.h>
70 #include <machine/bus.h>
71 #include <machine/in_cksum.h>
72 #include <machine/resource.h>
77 #include <dev/pci/pcireg.h>
78 #include <dev/pci/pcivar.h>
79 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
81 #include <vm/vm.h> /* for pmap_mapdev() */
84 #if defined(__i386) || defined(__amd64)
85 #include <machine/specialreg.h>
88 #include <dev/mxge/mxge_mcp.h>
89 #include <dev/mxge/mcp_gen_header.h>
90 /*#define MXGE_FAKE_IFP*/
91 #include <dev/mxge/if_mxge_var.h>
93 #include <sys/buf_ring.h>
97 #include "opt_inet6.h"
100 static int mxge_nvidia_ecrc_enable = 1;
101 static int mxge_force_firmware = 0;
102 static int mxge_intr_coal_delay = 30;
103 static int mxge_deassert_wait = 1;
104 static int mxge_flow_control = 1;
105 static int mxge_verbose = 0;
106 static int mxge_ticks;
107 static int mxge_max_slices = 1;
108 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
109 static int mxge_always_promisc = 0;
110 static int mxge_initial_mtu = ETHERMTU_JUMBO;
111 static int mxge_throttle = 0;
112 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
113 static char *mxge_fw_aligned = "mxge_eth_z8e";
114 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
115 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
117 static int mxge_probe(device_t dev);
118 static int mxge_attach(device_t dev);
119 static int mxge_detach(device_t dev);
120 static int mxge_shutdown(device_t dev);
121 static void mxge_intr(void *arg);
123 static device_method_t mxge_methods[] =
125 /* Device interface */
126 DEVMETHOD(device_probe, mxge_probe),
127 DEVMETHOD(device_attach, mxge_attach),
128 DEVMETHOD(device_detach, mxge_detach),
129 DEVMETHOD(device_shutdown, mxge_shutdown),
134 static driver_t mxge_driver =
138 sizeof(mxge_softc_t),
141 static devclass_t mxge_devclass;
143 /* Declare ourselves to be a child of the PCI bus.*/
144 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
145 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
146 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
148 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
149 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
150 static int mxge_close(mxge_softc_t *sc, int down);
151 static int mxge_open(mxge_softc_t *sc);
152 static void mxge_tick(void *arg);
155 mxge_probe(device_t dev)
160 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
161 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
162 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
163 rev = pci_get_revid(dev);
165 case MXGE_PCI_REV_Z8E:
166 device_set_desc(dev, "Myri10G-PCIE-8A");
168 case MXGE_PCI_REV_Z8ES:
169 device_set_desc(dev, "Myri10G-PCIE-8B");
172 device_set_desc(dev, "Myri10G-PCIE-8??");
173 device_printf(dev, "Unrecognized rev %d NIC\n",
183 mxge_enable_wc(mxge_softc_t *sc)
185 #if defined(__i386) || defined(__amd64)
190 len = rman_get_size(sc->mem_res);
191 err = pmap_change_attr((vm_offset_t) sc->sram,
192 len, PAT_WRITE_COMBINING);
194 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
202 /* callback to get our DMA address */
204 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
208 *(bus_addr_t *) arg = segs->ds_addr;
213 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
214 bus_size_t alignment)
217 device_t dev = sc->dev;
218 bus_size_t boundary, maxsegsize;
220 if (bytes > 4096 && alignment == 4096) {
228 /* allocate DMAable memory tags */
229 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
230 alignment, /* alignment */
231 boundary, /* boundary */
232 BUS_SPACE_MAXADDR, /* low */
233 BUS_SPACE_MAXADDR, /* high */
234 NULL, NULL, /* filter */
237 maxsegsize, /* maxsegsize */
238 BUS_DMA_COHERENT, /* flags */
239 NULL, NULL, /* lock */
240 &dma->dmat); /* tag */
242 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
246 /* allocate DMAable memory & map */
247 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
248 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
249 | BUS_DMA_ZERO), &dma->map);
251 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
252 goto abort_with_dmat;
255 /* load the memory */
256 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
257 mxge_dmamap_callback,
258 (void *)&dma->bus_addr, 0);
260 device_printf(dev, "couldn't load map (err = %d)\n", err);
266 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
268 (void)bus_dma_tag_destroy(dma->dmat);
274 mxge_dma_free(mxge_dma_t *dma)
276 bus_dmamap_unload(dma->dmat, dma->map);
277 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
278 (void)bus_dma_tag_destroy(dma->dmat);
282 * The eeprom strings on the lanaiX have the format
289 mxge_parse_strings(mxge_softc_t *sc)
292 int i, found_mac, found_sn2;
295 ptr = sc->eeprom_strings;
298 while (*ptr != '\0') {
299 if (strncmp(ptr, "MAC=", 4) == 0) {
302 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
303 if (endptr - ptr != 2)
312 } else if (strncmp(ptr, "PC=", 3) == 0) {
314 strlcpy(sc->product_code_string, ptr,
315 sizeof(sc->product_code_string));
316 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
318 strlcpy(sc->serial_number_string, ptr,
319 sizeof(sc->serial_number_string));
320 } else if (strncmp(ptr, "SN2=", 4) == 0) {
321 /* SN2 takes precedence over SN */
324 strlcpy(sc->serial_number_string, ptr,
325 sizeof(sc->serial_number_string));
327 while (*ptr++ != '\0') {}
334 device_printf(sc->dev, "failed to parse eeprom_strings\n");
339 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
341 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
344 unsigned long base, off;
346 device_t pdev, mcp55;
347 uint16_t vendor_id, device_id, word;
348 uintptr_t bus, slot, func, ivend, idev;
352 if (!mxge_nvidia_ecrc_enable)
355 pdev = device_get_parent(device_get_parent(sc->dev));
357 device_printf(sc->dev, "could not find parent?\n");
360 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
361 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
363 if (vendor_id != 0x10de)
368 if (device_id == 0x005d) {
369 /* ck804, base address is magic */
371 } else if (device_id >= 0x0374 && device_id <= 0x378) {
372 /* mcp55, base address stored in chipset */
373 mcp55 = pci_find_bsf(0, 0, 0);
375 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
376 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
377 word = pci_read_config(mcp55, 0x90, 2);
378 base = ((unsigned long)word & 0x7ffeU) << 25;
385 Test below is commented because it is believed that doing
386 config read/write beyond 0xff will access the config space
387 for the next larger function. Uncomment this and remove
388 the hacky pmap_mapdev() way of accessing config space when
389 FreeBSD grows support for extended pcie config space access
392 /* See if we can, by some miracle, access the extended
394 val = pci_read_config(pdev, 0x178, 4);
395 if (val != 0xffffffff) {
397 pci_write_config(pdev, 0x178, val, 4);
401 /* Rather than using normal pci config space writes, we must
402 * map the Nvidia config space ourselves. This is because on
403 * opteron/nvidia class machine the 0xe000000 mapping is
404 * handled by the nvidia chipset, that means the internal PCI
405 * device (the on-chip northbridge), or the amd-8131 bridge
406 * and things behind them are not visible by this method.
409 BUS_READ_IVAR(device_get_parent(pdev), pdev,
411 BUS_READ_IVAR(device_get_parent(pdev), pdev,
412 PCI_IVAR_SLOT, &slot);
413 BUS_READ_IVAR(device_get_parent(pdev), pdev,
414 PCI_IVAR_FUNCTION, &func);
415 BUS_READ_IVAR(device_get_parent(pdev), pdev,
416 PCI_IVAR_VENDOR, &ivend);
417 BUS_READ_IVAR(device_get_parent(pdev), pdev,
418 PCI_IVAR_DEVICE, &idev);
421 + 0x00100000UL * (unsigned long)bus
422 + 0x00001000UL * (unsigned long)(func
425 /* map it into the kernel */
426 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
430 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
433 /* get a pointer to the config space mapped into the kernel */
434 cfgptr = va + (off & PAGE_MASK);
436 /* make sure that we can really access it */
437 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
438 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
439 if (! (vendor_id == ivend && device_id == idev)) {
440 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
441 vendor_id, device_id);
442 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
446 ptr32 = (uint32_t*)(cfgptr + 0x178);
449 if (val == 0xffffffff) {
450 device_printf(sc->dev, "extended mapping failed\n");
451 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
455 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
457 device_printf(sc->dev,
458 "Enabled ECRC on upstream Nvidia bridge "
460 (int)bus, (int)slot, (int)func);
465 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
467 device_printf(sc->dev,
468 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
475 mxge_dma_test(mxge_softc_t *sc, int test_type)
478 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
484 /* Run a small DMA test.
485 * The magic multipliers to the length tell the firmware
486 * to do DMA read, write, or read+write tests. The
487 * results are returned in cmd.data0. The upper 16
488 * bits of the return is the number of transfers completed.
489 * The lower 16 bits is the time in 0.5us ticks that the
490 * transfers took to complete.
493 len = sc->tx_boundary;
495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
497 cmd.data2 = len * 0x10000;
498 status = mxge_send_cmd(sc, test_type, &cmd);
503 sc->read_dma = ((cmd.data0>>16) * len * 2) /
504 (cmd.data0 & 0xffff);
505 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
506 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
507 cmd.data2 = len * 0x1;
508 status = mxge_send_cmd(sc, test_type, &cmd);
513 sc->write_dma = ((cmd.data0>>16) * len * 2) /
514 (cmd.data0 & 0xffff);
516 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
517 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
518 cmd.data2 = len * 0x10001;
519 status = mxge_send_cmd(sc, test_type, &cmd);
524 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
525 (cmd.data0 & 0xffff);
528 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
529 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
536 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
537 * when the PCI-E Completion packets are aligned on an 8-byte
538 * boundary. Some PCI-E chip sets always align Completion packets; on
539 * the ones that do not, the alignment can be enforced by enabling
540 * ECRC generation (if supported).
542 * When PCI-E Completion packets are not aligned, it is actually more
543 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
545 * If the driver can neither enable ECRC nor verify that it has
546 * already been enabled, then it must use a firmware image which works
547 * around unaligned completion packets (ethp_z8e.dat), and it should
548 * also ensure that it never gives the device a Read-DMA which is
549 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
550 * enabled, then the driver should use the aligned (eth_z8e.dat)
551 * firmware image, and set tx_boundary to 4KB.
555 mxge_firmware_probe(mxge_softc_t *sc)
557 device_t dev = sc->dev;
561 sc->tx_boundary = 4096;
563 * Verify the max read request size was set to 4KB
564 * before trying the test with 4KB.
566 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
567 pectl = pci_read_config(dev, reg + 0x8, 2);
568 if ((pectl & (5 << 12)) != (5 << 12)) {
569 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
571 sc->tx_boundary = 2048;
576 * load the optimized firmware (which assumes aligned PCIe
577 * completions) in order to see if it works on this host.
579 sc->fw_name = mxge_fw_aligned;
580 status = mxge_load_firmware(sc, 1);
586 * Enable ECRC if possible
588 mxge_enable_nvidia_ecrc(sc);
591 * Run a DMA test which watches for unaligned completions and
592 * aborts on the first one seen. Not required on Z8ES or newer.
594 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
596 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
598 return 0; /* keep the aligned firmware */
601 device_printf(dev, "DMA test failed: %d\n", status);
602 if (status == ENOSYS)
603 device_printf(dev, "Falling back to ethp! "
604 "Please install up to date fw\n");
609 mxge_select_firmware(mxge_softc_t *sc)
612 int force_firmware = mxge_force_firmware;
615 force_firmware = sc->throttle;
617 if (force_firmware != 0) {
618 if (force_firmware == 1)
623 device_printf(sc->dev,
624 "Assuming %s completions (forced)\n",
625 aligned ? "aligned" : "unaligned");
629 /* if the PCIe link width is 4 or less, we can use the aligned
630 firmware and skip any checks */
631 if (sc->link_width != 0 && sc->link_width <= 4) {
632 device_printf(sc->dev,
633 "PCIe x%d Link, expect reduced performance\n",
639 if (0 == mxge_firmware_probe(sc))
644 sc->fw_name = mxge_fw_aligned;
645 sc->tx_boundary = 4096;
647 sc->fw_name = mxge_fw_unaligned;
648 sc->tx_boundary = 2048;
650 return (mxge_load_firmware(sc, 0));
654 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
658 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
659 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
660 be32toh(hdr->mcp_type));
664 /* save firmware version for sysctl */
665 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
667 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
669 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
670 &sc->fw_ver_minor, &sc->fw_ver_tiny);
672 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
673 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
674 device_printf(sc->dev, "Found firmware version %s\n",
676 device_printf(sc->dev, "Driver needs %d.%d\n",
677 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
685 z_alloc(void *nil, u_int items, u_int size)
689 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
694 z_free(void *nil, void *ptr)
701 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
704 char *inflate_buffer;
705 const struct firmware *fw;
706 const mcp_gen_header_t *hdr;
713 fw = firmware_get(sc->fw_name);
715 device_printf(sc->dev, "Could not find firmware image %s\n",
722 /* setup zlib and decompress f/w */
723 bzero(&zs, sizeof (zs));
726 status = inflateInit(&zs);
727 if (status != Z_OK) {
732 /* the uncompressed size is stored as the firmware version,
733 which would otherwise go unused */
734 fw_len = (size_t) fw->version;
735 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
736 if (inflate_buffer == NULL)
738 zs.avail_in = fw->datasize;
739 zs.next_in = __DECONST(char *, fw->data);
740 zs.avail_out = fw_len;
741 zs.next_out = inflate_buffer;
742 status = inflate(&zs, Z_FINISH);
743 if (status != Z_STREAM_END) {
744 device_printf(sc->dev, "zlib %d\n", status);
746 goto abort_with_buffer;
750 hdr_offset = htobe32(*(const uint32_t *)
751 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
752 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
753 device_printf(sc->dev, "Bad firmware file");
755 goto abort_with_buffer;
757 hdr = (const void*)(inflate_buffer + hdr_offset);
759 status = mxge_validate_firmware(sc, hdr);
761 goto abort_with_buffer;
763 /* Copy the inflated firmware to NIC SRAM. */
764 for (i = 0; i < fw_len; i += 256) {
765 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
767 min(256U, (unsigned)(fw_len - i)));
776 free(inflate_buffer, M_TEMP);
780 firmware_put(fw, FIRMWARE_UNLOAD);
785 * Enable or disable periodic RDMAs from the host to make certain
786 * chipsets resend dropped PCIe messages
790 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
793 volatile uint32_t *confirm;
794 volatile char *submit;
795 uint32_t *buf, dma_low, dma_high;
798 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
800 /* clear confirmation addr */
801 confirm = (volatile uint32_t *)sc->cmd;
805 /* send an rdma command to the PCIe engine, and wait for the
806 response in the confirmation address. The firmware should
807 write a -1 there to indicate it is alive and well
810 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
811 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
812 buf[0] = htobe32(dma_high); /* confirm addr MSW */
813 buf[1] = htobe32(dma_low); /* confirm addr LSW */
814 buf[2] = htobe32(0xffffffff); /* confirm data */
815 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
816 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
817 buf[3] = htobe32(dma_high); /* dummy addr MSW */
818 buf[4] = htobe32(dma_low); /* dummy addr LSW */
819 buf[5] = htobe32(enable); /* enable? */
822 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
824 mxge_pio_copy(submit, buf, 64);
829 while (*confirm != 0xffffffff && i < 20) {
833 if (*confirm != 0xffffffff) {
834 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
835 (enable ? "enable" : "disable"), confirm,
842 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
845 char buf_bytes[sizeof(*buf) + 8];
846 volatile mcp_cmd_response_t *response = sc->cmd;
847 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
848 uint32_t dma_low, dma_high;
849 int err, sleep_total = 0;
851 /* ensure buf is aligned to 8 bytes */
852 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
854 buf->data0 = htobe32(data->data0);
855 buf->data1 = htobe32(data->data1);
856 buf->data2 = htobe32(data->data2);
857 buf->cmd = htobe32(cmd);
858 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
859 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
861 buf->response_addr.low = htobe32(dma_low);
862 buf->response_addr.high = htobe32(dma_high);
863 mtx_lock(&sc->cmd_mtx);
864 response->result = 0xffffffff;
866 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
868 /* wait up to 20ms */
870 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
871 bus_dmamap_sync(sc->cmd_dma.dmat,
872 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
874 switch (be32toh(response->result)) {
876 data->data0 = be32toh(response->data);
882 case MXGEFW_CMD_UNKNOWN:
885 case MXGEFW_CMD_ERROR_UNALIGNED:
888 case MXGEFW_CMD_ERROR_BUSY:
891 case MXGEFW_CMD_ERROR_I2C_ABSENT:
895 device_printf(sc->dev,
897 "failed, result = %d\n",
898 cmd, be32toh(response->result));
906 device_printf(sc->dev, "mxge: command %d timed out"
908 cmd, be32toh(response->result));
909 mtx_unlock(&sc->cmd_mtx);
914 mxge_adopt_running_firmware(mxge_softc_t *sc)
916 struct mcp_gen_header *hdr;
917 const size_t bytes = sizeof (struct mcp_gen_header);
921 /* find running firmware header */
922 hdr_offset = htobe32(*(volatile uint32_t *)
923 (sc->sram + MCP_HEADER_PTR_OFFSET));
925 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
926 device_printf(sc->dev,
927 "Running firmware has bad header offset (%d)\n",
932 /* copy header of running firmware from SRAM to host memory to
933 * validate firmware */
934 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
936 device_printf(sc->dev, "could not malloc firmware hdr\n");
939 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
940 rman_get_bushandle(sc->mem_res),
941 hdr_offset, (char *)hdr, bytes);
942 status = mxge_validate_firmware(sc, hdr);
946 * check to see if adopted firmware has bug where adopting
947 * it will cause broadcasts to be filtered unless the NIC
948 * is kept in ALLMULTI mode
950 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
951 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
952 sc->adopted_rx_filter_bug = 1;
953 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
954 "working around rx filter bug\n",
955 sc->fw_ver_major, sc->fw_ver_minor,
964 mxge_load_firmware(mxge_softc_t *sc, int adopt)
966 volatile uint32_t *confirm;
967 volatile char *submit;
969 uint32_t *buf, size, dma_low, dma_high;
972 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
974 size = sc->sram_size;
975 status = mxge_load_firmware_helper(sc, &size);
979 /* Try to use the currently running firmware, if
981 status = mxge_adopt_running_firmware(sc);
983 device_printf(sc->dev,
984 "failed to adopt running firmware\n");
987 device_printf(sc->dev,
988 "Successfully adopted running firmware\n");
989 if (sc->tx_boundary == 4096) {
990 device_printf(sc->dev,
991 "Using firmware currently running on NIC"
993 device_printf(sc->dev,
994 "performance consider loading optimized "
997 sc->fw_name = mxge_fw_unaligned;
998 sc->tx_boundary = 2048;
1001 /* clear confirmation addr */
1002 confirm = (volatile uint32_t *)sc->cmd;
1005 /* send a reload command to the bootstrap MCP, and wait for the
1006 response in the confirmation address. The firmware should
1007 write a -1 there to indicate it is alive and well
1010 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1011 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1013 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1014 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1015 buf[2] = htobe32(0xffffffff); /* confirm data */
1017 /* FIX: All newest firmware should un-protect the bottom of
1018 the sram before handoff. However, the very first interfaces
1019 do not. Therefore the handoff copy must skip the first 8 bytes
1021 /* where the code starts*/
1022 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1023 buf[4] = htobe32(size - 8); /* length of code */
1024 buf[5] = htobe32(8); /* where to copy to */
1025 buf[6] = htobe32(0); /* where to jump to */
1027 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1028 mxge_pio_copy(submit, buf, 64);
1033 while (*confirm != 0xffffffff && i < 20) {
1036 bus_dmamap_sync(sc->cmd_dma.dmat,
1037 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1039 if (*confirm != 0xffffffff) {
1040 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1049 mxge_update_mac_address(mxge_softc_t *sc)
1052 uint8_t *addr = sc->mac_addr;
1056 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1057 | (addr[2] << 8) | addr[3]);
1059 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1061 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1066 mxge_change_pause(mxge_softc_t *sc, int pause)
1072 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1075 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1079 device_printf(sc->dev, "Failed to set flow control mode\n");
1087 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1092 if (mxge_always_promisc)
1096 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1099 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1103 device_printf(sc->dev, "Failed to set promisc mode\n");
1108 mxge_set_multicast_list(mxge_softc_t *sc)
1111 struct ifmultiaddr *ifma;
1112 struct ifnet *ifp = sc->ifp;
1115 /* This firmware is known to not support multicast */
1116 if (!sc->fw_multicast_support)
1119 /* Disable multicast filtering while we play with the lists*/
1120 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1122 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1123 " error status: %d\n", err);
1127 if (sc->adopted_rx_filter_bug)
1130 if (ifp->if_flags & IFF_ALLMULTI)
1131 /* request to disable multicast filtering, so quit here */
1134 /* Flush all the filters */
1136 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1138 device_printf(sc->dev,
1139 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1140 ", error status: %d\n", err);
1144 /* Walk the multicast list, and add each address */
1146 if_maddr_rlock(ifp);
1147 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1148 if (ifma->ifma_addr->sa_family != AF_LINK)
1150 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1152 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1154 cmd.data0 = htonl(cmd.data0);
1155 cmd.data1 = htonl(cmd.data1);
1156 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1158 device_printf(sc->dev, "Failed "
1159 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1161 /* abort, leaving multicast filtering off */
1162 if_maddr_runlock(ifp);
1166 if_maddr_runlock(ifp);
1167 /* Enable multicast filtering */
1168 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1170 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1171 ", error status: %d\n", err);
1176 mxge_max_mtu(mxge_softc_t *sc)
1181 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1182 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1184 /* try to set nbufs to see if it we can
1185 use virtually contiguous jumbos */
1187 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1190 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1192 /* otherwise, we're limited to MJUMPAGESIZE */
1193 return MJUMPAGESIZE - MXGEFW_PAD;
1197 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1199 struct mxge_slice_state *ss;
1200 mxge_rx_done_t *rx_done;
1201 volatile uint32_t *irq_claim;
1205 /* try to send a reset command to the card to see if it
1207 memset(&cmd, 0, sizeof (cmd));
1208 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1210 device_printf(sc->dev, "failed reset\n");
1214 mxge_dummy_rdma(sc, 1);
1217 /* set the intrq size */
1218 cmd.data0 = sc->rx_ring_size;
1219 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1222 * Even though we already know how many slices are supported
1223 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1224 * has magic side effects, and must be called after a reset.
1225 * It must be called prior to calling any RSS related cmds,
1226 * including assigning an interrupt queue for anything but
1227 * slice 0. It must also be called *after*
1228 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1229 * the firmware to compute offsets.
1232 if (sc->num_slices > 1) {
1233 /* ask the maximum number of slices it supports */
1234 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1237 device_printf(sc->dev,
1238 "failed to get number of slices\n");
1242 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1243 * to setting up the interrupt queue DMA
1245 cmd.data0 = sc->num_slices;
1246 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1247 #ifdef IFNET_BUF_RING
1248 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1250 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1253 device_printf(sc->dev,
1254 "failed to set number of slices\n");
1260 if (interrupts_setup) {
1261 /* Now exchange information about interrupts */
1262 for (slice = 0; slice < sc->num_slices; slice++) {
1263 rx_done = &sc->ss[slice].rx_done;
1264 memset(rx_done->entry, 0, sc->rx_ring_size);
1265 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1266 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1268 status |= mxge_send_cmd(sc,
1269 MXGEFW_CMD_SET_INTRQ_DMA,
1274 status |= mxge_send_cmd(sc,
1275 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1278 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1280 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1281 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1284 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1286 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1288 device_printf(sc->dev, "failed set interrupt parameters\n");
1293 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1296 /* run a DMA benchmark */
1297 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1299 for (slice = 0; slice < sc->num_slices; slice++) {
1300 ss = &sc->ss[slice];
1302 ss->irq_claim = irq_claim + (2 * slice);
1303 /* reset mcp/driver shared state back to 0 */
1304 ss->rx_done.idx = 0;
1305 ss->rx_done.cnt = 0;
1308 ss->tx.pkt_done = 0;
1309 ss->tx.queue_active = 0;
1310 ss->tx.activate = 0;
1311 ss->tx.deactivate = 0;
1316 ss->rx_small.cnt = 0;
1317 ss->lc.lro_bad_csum = 0;
1318 ss->lc.lro_queued = 0;
1319 ss->lc.lro_flushed = 0;
1320 if (ss->fw_stats != NULL) {
1321 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1324 sc->rdma_tags_available = 15;
1325 status = mxge_update_mac_address(sc);
1326 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1327 mxge_change_pause(sc, sc->pause);
1328 mxge_set_multicast_list(sc);
1330 cmd.data0 = sc->throttle;
1331 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1333 device_printf(sc->dev,
1334 "can't enable throttle\n");
1341 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1346 unsigned int throttle;
1349 throttle = sc->throttle;
1350 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1355 if (throttle == sc->throttle)
1358 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1361 mtx_lock(&sc->driver_mtx);
1362 cmd.data0 = throttle;
1363 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1365 sc->throttle = throttle;
1366 mtx_unlock(&sc->driver_mtx);
1371 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1374 unsigned int intr_coal_delay;
1378 intr_coal_delay = sc->intr_coal_delay;
1379 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1383 if (intr_coal_delay == sc->intr_coal_delay)
1386 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1389 mtx_lock(&sc->driver_mtx);
1390 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1391 sc->intr_coal_delay = intr_coal_delay;
1393 mtx_unlock(&sc->driver_mtx);
1398 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1401 unsigned int enabled;
1405 enabled = sc->pause;
1406 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1410 if (enabled == sc->pause)
1413 mtx_lock(&sc->driver_mtx);
1414 err = mxge_change_pause(sc, enabled);
1415 mtx_unlock(&sc->driver_mtx);
1420 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1426 arg2 = be32toh(*(int *)arg1);
1428 err = sysctl_handle_int(oidp, arg1, arg2, req);
1434 mxge_rem_sysctls(mxge_softc_t *sc)
1436 struct mxge_slice_state *ss;
1439 if (sc->slice_sysctl_tree == NULL)
1442 for (slice = 0; slice < sc->num_slices; slice++) {
1443 ss = &sc->ss[slice];
1444 if (ss == NULL || ss->sysctl_tree == NULL)
1446 sysctl_ctx_free(&ss->sysctl_ctx);
1447 ss->sysctl_tree = NULL;
1449 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1450 sc->slice_sysctl_tree = NULL;
1454 mxge_add_sysctls(mxge_softc_t *sc)
1456 struct sysctl_ctx_list *ctx;
1457 struct sysctl_oid_list *children;
1459 struct mxge_slice_state *ss;
1463 ctx = device_get_sysctl_ctx(sc->dev);
1464 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1465 fw = sc->ss[0].fw_stats;
1467 /* random information */
1468 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1470 CTLFLAG_RD, &sc->fw_version,
1471 0, "firmware version");
1472 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1474 CTLFLAG_RD, &sc->serial_number_string,
1475 0, "serial number");
1476 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1478 CTLFLAG_RD, &sc->product_code_string,
1480 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1482 CTLFLAG_RD, &sc->link_width,
1484 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1486 CTLFLAG_RD, &sc->tx_boundary,
1488 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1490 CTLFLAG_RD, &sc->wc,
1491 0, "write combining PIO?");
1492 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1494 CTLFLAG_RD, &sc->read_dma,
1495 0, "DMA Read speed in MB/s");
1496 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1498 CTLFLAG_RD, &sc->write_dma,
1499 0, "DMA Write speed in MB/s");
1500 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1501 "read_write_dma_MBs",
1502 CTLFLAG_RD, &sc->read_write_dma,
1503 0, "DMA concurrent Read/Write speed in MB/s");
1504 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1506 CTLFLAG_RD, &sc->watchdog_resets,
1507 0, "Number of times NIC was reset");
1510 /* performance related tunables */
1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1513 CTLTYPE_INT|CTLFLAG_RW, sc,
1514 0, mxge_change_intr_coal,
1515 "I", "interrupt coalescing delay in usecs");
1517 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1519 CTLTYPE_INT|CTLFLAG_RW, sc,
1520 0, mxge_change_throttle,
1521 "I", "transmit throttling");
1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1524 "flow_control_enabled",
1525 CTLTYPE_INT|CTLFLAG_RW, sc,
1526 0, mxge_change_flow_control,
1527 "I", "interrupt coalescing delay in usecs");
1529 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1531 CTLFLAG_RW, &mxge_deassert_wait,
1532 0, "Wait for IRQ line to go low in ihandler");
1534 /* stats block from firmware is in network byte order.
1536 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1538 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1539 0, mxge_handle_be32,
1541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1542 "rdma_tags_available",
1543 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1544 0, mxge_handle_be32,
1545 "I", "rdma_tags_available");
1546 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1547 "dropped_bad_crc32",
1548 CTLTYPE_INT|CTLFLAG_RD,
1549 &fw->dropped_bad_crc32,
1550 0, mxge_handle_be32,
1551 "I", "dropped_bad_crc32");
1552 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1554 CTLTYPE_INT|CTLFLAG_RD,
1555 &fw->dropped_bad_phy,
1556 0, mxge_handle_be32,
1557 "I", "dropped_bad_phy");
1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1559 "dropped_link_error_or_filtered",
1560 CTLTYPE_INT|CTLFLAG_RD,
1561 &fw->dropped_link_error_or_filtered,
1562 0, mxge_handle_be32,
1563 "I", "dropped_link_error_or_filtered");
1564 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1565 "dropped_link_overflow",
1566 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1567 0, mxge_handle_be32,
1568 "I", "dropped_link_overflow");
1569 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1570 "dropped_multicast_filtered",
1571 CTLTYPE_INT|CTLFLAG_RD,
1572 &fw->dropped_multicast_filtered,
1573 0, mxge_handle_be32,
1574 "I", "dropped_multicast_filtered");
1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1576 "dropped_no_big_buffer",
1577 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1578 0, mxge_handle_be32,
1579 "I", "dropped_no_big_buffer");
1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1581 "dropped_no_small_buffer",
1582 CTLTYPE_INT|CTLFLAG_RD,
1583 &fw->dropped_no_small_buffer,
1584 0, mxge_handle_be32,
1585 "I", "dropped_no_small_buffer");
1586 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1588 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1589 0, mxge_handle_be32,
1590 "I", "dropped_overrun");
1591 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1593 CTLTYPE_INT|CTLFLAG_RD,
1595 0, mxge_handle_be32,
1596 "I", "dropped_pause");
1597 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1599 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1600 0, mxge_handle_be32,
1601 "I", "dropped_runt");
1603 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1604 "dropped_unicast_filtered",
1605 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1606 0, mxge_handle_be32,
1607 "I", "dropped_unicast_filtered");
1609 /* verbose printing? */
1610 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1612 CTLFLAG_RW, &mxge_verbose,
1613 0, "verbose printing");
1615 /* add counters exported for debugging from all slices */
1616 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1617 sc->slice_sysctl_tree =
1618 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1619 "slice", CTLFLAG_RD, 0, "");
1621 for (slice = 0; slice < sc->num_slices; slice++) {
1622 ss = &sc->ss[slice];
1623 sysctl_ctx_init(&ss->sysctl_ctx);
1624 ctx = &ss->sysctl_ctx;
1625 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1626 sprintf(slice_num, "%d", slice);
1628 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1630 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1631 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1633 CTLFLAG_RD, &ss->rx_small.cnt,
1635 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1637 CTLFLAG_RD, &ss->rx_big.cnt,
1639 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1640 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1641 0, "number of lro merge queues flushed");
1643 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1644 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1645 0, "number of bad csums preventing LRO");
1647 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1648 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1649 0, "number of frames appended to lro merge"
1652 #ifndef IFNET_BUF_RING
1653 /* only transmit from slice 0 for now */
1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1659 CTLFLAG_RD, &ss->tx.req,
1662 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1664 CTLFLAG_RD, &ss->tx.done,
1666 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1668 CTLFLAG_RD, &ss->tx.pkt_done,
1670 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1672 CTLFLAG_RD, &ss->tx.stall,
1674 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1676 CTLFLAG_RD, &ss->tx.wake,
1678 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1680 CTLFLAG_RD, &ss->tx.defrag,
1682 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1684 CTLFLAG_RD, &ss->tx.queue_active,
1685 0, "tx_queue_active");
1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1688 CTLFLAG_RD, &ss->tx.activate,
1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1692 CTLFLAG_RD, &ss->tx.deactivate,
1693 0, "tx_deactivate");
1697 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1698 backwards one at a time and handle ring wraps */
1701 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1702 mcp_kreq_ether_send_t *src, int cnt)
1704 int idx, starting_slot;
1705 starting_slot = tx->req;
1708 idx = (starting_slot + cnt) & tx->mask;
1709 mxge_pio_copy(&tx->lanai[idx],
1710 &src[cnt], sizeof(*src));
1716 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1717 * at most 32 bytes at a time, so as to avoid involving the software
1718 * pio handler in the nic. We re-write the first segment's flags
1719 * to mark them valid only after writing the entire chain
1723 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1728 volatile uint32_t *dst_ints;
1729 mcp_kreq_ether_send_t *srcp;
1730 volatile mcp_kreq_ether_send_t *dstp, *dst;
1733 idx = tx->req & tx->mask;
1735 last_flags = src->flags;
1738 dst = dstp = &tx->lanai[idx];
1741 if ((idx + cnt) < tx->mask) {
1742 for (i = 0; i < (cnt - 1); i += 2) {
1743 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1744 wmb(); /* force write every 32 bytes */
1749 /* submit all but the first request, and ensure
1750 that it is submitted below */
1751 mxge_submit_req_backwards(tx, src, cnt);
1755 /* submit the first request */
1756 mxge_pio_copy(dstp, srcp, sizeof(*src));
1757 wmb(); /* barrier before setting valid flag */
1760 /* re-write the last 32-bits with the valid flags */
1761 src->flags = last_flags;
1762 src_ints = (uint32_t *)src;
1764 dst_ints = (volatile uint32_t *)dst;
1766 *dst_ints = *src_ints;
1772 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1773 struct mxge_pkt_info *pi)
1775 struct ether_vlan_header *eh;
1777 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1778 #if IFCAP_TSO6 && defined(INET6)
1782 eh = mtod(m, struct ether_vlan_header *);
1783 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1784 etype = ntohs(eh->evl_proto);
1785 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1787 etype = ntohs(eh->evl_encap_proto);
1788 pi->ip_off = ETHER_HDR_LEN;
1794 * ensure ip header is in first mbuf, copy it to a
1795 * scratch buffer if not
1797 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1799 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1800 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1802 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1804 pi->ip_hlen = pi->ip->ip_hl << 2;
1808 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1809 sizeof(struct tcphdr))) {
1810 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1811 sizeof(struct tcphdr), ss->scratch);
1812 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1814 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1816 #if IFCAP_TSO6 && defined(INET6)
1817 case ETHERTYPE_IPV6:
1818 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1819 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1820 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1822 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1825 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1826 pi->ip_hlen -= pi->ip_off;
1827 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1833 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1836 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1837 sizeof(struct tcphdr))) {
1838 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1839 sizeof(struct tcphdr), ss->scratch);
1840 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1842 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1854 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1855 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1858 mcp_kreq_ether_send_t *req;
1859 bus_dma_segment_t *seg;
1860 uint32_t low, high_swapped;
1861 int len, seglen, cum_len, cum_len_next;
1862 int next_is_first, chop, cnt, rdma_count, small;
1863 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1864 uint8_t flags, flags_next;
1867 mss = m->m_pkthdr.tso_segsz;
1869 /* negative cum_len signifies to the
1870 * send loop that we are still in the
1871 * header portion of the TSO packet.
1874 cksum_offset = pi->ip_off + pi->ip_hlen;
1875 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1877 /* TSO implies checksum offload on this hardware */
1878 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1880 * If packet has full TCP csum, replace it with pseudo hdr
1881 * sum that the NIC expects, otherwise the NIC will emit
1882 * packets with bad TCP checksums.
1884 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1886 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1887 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1888 sum = in6_cksum_pseudo(pi->ip6,
1889 m->m_pkthdr.len - cksum_offset,
1894 m->m_pkthdr.csum_flags |= CSUM_TCP;
1895 sum = in_pseudo(pi->ip->ip_src.s_addr,
1896 pi->ip->ip_dst.s_addr,
1897 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1901 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1902 cksum_offset, sizeof(sum), (caddr_t)&sum);
1904 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1907 /* for TSO, pseudo_hdr_offset holds mss.
1908 * The firmware figures out where to put
1909 * the checksum by parsing the header. */
1910 pseudo_hdr_offset = htobe16(mss);
1914 * for IPv6 TSO, the "checksum offset" is re-purposed
1915 * to store the TCP header len
1917 cksum_offset = (pi->tcp->th_off << 2);
1925 /* "rdma_count" is the number of RDMAs belonging to the
1926 * current packet BEFORE the current send request. For
1927 * non-TSO packets, this is equal to "count".
1928 * For TSO packets, rdma_count needs to be reset
1929 * to 0 after a segment cut.
1931 * The rdma_count field of the send request is
1932 * the number of RDMAs of the packet starting at
1933 * that request. For TSO send requests with one ore more cuts
1934 * in the middle, this is the number of RDMAs starting
1935 * after the last cut in the request. All previous
1936 * segments before the last cut implicitly have 1 RDMA.
1938 * Since the number of RDMAs is not known beforehand,
1939 * it must be filled-in retroactively - after each
1940 * segmentation cut or at the end of the entire packet.
1943 while (busdma_seg_cnt) {
1944 /* Break the busdma segment up into pieces*/
1945 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1946 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1950 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1952 cum_len_next = cum_len + seglen;
1953 (req-rdma_count)->rdma_count = rdma_count + 1;
1954 if (__predict_true(cum_len >= 0)) {
1956 chop = (cum_len_next > mss);
1957 cum_len_next = cum_len_next % mss;
1958 next_is_first = (cum_len_next == 0);
1959 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1960 flags_next |= next_is_first *
1962 rdma_count |= -(chop | next_is_first);
1963 rdma_count += chop & !next_is_first;
1964 } else if (cum_len_next >= 0) {
1969 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1970 flags_next = MXGEFW_FLAGS_TSO_PLD |
1971 MXGEFW_FLAGS_FIRST |
1972 (small * MXGEFW_FLAGS_SMALL);
1975 req->addr_high = high_swapped;
1976 req->addr_low = htobe32(low);
1977 req->pseudo_hdr_offset = pseudo_hdr_offset;
1979 req->rdma_count = 1;
1980 req->length = htobe16(seglen);
1981 req->cksum_offset = cksum_offset;
1982 req->flags = flags | ((cum_len & 1) *
1983 MXGEFW_FLAGS_ALIGN_ODD);
1986 cum_len = cum_len_next;
1991 if (cksum_offset != 0 && !pi->ip6) {
1992 if (__predict_false(cksum_offset > seglen))
1993 cksum_offset -= seglen;
1997 if (__predict_false(cnt > tx->max_desc))
2003 (req-rdma_count)->rdma_count = rdma_count;
2007 req->flags |= MXGEFW_FLAGS_TSO_LAST;
2008 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
2010 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2011 mxge_submit_req(tx, tx->req_list, cnt);
2012 #ifdef IFNET_BUF_RING
2013 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2014 /* tell the NIC to start polling this slice */
2016 tx->queue_active = 1;
2024 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
2028 printf("tx->max_desc exceeded via TSO!\n");
2029 printf("mss = %d, %ld, %d!\n", mss,
2030 (long)seg - (long)tx->seg_list, tx->max_desc);
2037 #endif /* IFCAP_TSO4 */
2039 #ifdef MXGE_NEW_VLAN_API
2041 * We reproduce the software vlan tag insertion from
2042 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2043 * vlan tag insertion. We need to advertise this in order to have the
2044 * vlan interface respect our csum offload flags.
2046 static struct mbuf *
2047 mxge_vlan_tag_insert(struct mbuf *m)
2049 struct ether_vlan_header *evl;
2051 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2052 if (__predict_false(m == NULL))
2054 if (m->m_len < sizeof(*evl)) {
2055 m = m_pullup(m, sizeof(*evl));
2056 if (__predict_false(m == NULL))
2060 * Transform the Ethernet header into an Ethernet header
2061 * with 802.1Q encapsulation.
2063 evl = mtod(m, struct ether_vlan_header *);
2064 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2065 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2066 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2067 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2068 m->m_flags &= ~M_VLANTAG;
2071 #endif /* MXGE_NEW_VLAN_API */
2074 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2076 struct mxge_pkt_info pi = {0,0,0,0};
2078 mcp_kreq_ether_send_t *req;
2079 bus_dma_segment_t *seg;
2083 int cnt, cum_len, err, i, idx, odd_flag;
2084 uint16_t pseudo_hdr_offset;
2085 uint8_t flags, cksum_offset;
2092 #ifdef MXGE_NEW_VLAN_API
2093 if (m->m_flags & M_VLANTAG) {
2094 m = mxge_vlan_tag_insert(m);
2095 if (__predict_false(m == NULL))
2096 goto drop_without_m;
2099 if (m->m_pkthdr.csum_flags &
2100 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2101 if (mxge_parse_tx(ss, m, &pi))
2105 /* (try to) map the frame for DMA */
2106 idx = tx->req & tx->mask;
2107 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2108 m, tx->seg_list, &cnt,
2110 if (__predict_false(err == EFBIG)) {
2111 /* Too many segments in the chain. Try
2113 m_tmp = m_defrag(m, M_NOWAIT);
2114 if (m_tmp == NULL) {
2119 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2121 m, tx->seg_list, &cnt,
2124 if (__predict_false(err != 0)) {
2125 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2126 " packet len = %d\n", err, m->m_pkthdr.len);
2129 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2130 BUS_DMASYNC_PREWRITE);
2131 tx->info[idx].m = m;
2134 /* TSO is different enough, we handle it in another routine */
2135 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2136 mxge_encap_tso(ss, m, cnt, &pi);
2143 pseudo_hdr_offset = 0;
2144 flags = MXGEFW_FLAGS_NO_TSO;
2146 /* checksum offloading? */
2147 if (m->m_pkthdr.csum_flags &
2148 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2149 /* ensure ip header is in first mbuf, copy
2150 it to a scratch buffer if not */
2151 cksum_offset = pi.ip_off + pi.ip_hlen;
2152 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2153 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2154 req->cksum_offset = cksum_offset;
2155 flags |= MXGEFW_FLAGS_CKSUM;
2156 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2160 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2161 flags |= MXGEFW_FLAGS_SMALL;
2163 /* convert segments into a request list */
2166 req->flags = MXGEFW_FLAGS_FIRST;
2167 for (i = 0; i < cnt; i++) {
2169 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2171 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2172 req->length = htobe16(seg->ds_len);
2173 req->cksum_offset = cksum_offset;
2174 if (cksum_offset > seg->ds_len)
2175 cksum_offset -= seg->ds_len;
2178 req->pseudo_hdr_offset = pseudo_hdr_offset;
2179 req->pad = 0; /* complete solid 16-byte block */
2180 req->rdma_count = 1;
2181 req->flags |= flags | ((cum_len & 1) * odd_flag);
2182 cum_len += seg->ds_len;
2188 /* pad runts to 60 bytes */
2192 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2194 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2195 req->length = htobe16(60 - cum_len);
2196 req->cksum_offset = 0;
2197 req->pseudo_hdr_offset = pseudo_hdr_offset;
2198 req->pad = 0; /* complete solid 16-byte block */
2199 req->rdma_count = 1;
2200 req->flags |= flags | ((cum_len & 1) * odd_flag);
2204 tx->req_list[0].rdma_count = cnt;
2206 /* print what the firmware will see */
2207 for (i = 0; i < cnt; i++) {
2208 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2209 "cso:%d, flags:0x%x, rdma:%d\n",
2210 i, (int)ntohl(tx->req_list[i].addr_high),
2211 (int)ntohl(tx->req_list[i].addr_low),
2212 (int)ntohs(tx->req_list[i].length),
2213 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2214 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2215 tx->req_list[i].rdma_count);
2217 printf("--------------\n");
2219 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2220 mxge_submit_req(tx, tx->req_list, cnt);
2221 #ifdef IFNET_BUF_RING
2222 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2223 /* tell the NIC to start polling this slice */
2225 tx->queue_active = 1;
2239 #ifdef IFNET_BUF_RING
2241 mxge_qflush(struct ifnet *ifp)
2243 mxge_softc_t *sc = ifp->if_softc;
2248 for (slice = 0; slice < sc->num_slices; slice++) {
2249 tx = &sc->ss[slice].tx;
2251 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2253 mtx_unlock(&tx->mtx);
2259 mxge_start_locked(struct mxge_slice_state *ss)
2270 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2271 m = drbr_dequeue(ifp, tx->br);
2275 /* let BPF see it */
2278 /* give it to the nic */
2281 /* ran out of transmit slots */
2282 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2283 && (!drbr_empty(ifp, tx->br))) {
2284 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2290 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2301 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2303 err = drbr_enqueue(ifp, tx->br, m);
2307 if (!drbr_needs_enqueue(ifp, tx->br) &&
2308 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2309 /* let BPF see it */
2311 /* give it to the nic */
2313 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2316 if (!drbr_empty(ifp, tx->br))
2317 mxge_start_locked(ss);
2322 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2324 mxge_softc_t *sc = ifp->if_softc;
2325 struct mxge_slice_state *ss;
2330 slice = m->m_pkthdr.flowid;
2331 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2333 ss = &sc->ss[slice];
2336 if (mtx_trylock(&tx->mtx)) {
2337 err = mxge_transmit_locked(ss, m);
2338 mtx_unlock(&tx->mtx);
2340 err = drbr_enqueue(ifp, tx->br, m);
2349 mxge_start_locked(struct mxge_slice_state *ss)
2359 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2360 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2364 /* let BPF see it */
2367 /* give it to the nic */
2370 /* ran out of transmit slots */
2371 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2372 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2378 mxge_start(struct ifnet *ifp)
2380 mxge_softc_t *sc = ifp->if_softc;
2381 struct mxge_slice_state *ss;
2383 /* only use the first slice for now */
2385 mtx_lock(&ss->tx.mtx);
2386 mxge_start_locked(ss);
2387 mtx_unlock(&ss->tx.mtx);
2391 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2392 * at most 32 bytes at a time, so as to avoid involving the software
2393 * pio handler in the nic. We re-write the first segment's low
2394 * DMA address to mark it valid only after we write the entire chunk
2398 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2399 mcp_kreq_ether_recv_t *src)
2403 low = src->addr_low;
2404 src->addr_low = 0xffffffff;
2405 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2407 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2409 src->addr_low = low;
2410 dst->addr_low = low;
2415 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2417 bus_dma_segment_t seg;
2419 mxge_rx_ring_t *rx = &ss->rx_small;
2422 m = m_gethdr(M_NOWAIT, MT_DATA);
2429 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2430 &seg, &cnt, BUS_DMA_NOWAIT);
2435 rx->info[idx].m = m;
2436 rx->shadow[idx].addr_low =
2437 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2438 rx->shadow[idx].addr_high =
2439 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2443 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2448 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2450 bus_dma_segment_t seg[3];
2452 mxge_rx_ring_t *rx = &ss->rx_big;
2455 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2461 m->m_len = rx->mlen;
2462 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2463 seg, &cnt, BUS_DMA_NOWAIT);
2468 rx->info[idx].m = m;
2469 rx->shadow[idx].addr_low =
2470 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2471 rx->shadow[idx].addr_high =
2472 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2474 #if MXGE_VIRT_JUMBOS
2475 for (i = 1; i < cnt; i++) {
2476 rx->shadow[idx + i].addr_low =
2477 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2478 rx->shadow[idx + i].addr_high =
2479 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2484 for (i = 0; i < rx->nbufs; i++) {
2485 if ((idx & 7) == 7) {
2486 mxge_submit_8rx(&rx->lanai[idx - 7],
2487 &rx->shadow[idx - 7]);
2497 mxge_csum_generic(uint16_t *raw, int len)
2508 csum = (csum >> 16) + (csum & 0xffff);
2509 csum = (csum >> 16) + (csum & 0xffff);
2510 return (uint16_t)csum;
2513 static inline uint16_t
2514 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2517 int nxt, cksum_offset;
2518 struct ip6_hdr *ip6 = p;
2522 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2523 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2524 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2525 IPPROTO_IPV6, &nxt);
2526 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2531 * IPv6 headers do not contain a checksum, and hence
2532 * do not checksum to zero, so they don't "fall out"
2533 * of the partial checksum calculation like IPv4
2534 * headers do. We need to fix the partial checksum by
2535 * subtracting the checksum of the IPv6 header.
2538 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2541 csum += (csum < ~partial);
2542 csum = (csum >> 16) + (csum & 0xFFFF);
2543 csum = (csum >> 16) + (csum & 0xFFFF);
2544 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2551 * Myri10GE hardware checksums are not valid if the sender
2552 * padded the frame with non-zero padding. This is because
2553 * the firmware just does a simple 16-bit 1s complement
2554 * checksum across the entire frame, excluding the first 14
2555 * bytes. It is best to simply to check the checksum and
2556 * tell the stack about it only if the checksum is good
2559 static inline uint16_t
2560 mxge_rx_csum(struct mbuf *m, int csum)
2562 struct ether_header *eh;
2566 #if defined(INET) || defined(INET6)
2567 int cap = m->m_pkthdr.rcvif->if_capenable;
2572 eh = mtod(m, struct ether_header *);
2573 etype = ntohs(eh->ether_type);
2577 if ((cap & IFCAP_RXCSUM) == 0)
2579 ip = (struct ip *)(eh + 1);
2580 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2582 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2583 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2584 (ip->ip_hl << 2) + ip->ip_p));
2589 case ETHERTYPE_IPV6:
2590 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2592 c = mxge_rx_csum6((eh + 1), m, csum);
2602 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2604 struct ether_vlan_header *evl;
2605 struct ether_header *eh;
2608 evl = mtod(m, struct ether_vlan_header *);
2609 eh = mtod(m, struct ether_header *);
2612 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2613 * after what the firmware thought was the end of the ethernet
2617 /* put checksum into host byte order */
2618 *csum = ntohs(*csum);
2619 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2620 (*csum) += ~partial;
2621 (*csum) += ((*csum) < ~partial);
2622 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2623 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2625 /* restore checksum to network byte order;
2626 later consumers expect this */
2627 *csum = htons(*csum);
2630 #ifdef MXGE_NEW_VLAN_API
2631 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2635 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2639 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2640 m_tag_prepend(m, mtag);
2644 m->m_flags |= M_VLANTAG;
2647 * Remove the 802.1q header by copying the Ethernet
2648 * addresses over it and adjusting the beginning of
2649 * the data in the mbuf. The encapsulated Ethernet
2650 * type field is already in place.
2652 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2653 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2654 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2659 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2660 uint32_t csum, int lro)
2665 struct ether_header *eh;
2667 bus_dmamap_t old_map;
2673 idx = rx->cnt & rx->mask;
2674 rx->cnt += rx->nbufs;
2675 /* save a pointer to the received mbuf */
2676 m = rx->info[idx].m;
2677 /* try to replace the received mbuf */
2678 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2679 /* drop the frame -- the old mbuf is re-cycled */
2684 /* unmap the received buffer */
2685 old_map = rx->info[idx].map;
2686 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2687 bus_dmamap_unload(rx->dmat, old_map);
2689 /* swap the bus_dmamap_t's */
2690 rx->info[idx].map = rx->extra_map;
2691 rx->extra_map = old_map;
2693 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2695 m->m_data += MXGEFW_PAD;
2697 m->m_pkthdr.rcvif = ifp;
2698 m->m_len = m->m_pkthdr.len = len;
2700 eh = mtod(m, struct ether_header *);
2701 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2702 mxge_vlan_tag_remove(m, &csum);
2704 /* if the checksum is valid, mark it in the mbuf header */
2706 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2707 (0 == mxge_rx_csum(m, csum))) {
2708 /* Tell the stack that the checksum is good */
2709 m->m_pkthdr.csum_data = 0xffff;
2710 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2713 #if defined(INET) || defined (INET6)
2714 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2718 /* flowid only valid if RSS hashing is enabled */
2719 if (sc->num_slices > 1) {
2720 m->m_pkthdr.flowid = (ss - sc->ss);
2721 m->m_flags |= M_FLOWID;
2723 /* pass the frame up the stack */
2724 (*ifp->if_input)(ifp, m);
2728 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2729 uint32_t csum, int lro)
2733 struct ether_header *eh;
2736 bus_dmamap_t old_map;
2742 idx = rx->cnt & rx->mask;
2744 /* save a pointer to the received mbuf */
2745 m = rx->info[idx].m;
2746 /* try to replace the received mbuf */
2747 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2748 /* drop the frame -- the old mbuf is re-cycled */
2753 /* unmap the received buffer */
2754 old_map = rx->info[idx].map;
2755 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2756 bus_dmamap_unload(rx->dmat, old_map);
2758 /* swap the bus_dmamap_t's */
2759 rx->info[idx].map = rx->extra_map;
2760 rx->extra_map = old_map;
2762 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2764 m->m_data += MXGEFW_PAD;
2766 m->m_pkthdr.rcvif = ifp;
2767 m->m_len = m->m_pkthdr.len = len;
2769 eh = mtod(m, struct ether_header *);
2770 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2771 mxge_vlan_tag_remove(m, &csum);
2773 /* if the checksum is valid, mark it in the mbuf header */
2774 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2775 (0 == mxge_rx_csum(m, csum))) {
2776 /* Tell the stack that the checksum is good */
2777 m->m_pkthdr.csum_data = 0xffff;
2778 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2781 #if defined(INET) || defined (INET6)
2782 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2786 /* flowid only valid if RSS hashing is enabled */
2787 if (sc->num_slices > 1) {
2788 m->m_pkthdr.flowid = (ss - sc->ss);
2789 m->m_flags |= M_FLOWID;
2791 /* pass the frame up the stack */
2792 (*ifp->if_input)(ifp, m);
2796 mxge_clean_rx_done(struct mxge_slice_state *ss)
2798 mxge_rx_done_t *rx_done = &ss->rx_done;
2804 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2805 while (rx_done->entry[rx_done->idx].length != 0) {
2806 length = ntohs(rx_done->entry[rx_done->idx].length);
2807 rx_done->entry[rx_done->idx].length = 0;
2808 checksum = rx_done->entry[rx_done->idx].checksum;
2809 if (length <= (MHLEN - MXGEFW_PAD))
2810 mxge_rx_done_small(ss, length, checksum, lro);
2812 mxge_rx_done_big(ss, length, checksum, lro);
2814 rx_done->idx = rx_done->cnt & rx_done->mask;
2816 /* limit potential for livelock */
2817 if (__predict_false(++limit > rx_done->mask / 2))
2820 #if defined(INET) || defined (INET6)
2821 while (!SLIST_EMPTY(&ss->lc.lro_active)) {
2822 struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active);
2823 SLIST_REMOVE_HEAD(&ss->lc.lro_active, next);
2824 tcp_lro_flush(&ss->lc, lro);
2831 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2842 while (tx->pkt_done != mcp_idx) {
2843 idx = tx->done & tx->mask;
2845 m = tx->info[idx].m;
2846 /* mbuf and DMA map only attached to the first
2849 ss->obytes += m->m_pkthdr.len;
2850 if (m->m_flags & M_MCAST)
2853 tx->info[idx].m = NULL;
2854 map = tx->info[idx].map;
2855 bus_dmamap_unload(tx->dmat, map);
2858 if (tx->info[idx].flag) {
2859 tx->info[idx].flag = 0;
2864 /* If we have space, clear IFF_OACTIVE to tell the stack that
2865 its OK to send packets */
2866 #ifdef IFNET_BUF_RING
2867 flags = &ss->if_drv_flags;
2869 flags = &ifp->if_drv_flags;
2871 mtx_lock(&ss->tx.mtx);
2872 if ((*flags) & IFF_DRV_OACTIVE &&
2873 tx->req - tx->done < (tx->mask + 1)/4) {
2874 *(flags) &= ~IFF_DRV_OACTIVE;
2876 mxge_start_locked(ss);
2878 #ifdef IFNET_BUF_RING
2879 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2880 /* let the NIC stop polling this queue, since there
2881 * are no more transmits pending */
2882 if (tx->req == tx->done) {
2884 tx->queue_active = 0;
2890 mtx_unlock(&ss->tx.mtx);
2894 static struct mxge_media_type mxge_xfp_media_types[] =
2896 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2897 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2898 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2899 {0, (1 << 5), "10GBASE-ER"},
2900 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2901 {0, (1 << 3), "10GBASE-SW"},
2902 {0, (1 << 2), "10GBASE-LW"},
2903 {0, (1 << 1), "10GBASE-EW"},
2904 {0, (1 << 0), "Reserved"}
2906 static struct mxge_media_type mxge_sfp_media_types[] =
2908 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2909 {0, (1 << 7), "Reserved"},
2910 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2911 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2912 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2913 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2917 mxge_media_set(mxge_softc_t *sc, int media_type)
2921 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2923 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2924 sc->current_media = media_type;
2925 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2929 mxge_media_init(mxge_softc_t *sc)
2934 ifmedia_removeall(&sc->media);
2935 mxge_media_set(sc, IFM_AUTO);
2938 * parse the product code to deterimine the interface type
2939 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2940 * after the 3rd dash in the driver's cached copy of the
2941 * EEPROM's product code string.
2943 ptr = sc->product_code_string;
2945 device_printf(sc->dev, "Missing product code\n");
2949 for (i = 0; i < 3; i++, ptr++) {
2950 ptr = strchr(ptr, '-');
2952 device_printf(sc->dev,
2953 "only %d dashes in PC?!?\n", i);
2957 if (*ptr == 'C' || *(ptr +1) == 'C') {
2959 sc->connector = MXGE_CX4;
2960 mxge_media_set(sc, IFM_10G_CX4);
2961 } else if (*ptr == 'Q') {
2962 /* -Q is Quad Ribbon Fiber */
2963 sc->connector = MXGE_QRF;
2964 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2965 /* FreeBSD has no media type for Quad ribbon fiber */
2966 } else if (*ptr == 'R') {
2968 sc->connector = MXGE_XFP;
2969 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2970 /* -S or -2S is SFP+ */
2971 sc->connector = MXGE_SFP;
2973 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2978 * Determine the media type for a NIC. Some XFPs will identify
2979 * themselves only when their link is up, so this is initiated via a
2980 * link up interrupt. However, this can potentially take up to
2981 * several milliseconds, so it is run via the watchdog routine, rather
2982 * than in the interrupt handler itself.
2985 mxge_media_probe(mxge_softc_t *sc)
2990 struct mxge_media_type *mxge_media_types = NULL;
2991 int i, err, ms, mxge_media_type_entries;
2994 sc->need_media_probe = 0;
2996 if (sc->connector == MXGE_XFP) {
2998 mxge_media_types = mxge_xfp_media_types;
2999 mxge_media_type_entries =
3000 sizeof (mxge_xfp_media_types) /
3001 sizeof (mxge_xfp_media_types[0]);
3002 byte = MXGE_XFP_COMPLIANCE_BYTE;
3004 } else if (sc->connector == MXGE_SFP) {
3005 /* -S or -2S is SFP+ */
3006 mxge_media_types = mxge_sfp_media_types;
3007 mxge_media_type_entries =
3008 sizeof (mxge_sfp_media_types) /
3009 sizeof (mxge_sfp_media_types[0]);
3013 /* nothing to do; media type cannot change */
3018 * At this point we know the NIC has an XFP cage, so now we
3019 * try to determine what is in the cage by using the
3020 * firmware's XFP I2C commands to read the XFP 10GbE compilance
3021 * register. We read just one byte, which may take over
3025 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
3027 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3028 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
3029 device_printf(sc->dev, "failed to read XFP\n");
3031 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
3032 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
3034 if (err != MXGEFW_CMD_OK) {
3038 /* now we wait for the data to be cached */
3040 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3041 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3044 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3046 if (err != MXGEFW_CMD_OK) {
3047 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
3048 cage_type, err, ms);
3052 if (cmd.data0 == mxge_media_types[0].bitmask) {
3054 device_printf(sc->dev, "%s:%s\n", cage_type,
3055 mxge_media_types[0].name);
3056 if (sc->current_media != mxge_media_types[0].flag) {
3057 mxge_media_init(sc);
3058 mxge_media_set(sc, mxge_media_types[0].flag);
3062 for (i = 1; i < mxge_media_type_entries; i++) {
3063 if (cmd.data0 & mxge_media_types[i].bitmask) {
3065 device_printf(sc->dev, "%s:%s\n",
3067 mxge_media_types[i].name);
3069 if (sc->current_media != mxge_media_types[i].flag) {
3070 mxge_media_init(sc);
3071 mxge_media_set(sc, mxge_media_types[i].flag);
3077 device_printf(sc->dev, "%s media 0x%x unknown\n",
3078 cage_type, cmd.data0);
3084 mxge_intr(void *arg)
3086 struct mxge_slice_state *ss = arg;
3087 mxge_softc_t *sc = ss->sc;
3088 mcp_irq_data_t *stats = ss->fw_stats;
3089 mxge_tx_ring_t *tx = &ss->tx;
3090 mxge_rx_done_t *rx_done = &ss->rx_done;
3091 uint32_t send_done_count;
3095 #ifndef IFNET_BUF_RING
3096 /* an interrupt on a non-zero slice is implicitly valid
3097 since MSI-X irqs are not shared */
3099 mxge_clean_rx_done(ss);
3100 *ss->irq_claim = be32toh(3);
3105 /* make sure the DMA has finished */
3106 if (!stats->valid) {
3109 valid = stats->valid;
3111 if (sc->legacy_irq) {
3112 /* lower legacy IRQ */
3113 *sc->irq_deassert = 0;
3114 if (!mxge_deassert_wait)
3115 /* don't wait for conf. that irq is low */
3121 /* loop while waiting for legacy irq deassertion */
3123 /* check for transmit completes and receives */
3124 send_done_count = be32toh(stats->send_done_count);
3125 while ((send_done_count != tx->pkt_done) ||
3126 (rx_done->entry[rx_done->idx].length != 0)) {
3127 if (send_done_count != tx->pkt_done)
3128 mxge_tx_done(ss, (int)send_done_count);
3129 mxge_clean_rx_done(ss);
3130 send_done_count = be32toh(stats->send_done_count);
3132 if (sc->legacy_irq && mxge_deassert_wait)
3134 } while (*((volatile uint8_t *) &stats->valid));
3136 /* fw link & error stats meaningful only on the first slice */
3137 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3138 if (sc->link_state != stats->link_up) {
3139 sc->link_state = stats->link_up;
3140 if (sc->link_state) {
3141 if_link_state_change(sc->ifp, LINK_STATE_UP);
3142 if_initbaudrate(sc->ifp, IF_Gbps(10));
3144 device_printf(sc->dev, "link up\n");
3146 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3147 sc->ifp->if_baudrate = 0;
3149 device_printf(sc->dev, "link down\n");
3151 sc->need_media_probe = 1;
3153 if (sc->rdma_tags_available !=
3154 be32toh(stats->rdma_tags_available)) {
3155 sc->rdma_tags_available =
3156 be32toh(stats->rdma_tags_available);
3157 device_printf(sc->dev, "RDMA timed out! %d tags "
3158 "left\n", sc->rdma_tags_available);
3161 if (stats->link_down) {
3162 sc->down_cnt += stats->link_down;
3164 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3168 /* check to see if we have rx token to pass back */
3170 *ss->irq_claim = be32toh(3);
3171 *(ss->irq_claim + 1) = be32toh(3);
3175 mxge_init(void *arg)
3177 mxge_softc_t *sc = arg;
3178 struct ifnet *ifp = sc->ifp;
3181 mtx_lock(&sc->driver_mtx);
3182 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3183 (void) mxge_open(sc);
3184 mtx_unlock(&sc->driver_mtx);
3190 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3194 #if defined(INET) || defined(INET6)
3195 tcp_lro_free(&ss->lc);
3197 for (i = 0; i <= ss->rx_big.mask; i++) {
3198 if (ss->rx_big.info[i].m == NULL)
3200 bus_dmamap_unload(ss->rx_big.dmat,
3201 ss->rx_big.info[i].map);
3202 m_freem(ss->rx_big.info[i].m);
3203 ss->rx_big.info[i].m = NULL;
3206 for (i = 0; i <= ss->rx_small.mask; i++) {
3207 if (ss->rx_small.info[i].m == NULL)
3209 bus_dmamap_unload(ss->rx_small.dmat,
3210 ss->rx_small.info[i].map);
3211 m_freem(ss->rx_small.info[i].m);
3212 ss->rx_small.info[i].m = NULL;
3215 /* transmit ring used only on the first slice */
3216 if (ss->tx.info == NULL)
3219 for (i = 0; i <= ss->tx.mask; i++) {
3220 ss->tx.info[i].flag = 0;
3221 if (ss->tx.info[i].m == NULL)
3223 bus_dmamap_unload(ss->tx.dmat,
3224 ss->tx.info[i].map);
3225 m_freem(ss->tx.info[i].m);
3226 ss->tx.info[i].m = NULL;
3231 mxge_free_mbufs(mxge_softc_t *sc)
3235 for (slice = 0; slice < sc->num_slices; slice++)
3236 mxge_free_slice_mbufs(&sc->ss[slice]);
3240 mxge_free_slice_rings(struct mxge_slice_state *ss)
3245 if (ss->rx_done.entry != NULL)
3246 mxge_dma_free(&ss->rx_done.dma);
3247 ss->rx_done.entry = NULL;
3249 if (ss->tx.req_bytes != NULL)
3250 free(ss->tx.req_bytes, M_DEVBUF);
3251 ss->tx.req_bytes = NULL;
3253 if (ss->tx.seg_list != NULL)
3254 free(ss->tx.seg_list, M_DEVBUF);
3255 ss->tx.seg_list = NULL;
3257 if (ss->rx_small.shadow != NULL)
3258 free(ss->rx_small.shadow, M_DEVBUF);
3259 ss->rx_small.shadow = NULL;
3261 if (ss->rx_big.shadow != NULL)
3262 free(ss->rx_big.shadow, M_DEVBUF);
3263 ss->rx_big.shadow = NULL;
3265 if (ss->tx.info != NULL) {
3266 if (ss->tx.dmat != NULL) {
3267 for (i = 0; i <= ss->tx.mask; i++) {
3268 bus_dmamap_destroy(ss->tx.dmat,
3269 ss->tx.info[i].map);
3271 bus_dma_tag_destroy(ss->tx.dmat);
3273 free(ss->tx.info, M_DEVBUF);
3277 if (ss->rx_small.info != NULL) {
3278 if (ss->rx_small.dmat != NULL) {
3279 for (i = 0; i <= ss->rx_small.mask; i++) {
3280 bus_dmamap_destroy(ss->rx_small.dmat,
3281 ss->rx_small.info[i].map);
3283 bus_dmamap_destroy(ss->rx_small.dmat,
3284 ss->rx_small.extra_map);
3285 bus_dma_tag_destroy(ss->rx_small.dmat);
3287 free(ss->rx_small.info, M_DEVBUF);
3289 ss->rx_small.info = NULL;
3291 if (ss->rx_big.info != NULL) {
3292 if (ss->rx_big.dmat != NULL) {
3293 for (i = 0; i <= ss->rx_big.mask; i++) {
3294 bus_dmamap_destroy(ss->rx_big.dmat,
3295 ss->rx_big.info[i].map);
3297 bus_dmamap_destroy(ss->rx_big.dmat,
3298 ss->rx_big.extra_map);
3299 bus_dma_tag_destroy(ss->rx_big.dmat);
3301 free(ss->rx_big.info, M_DEVBUF);
3303 ss->rx_big.info = NULL;
3307 mxge_free_rings(mxge_softc_t *sc)
3311 for (slice = 0; slice < sc->num_slices; slice++)
3312 mxge_free_slice_rings(&sc->ss[slice]);
3316 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3317 int tx_ring_entries)
3319 mxge_softc_t *sc = ss->sc;
3323 /* allocate per-slice receive resources */
3325 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3326 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3328 /* allocate the rx shadow rings */
3329 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3330 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3332 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3333 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3335 /* allocate the rx host info rings */
3336 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3337 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3339 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3340 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3342 /* allocate the rx busdma resources */
3343 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3345 4096, /* boundary */
3346 BUS_SPACE_MAXADDR, /* low */
3347 BUS_SPACE_MAXADDR, /* high */
3348 NULL, NULL, /* filter */
3349 MHLEN, /* maxsize */
3351 MHLEN, /* maxsegsize */
3352 BUS_DMA_ALLOCNOW, /* flags */
3353 NULL, NULL, /* lock */
3354 &ss->rx_small.dmat); /* tag */
3356 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3361 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3363 #if MXGE_VIRT_JUMBOS
3364 4096, /* boundary */
3368 BUS_SPACE_MAXADDR, /* low */
3369 BUS_SPACE_MAXADDR, /* high */
3370 NULL, NULL, /* filter */
3371 3*4096, /* maxsize */
3372 #if MXGE_VIRT_JUMBOS
3374 4096, /* maxsegsize*/
3377 MJUM9BYTES, /* maxsegsize*/
3379 BUS_DMA_ALLOCNOW, /* flags */
3380 NULL, NULL, /* lock */
3381 &ss->rx_big.dmat); /* tag */
3383 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3387 for (i = 0; i <= ss->rx_small.mask; i++) {
3388 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3389 &ss->rx_small.info[i].map);
3391 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3396 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3397 &ss->rx_small.extra_map);
3399 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3404 for (i = 0; i <= ss->rx_big.mask; i++) {
3405 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3406 &ss->rx_big.info[i].map);
3408 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3413 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3414 &ss->rx_big.extra_map);
3416 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3421 /* now allocate TX resources */
3423 #ifndef IFNET_BUF_RING
3424 /* only use a single TX ring for now */
3425 if (ss != ss->sc->ss)
3429 ss->tx.mask = tx_ring_entries - 1;
3430 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3433 /* allocate the tx request copy block */
3435 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3436 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3437 /* ensure req_list entries are aligned to 8 bytes */
3438 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3439 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3441 /* allocate the tx busdma segment list */
3442 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3443 ss->tx.seg_list = (bus_dma_segment_t *)
3444 malloc(bytes, M_DEVBUF, M_WAITOK);
3446 /* allocate the tx host info ring */
3447 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3448 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3450 /* allocate the tx busdma resources */
3451 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3453 sc->tx_boundary, /* boundary */
3454 BUS_SPACE_MAXADDR, /* low */
3455 BUS_SPACE_MAXADDR, /* high */
3456 NULL, NULL, /* filter */
3457 65536 + 256, /* maxsize */
3458 ss->tx.max_desc - 2, /* num segs */
3459 sc->tx_boundary, /* maxsegsz */
3460 BUS_DMA_ALLOCNOW, /* flags */
3461 NULL, NULL, /* lock */
3462 &ss->tx.dmat); /* tag */
3465 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3470 /* now use these tags to setup dmamaps for each slot
3472 for (i = 0; i <= ss->tx.mask; i++) {
3473 err = bus_dmamap_create(ss->tx.dmat, 0,
3474 &ss->tx.info[i].map);
3476 device_printf(sc->dev, "Err %d tx dmamap\n",
3486 mxge_alloc_rings(mxge_softc_t *sc)
3490 int tx_ring_entries, rx_ring_entries;
3493 /* get ring sizes */
3494 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3495 tx_ring_size = cmd.data0;
3497 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3501 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3502 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3503 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3504 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3505 IFQ_SET_READY(&sc->ifp->if_snd);
3507 for (slice = 0; slice < sc->num_slices; slice++) {
3508 err = mxge_alloc_slice_rings(&sc->ss[slice],
3517 mxge_free_rings(sc);
3524 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3526 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3528 if (bufsize < MCLBYTES) {
3529 /* easy, everything fits in a single buffer */
3530 *big_buf_size = MCLBYTES;
3531 *cl_size = MCLBYTES;
3536 if (bufsize < MJUMPAGESIZE) {
3537 /* still easy, everything still fits in a single buffer */
3538 *big_buf_size = MJUMPAGESIZE;
3539 *cl_size = MJUMPAGESIZE;
3543 #if MXGE_VIRT_JUMBOS
3544 /* now we need to use virtually contiguous buffers */
3545 *cl_size = MJUM9BYTES;
3546 *big_buf_size = 4096;
3547 *nbufs = mtu / 4096 + 1;
3548 /* needs to be a power of two, so round up */
3552 *cl_size = MJUM9BYTES;
3553 *big_buf_size = MJUM9BYTES;
3559 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3568 slice = ss - sc->ss;
3570 #if defined(INET) || defined(INET6)
3571 (void)tcp_lro_init(&ss->lc);
3573 ss->lc.ifp = sc->ifp;
3575 /* get the lanai pointers to the send and receive rings */
3578 #ifndef IFNET_BUF_RING
3579 /* We currently only send from the first slice */
3583 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3585 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3586 ss->tx.send_go = (volatile uint32_t *)
3587 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3588 ss->tx.send_stop = (volatile uint32_t *)
3589 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3590 #ifndef IFNET_BUF_RING
3594 err |= mxge_send_cmd(sc,
3595 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3596 ss->rx_small.lanai =
3597 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3599 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3601 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3604 device_printf(sc->dev,
3605 "failed to get ring sizes or locations\n");
3609 /* stock receive rings */
3610 for (i = 0; i <= ss->rx_small.mask; i++) {
3611 map = ss->rx_small.info[i].map;
3612 err = mxge_get_buf_small(ss, map, i);
3614 device_printf(sc->dev, "alloced %d/%d smalls\n",
3615 i, ss->rx_small.mask + 1);
3619 for (i = 0; i <= ss->rx_big.mask; i++) {
3620 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3621 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3623 ss->rx_big.nbufs = nbufs;
3624 ss->rx_big.cl_size = cl_size;
3625 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3626 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3627 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3628 map = ss->rx_big.info[i].map;
3629 err = mxge_get_buf_big(ss, map, i);
3631 device_printf(sc->dev, "alloced %d/%d bigs\n",
3632 i, ss->rx_big.mask + 1);
3640 mxge_open(mxge_softc_t *sc)
3643 int err, big_bytes, nbufs, slice, cl_size, i;
3645 volatile uint8_t *itable;
3646 struct mxge_slice_state *ss;
3648 /* Copy the MAC address in case it was overridden */
3649 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3651 err = mxge_reset(sc, 1);
3653 device_printf(sc->dev, "failed to reset\n");
3657 if (sc->num_slices > 1) {
3658 /* setup the indirection table */
3659 cmd.data0 = sc->num_slices;
3660 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3663 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3666 device_printf(sc->dev,
3667 "failed to setup rss tables\n");
3671 /* just enable an identity mapping */
3672 itable = sc->sram + cmd.data0;
3673 for (i = 0; i < sc->num_slices; i++)
3674 itable[i] = (uint8_t)i;
3677 cmd.data1 = mxge_rss_hash_type;
3678 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3680 device_printf(sc->dev, "failed to enable slices\n");
3686 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3689 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3691 /* error is only meaningful if we're trying to set
3692 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3693 if (err && nbufs > 1) {
3694 device_printf(sc->dev,
3695 "Failed to set alway-use-n to %d\n",
3699 /* Give the firmware the mtu and the big and small buffer
3700 sizes. The firmware wants the big buf size to be a power
3701 of two. Luckily, FreeBSD's clusters are powers of two */
3702 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3703 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3704 cmd.data0 = MHLEN - MXGEFW_PAD;
3705 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3707 cmd.data0 = big_bytes;
3708 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3711 device_printf(sc->dev, "failed to setup params\n");
3715 /* Now give him the pointer to the stats block */
3717 #ifdef IFNET_BUF_RING
3718 slice < sc->num_slices;
3723 ss = &sc->ss[slice];
3725 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3727 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3728 cmd.data2 = sizeof(struct mcp_irq_data);
3729 cmd.data2 |= (slice << 16);
3730 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3734 bus = sc->ss->fw_stats_dma.bus_addr;
3735 bus += offsetof(struct mcp_irq_data, send_done_count);
3736 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3737 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3738 err = mxge_send_cmd(sc,
3739 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3741 /* Firmware cannot support multicast without STATS_DMA_V2 */
3742 sc->fw_multicast_support = 0;
3744 sc->fw_multicast_support = 1;
3748 device_printf(sc->dev, "failed to setup params\n");
3752 for (slice = 0; slice < sc->num_slices; slice++) {
3753 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3755 device_printf(sc->dev, "couldn't open slice %d\n",
3761 /* Finally, start the firmware running */
3762 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3764 device_printf(sc->dev, "Couldn't bring up link\n");
3767 #ifdef IFNET_BUF_RING
3768 for (slice = 0; slice < sc->num_slices; slice++) {
3769 ss = &sc->ss[slice];
3770 ss->if_drv_flags |= IFF_DRV_RUNNING;
3771 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3774 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3775 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3781 mxge_free_mbufs(sc);
3787 mxge_close(mxge_softc_t *sc, int down)
3790 int err, old_down_cnt;
3791 #ifdef IFNET_BUF_RING
3792 struct mxge_slice_state *ss;
3796 #ifdef IFNET_BUF_RING
3797 for (slice = 0; slice < sc->num_slices; slice++) {
3798 ss = &sc->ss[slice];
3799 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3802 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3804 old_down_cnt = sc->down_cnt;
3806 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3808 device_printf(sc->dev,
3809 "Couldn't bring down link\n");
3811 if (old_down_cnt == sc->down_cnt) {
3812 /* wait for down irq */
3813 DELAY(10 * sc->intr_coal_delay);
3816 if (old_down_cnt == sc->down_cnt) {
3817 device_printf(sc->dev, "never got down irq\n");
3820 mxge_free_mbufs(sc);
3826 mxge_setup_cfg_space(mxge_softc_t *sc)
3828 device_t dev = sc->dev;
3830 uint16_t lnk, pectl;
3832 /* find the PCIe link width and set max read request to 4KB*/
3833 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3834 lnk = pci_read_config(dev, reg + 0x12, 2);
3835 sc->link_width = (lnk >> 4) & 0x3f;
3837 if (sc->pectl == 0) {
3838 pectl = pci_read_config(dev, reg + 0x8, 2);
3839 pectl = (pectl & ~0x7000) | (5 << 12);
3840 pci_write_config(dev, reg + 0x8, pectl, 2);
3843 /* restore saved pectl after watchdog reset */
3844 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3848 /* Enable DMA and Memory space access */
3849 pci_enable_busmaster(dev);
3853 mxge_read_reboot(mxge_softc_t *sc)
3855 device_t dev = sc->dev;
3858 /* find the vendor specific offset */
3859 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3860 device_printf(sc->dev,
3861 "could not find vendor specific offset\n");
3862 return (uint32_t)-1;
3864 /* enable read32 mode */
3865 pci_write_config(dev, vs + 0x10, 0x3, 1);
3866 /* tell NIC which register to read */
3867 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3868 return (pci_read_config(dev, vs + 0x14, 4));
3872 mxge_watchdog_reset(mxge_softc_t *sc)
3874 struct pci_devinfo *dinfo;
3875 struct mxge_slice_state *ss;
3876 int err, running, s, num_tx_slices = 1;
3882 device_printf(sc->dev, "Watchdog reset!\n");
3885 * check to see if the NIC rebooted. If it did, then all of
3886 * PCI config space has been reset, and things like the
3887 * busmaster bit will be zero. If this is the case, then we
3888 * must restore PCI config space before the NIC can be used
3891 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3892 if (cmd == 0xffff) {
3894 * maybe the watchdog caught the NIC rebooting; wait
3895 * up to 100ms for it to finish. If it does not come
3896 * back, then give up
3899 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3900 if (cmd == 0xffff) {
3901 device_printf(sc->dev, "NIC disappeared!\n");
3904 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3905 /* print the reboot status */
3906 reboot = mxge_read_reboot(sc);
3907 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3909 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3913 * quiesce NIC so that TX routines will not try to
3914 * xmit after restoration of BAR
3917 /* Mark the link as down */
3918 if (sc->link_state) {
3920 if_link_state_change(sc->ifp,
3923 #ifdef IFNET_BUF_RING
3924 num_tx_slices = sc->num_slices;
3926 /* grab all TX locks to ensure no tx */
3927 for (s = 0; s < num_tx_slices; s++) {
3929 mtx_lock(&ss->tx.mtx);
3933 /* restore PCI configuration space */
3934 dinfo = device_get_ivars(sc->dev);
3935 pci_cfg_restore(sc->dev, dinfo);
3937 /* and redo any changes we made to our config space */
3938 mxge_setup_cfg_space(sc);
3941 err = mxge_load_firmware(sc, 0);
3943 device_printf(sc->dev,
3944 "Unable to re-load f/w\n");
3948 err = mxge_open(sc);
3949 /* release all TX locks */
3950 for (s = 0; s < num_tx_slices; s++) {
3952 #ifdef IFNET_BUF_RING
3953 mxge_start_locked(ss);
3955 mtx_unlock(&ss->tx.mtx);
3958 sc->watchdog_resets++;
3960 device_printf(sc->dev,
3961 "NIC did not reboot, not resetting\n");
3965 device_printf(sc->dev, "watchdog reset failed\n");
3969 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3974 mxge_watchdog_task(void *arg, int pending)
3976 mxge_softc_t *sc = arg;
3979 mtx_lock(&sc->driver_mtx);
3980 mxge_watchdog_reset(sc);
3981 mtx_unlock(&sc->driver_mtx);
3985 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3987 tx = &sc->ss[slice].tx;
3988 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3989 device_printf(sc->dev,
3990 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3991 tx->req, tx->done, tx->queue_active);
3992 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3993 tx->activate, tx->deactivate);
3994 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3996 be32toh(sc->ss->fw_stats->send_done_count));
4000 mxge_watchdog(mxge_softc_t *sc)
4003 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
4006 /* see if we have outstanding transmits, which
4007 have been pending for more than mxge_ticks */
4009 #ifdef IFNET_BUF_RING
4010 (i < sc->num_slices) && (err == 0);
4012 (i < 1) && (err == 0);
4016 if (tx->req != tx->done &&
4017 tx->watchdog_req != tx->watchdog_done &&
4018 tx->done == tx->watchdog_done) {
4019 /* check for pause blocking before resetting */
4020 if (tx->watchdog_rx_pause == rx_pause) {
4021 mxge_warn_stuck(sc, tx, i);
4022 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4026 device_printf(sc->dev, "Flow control blocking "
4027 "xmits, check link partner\n");
4030 tx->watchdog_req = tx->req;
4031 tx->watchdog_done = tx->done;
4032 tx->watchdog_rx_pause = rx_pause;
4035 if (sc->need_media_probe)
4036 mxge_media_probe(sc);
4041 mxge_update_stats(mxge_softc_t *sc)
4043 struct mxge_slice_state *ss;
4045 u_long ipackets = 0;
4046 u_long opackets = 0;
4047 #ifdef IFNET_BUF_RING
4055 for (slice = 0; slice < sc->num_slices; slice++) {
4056 ss = &sc->ss[slice];
4057 ipackets += ss->ipackets;
4058 opackets += ss->opackets;
4059 #ifdef IFNET_BUF_RING
4060 obytes += ss->obytes;
4061 omcasts += ss->omcasts;
4062 odrops += ss->tx.br->br_drops;
4064 oerrors += ss->oerrors;
4066 pkts = (ipackets - sc->ifp->if_ipackets);
4067 pkts += (opackets - sc->ifp->if_opackets);
4068 sc->ifp->if_ipackets = ipackets;
4069 sc->ifp->if_opackets = opackets;
4070 #ifdef IFNET_BUF_RING
4071 sc->ifp->if_obytes = obytes;
4072 sc->ifp->if_omcasts = omcasts;
4073 sc->ifp->if_snd.ifq_drops = odrops;
4075 sc->ifp->if_oerrors = oerrors;
4080 mxge_tick(void *arg)
4082 mxge_softc_t *sc = arg;
4089 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4091 /* aggregate stats from different slices */
4092 pkts = mxge_update_stats(sc);
4093 if (!sc->watchdog_countdown) {
4094 err = mxge_watchdog(sc);
4095 sc->watchdog_countdown = 4;
4097 sc->watchdog_countdown--;
4100 /* ensure NIC did not suffer h/w fault while idle */
4101 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4102 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4104 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4107 /* look less often if NIC is idle */
4112 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4117 mxge_media_change(struct ifnet *ifp)
4123 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4125 struct ifnet *ifp = sc->ifp;
4126 int real_mtu, old_mtu;
4130 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4131 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4133 mtx_lock(&sc->driver_mtx);
4134 old_mtu = ifp->if_mtu;
4136 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4138 err = mxge_open(sc);
4140 ifp->if_mtu = old_mtu;
4142 (void) mxge_open(sc);
4145 mtx_unlock(&sc->driver_mtx);
4150 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4152 mxge_softc_t *sc = ifp->if_softc;
4157 ifmr->ifm_status = IFM_AVALID;
4158 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4159 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4160 ifmr->ifm_active |= sc->current_media;
4164 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4166 mxge_softc_t *sc = ifp->if_softc;
4167 struct ifreq *ifr = (struct ifreq *)data;
4174 err = ether_ioctl(ifp, command, data);
4178 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4182 mtx_lock(&sc->driver_mtx);
4184 mtx_unlock(&sc->driver_mtx);
4187 if (ifp->if_flags & IFF_UP) {
4188 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4189 err = mxge_open(sc);
4191 /* take care of promis can allmulti
4193 mxge_change_promisc(sc,
4194 ifp->if_flags & IFF_PROMISC);
4195 mxge_set_multicast_list(sc);
4198 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4202 mtx_unlock(&sc->driver_mtx);
4207 mtx_lock(&sc->driver_mtx);
4208 mxge_set_multicast_list(sc);
4209 mtx_unlock(&sc->driver_mtx);
4213 mtx_lock(&sc->driver_mtx);
4214 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4215 if (mask & IFCAP_TXCSUM) {
4216 if (IFCAP_TXCSUM & ifp->if_capenable) {
4217 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4218 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4220 ifp->if_capenable |= IFCAP_TXCSUM;
4221 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4223 } else if (mask & IFCAP_RXCSUM) {
4224 if (IFCAP_RXCSUM & ifp->if_capenable) {
4225 ifp->if_capenable &= ~IFCAP_RXCSUM;
4227 ifp->if_capenable |= IFCAP_RXCSUM;
4230 if (mask & IFCAP_TSO4) {
4231 if (IFCAP_TSO4 & ifp->if_capenable) {
4232 ifp->if_capenable &= ~IFCAP_TSO4;
4233 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4234 ifp->if_capenable |= IFCAP_TSO4;
4235 ifp->if_hwassist |= CSUM_TSO;
4237 printf("mxge requires tx checksum offload"
4238 " be enabled to use TSO\n");
4243 if (mask & IFCAP_TXCSUM_IPV6) {
4244 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4245 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4247 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4250 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4251 ifp->if_hwassist |= (CSUM_TCP_IPV6
4254 } else if (mask & IFCAP_RXCSUM_IPV6) {
4255 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4256 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4258 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4261 if (mask & IFCAP_TSO6) {
4262 if (IFCAP_TSO6 & ifp->if_capenable) {
4263 ifp->if_capenable &= ~IFCAP_TSO6;
4264 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4265 ifp->if_capenable |= IFCAP_TSO6;
4266 ifp->if_hwassist |= CSUM_TSO;
4268 printf("mxge requires tx checksum offload"
4269 " be enabled to use TSO\n");
4273 #endif /*IFCAP_TSO6 */
4275 if (mask & IFCAP_LRO)
4276 ifp->if_capenable ^= IFCAP_LRO;
4277 if (mask & IFCAP_VLAN_HWTAGGING)
4278 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4279 if (mask & IFCAP_VLAN_HWTSO)
4280 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4282 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4283 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4284 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4286 mtx_unlock(&sc->driver_mtx);
4287 VLAN_CAPABILITIES(ifp);
4292 mtx_lock(&sc->driver_mtx);
4293 mxge_media_probe(sc);
4294 mtx_unlock(&sc->driver_mtx);
4295 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4296 &sc->media, command);
4306 mxge_fetch_tunables(mxge_softc_t *sc)
4309 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4310 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4311 &mxge_flow_control);
4312 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4313 &mxge_intr_coal_delay);
4314 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4315 &mxge_nvidia_ecrc_enable);
4316 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4317 &mxge_force_firmware);
4318 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4319 &mxge_deassert_wait);
4320 TUNABLE_INT_FETCH("hw.mxge.verbose",
4322 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4323 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4324 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4325 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4326 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4327 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4331 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4332 mxge_intr_coal_delay = 30;
4333 if (mxge_ticks == 0)
4334 mxge_ticks = hz / 2;
4335 sc->pause = mxge_flow_control;
4336 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4337 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4338 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4340 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4341 mxge_initial_mtu < ETHER_MIN_LEN)
4342 mxge_initial_mtu = ETHERMTU_JUMBO;
4344 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4345 mxge_throttle = MXGE_MAX_THROTTLE;
4346 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4347 mxge_throttle = MXGE_MIN_THROTTLE;
4348 sc->throttle = mxge_throttle;
4353 mxge_free_slices(mxge_softc_t *sc)
4355 struct mxge_slice_state *ss;
4362 for (i = 0; i < sc->num_slices; i++) {
4364 if (ss->fw_stats != NULL) {
4365 mxge_dma_free(&ss->fw_stats_dma);
4366 ss->fw_stats = NULL;
4367 #ifdef IFNET_BUF_RING
4368 if (ss->tx.br != NULL) {
4369 drbr_free(ss->tx.br, M_DEVBUF);
4373 mtx_destroy(&ss->tx.mtx);
4375 if (ss->rx_done.entry != NULL) {
4376 mxge_dma_free(&ss->rx_done.dma);
4377 ss->rx_done.entry = NULL;
4380 free(sc->ss, M_DEVBUF);
4385 mxge_alloc_slices(mxge_softc_t *sc)
4388 struct mxge_slice_state *ss;
4390 int err, i, max_intr_slots;
4392 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4394 device_printf(sc->dev, "Cannot determine rx ring size\n");
4397 sc->rx_ring_size = cmd.data0;
4398 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4400 bytes = sizeof (*sc->ss) * sc->num_slices;
4401 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4404 for (i = 0; i < sc->num_slices; i++) {
4409 /* allocate per-slice rx interrupt queues */
4411 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4412 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4415 ss->rx_done.entry = ss->rx_done.dma.addr;
4416 bzero(ss->rx_done.entry, bytes);
4419 * allocate the per-slice firmware stats; stats
4420 * (including tx) are used used only on the first
4423 #ifndef IFNET_BUF_RING
4428 bytes = sizeof (*ss->fw_stats);
4429 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4430 sizeof (*ss->fw_stats), 64);
4433 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4434 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4435 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4436 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4437 #ifdef IFNET_BUF_RING
4438 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4446 mxge_free_slices(sc);
4451 mxge_slice_probe(mxge_softc_t *sc)
4455 int msix_cnt, status, max_intr_slots;
4459 * don't enable multiple slices if they are not enabled,
4460 * or if this is not an SMP system
4463 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4466 /* see how many MSI-X interrupts are available */
4467 msix_cnt = pci_msix_count(sc->dev);
4471 /* now load the slice aware firmware see what it supports */
4472 old_fw = sc->fw_name;
4473 if (old_fw == mxge_fw_aligned)
4474 sc->fw_name = mxge_fw_rss_aligned;
4476 sc->fw_name = mxge_fw_rss_unaligned;
4477 status = mxge_load_firmware(sc, 0);
4479 device_printf(sc->dev, "Falling back to a single slice\n");
4483 /* try to send a reset command to the card to see if it
4485 memset(&cmd, 0, sizeof (cmd));
4486 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4488 device_printf(sc->dev, "failed reset\n");
4492 /* get rx ring size */
4493 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4495 device_printf(sc->dev, "Cannot determine rx ring size\n");
4498 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4500 /* tell it the size of the interrupt queues */
4501 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4502 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4504 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4508 /* ask the maximum number of slices it supports */
4509 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4511 device_printf(sc->dev,
4512 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4515 sc->num_slices = cmd.data0;
4516 if (sc->num_slices > msix_cnt)
4517 sc->num_slices = msix_cnt;
4519 if (mxge_max_slices == -1) {
4520 /* cap to number of CPUs in system */
4521 if (sc->num_slices > mp_ncpus)
4522 sc->num_slices = mp_ncpus;
4524 if (sc->num_slices > mxge_max_slices)
4525 sc->num_slices = mxge_max_slices;
4527 /* make sure it is a power of two */
4528 while (sc->num_slices & (sc->num_slices - 1))
4532 device_printf(sc->dev, "using %d slices\n",
4538 sc->fw_name = old_fw;
4539 (void) mxge_load_firmware(sc, 0);
4543 mxge_add_msix_irqs(mxge_softc_t *sc)
4546 int count, err, i, rid;
4549 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4552 if (sc->msix_table_res == NULL) {
4553 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4557 count = sc->num_slices;
4558 err = pci_alloc_msix(sc->dev, &count);
4560 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4561 "err = %d \n", sc->num_slices, err);
4562 goto abort_with_msix_table;
4564 if (count < sc->num_slices) {
4565 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4566 count, sc->num_slices);
4567 device_printf(sc->dev,
4568 "Try setting hw.mxge.max_slices to %d\n",
4571 goto abort_with_msix;
4573 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4574 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4575 if (sc->msix_irq_res == NULL) {
4577 goto abort_with_msix;
4580 for (i = 0; i < sc->num_slices; i++) {
4582 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4585 if (sc->msix_irq_res[i] == NULL) {
4586 device_printf(sc->dev, "couldn't allocate IRQ res"
4587 " for message %d\n", i);
4589 goto abort_with_res;
4593 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4594 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4596 for (i = 0; i < sc->num_slices; i++) {
4597 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4598 INTR_TYPE_NET | INTR_MPSAFE,
4599 #if __FreeBSD_version > 700030
4602 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4604 device_printf(sc->dev, "couldn't setup intr for "
4606 goto abort_with_intr;
4608 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4609 sc->msix_ih[i], "s%d", i);
4613 device_printf(sc->dev, "using %d msix IRQs:",
4615 for (i = 0; i < sc->num_slices; i++)
4616 printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4622 for (i = 0; i < sc->num_slices; i++) {
4623 if (sc->msix_ih[i] != NULL) {
4624 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4626 sc->msix_ih[i] = NULL;
4629 free(sc->msix_ih, M_DEVBUF);
4633 for (i = 0; i < sc->num_slices; i++) {
4635 if (sc->msix_irq_res[i] != NULL)
4636 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4637 sc->msix_irq_res[i]);
4638 sc->msix_irq_res[i] = NULL;
4640 free(sc->msix_irq_res, M_DEVBUF);
4644 pci_release_msi(sc->dev);
4646 abort_with_msix_table:
4647 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4648 sc->msix_table_res);
4654 mxge_add_single_irq(mxge_softc_t *sc)
4656 int count, err, rid;
4658 count = pci_msi_count(sc->dev);
4659 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4665 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4666 1, RF_SHAREABLE | RF_ACTIVE);
4667 if (sc->irq_res == NULL) {
4668 device_printf(sc->dev, "could not alloc interrupt\n");
4672 device_printf(sc->dev, "using %s irq %ld\n",
4673 sc->legacy_irq ? "INTx" : "MSI",
4674 rman_get_start(sc->irq_res));
4675 err = bus_setup_intr(sc->dev, sc->irq_res,
4676 INTR_TYPE_NET | INTR_MPSAFE,
4677 #if __FreeBSD_version > 700030
4680 mxge_intr, &sc->ss[0], &sc->ih);
4682 bus_release_resource(sc->dev, SYS_RES_IRQ,
4683 sc->legacy_irq ? 0 : 1, sc->irq_res);
4684 if (!sc->legacy_irq)
4685 pci_release_msi(sc->dev);
4691 mxge_rem_msix_irqs(mxge_softc_t *sc)
4695 for (i = 0; i < sc->num_slices; i++) {
4696 if (sc->msix_ih[i] != NULL) {
4697 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4699 sc->msix_ih[i] = NULL;
4702 free(sc->msix_ih, M_DEVBUF);
4704 for (i = 0; i < sc->num_slices; i++) {
4706 if (sc->msix_irq_res[i] != NULL)
4707 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4708 sc->msix_irq_res[i]);
4709 sc->msix_irq_res[i] = NULL;
4711 free(sc->msix_irq_res, M_DEVBUF);
4713 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4714 sc->msix_table_res);
4716 pci_release_msi(sc->dev);
4721 mxge_rem_single_irq(mxge_softc_t *sc)
4723 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4724 bus_release_resource(sc->dev, SYS_RES_IRQ,
4725 sc->legacy_irq ? 0 : 1, sc->irq_res);
4726 if (!sc->legacy_irq)
4727 pci_release_msi(sc->dev);
4731 mxge_rem_irq(mxge_softc_t *sc)
4733 if (sc->num_slices > 1)
4734 mxge_rem_msix_irqs(sc);
4736 mxge_rem_single_irq(sc);
4740 mxge_add_irq(mxge_softc_t *sc)
4744 if (sc->num_slices > 1)
4745 err = mxge_add_msix_irqs(sc);
4747 err = mxge_add_single_irq(sc);
4749 if (0 && err == 0 && sc->num_slices > 1) {
4750 mxge_rem_msix_irqs(sc);
4751 err = mxge_add_msix_irqs(sc);
4758 mxge_attach(device_t dev)
4761 mxge_softc_t *sc = device_get_softc(dev);
4766 mxge_fetch_tunables(sc);
4768 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4769 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4770 taskqueue_thread_enqueue, &sc->tq);
4771 if (sc->tq == NULL) {
4773 goto abort_with_nothing;
4776 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4779 BUS_SPACE_MAXADDR, /* low */
4780 BUS_SPACE_MAXADDR, /* high */
4781 NULL, NULL, /* filter */
4782 65536 + 256, /* maxsize */
4783 MXGE_MAX_SEND_DESC, /* num segs */
4784 65536, /* maxsegsize */
4786 NULL, NULL, /* lock */
4787 &sc->parent_dmat); /* tag */
4790 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4795 ifp = sc->ifp = if_alloc(IFT_ETHER);
4797 device_printf(dev, "can not if_alloc()\n");
4799 goto abort_with_parent_dmat;
4801 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4803 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4804 device_get_nameunit(dev));
4805 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4806 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4807 "%s:drv", device_get_nameunit(dev));
4808 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4809 MTX_NETWORK_LOCK, MTX_DEF);
4811 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4813 mxge_setup_cfg_space(sc);
4815 /* Map the board into the kernel */
4817 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4819 if (sc->mem_res == NULL) {
4820 device_printf(dev, "could not map memory\n");
4822 goto abort_with_lock;
4824 sc->sram = rman_get_virtual(sc->mem_res);
4825 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4826 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4827 device_printf(dev, "impossible memory region size %ld\n",
4828 rman_get_size(sc->mem_res));
4830 goto abort_with_mem_res;
4833 /* make NULL terminated copy of the EEPROM strings section of
4835 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4836 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4837 rman_get_bushandle(sc->mem_res),
4838 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4840 MXGE_EEPROM_STRINGS_SIZE - 2);
4841 err = mxge_parse_strings(sc);
4843 goto abort_with_mem_res;
4845 /* Enable write combining for efficient use of PCIe bus */
4848 /* Allocate the out of band dma memory */
4849 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4850 sizeof (mxge_cmd_t), 64);
4852 goto abort_with_mem_res;
4853 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4854 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4856 goto abort_with_cmd_dma;
4858 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4860 goto abort_with_zeropad_dma;
4862 /* select & load the firmware */
4863 err = mxge_select_firmware(sc);
4865 goto abort_with_dmabench;
4866 sc->intr_coal_delay = mxge_intr_coal_delay;
4868 mxge_slice_probe(sc);
4869 err = mxge_alloc_slices(sc);
4871 goto abort_with_dmabench;
4873 err = mxge_reset(sc, 0);
4875 goto abort_with_slices;
4877 err = mxge_alloc_rings(sc);
4879 device_printf(sc->dev, "failed to allocate rings\n");
4880 goto abort_with_slices;
4883 err = mxge_add_irq(sc);
4885 device_printf(sc->dev, "failed to add irq\n");
4886 goto abort_with_rings;
4889 if_initbaudrate(ifp, IF_Gbps(10));
4890 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4891 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4893 #if defined(INET) || defined(INET6)
4894 ifp->if_capabilities |= IFCAP_LRO;
4897 #ifdef MXGE_NEW_VLAN_API
4898 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4900 /* Only FW 1.4.32 and newer can do TSO over vlans */
4901 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4902 sc->fw_ver_tiny >= 32)
4903 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4905 sc->max_mtu = mxge_max_mtu(sc);
4906 if (sc->max_mtu >= 9000)
4907 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4909 device_printf(dev, "MTU limited to %d. Install "
4910 "latest firmware for 9000 byte jumbo support\n",
4911 sc->max_mtu - ETHER_HDR_LEN);
4912 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4913 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4914 /* check to see if f/w supports TSO for IPv6 */
4915 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4917 ifp->if_capabilities |= IFCAP_TSO6;
4918 sc->max_tso6_hlen = min(cmd.data0,
4919 sizeof (sc->ss[0].scratch));
4921 ifp->if_capenable = ifp->if_capabilities;
4922 if (sc->lro_cnt == 0)
4923 ifp->if_capenable &= ~IFCAP_LRO;
4924 ifp->if_init = mxge_init;
4926 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4927 ifp->if_ioctl = mxge_ioctl;
4928 ifp->if_start = mxge_start;
4929 /* Initialise the ifmedia structure */
4930 ifmedia_init(&sc->media, 0, mxge_media_change,
4932 mxge_media_init(sc);
4933 mxge_media_probe(sc);
4935 ether_ifattach(ifp, sc->mac_addr);
4936 /* ether_ifattach sets mtu to ETHERMTU */
4937 if (mxge_initial_mtu != ETHERMTU)
4938 mxge_change_mtu(sc, mxge_initial_mtu);
4940 mxge_add_sysctls(sc);
4941 #ifdef IFNET_BUF_RING
4942 ifp->if_transmit = mxge_transmit;
4943 ifp->if_qflush = mxge_qflush;
4945 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4946 device_get_nameunit(sc->dev));
4947 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4951 mxge_free_rings(sc);
4953 mxge_free_slices(sc);
4954 abort_with_dmabench:
4955 mxge_dma_free(&sc->dmabench_dma);
4956 abort_with_zeropad_dma:
4957 mxge_dma_free(&sc->zeropad_dma);
4959 mxge_dma_free(&sc->cmd_dma);
4961 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4963 pci_disable_busmaster(dev);
4964 mtx_destroy(&sc->cmd_mtx);
4965 mtx_destroy(&sc->driver_mtx);
4967 abort_with_parent_dmat:
4968 bus_dma_tag_destroy(sc->parent_dmat);
4970 if (sc->tq != NULL) {
4971 taskqueue_drain(sc->tq, &sc->watchdog_task);
4972 taskqueue_free(sc->tq);
4980 mxge_detach(device_t dev)
4982 mxge_softc_t *sc = device_get_softc(dev);
4984 if (mxge_vlans_active(sc)) {
4985 device_printf(sc->dev,
4986 "Detach vlans before removing module\n");
4989 mtx_lock(&sc->driver_mtx);
4991 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4993 mtx_unlock(&sc->driver_mtx);
4994 ether_ifdetach(sc->ifp);
4995 if (sc->tq != NULL) {
4996 taskqueue_drain(sc->tq, &sc->watchdog_task);
4997 taskqueue_free(sc->tq);
5000 callout_drain(&sc->co_hdl);
5001 ifmedia_removeall(&sc->media);
5002 mxge_dummy_rdma(sc, 0);
5003 mxge_rem_sysctls(sc);
5005 mxge_free_rings(sc);
5006 mxge_free_slices(sc);
5007 mxge_dma_free(&sc->dmabench_dma);
5008 mxge_dma_free(&sc->zeropad_dma);
5009 mxge_dma_free(&sc->cmd_dma);
5010 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5011 pci_disable_busmaster(dev);
5012 mtx_destroy(&sc->cmd_mtx);
5013 mtx_destroy(&sc->driver_mtx);
5015 bus_dma_tag_destroy(sc->parent_dmat);
5020 mxge_shutdown(device_t dev)
5026 This file uses Myri10GE driver indentation.
5029 c-file-style:"linux"