1 /******************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 Copyright (c) 2006-2013, Myricom Inc.
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
13 2. Neither the name of the Myricom Inc, nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
29 ***************************************************************************/
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/linker.h>
37 #include <sys/firmware.h>
38 #include <sys/endian.h>
39 #include <sys/sockio.h>
41 #include <sys/malloc.h>
43 #include <sys/kernel.h>
45 #include <sys/module.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
49 #include <sys/taskqueue.h>
53 #include <net/if_var.h>
54 #include <net/if_arp.h>
55 #include <net/ethernet.h>
56 #include <net/if_dl.h>
57 #include <net/if_media.h>
61 #include <net/if_types.h>
62 #include <net/if_vlan_var.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/in.h>
66 #include <netinet/ip.h>
67 #include <netinet/ip6.h>
68 #include <netinet/tcp.h>
69 #include <netinet/tcp_lro.h>
70 #include <netinet6/ip6_var.h>
72 #include <machine/bus.h>
73 #include <machine/in_cksum.h>
74 #include <machine/resource.h>
79 #include <dev/pci/pcireg.h>
80 #include <dev/pci/pcivar.h>
81 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
83 #include <vm/vm.h> /* for pmap_mapdev() */
86 #if defined(__i386) || defined(__amd64)
87 #include <machine/specialreg.h>
90 #include <dev/mxge/mxge_mcp.h>
91 #include <dev/mxge/mcp_gen_header.h>
92 /*#define MXGE_FAKE_IFP*/
93 #include <dev/mxge/if_mxge_var.h>
95 #include <sys/buf_ring.h>
99 #include "opt_inet6.h"
102 static int mxge_nvidia_ecrc_enable = 1;
103 static int mxge_force_firmware = 0;
104 static int mxge_intr_coal_delay = 30;
105 static int mxge_deassert_wait = 1;
106 static int mxge_flow_control = 1;
107 static int mxge_verbose = 0;
108 static int mxge_ticks;
109 static int mxge_max_slices = 1;
110 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
111 static int mxge_always_promisc = 0;
112 static int mxge_initial_mtu = ETHERMTU_JUMBO;
113 static int mxge_throttle = 0;
114 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
115 static char *mxge_fw_aligned = "mxge_eth_z8e";
116 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
117 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
119 static int mxge_probe(device_t dev);
120 static int mxge_attach(device_t dev);
121 static int mxge_detach(device_t dev);
122 static int mxge_shutdown(device_t dev);
123 static void mxge_intr(void *arg);
125 static device_method_t mxge_methods[] =
127 /* Device interface */
128 DEVMETHOD(device_probe, mxge_probe),
129 DEVMETHOD(device_attach, mxge_attach),
130 DEVMETHOD(device_detach, mxge_detach),
131 DEVMETHOD(device_shutdown, mxge_shutdown),
136 static driver_t mxge_driver =
140 sizeof(mxge_softc_t),
143 static devclass_t mxge_devclass;
145 /* Declare ourselves to be a child of the PCI bus.*/
146 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
147 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
148 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
150 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
151 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
152 static int mxge_close(mxge_softc_t *sc, int down);
153 static int mxge_open(mxge_softc_t *sc);
154 static void mxge_tick(void *arg);
157 mxge_probe(device_t dev)
162 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
163 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
164 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
165 rev = pci_get_revid(dev);
167 case MXGE_PCI_REV_Z8E:
168 device_set_desc(dev, "Myri10G-PCIE-8A");
170 case MXGE_PCI_REV_Z8ES:
171 device_set_desc(dev, "Myri10G-PCIE-8B");
174 device_set_desc(dev, "Myri10G-PCIE-8??");
175 device_printf(dev, "Unrecognized rev %d NIC\n",
185 mxge_enable_wc(mxge_softc_t *sc)
187 #if defined(__i386) || defined(__amd64)
192 len = rman_get_size(sc->mem_res);
193 err = pmap_change_attr((vm_offset_t) sc->sram,
194 len, PAT_WRITE_COMBINING);
196 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
204 /* callback to get our DMA address */
206 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
210 *(bus_addr_t *) arg = segs->ds_addr;
215 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
216 bus_size_t alignment)
219 device_t dev = sc->dev;
220 bus_size_t boundary, maxsegsize;
222 if (bytes > 4096 && alignment == 4096) {
230 /* allocate DMAable memory tags */
231 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
232 alignment, /* alignment */
233 boundary, /* boundary */
234 BUS_SPACE_MAXADDR, /* low */
235 BUS_SPACE_MAXADDR, /* high */
236 NULL, NULL, /* filter */
239 maxsegsize, /* maxsegsize */
240 BUS_DMA_COHERENT, /* flags */
241 NULL, NULL, /* lock */
242 &dma->dmat); /* tag */
244 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
248 /* allocate DMAable memory & map */
249 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
250 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
251 | BUS_DMA_ZERO), &dma->map);
253 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
254 goto abort_with_dmat;
257 /* load the memory */
258 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
259 mxge_dmamap_callback,
260 (void *)&dma->bus_addr, 0);
262 device_printf(dev, "couldn't load map (err = %d)\n", err);
268 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
270 (void)bus_dma_tag_destroy(dma->dmat);
276 mxge_dma_free(mxge_dma_t *dma)
278 bus_dmamap_unload(dma->dmat, dma->map);
279 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
280 (void)bus_dma_tag_destroy(dma->dmat);
284 * The eeprom strings on the lanaiX have the format
291 mxge_parse_strings(mxge_softc_t *sc)
294 int i, found_mac, found_sn2;
297 ptr = sc->eeprom_strings;
300 while (*ptr != '\0') {
301 if (strncmp(ptr, "MAC=", 4) == 0) {
304 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
305 if (endptr - ptr != 2)
314 } else if (strncmp(ptr, "PC=", 3) == 0) {
316 strlcpy(sc->product_code_string, ptr,
317 sizeof(sc->product_code_string));
318 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
320 strlcpy(sc->serial_number_string, ptr,
321 sizeof(sc->serial_number_string));
322 } else if (strncmp(ptr, "SN2=", 4) == 0) {
323 /* SN2 takes precedence over SN */
326 strlcpy(sc->serial_number_string, ptr,
327 sizeof(sc->serial_number_string));
329 while (*ptr++ != '\0') {}
336 device_printf(sc->dev, "failed to parse eeprom_strings\n");
341 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
343 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
346 unsigned long base, off;
348 device_t pdev, mcp55;
349 uint16_t vendor_id, device_id, word;
350 uintptr_t bus, slot, func, ivend, idev;
354 if (!mxge_nvidia_ecrc_enable)
357 pdev = device_get_parent(device_get_parent(sc->dev));
359 device_printf(sc->dev, "could not find parent?\n");
362 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
363 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
365 if (vendor_id != 0x10de)
370 if (device_id == 0x005d) {
371 /* ck804, base address is magic */
373 } else if (device_id >= 0x0374 && device_id <= 0x378) {
374 /* mcp55, base address stored in chipset */
375 mcp55 = pci_find_bsf(0, 0, 0);
377 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
378 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
379 word = pci_read_config(mcp55, 0x90, 2);
380 base = ((unsigned long)word & 0x7ffeU) << 25;
387 Test below is commented because it is believed that doing
388 config read/write beyond 0xff will access the config space
389 for the next larger function. Uncomment this and remove
390 the hacky pmap_mapdev() way of accessing config space when
391 FreeBSD grows support for extended pcie config space access
394 /* See if we can, by some miracle, access the extended
396 val = pci_read_config(pdev, 0x178, 4);
397 if (val != 0xffffffff) {
399 pci_write_config(pdev, 0x178, val, 4);
403 /* Rather than using normal pci config space writes, we must
404 * map the Nvidia config space ourselves. This is because on
405 * opteron/nvidia class machine the 0xe000000 mapping is
406 * handled by the nvidia chipset, that means the internal PCI
407 * device (the on-chip northbridge), or the amd-8131 bridge
408 * and things behind them are not visible by this method.
411 BUS_READ_IVAR(device_get_parent(pdev), pdev,
413 BUS_READ_IVAR(device_get_parent(pdev), pdev,
414 PCI_IVAR_SLOT, &slot);
415 BUS_READ_IVAR(device_get_parent(pdev), pdev,
416 PCI_IVAR_FUNCTION, &func);
417 BUS_READ_IVAR(device_get_parent(pdev), pdev,
418 PCI_IVAR_VENDOR, &ivend);
419 BUS_READ_IVAR(device_get_parent(pdev), pdev,
420 PCI_IVAR_DEVICE, &idev);
423 + 0x00100000UL * (unsigned long)bus
424 + 0x00001000UL * (unsigned long)(func
427 /* map it into the kernel */
428 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
432 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
435 /* get a pointer to the config space mapped into the kernel */
436 cfgptr = va + (off & PAGE_MASK);
438 /* make sure that we can really access it */
439 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
440 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
441 if (! (vendor_id == ivend && device_id == idev)) {
442 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
443 vendor_id, device_id);
444 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
448 ptr32 = (uint32_t*)(cfgptr + 0x178);
451 if (val == 0xffffffff) {
452 device_printf(sc->dev, "extended mapping failed\n");
453 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
457 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
459 device_printf(sc->dev,
460 "Enabled ECRC on upstream Nvidia bridge "
462 (int)bus, (int)slot, (int)func);
467 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
469 device_printf(sc->dev,
470 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
477 mxge_dma_test(mxge_softc_t *sc, int test_type)
480 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
486 /* Run a small DMA test.
487 * The magic multipliers to the length tell the firmware
488 * to do DMA read, write, or read+write tests. The
489 * results are returned in cmd.data0. The upper 16
490 * bits of the return is the number of transfers completed.
491 * The lower 16 bits is the time in 0.5us ticks that the
492 * transfers took to complete.
495 len = sc->tx_boundary;
497 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
498 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
499 cmd.data2 = len * 0x10000;
500 status = mxge_send_cmd(sc, test_type, &cmd);
505 sc->read_dma = ((cmd.data0>>16) * len * 2) /
506 (cmd.data0 & 0xffff);
507 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
508 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
509 cmd.data2 = len * 0x1;
510 status = mxge_send_cmd(sc, test_type, &cmd);
515 sc->write_dma = ((cmd.data0>>16) * len * 2) /
516 (cmd.data0 & 0xffff);
518 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
519 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
520 cmd.data2 = len * 0x10001;
521 status = mxge_send_cmd(sc, test_type, &cmd);
526 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
527 (cmd.data0 & 0xffff);
530 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
531 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
538 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
539 * when the PCI-E Completion packets are aligned on an 8-byte
540 * boundary. Some PCI-E chip sets always align Completion packets; on
541 * the ones that do not, the alignment can be enforced by enabling
542 * ECRC generation (if supported).
544 * When PCI-E Completion packets are not aligned, it is actually more
545 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
547 * If the driver can neither enable ECRC nor verify that it has
548 * already been enabled, then it must use a firmware image which works
549 * around unaligned completion packets (ethp_z8e.dat), and it should
550 * also ensure that it never gives the device a Read-DMA which is
551 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
552 * enabled, then the driver should use the aligned (eth_z8e.dat)
553 * firmware image, and set tx_boundary to 4KB.
557 mxge_firmware_probe(mxge_softc_t *sc)
559 device_t dev = sc->dev;
563 sc->tx_boundary = 4096;
565 * Verify the max read request size was set to 4KB
566 * before trying the test with 4KB.
568 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
569 pectl = pci_read_config(dev, reg + 0x8, 2);
570 if ((pectl & (5 << 12)) != (5 << 12)) {
571 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
573 sc->tx_boundary = 2048;
578 * load the optimized firmware (which assumes aligned PCIe
579 * completions) in order to see if it works on this host.
581 sc->fw_name = mxge_fw_aligned;
582 status = mxge_load_firmware(sc, 1);
588 * Enable ECRC if possible
590 mxge_enable_nvidia_ecrc(sc);
593 * Run a DMA test which watches for unaligned completions and
594 * aborts on the first one seen. Not required on Z8ES or newer.
596 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
598 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
600 return 0; /* keep the aligned firmware */
603 device_printf(dev, "DMA test failed: %d\n", status);
604 if (status == ENOSYS)
605 device_printf(dev, "Falling back to ethp! "
606 "Please install up to date fw\n");
611 mxge_select_firmware(mxge_softc_t *sc)
614 int force_firmware = mxge_force_firmware;
617 force_firmware = sc->throttle;
619 if (force_firmware != 0) {
620 if (force_firmware == 1)
625 device_printf(sc->dev,
626 "Assuming %s completions (forced)\n",
627 aligned ? "aligned" : "unaligned");
631 /* if the PCIe link width is 4 or less, we can use the aligned
632 firmware and skip any checks */
633 if (sc->link_width != 0 && sc->link_width <= 4) {
634 device_printf(sc->dev,
635 "PCIe x%d Link, expect reduced performance\n",
641 if (0 == mxge_firmware_probe(sc))
646 sc->fw_name = mxge_fw_aligned;
647 sc->tx_boundary = 4096;
649 sc->fw_name = mxge_fw_unaligned;
650 sc->tx_boundary = 2048;
652 return (mxge_load_firmware(sc, 0));
656 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
660 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
661 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
662 be32toh(hdr->mcp_type));
666 /* save firmware version for sysctl */
667 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
669 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
671 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
672 &sc->fw_ver_minor, &sc->fw_ver_tiny);
674 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
675 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
676 device_printf(sc->dev, "Found firmware version %s\n",
678 device_printf(sc->dev, "Driver needs %d.%d\n",
679 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
687 z_alloc(void *nil, u_int items, u_int size)
691 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
696 z_free(void *nil, void *ptr)
703 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
706 char *inflate_buffer;
707 const struct firmware *fw;
708 const mcp_gen_header_t *hdr;
715 fw = firmware_get(sc->fw_name);
717 device_printf(sc->dev, "Could not find firmware image %s\n",
724 /* setup zlib and decompress f/w */
725 bzero(&zs, sizeof (zs));
728 status = inflateInit(&zs);
729 if (status != Z_OK) {
734 /* the uncompressed size is stored as the firmware version,
735 which would otherwise go unused */
736 fw_len = (size_t) fw->version;
737 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
738 if (inflate_buffer == NULL)
740 zs.avail_in = fw->datasize;
741 zs.next_in = __DECONST(char *, fw->data);
742 zs.avail_out = fw_len;
743 zs.next_out = inflate_buffer;
744 status = inflate(&zs, Z_FINISH);
745 if (status != Z_STREAM_END) {
746 device_printf(sc->dev, "zlib %d\n", status);
748 goto abort_with_buffer;
752 hdr_offset = htobe32(*(const uint32_t *)
753 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
754 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
755 device_printf(sc->dev, "Bad firmware file");
757 goto abort_with_buffer;
759 hdr = (const void*)(inflate_buffer + hdr_offset);
761 status = mxge_validate_firmware(sc, hdr);
763 goto abort_with_buffer;
765 /* Copy the inflated firmware to NIC SRAM. */
766 for (i = 0; i < fw_len; i += 256) {
767 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
769 min(256U, (unsigned)(fw_len - i)));
778 free(inflate_buffer, M_TEMP);
782 firmware_put(fw, FIRMWARE_UNLOAD);
787 * Enable or disable periodic RDMAs from the host to make certain
788 * chipsets resend dropped PCIe messages
792 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
795 volatile uint32_t *confirm;
796 volatile char *submit;
797 uint32_t *buf, dma_low, dma_high;
800 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
802 /* clear confirmation addr */
803 confirm = (volatile uint32_t *)sc->cmd;
807 /* send an rdma command to the PCIe engine, and wait for the
808 response in the confirmation address. The firmware should
809 write a -1 there to indicate it is alive and well
812 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
813 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
814 buf[0] = htobe32(dma_high); /* confirm addr MSW */
815 buf[1] = htobe32(dma_low); /* confirm addr LSW */
816 buf[2] = htobe32(0xffffffff); /* confirm data */
817 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
818 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
819 buf[3] = htobe32(dma_high); /* dummy addr MSW */
820 buf[4] = htobe32(dma_low); /* dummy addr LSW */
821 buf[5] = htobe32(enable); /* enable? */
824 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
826 mxge_pio_copy(submit, buf, 64);
831 while (*confirm != 0xffffffff && i < 20) {
835 if (*confirm != 0xffffffff) {
836 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
837 (enable ? "enable" : "disable"), confirm,
844 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
847 char buf_bytes[sizeof(*buf) + 8];
848 volatile mcp_cmd_response_t *response = sc->cmd;
849 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
850 uint32_t dma_low, dma_high;
851 int err, sleep_total = 0;
853 /* ensure buf is aligned to 8 bytes */
854 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
856 buf->data0 = htobe32(data->data0);
857 buf->data1 = htobe32(data->data1);
858 buf->data2 = htobe32(data->data2);
859 buf->cmd = htobe32(cmd);
860 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
861 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
863 buf->response_addr.low = htobe32(dma_low);
864 buf->response_addr.high = htobe32(dma_high);
865 mtx_lock(&sc->cmd_mtx);
866 response->result = 0xffffffff;
868 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
870 /* wait up to 20ms */
872 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
873 bus_dmamap_sync(sc->cmd_dma.dmat,
874 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
876 switch (be32toh(response->result)) {
878 data->data0 = be32toh(response->data);
884 case MXGEFW_CMD_UNKNOWN:
887 case MXGEFW_CMD_ERROR_UNALIGNED:
890 case MXGEFW_CMD_ERROR_BUSY:
893 case MXGEFW_CMD_ERROR_I2C_ABSENT:
897 device_printf(sc->dev,
899 "failed, result = %d\n",
900 cmd, be32toh(response->result));
908 device_printf(sc->dev, "mxge: command %d timed out"
910 cmd, be32toh(response->result));
911 mtx_unlock(&sc->cmd_mtx);
916 mxge_adopt_running_firmware(mxge_softc_t *sc)
918 struct mcp_gen_header *hdr;
919 const size_t bytes = sizeof (struct mcp_gen_header);
923 /* find running firmware header */
924 hdr_offset = htobe32(*(volatile uint32_t *)
925 (sc->sram + MCP_HEADER_PTR_OFFSET));
927 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
928 device_printf(sc->dev,
929 "Running firmware has bad header offset (%d)\n",
934 /* copy header of running firmware from SRAM to host memory to
935 * validate firmware */
936 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
938 device_printf(sc->dev, "could not malloc firmware hdr\n");
941 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
942 rman_get_bushandle(sc->mem_res),
943 hdr_offset, (char *)hdr, bytes);
944 status = mxge_validate_firmware(sc, hdr);
948 * check to see if adopted firmware has bug where adopting
949 * it will cause broadcasts to be filtered unless the NIC
950 * is kept in ALLMULTI mode
952 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
953 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
954 sc->adopted_rx_filter_bug = 1;
955 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
956 "working around rx filter bug\n",
957 sc->fw_ver_major, sc->fw_ver_minor,
966 mxge_load_firmware(mxge_softc_t *sc, int adopt)
968 volatile uint32_t *confirm;
969 volatile char *submit;
971 uint32_t *buf, size, dma_low, dma_high;
974 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
976 size = sc->sram_size;
977 status = mxge_load_firmware_helper(sc, &size);
981 /* Try to use the currently running firmware, if
983 status = mxge_adopt_running_firmware(sc);
985 device_printf(sc->dev,
986 "failed to adopt running firmware\n");
989 device_printf(sc->dev,
990 "Successfully adopted running firmware\n");
991 if (sc->tx_boundary == 4096) {
992 device_printf(sc->dev,
993 "Using firmware currently running on NIC"
995 device_printf(sc->dev,
996 "performance consider loading optimized "
999 sc->fw_name = mxge_fw_unaligned;
1000 sc->tx_boundary = 2048;
1003 /* clear confirmation addr */
1004 confirm = (volatile uint32_t *)sc->cmd;
1007 /* send a reload command to the bootstrap MCP, and wait for the
1008 response in the confirmation address. The firmware should
1009 write a -1 there to indicate it is alive and well
1012 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1013 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1015 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1016 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1017 buf[2] = htobe32(0xffffffff); /* confirm data */
1019 /* FIX: All newest firmware should un-protect the bottom of
1020 the sram before handoff. However, the very first interfaces
1021 do not. Therefore the handoff copy must skip the first 8 bytes
1023 /* where the code starts*/
1024 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1025 buf[4] = htobe32(size - 8); /* length of code */
1026 buf[5] = htobe32(8); /* where to copy to */
1027 buf[6] = htobe32(0); /* where to jump to */
1029 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1030 mxge_pio_copy(submit, buf, 64);
1035 while (*confirm != 0xffffffff && i < 20) {
1038 bus_dmamap_sync(sc->cmd_dma.dmat,
1039 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1041 if (*confirm != 0xffffffff) {
1042 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1051 mxge_update_mac_address(mxge_softc_t *sc)
1054 uint8_t *addr = sc->mac_addr;
1058 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1059 | (addr[2] << 8) | addr[3]);
1061 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1063 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1068 mxge_change_pause(mxge_softc_t *sc, int pause)
1074 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1077 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1081 device_printf(sc->dev, "Failed to set flow control mode\n");
1089 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1094 if (mxge_always_promisc)
1098 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1101 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1105 device_printf(sc->dev, "Failed to set promisc mode\n");
1110 mxge_set_multicast_list(mxge_softc_t *sc)
1113 struct ifmultiaddr *ifma;
1114 struct ifnet *ifp = sc->ifp;
1117 /* This firmware is known to not support multicast */
1118 if (!sc->fw_multicast_support)
1121 /* Disable multicast filtering while we play with the lists*/
1122 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1124 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1125 " error status: %d\n", err);
1129 if (sc->adopted_rx_filter_bug)
1132 if (ifp->if_flags & IFF_ALLMULTI)
1133 /* request to disable multicast filtering, so quit here */
1136 /* Flush all the filters */
1138 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1140 device_printf(sc->dev,
1141 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1142 ", error status: %d\n", err);
1146 /* Walk the multicast list, and add each address */
1148 if_maddr_rlock(ifp);
1149 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1150 if (ifma->ifma_addr->sa_family != AF_LINK)
1152 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1154 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1156 cmd.data0 = htonl(cmd.data0);
1157 cmd.data1 = htonl(cmd.data1);
1158 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1160 device_printf(sc->dev, "Failed "
1161 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1163 /* abort, leaving multicast filtering off */
1164 if_maddr_runlock(ifp);
1168 if_maddr_runlock(ifp);
1169 /* Enable multicast filtering */
1170 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1172 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1173 ", error status: %d\n", err);
1178 mxge_max_mtu(mxge_softc_t *sc)
1183 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1184 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1186 /* try to set nbufs to see if it we can
1187 use virtually contiguous jumbos */
1189 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1192 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1194 /* otherwise, we're limited to MJUMPAGESIZE */
1195 return MJUMPAGESIZE - MXGEFW_PAD;
1199 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1201 struct mxge_slice_state *ss;
1202 mxge_rx_done_t *rx_done;
1203 volatile uint32_t *irq_claim;
1207 /* try to send a reset command to the card to see if it
1209 memset(&cmd, 0, sizeof (cmd));
1210 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1212 device_printf(sc->dev, "failed reset\n");
1216 mxge_dummy_rdma(sc, 1);
1219 /* set the intrq size */
1220 cmd.data0 = sc->rx_ring_size;
1221 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1224 * Even though we already know how many slices are supported
1225 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1226 * has magic side effects, and must be called after a reset.
1227 * It must be called prior to calling any RSS related cmds,
1228 * including assigning an interrupt queue for anything but
1229 * slice 0. It must also be called *after*
1230 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1231 * the firmware to compute offsets.
1234 if (sc->num_slices > 1) {
1235 /* ask the maximum number of slices it supports */
1236 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1239 device_printf(sc->dev,
1240 "failed to get number of slices\n");
1244 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1245 * to setting up the interrupt queue DMA
1247 cmd.data0 = sc->num_slices;
1248 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1249 #ifdef IFNET_BUF_RING
1250 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1252 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1255 device_printf(sc->dev,
1256 "failed to set number of slices\n");
1262 if (interrupts_setup) {
1263 /* Now exchange information about interrupts */
1264 for (slice = 0; slice < sc->num_slices; slice++) {
1265 rx_done = &sc->ss[slice].rx_done;
1266 memset(rx_done->entry, 0, sc->rx_ring_size);
1267 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1268 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1270 status |= mxge_send_cmd(sc,
1271 MXGEFW_CMD_SET_INTRQ_DMA,
1276 status |= mxge_send_cmd(sc,
1277 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1280 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1282 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1283 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1286 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1288 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1290 device_printf(sc->dev, "failed set interrupt parameters\n");
1295 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1298 /* run a DMA benchmark */
1299 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1301 for (slice = 0; slice < sc->num_slices; slice++) {
1302 ss = &sc->ss[slice];
1304 ss->irq_claim = irq_claim + (2 * slice);
1305 /* reset mcp/driver shared state back to 0 */
1306 ss->rx_done.idx = 0;
1307 ss->rx_done.cnt = 0;
1310 ss->tx.pkt_done = 0;
1311 ss->tx.queue_active = 0;
1312 ss->tx.activate = 0;
1313 ss->tx.deactivate = 0;
1318 ss->rx_small.cnt = 0;
1319 ss->lc.lro_bad_csum = 0;
1320 ss->lc.lro_queued = 0;
1321 ss->lc.lro_flushed = 0;
1322 if (ss->fw_stats != NULL) {
1323 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1326 sc->rdma_tags_available = 15;
1327 status = mxge_update_mac_address(sc);
1328 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1329 mxge_change_pause(sc, sc->pause);
1330 mxge_set_multicast_list(sc);
1332 cmd.data0 = sc->throttle;
1333 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1335 device_printf(sc->dev,
1336 "can't enable throttle\n");
1343 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1348 unsigned int throttle;
1351 throttle = sc->throttle;
1352 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1357 if (throttle == sc->throttle)
1360 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1363 mtx_lock(&sc->driver_mtx);
1364 cmd.data0 = throttle;
1365 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1367 sc->throttle = throttle;
1368 mtx_unlock(&sc->driver_mtx);
1373 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1376 unsigned int intr_coal_delay;
1380 intr_coal_delay = sc->intr_coal_delay;
1381 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1385 if (intr_coal_delay == sc->intr_coal_delay)
1388 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1391 mtx_lock(&sc->driver_mtx);
1392 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1393 sc->intr_coal_delay = intr_coal_delay;
1395 mtx_unlock(&sc->driver_mtx);
1400 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1403 unsigned int enabled;
1407 enabled = sc->pause;
1408 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1412 if (enabled == sc->pause)
1415 mtx_lock(&sc->driver_mtx);
1416 err = mxge_change_pause(sc, enabled);
1417 mtx_unlock(&sc->driver_mtx);
1422 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1428 arg2 = be32toh(*(int *)arg1);
1430 err = sysctl_handle_int(oidp, arg1, arg2, req);
1436 mxge_rem_sysctls(mxge_softc_t *sc)
1438 struct mxge_slice_state *ss;
1441 if (sc->slice_sysctl_tree == NULL)
1444 for (slice = 0; slice < sc->num_slices; slice++) {
1445 ss = &sc->ss[slice];
1446 if (ss == NULL || ss->sysctl_tree == NULL)
1448 sysctl_ctx_free(&ss->sysctl_ctx);
1449 ss->sysctl_tree = NULL;
1451 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1452 sc->slice_sysctl_tree = NULL;
1456 mxge_add_sysctls(mxge_softc_t *sc)
1458 struct sysctl_ctx_list *ctx;
1459 struct sysctl_oid_list *children;
1461 struct mxge_slice_state *ss;
1465 ctx = device_get_sysctl_ctx(sc->dev);
1466 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1467 fw = sc->ss[0].fw_stats;
1469 /* random information */
1470 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1472 CTLFLAG_RD, sc->fw_version,
1473 0, "firmware version");
1474 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1476 CTLFLAG_RD, sc->serial_number_string,
1477 0, "serial number");
1478 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1480 CTLFLAG_RD, sc->product_code_string,
1482 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1484 CTLFLAG_RD, &sc->link_width,
1486 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1488 CTLFLAG_RD, &sc->tx_boundary,
1490 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1492 CTLFLAG_RD, &sc->wc,
1493 0, "write combining PIO?");
1494 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1496 CTLFLAG_RD, &sc->read_dma,
1497 0, "DMA Read speed in MB/s");
1498 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1500 CTLFLAG_RD, &sc->write_dma,
1501 0, "DMA Write speed in MB/s");
1502 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1503 "read_write_dma_MBs",
1504 CTLFLAG_RD, &sc->read_write_dma,
1505 0, "DMA concurrent Read/Write speed in MB/s");
1506 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1508 CTLFLAG_RD, &sc->watchdog_resets,
1509 0, "Number of times NIC was reset");
1512 /* performance related tunables */
1513 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1515 CTLTYPE_INT|CTLFLAG_RW, sc,
1516 0, mxge_change_intr_coal,
1517 "I", "interrupt coalescing delay in usecs");
1519 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1521 CTLTYPE_INT|CTLFLAG_RW, sc,
1522 0, mxge_change_throttle,
1523 "I", "transmit throttling");
1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1526 "flow_control_enabled",
1527 CTLTYPE_INT|CTLFLAG_RW, sc,
1528 0, mxge_change_flow_control,
1529 "I", "interrupt coalescing delay in usecs");
1531 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1533 CTLFLAG_RW, &mxge_deassert_wait,
1534 0, "Wait for IRQ line to go low in ihandler");
1536 /* stats block from firmware is in network byte order.
1538 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1540 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1541 0, mxge_handle_be32,
1543 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1544 "rdma_tags_available",
1545 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1546 0, mxge_handle_be32,
1547 "I", "rdma_tags_available");
1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1549 "dropped_bad_crc32",
1550 CTLTYPE_INT|CTLFLAG_RD,
1551 &fw->dropped_bad_crc32,
1552 0, mxge_handle_be32,
1553 "I", "dropped_bad_crc32");
1554 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1556 CTLTYPE_INT|CTLFLAG_RD,
1557 &fw->dropped_bad_phy,
1558 0, mxge_handle_be32,
1559 "I", "dropped_bad_phy");
1560 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1561 "dropped_link_error_or_filtered",
1562 CTLTYPE_INT|CTLFLAG_RD,
1563 &fw->dropped_link_error_or_filtered,
1564 0, mxge_handle_be32,
1565 "I", "dropped_link_error_or_filtered");
1566 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1567 "dropped_link_overflow",
1568 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1569 0, mxge_handle_be32,
1570 "I", "dropped_link_overflow");
1571 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1572 "dropped_multicast_filtered",
1573 CTLTYPE_INT|CTLFLAG_RD,
1574 &fw->dropped_multicast_filtered,
1575 0, mxge_handle_be32,
1576 "I", "dropped_multicast_filtered");
1577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1578 "dropped_no_big_buffer",
1579 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1580 0, mxge_handle_be32,
1581 "I", "dropped_no_big_buffer");
1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1583 "dropped_no_small_buffer",
1584 CTLTYPE_INT|CTLFLAG_RD,
1585 &fw->dropped_no_small_buffer,
1586 0, mxge_handle_be32,
1587 "I", "dropped_no_small_buffer");
1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1590 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1591 0, mxge_handle_be32,
1592 "I", "dropped_overrun");
1593 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1595 CTLTYPE_INT|CTLFLAG_RD,
1597 0, mxge_handle_be32,
1598 "I", "dropped_pause");
1599 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1601 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1602 0, mxge_handle_be32,
1603 "I", "dropped_runt");
1605 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1606 "dropped_unicast_filtered",
1607 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1608 0, mxge_handle_be32,
1609 "I", "dropped_unicast_filtered");
1611 /* verbose printing? */
1612 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1614 CTLFLAG_RW, &mxge_verbose,
1615 0, "verbose printing");
1617 /* add counters exported for debugging from all slices */
1618 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1619 sc->slice_sysctl_tree =
1620 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1621 "slice", CTLFLAG_RD, 0, "");
1623 for (slice = 0; slice < sc->num_slices; slice++) {
1624 ss = &sc->ss[slice];
1625 sysctl_ctx_init(&ss->sysctl_ctx);
1626 ctx = &ss->sysctl_ctx;
1627 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1628 sprintf(slice_num, "%d", slice);
1630 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1632 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1633 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1635 CTLFLAG_RD, &ss->rx_small.cnt,
1637 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1639 CTLFLAG_RD, &ss->rx_big.cnt,
1641 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1642 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1643 0, "number of lro merge queues flushed");
1645 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1646 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1647 0, "number of bad csums preventing LRO");
1649 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1650 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1651 0, "number of frames appended to lro merge"
1654 #ifndef IFNET_BUF_RING
1655 /* only transmit from slice 0 for now */
1659 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1661 CTLFLAG_RD, &ss->tx.req,
1664 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1666 CTLFLAG_RD, &ss->tx.done,
1668 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1670 CTLFLAG_RD, &ss->tx.pkt_done,
1672 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1674 CTLFLAG_RD, &ss->tx.stall,
1676 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1678 CTLFLAG_RD, &ss->tx.wake,
1680 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1682 CTLFLAG_RD, &ss->tx.defrag,
1684 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1686 CTLFLAG_RD, &ss->tx.queue_active,
1687 0, "tx_queue_active");
1688 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1690 CTLFLAG_RD, &ss->tx.activate,
1692 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1694 CTLFLAG_RD, &ss->tx.deactivate,
1695 0, "tx_deactivate");
1699 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1700 backwards one at a time and handle ring wraps */
1703 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1704 mcp_kreq_ether_send_t *src, int cnt)
1706 int idx, starting_slot;
1707 starting_slot = tx->req;
1710 idx = (starting_slot + cnt) & tx->mask;
1711 mxge_pio_copy(&tx->lanai[idx],
1712 &src[cnt], sizeof(*src));
1718 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1719 * at most 32 bytes at a time, so as to avoid involving the software
1720 * pio handler in the nic. We re-write the first segment's flags
1721 * to mark them valid only after writing the entire chain
1725 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1730 volatile uint32_t *dst_ints;
1731 mcp_kreq_ether_send_t *srcp;
1732 volatile mcp_kreq_ether_send_t *dstp, *dst;
1735 idx = tx->req & tx->mask;
1737 last_flags = src->flags;
1740 dst = dstp = &tx->lanai[idx];
1743 if ((idx + cnt) < tx->mask) {
1744 for (i = 0; i < (cnt - 1); i += 2) {
1745 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1746 wmb(); /* force write every 32 bytes */
1751 /* submit all but the first request, and ensure
1752 that it is submitted below */
1753 mxge_submit_req_backwards(tx, src, cnt);
1757 /* submit the first request */
1758 mxge_pio_copy(dstp, srcp, sizeof(*src));
1759 wmb(); /* barrier before setting valid flag */
1762 /* re-write the last 32-bits with the valid flags */
1763 src->flags = last_flags;
1764 src_ints = (uint32_t *)src;
1766 dst_ints = (volatile uint32_t *)dst;
1768 *dst_ints = *src_ints;
1774 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1775 struct mxge_pkt_info *pi)
1777 struct ether_vlan_header *eh;
1779 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1780 #if IFCAP_TSO6 && defined(INET6)
1784 eh = mtod(m, struct ether_vlan_header *);
1785 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1786 etype = ntohs(eh->evl_proto);
1787 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1789 etype = ntohs(eh->evl_encap_proto);
1790 pi->ip_off = ETHER_HDR_LEN;
1796 * ensure ip header is in first mbuf, copy it to a
1797 * scratch buffer if not
1799 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1801 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1802 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1804 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1806 pi->ip_hlen = pi->ip->ip_hl << 2;
1810 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1811 sizeof(struct tcphdr))) {
1812 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1813 sizeof(struct tcphdr), ss->scratch);
1814 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1816 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1818 #if IFCAP_TSO6 && defined(INET6)
1819 case ETHERTYPE_IPV6:
1820 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1821 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1822 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1824 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1827 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1828 pi->ip_hlen -= pi->ip_off;
1829 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1835 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1838 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1839 sizeof(struct tcphdr))) {
1840 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1841 sizeof(struct tcphdr), ss->scratch);
1842 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1844 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1856 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1857 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1860 mcp_kreq_ether_send_t *req;
1861 bus_dma_segment_t *seg;
1862 uint32_t low, high_swapped;
1863 int len, seglen, cum_len, cum_len_next;
1864 int next_is_first, chop, cnt, rdma_count, small;
1865 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1866 uint8_t flags, flags_next;
1869 mss = m->m_pkthdr.tso_segsz;
1871 /* negative cum_len signifies to the
1872 * send loop that we are still in the
1873 * header portion of the TSO packet.
1876 cksum_offset = pi->ip_off + pi->ip_hlen;
1877 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1879 /* TSO implies checksum offload on this hardware */
1880 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1882 * If packet has full TCP csum, replace it with pseudo hdr
1883 * sum that the NIC expects, otherwise the NIC will emit
1884 * packets with bad TCP checksums.
1886 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1888 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1889 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1890 sum = in6_cksum_pseudo(pi->ip6,
1891 m->m_pkthdr.len - cksum_offset,
1896 m->m_pkthdr.csum_flags |= CSUM_TCP;
1897 sum = in_pseudo(pi->ip->ip_src.s_addr,
1898 pi->ip->ip_dst.s_addr,
1899 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1903 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1904 cksum_offset, sizeof(sum), (caddr_t)&sum);
1906 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1909 /* for TSO, pseudo_hdr_offset holds mss.
1910 * The firmware figures out where to put
1911 * the checksum by parsing the header. */
1912 pseudo_hdr_offset = htobe16(mss);
1916 * for IPv6 TSO, the "checksum offset" is re-purposed
1917 * to store the TCP header len
1919 cksum_offset = (pi->tcp->th_off << 2);
1927 /* "rdma_count" is the number of RDMAs belonging to the
1928 * current packet BEFORE the current send request. For
1929 * non-TSO packets, this is equal to "count".
1930 * For TSO packets, rdma_count needs to be reset
1931 * to 0 after a segment cut.
1933 * The rdma_count field of the send request is
1934 * the number of RDMAs of the packet starting at
1935 * that request. For TSO send requests with one ore more cuts
1936 * in the middle, this is the number of RDMAs starting
1937 * after the last cut in the request. All previous
1938 * segments before the last cut implicitly have 1 RDMA.
1940 * Since the number of RDMAs is not known beforehand,
1941 * it must be filled-in retroactively - after each
1942 * segmentation cut or at the end of the entire packet.
1945 while (busdma_seg_cnt) {
1946 /* Break the busdma segment up into pieces*/
1947 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1948 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1952 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1954 cum_len_next = cum_len + seglen;
1955 (req-rdma_count)->rdma_count = rdma_count + 1;
1956 if (__predict_true(cum_len >= 0)) {
1958 chop = (cum_len_next > mss);
1959 cum_len_next = cum_len_next % mss;
1960 next_is_first = (cum_len_next == 0);
1961 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1962 flags_next |= next_is_first *
1964 rdma_count |= -(chop | next_is_first);
1965 rdma_count += chop & !next_is_first;
1966 } else if (cum_len_next >= 0) {
1971 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1972 flags_next = MXGEFW_FLAGS_TSO_PLD |
1973 MXGEFW_FLAGS_FIRST |
1974 (small * MXGEFW_FLAGS_SMALL);
1977 req->addr_high = high_swapped;
1978 req->addr_low = htobe32(low);
1979 req->pseudo_hdr_offset = pseudo_hdr_offset;
1981 req->rdma_count = 1;
1982 req->length = htobe16(seglen);
1983 req->cksum_offset = cksum_offset;
1984 req->flags = flags | ((cum_len & 1) *
1985 MXGEFW_FLAGS_ALIGN_ODD);
1988 cum_len = cum_len_next;
1993 if (cksum_offset != 0 && !pi->ip6) {
1994 if (__predict_false(cksum_offset > seglen))
1995 cksum_offset -= seglen;
1999 if (__predict_false(cnt > tx->max_desc))
2005 (req-rdma_count)->rdma_count = rdma_count;
2009 req->flags |= MXGEFW_FLAGS_TSO_LAST;
2010 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
2012 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2013 mxge_submit_req(tx, tx->req_list, cnt);
2014 #ifdef IFNET_BUF_RING
2015 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2016 /* tell the NIC to start polling this slice */
2018 tx->queue_active = 1;
2026 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
2030 printf("tx->max_desc exceeded via TSO!\n");
2031 printf("mss = %d, %ld, %d!\n", mss,
2032 (long)seg - (long)tx->seg_list, tx->max_desc);
2039 #endif /* IFCAP_TSO4 */
2041 #ifdef MXGE_NEW_VLAN_API
2043 * We reproduce the software vlan tag insertion from
2044 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2045 * vlan tag insertion. We need to advertise this in order to have the
2046 * vlan interface respect our csum offload flags.
2048 static struct mbuf *
2049 mxge_vlan_tag_insert(struct mbuf *m)
2051 struct ether_vlan_header *evl;
2053 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2054 if (__predict_false(m == NULL))
2056 if (m->m_len < sizeof(*evl)) {
2057 m = m_pullup(m, sizeof(*evl));
2058 if (__predict_false(m == NULL))
2062 * Transform the Ethernet header into an Ethernet header
2063 * with 802.1Q encapsulation.
2065 evl = mtod(m, struct ether_vlan_header *);
2066 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2067 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2068 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2069 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2070 m->m_flags &= ~M_VLANTAG;
2073 #endif /* MXGE_NEW_VLAN_API */
2076 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2078 struct mxge_pkt_info pi = {0,0,0,0};
2080 mcp_kreq_ether_send_t *req;
2081 bus_dma_segment_t *seg;
2085 int cnt, cum_len, err, i, idx, odd_flag;
2086 uint16_t pseudo_hdr_offset;
2087 uint8_t flags, cksum_offset;
2094 #ifdef MXGE_NEW_VLAN_API
2095 if (m->m_flags & M_VLANTAG) {
2096 m = mxge_vlan_tag_insert(m);
2097 if (__predict_false(m == NULL))
2098 goto drop_without_m;
2101 if (m->m_pkthdr.csum_flags &
2102 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2103 if (mxge_parse_tx(ss, m, &pi))
2107 /* (try to) map the frame for DMA */
2108 idx = tx->req & tx->mask;
2109 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2110 m, tx->seg_list, &cnt,
2112 if (__predict_false(err == EFBIG)) {
2113 /* Too many segments in the chain. Try
2115 m_tmp = m_defrag(m, M_NOWAIT);
2116 if (m_tmp == NULL) {
2121 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2123 m, tx->seg_list, &cnt,
2126 if (__predict_false(err != 0)) {
2127 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2128 " packet len = %d\n", err, m->m_pkthdr.len);
2131 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2132 BUS_DMASYNC_PREWRITE);
2133 tx->info[idx].m = m;
2136 /* TSO is different enough, we handle it in another routine */
2137 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2138 mxge_encap_tso(ss, m, cnt, &pi);
2145 pseudo_hdr_offset = 0;
2146 flags = MXGEFW_FLAGS_NO_TSO;
2148 /* checksum offloading? */
2149 if (m->m_pkthdr.csum_flags &
2150 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2151 /* ensure ip header is in first mbuf, copy
2152 it to a scratch buffer if not */
2153 cksum_offset = pi.ip_off + pi.ip_hlen;
2154 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2155 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2156 req->cksum_offset = cksum_offset;
2157 flags |= MXGEFW_FLAGS_CKSUM;
2158 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2162 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2163 flags |= MXGEFW_FLAGS_SMALL;
2165 /* convert segments into a request list */
2168 req->flags = MXGEFW_FLAGS_FIRST;
2169 for (i = 0; i < cnt; i++) {
2171 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2173 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2174 req->length = htobe16(seg->ds_len);
2175 req->cksum_offset = cksum_offset;
2176 if (cksum_offset > seg->ds_len)
2177 cksum_offset -= seg->ds_len;
2180 req->pseudo_hdr_offset = pseudo_hdr_offset;
2181 req->pad = 0; /* complete solid 16-byte block */
2182 req->rdma_count = 1;
2183 req->flags |= flags | ((cum_len & 1) * odd_flag);
2184 cum_len += seg->ds_len;
2190 /* pad runts to 60 bytes */
2194 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2196 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2197 req->length = htobe16(60 - cum_len);
2198 req->cksum_offset = 0;
2199 req->pseudo_hdr_offset = pseudo_hdr_offset;
2200 req->pad = 0; /* complete solid 16-byte block */
2201 req->rdma_count = 1;
2202 req->flags |= flags | ((cum_len & 1) * odd_flag);
2206 tx->req_list[0].rdma_count = cnt;
2208 /* print what the firmware will see */
2209 for (i = 0; i < cnt; i++) {
2210 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2211 "cso:%d, flags:0x%x, rdma:%d\n",
2212 i, (int)ntohl(tx->req_list[i].addr_high),
2213 (int)ntohl(tx->req_list[i].addr_low),
2214 (int)ntohs(tx->req_list[i].length),
2215 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2216 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2217 tx->req_list[i].rdma_count);
2219 printf("--------------\n");
2221 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2222 mxge_submit_req(tx, tx->req_list, cnt);
2223 #ifdef IFNET_BUF_RING
2224 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2225 /* tell the NIC to start polling this slice */
2227 tx->queue_active = 1;
2241 #ifdef IFNET_BUF_RING
2243 mxge_qflush(struct ifnet *ifp)
2245 mxge_softc_t *sc = ifp->if_softc;
2250 for (slice = 0; slice < sc->num_slices; slice++) {
2251 tx = &sc->ss[slice].tx;
2253 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2255 mtx_unlock(&tx->mtx);
2261 mxge_start_locked(struct mxge_slice_state *ss)
2272 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2273 m = drbr_dequeue(ifp, tx->br);
2277 /* let BPF see it */
2280 /* give it to the nic */
2283 /* ran out of transmit slots */
2284 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2285 && (!drbr_empty(ifp, tx->br))) {
2286 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2292 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2303 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2305 err = drbr_enqueue(ifp, tx->br, m);
2309 if (!drbr_needs_enqueue(ifp, tx->br) &&
2310 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2311 /* let BPF see it */
2313 /* give it to the nic */
2315 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2318 if (!drbr_empty(ifp, tx->br))
2319 mxge_start_locked(ss);
2324 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2326 mxge_softc_t *sc = ifp->if_softc;
2327 struct mxge_slice_state *ss;
2332 slice = m->m_pkthdr.flowid;
2333 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2335 ss = &sc->ss[slice];
2338 if (mtx_trylock(&tx->mtx)) {
2339 err = mxge_transmit_locked(ss, m);
2340 mtx_unlock(&tx->mtx);
2342 err = drbr_enqueue(ifp, tx->br, m);
2351 mxge_start_locked(struct mxge_slice_state *ss)
2361 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2362 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2366 /* let BPF see it */
2369 /* give it to the nic */
2372 /* ran out of transmit slots */
2373 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2374 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2380 mxge_start(struct ifnet *ifp)
2382 mxge_softc_t *sc = ifp->if_softc;
2383 struct mxge_slice_state *ss;
2385 /* only use the first slice for now */
2387 mtx_lock(&ss->tx.mtx);
2388 mxge_start_locked(ss);
2389 mtx_unlock(&ss->tx.mtx);
2393 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2394 * at most 32 bytes at a time, so as to avoid involving the software
2395 * pio handler in the nic. We re-write the first segment's low
2396 * DMA address to mark it valid only after we write the entire chunk
2400 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2401 mcp_kreq_ether_recv_t *src)
2405 low = src->addr_low;
2406 src->addr_low = 0xffffffff;
2407 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2409 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2411 src->addr_low = low;
2412 dst->addr_low = low;
2417 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2419 bus_dma_segment_t seg;
2421 mxge_rx_ring_t *rx = &ss->rx_small;
2424 m = m_gethdr(M_NOWAIT, MT_DATA);
2431 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2432 &seg, &cnt, BUS_DMA_NOWAIT);
2437 rx->info[idx].m = m;
2438 rx->shadow[idx].addr_low =
2439 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2440 rx->shadow[idx].addr_high =
2441 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2445 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2450 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2452 bus_dma_segment_t seg[3];
2454 mxge_rx_ring_t *rx = &ss->rx_big;
2457 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2463 m->m_len = rx->mlen;
2464 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2465 seg, &cnt, BUS_DMA_NOWAIT);
2470 rx->info[idx].m = m;
2471 rx->shadow[idx].addr_low =
2472 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2473 rx->shadow[idx].addr_high =
2474 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2476 #if MXGE_VIRT_JUMBOS
2477 for (i = 1; i < cnt; i++) {
2478 rx->shadow[idx + i].addr_low =
2479 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2480 rx->shadow[idx + i].addr_high =
2481 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2486 for (i = 0; i < rx->nbufs; i++) {
2487 if ((idx & 7) == 7) {
2488 mxge_submit_8rx(&rx->lanai[idx - 7],
2489 &rx->shadow[idx - 7]);
2499 mxge_csum_generic(uint16_t *raw, int len)
2510 csum = (csum >> 16) + (csum & 0xffff);
2511 csum = (csum >> 16) + (csum & 0xffff);
2512 return (uint16_t)csum;
2515 static inline uint16_t
2516 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2519 int nxt, cksum_offset;
2520 struct ip6_hdr *ip6 = p;
2524 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2525 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2526 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2527 IPPROTO_IPV6, &nxt);
2528 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2533 * IPv6 headers do not contain a checksum, and hence
2534 * do not checksum to zero, so they don't "fall out"
2535 * of the partial checksum calculation like IPv4
2536 * headers do. We need to fix the partial checksum by
2537 * subtracting the checksum of the IPv6 header.
2540 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2543 csum += (csum < ~partial);
2544 csum = (csum >> 16) + (csum & 0xFFFF);
2545 csum = (csum >> 16) + (csum & 0xFFFF);
2546 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2553 * Myri10GE hardware checksums are not valid if the sender
2554 * padded the frame with non-zero padding. This is because
2555 * the firmware just does a simple 16-bit 1s complement
2556 * checksum across the entire frame, excluding the first 14
2557 * bytes. It is best to simply to check the checksum and
2558 * tell the stack about it only if the checksum is good
2561 static inline uint16_t
2562 mxge_rx_csum(struct mbuf *m, int csum)
2564 struct ether_header *eh;
2568 #if defined(INET) || defined(INET6)
2569 int cap = m->m_pkthdr.rcvif->if_capenable;
2574 eh = mtod(m, struct ether_header *);
2575 etype = ntohs(eh->ether_type);
2579 if ((cap & IFCAP_RXCSUM) == 0)
2581 ip = (struct ip *)(eh + 1);
2582 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2584 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2585 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2586 (ip->ip_hl << 2) + ip->ip_p));
2591 case ETHERTYPE_IPV6:
2592 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2594 c = mxge_rx_csum6((eh + 1), m, csum);
2604 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2606 struct ether_vlan_header *evl;
2607 struct ether_header *eh;
2610 evl = mtod(m, struct ether_vlan_header *);
2611 eh = mtod(m, struct ether_header *);
2614 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2615 * after what the firmware thought was the end of the ethernet
2619 /* put checksum into host byte order */
2620 *csum = ntohs(*csum);
2621 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2622 (*csum) += ~partial;
2623 (*csum) += ((*csum) < ~partial);
2624 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2625 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2627 /* restore checksum to network byte order;
2628 later consumers expect this */
2629 *csum = htons(*csum);
2632 #ifdef MXGE_NEW_VLAN_API
2633 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2637 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2641 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2642 m_tag_prepend(m, mtag);
2646 m->m_flags |= M_VLANTAG;
2649 * Remove the 802.1q header by copying the Ethernet
2650 * addresses over it and adjusting the beginning of
2651 * the data in the mbuf. The encapsulated Ethernet
2652 * type field is already in place.
2654 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2655 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2656 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2661 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2662 uint32_t csum, int lro)
2667 struct ether_header *eh;
2669 bus_dmamap_t old_map;
2675 idx = rx->cnt & rx->mask;
2676 rx->cnt += rx->nbufs;
2677 /* save a pointer to the received mbuf */
2678 m = rx->info[idx].m;
2679 /* try to replace the received mbuf */
2680 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2681 /* drop the frame -- the old mbuf is re-cycled */
2682 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2686 /* unmap the received buffer */
2687 old_map = rx->info[idx].map;
2688 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2689 bus_dmamap_unload(rx->dmat, old_map);
2691 /* swap the bus_dmamap_t's */
2692 rx->info[idx].map = rx->extra_map;
2693 rx->extra_map = old_map;
2695 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2697 m->m_data += MXGEFW_PAD;
2699 m->m_pkthdr.rcvif = ifp;
2700 m->m_len = m->m_pkthdr.len = len;
2702 eh = mtod(m, struct ether_header *);
2703 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2704 mxge_vlan_tag_remove(m, &csum);
2706 /* flowid only valid if RSS hashing is enabled */
2707 if (sc->num_slices > 1) {
2708 m->m_pkthdr.flowid = (ss - sc->ss);
2709 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2711 /* if the checksum is valid, mark it in the mbuf header */
2712 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2713 (0 == mxge_rx_csum(m, csum))) {
2714 /* Tell the stack that the checksum is good */
2715 m->m_pkthdr.csum_data = 0xffff;
2716 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2719 #if defined(INET) || defined (INET6)
2720 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2724 /* pass the frame up the stack */
2725 (*ifp->if_input)(ifp, m);
2729 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2730 uint32_t csum, int lro)
2734 struct ether_header *eh;
2737 bus_dmamap_t old_map;
2743 idx = rx->cnt & rx->mask;
2745 /* save a pointer to the received mbuf */
2746 m = rx->info[idx].m;
2747 /* try to replace the received mbuf */
2748 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2749 /* drop the frame -- the old mbuf is re-cycled */
2750 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2754 /* unmap the received buffer */
2755 old_map = rx->info[idx].map;
2756 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2757 bus_dmamap_unload(rx->dmat, old_map);
2759 /* swap the bus_dmamap_t's */
2760 rx->info[idx].map = rx->extra_map;
2761 rx->extra_map = old_map;
2763 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2765 m->m_data += MXGEFW_PAD;
2767 m->m_pkthdr.rcvif = ifp;
2768 m->m_len = m->m_pkthdr.len = len;
2770 eh = mtod(m, struct ether_header *);
2771 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2772 mxge_vlan_tag_remove(m, &csum);
2774 /* flowid only valid if RSS hashing is enabled */
2775 if (sc->num_slices > 1) {
2776 m->m_pkthdr.flowid = (ss - sc->ss);
2777 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2779 /* if the checksum is valid, mark it in the mbuf header */
2780 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2781 (0 == mxge_rx_csum(m, csum))) {
2782 /* Tell the stack that the checksum is good */
2783 m->m_pkthdr.csum_data = 0xffff;
2784 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2787 #if defined(INET) || defined (INET6)
2788 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2792 /* pass the frame up the stack */
2793 (*ifp->if_input)(ifp, m);
2797 mxge_clean_rx_done(struct mxge_slice_state *ss)
2799 mxge_rx_done_t *rx_done = &ss->rx_done;
2805 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2806 while (rx_done->entry[rx_done->idx].length != 0) {
2807 length = ntohs(rx_done->entry[rx_done->idx].length);
2808 rx_done->entry[rx_done->idx].length = 0;
2809 checksum = rx_done->entry[rx_done->idx].checksum;
2810 if (length <= (MHLEN - MXGEFW_PAD))
2811 mxge_rx_done_small(ss, length, checksum, lro);
2813 mxge_rx_done_big(ss, length, checksum, lro);
2815 rx_done->idx = rx_done->cnt & rx_done->mask;
2817 /* limit potential for livelock */
2818 if (__predict_false(++limit > rx_done->mask / 2))
2821 #if defined(INET) || defined (INET6)
2822 tcp_lro_flush_all(&ss->lc);
2828 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2839 while (tx->pkt_done != mcp_idx) {
2840 idx = tx->done & tx->mask;
2842 m = tx->info[idx].m;
2843 /* mbuf and DMA map only attached to the first
2846 ss->obytes += m->m_pkthdr.len;
2847 if (m->m_flags & M_MCAST)
2850 tx->info[idx].m = NULL;
2851 map = tx->info[idx].map;
2852 bus_dmamap_unload(tx->dmat, map);
2855 if (tx->info[idx].flag) {
2856 tx->info[idx].flag = 0;
2861 /* If we have space, clear IFF_OACTIVE to tell the stack that
2862 its OK to send packets */
2863 #ifdef IFNET_BUF_RING
2864 flags = &ss->if_drv_flags;
2866 flags = &ifp->if_drv_flags;
2868 mtx_lock(&ss->tx.mtx);
2869 if ((*flags) & IFF_DRV_OACTIVE &&
2870 tx->req - tx->done < (tx->mask + 1)/4) {
2871 *(flags) &= ~IFF_DRV_OACTIVE;
2873 mxge_start_locked(ss);
2875 #ifdef IFNET_BUF_RING
2876 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2877 /* let the NIC stop polling this queue, since there
2878 * are no more transmits pending */
2879 if (tx->req == tx->done) {
2881 tx->queue_active = 0;
2887 mtx_unlock(&ss->tx.mtx);
2891 static struct mxge_media_type mxge_xfp_media_types[] =
2893 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2894 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2895 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2896 {0, (1 << 5), "10GBASE-ER"},
2897 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2898 {0, (1 << 3), "10GBASE-SW"},
2899 {0, (1 << 2), "10GBASE-LW"},
2900 {0, (1 << 1), "10GBASE-EW"},
2901 {0, (1 << 0), "Reserved"}
2903 static struct mxge_media_type mxge_sfp_media_types[] =
2905 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2906 {0, (1 << 7), "Reserved"},
2907 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2908 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2909 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2910 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2914 mxge_media_set(mxge_softc_t *sc, int media_type)
2918 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2920 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2921 sc->current_media = media_type;
2922 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2926 mxge_media_init(mxge_softc_t *sc)
2931 ifmedia_removeall(&sc->media);
2932 mxge_media_set(sc, IFM_AUTO);
2935 * parse the product code to deterimine the interface type
2936 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2937 * after the 3rd dash in the driver's cached copy of the
2938 * EEPROM's product code string.
2940 ptr = sc->product_code_string;
2942 device_printf(sc->dev, "Missing product code\n");
2946 for (i = 0; i < 3; i++, ptr++) {
2947 ptr = strchr(ptr, '-');
2949 device_printf(sc->dev,
2950 "only %d dashes in PC?!?\n", i);
2954 if (*ptr == 'C' || *(ptr +1) == 'C') {
2956 sc->connector = MXGE_CX4;
2957 mxge_media_set(sc, IFM_10G_CX4);
2958 } else if (*ptr == 'Q') {
2959 /* -Q is Quad Ribbon Fiber */
2960 sc->connector = MXGE_QRF;
2961 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2962 /* FreeBSD has no media type for Quad ribbon fiber */
2963 } else if (*ptr == 'R') {
2965 sc->connector = MXGE_XFP;
2966 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2967 /* -S or -2S is SFP+ */
2968 sc->connector = MXGE_SFP;
2970 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2975 * Determine the media type for a NIC. Some XFPs will identify
2976 * themselves only when their link is up, so this is initiated via a
2977 * link up interrupt. However, this can potentially take up to
2978 * several milliseconds, so it is run via the watchdog routine, rather
2979 * than in the interrupt handler itself.
2982 mxge_media_probe(mxge_softc_t *sc)
2987 struct mxge_media_type *mxge_media_types = NULL;
2988 int i, err, ms, mxge_media_type_entries;
2991 sc->need_media_probe = 0;
2993 if (sc->connector == MXGE_XFP) {
2995 mxge_media_types = mxge_xfp_media_types;
2996 mxge_media_type_entries =
2997 nitems(mxge_xfp_media_types);
2998 byte = MXGE_XFP_COMPLIANCE_BYTE;
3000 } else if (sc->connector == MXGE_SFP) {
3001 /* -S or -2S is SFP+ */
3002 mxge_media_types = mxge_sfp_media_types;
3003 mxge_media_type_entries =
3004 nitems(mxge_sfp_media_types);
3008 /* nothing to do; media type cannot change */
3013 * At this point we know the NIC has an XFP cage, so now we
3014 * try to determine what is in the cage by using the
3015 * firmware's XFP I2C commands to read the XFP 10GbE compilance
3016 * register. We read just one byte, which may take over
3020 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
3022 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3023 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
3024 device_printf(sc->dev, "failed to read XFP\n");
3026 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
3027 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
3029 if (err != MXGEFW_CMD_OK) {
3033 /* now we wait for the data to be cached */
3035 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3036 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3039 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3041 if (err != MXGEFW_CMD_OK) {
3042 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
3043 cage_type, err, ms);
3047 if (cmd.data0 == mxge_media_types[0].bitmask) {
3049 device_printf(sc->dev, "%s:%s\n", cage_type,
3050 mxge_media_types[0].name);
3051 if (sc->current_media != mxge_media_types[0].flag) {
3052 mxge_media_init(sc);
3053 mxge_media_set(sc, mxge_media_types[0].flag);
3057 for (i = 1; i < mxge_media_type_entries; i++) {
3058 if (cmd.data0 & mxge_media_types[i].bitmask) {
3060 device_printf(sc->dev, "%s:%s\n",
3062 mxge_media_types[i].name);
3064 if (sc->current_media != mxge_media_types[i].flag) {
3065 mxge_media_init(sc);
3066 mxge_media_set(sc, mxge_media_types[i].flag);
3072 device_printf(sc->dev, "%s media 0x%x unknown\n",
3073 cage_type, cmd.data0);
3079 mxge_intr(void *arg)
3081 struct mxge_slice_state *ss = arg;
3082 mxge_softc_t *sc = ss->sc;
3083 mcp_irq_data_t *stats = ss->fw_stats;
3084 mxge_tx_ring_t *tx = &ss->tx;
3085 mxge_rx_done_t *rx_done = &ss->rx_done;
3086 uint32_t send_done_count;
3090 #ifndef IFNET_BUF_RING
3091 /* an interrupt on a non-zero slice is implicitly valid
3092 since MSI-X irqs are not shared */
3094 mxge_clean_rx_done(ss);
3095 *ss->irq_claim = be32toh(3);
3100 /* make sure the DMA has finished */
3101 if (!stats->valid) {
3104 valid = stats->valid;
3106 if (sc->legacy_irq) {
3107 /* lower legacy IRQ */
3108 *sc->irq_deassert = 0;
3109 if (!mxge_deassert_wait)
3110 /* don't wait for conf. that irq is low */
3116 /* loop while waiting for legacy irq deassertion */
3118 /* check for transmit completes and receives */
3119 send_done_count = be32toh(stats->send_done_count);
3120 while ((send_done_count != tx->pkt_done) ||
3121 (rx_done->entry[rx_done->idx].length != 0)) {
3122 if (send_done_count != tx->pkt_done)
3123 mxge_tx_done(ss, (int)send_done_count);
3124 mxge_clean_rx_done(ss);
3125 send_done_count = be32toh(stats->send_done_count);
3127 if (sc->legacy_irq && mxge_deassert_wait)
3129 } while (*((volatile uint8_t *) &stats->valid));
3131 /* fw link & error stats meaningful only on the first slice */
3132 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3133 if (sc->link_state != stats->link_up) {
3134 sc->link_state = stats->link_up;
3135 if (sc->link_state) {
3136 if_link_state_change(sc->ifp, LINK_STATE_UP);
3138 device_printf(sc->dev, "link up\n");
3140 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3142 device_printf(sc->dev, "link down\n");
3144 sc->need_media_probe = 1;
3146 if (sc->rdma_tags_available !=
3147 be32toh(stats->rdma_tags_available)) {
3148 sc->rdma_tags_available =
3149 be32toh(stats->rdma_tags_available);
3150 device_printf(sc->dev, "RDMA timed out! %d tags "
3151 "left\n", sc->rdma_tags_available);
3154 if (stats->link_down) {
3155 sc->down_cnt += stats->link_down;
3157 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3161 /* check to see if we have rx token to pass back */
3163 *ss->irq_claim = be32toh(3);
3164 *(ss->irq_claim + 1) = be32toh(3);
3168 mxge_init(void *arg)
3170 mxge_softc_t *sc = arg;
3171 struct ifnet *ifp = sc->ifp;
3174 mtx_lock(&sc->driver_mtx);
3175 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3176 (void) mxge_open(sc);
3177 mtx_unlock(&sc->driver_mtx);
3183 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3187 #if defined(INET) || defined(INET6)
3188 tcp_lro_free(&ss->lc);
3190 for (i = 0; i <= ss->rx_big.mask; i++) {
3191 if (ss->rx_big.info[i].m == NULL)
3193 bus_dmamap_unload(ss->rx_big.dmat,
3194 ss->rx_big.info[i].map);
3195 m_freem(ss->rx_big.info[i].m);
3196 ss->rx_big.info[i].m = NULL;
3199 for (i = 0; i <= ss->rx_small.mask; i++) {
3200 if (ss->rx_small.info[i].m == NULL)
3202 bus_dmamap_unload(ss->rx_small.dmat,
3203 ss->rx_small.info[i].map);
3204 m_freem(ss->rx_small.info[i].m);
3205 ss->rx_small.info[i].m = NULL;
3208 /* transmit ring used only on the first slice */
3209 if (ss->tx.info == NULL)
3212 for (i = 0; i <= ss->tx.mask; i++) {
3213 ss->tx.info[i].flag = 0;
3214 if (ss->tx.info[i].m == NULL)
3216 bus_dmamap_unload(ss->tx.dmat,
3217 ss->tx.info[i].map);
3218 m_freem(ss->tx.info[i].m);
3219 ss->tx.info[i].m = NULL;
3224 mxge_free_mbufs(mxge_softc_t *sc)
3228 for (slice = 0; slice < sc->num_slices; slice++)
3229 mxge_free_slice_mbufs(&sc->ss[slice]);
3233 mxge_free_slice_rings(struct mxge_slice_state *ss)
3238 if (ss->rx_done.entry != NULL)
3239 mxge_dma_free(&ss->rx_done.dma);
3240 ss->rx_done.entry = NULL;
3242 if (ss->tx.req_bytes != NULL)
3243 free(ss->tx.req_bytes, M_DEVBUF);
3244 ss->tx.req_bytes = NULL;
3246 if (ss->tx.seg_list != NULL)
3247 free(ss->tx.seg_list, M_DEVBUF);
3248 ss->tx.seg_list = NULL;
3250 if (ss->rx_small.shadow != NULL)
3251 free(ss->rx_small.shadow, M_DEVBUF);
3252 ss->rx_small.shadow = NULL;
3254 if (ss->rx_big.shadow != NULL)
3255 free(ss->rx_big.shadow, M_DEVBUF);
3256 ss->rx_big.shadow = NULL;
3258 if (ss->tx.info != NULL) {
3259 if (ss->tx.dmat != NULL) {
3260 for (i = 0; i <= ss->tx.mask; i++) {
3261 bus_dmamap_destroy(ss->tx.dmat,
3262 ss->tx.info[i].map);
3264 bus_dma_tag_destroy(ss->tx.dmat);
3266 free(ss->tx.info, M_DEVBUF);
3270 if (ss->rx_small.info != NULL) {
3271 if (ss->rx_small.dmat != NULL) {
3272 for (i = 0; i <= ss->rx_small.mask; i++) {
3273 bus_dmamap_destroy(ss->rx_small.dmat,
3274 ss->rx_small.info[i].map);
3276 bus_dmamap_destroy(ss->rx_small.dmat,
3277 ss->rx_small.extra_map);
3278 bus_dma_tag_destroy(ss->rx_small.dmat);
3280 free(ss->rx_small.info, M_DEVBUF);
3282 ss->rx_small.info = NULL;
3284 if (ss->rx_big.info != NULL) {
3285 if (ss->rx_big.dmat != NULL) {
3286 for (i = 0; i <= ss->rx_big.mask; i++) {
3287 bus_dmamap_destroy(ss->rx_big.dmat,
3288 ss->rx_big.info[i].map);
3290 bus_dmamap_destroy(ss->rx_big.dmat,
3291 ss->rx_big.extra_map);
3292 bus_dma_tag_destroy(ss->rx_big.dmat);
3294 free(ss->rx_big.info, M_DEVBUF);
3296 ss->rx_big.info = NULL;
3300 mxge_free_rings(mxge_softc_t *sc)
3304 for (slice = 0; slice < sc->num_slices; slice++)
3305 mxge_free_slice_rings(&sc->ss[slice]);
3309 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3310 int tx_ring_entries)
3312 mxge_softc_t *sc = ss->sc;
3316 /* allocate per-slice receive resources */
3318 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3319 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3321 /* allocate the rx shadow rings */
3322 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3323 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3325 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3326 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3328 /* allocate the rx host info rings */
3329 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3330 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3332 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3333 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3335 /* allocate the rx busdma resources */
3336 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3338 4096, /* boundary */
3339 BUS_SPACE_MAXADDR, /* low */
3340 BUS_SPACE_MAXADDR, /* high */
3341 NULL, NULL, /* filter */
3342 MHLEN, /* maxsize */
3344 MHLEN, /* maxsegsize */
3345 BUS_DMA_ALLOCNOW, /* flags */
3346 NULL, NULL, /* lock */
3347 &ss->rx_small.dmat); /* tag */
3349 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3354 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3356 #if MXGE_VIRT_JUMBOS
3357 4096, /* boundary */
3361 BUS_SPACE_MAXADDR, /* low */
3362 BUS_SPACE_MAXADDR, /* high */
3363 NULL, NULL, /* filter */
3364 3*4096, /* maxsize */
3365 #if MXGE_VIRT_JUMBOS
3367 4096, /* maxsegsize*/
3370 MJUM9BYTES, /* maxsegsize*/
3372 BUS_DMA_ALLOCNOW, /* flags */
3373 NULL, NULL, /* lock */
3374 &ss->rx_big.dmat); /* tag */
3376 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3380 for (i = 0; i <= ss->rx_small.mask; i++) {
3381 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3382 &ss->rx_small.info[i].map);
3384 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3389 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3390 &ss->rx_small.extra_map);
3392 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3397 for (i = 0; i <= ss->rx_big.mask; i++) {
3398 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3399 &ss->rx_big.info[i].map);
3401 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3406 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3407 &ss->rx_big.extra_map);
3409 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3414 /* now allocate TX resources */
3416 #ifndef IFNET_BUF_RING
3417 /* only use a single TX ring for now */
3418 if (ss != ss->sc->ss)
3422 ss->tx.mask = tx_ring_entries - 1;
3423 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3426 /* allocate the tx request copy block */
3428 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3429 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3430 /* ensure req_list entries are aligned to 8 bytes */
3431 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3432 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3434 /* allocate the tx busdma segment list */
3435 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3436 ss->tx.seg_list = (bus_dma_segment_t *)
3437 malloc(bytes, M_DEVBUF, M_WAITOK);
3439 /* allocate the tx host info ring */
3440 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3441 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3443 /* allocate the tx busdma resources */
3444 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3446 sc->tx_boundary, /* boundary */
3447 BUS_SPACE_MAXADDR, /* low */
3448 BUS_SPACE_MAXADDR, /* high */
3449 NULL, NULL, /* filter */
3450 65536 + 256, /* maxsize */
3451 ss->tx.max_desc - 2, /* num segs */
3452 sc->tx_boundary, /* maxsegsz */
3453 BUS_DMA_ALLOCNOW, /* flags */
3454 NULL, NULL, /* lock */
3455 &ss->tx.dmat); /* tag */
3458 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3463 /* now use these tags to setup dmamaps for each slot
3465 for (i = 0; i <= ss->tx.mask; i++) {
3466 err = bus_dmamap_create(ss->tx.dmat, 0,
3467 &ss->tx.info[i].map);
3469 device_printf(sc->dev, "Err %d tx dmamap\n",
3479 mxge_alloc_rings(mxge_softc_t *sc)
3483 int tx_ring_entries, rx_ring_entries;
3486 /* get ring sizes */
3487 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3488 tx_ring_size = cmd.data0;
3490 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3494 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3495 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3496 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3497 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3498 IFQ_SET_READY(&sc->ifp->if_snd);
3500 for (slice = 0; slice < sc->num_slices; slice++) {
3501 err = mxge_alloc_slice_rings(&sc->ss[slice],
3510 mxge_free_rings(sc);
3517 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3519 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3521 if (bufsize < MCLBYTES) {
3522 /* easy, everything fits in a single buffer */
3523 *big_buf_size = MCLBYTES;
3524 *cl_size = MCLBYTES;
3529 if (bufsize < MJUMPAGESIZE) {
3530 /* still easy, everything still fits in a single buffer */
3531 *big_buf_size = MJUMPAGESIZE;
3532 *cl_size = MJUMPAGESIZE;
3536 #if MXGE_VIRT_JUMBOS
3537 /* now we need to use virtually contiguous buffers */
3538 *cl_size = MJUM9BYTES;
3539 *big_buf_size = 4096;
3540 *nbufs = mtu / 4096 + 1;
3541 /* needs to be a power of two, so round up */
3545 *cl_size = MJUM9BYTES;
3546 *big_buf_size = MJUM9BYTES;
3552 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3561 slice = ss - sc->ss;
3563 #if defined(INET) || defined(INET6)
3564 (void)tcp_lro_init(&ss->lc);
3566 ss->lc.ifp = sc->ifp;
3568 /* get the lanai pointers to the send and receive rings */
3571 #ifndef IFNET_BUF_RING
3572 /* We currently only send from the first slice */
3576 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3578 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3579 ss->tx.send_go = (volatile uint32_t *)
3580 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3581 ss->tx.send_stop = (volatile uint32_t *)
3582 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3583 #ifndef IFNET_BUF_RING
3587 err |= mxge_send_cmd(sc,
3588 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3589 ss->rx_small.lanai =
3590 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3592 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3594 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3597 device_printf(sc->dev,
3598 "failed to get ring sizes or locations\n");
3602 /* stock receive rings */
3603 for (i = 0; i <= ss->rx_small.mask; i++) {
3604 map = ss->rx_small.info[i].map;
3605 err = mxge_get_buf_small(ss, map, i);
3607 device_printf(sc->dev, "alloced %d/%d smalls\n",
3608 i, ss->rx_small.mask + 1);
3612 for (i = 0; i <= ss->rx_big.mask; i++) {
3613 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3614 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3616 ss->rx_big.nbufs = nbufs;
3617 ss->rx_big.cl_size = cl_size;
3618 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3619 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3620 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3621 map = ss->rx_big.info[i].map;
3622 err = mxge_get_buf_big(ss, map, i);
3624 device_printf(sc->dev, "alloced %d/%d bigs\n",
3625 i, ss->rx_big.mask + 1);
3633 mxge_open(mxge_softc_t *sc)
3636 int err, big_bytes, nbufs, slice, cl_size, i;
3638 volatile uint8_t *itable;
3639 struct mxge_slice_state *ss;
3641 /* Copy the MAC address in case it was overridden */
3642 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3644 err = mxge_reset(sc, 1);
3646 device_printf(sc->dev, "failed to reset\n");
3650 if (sc->num_slices > 1) {
3651 /* setup the indirection table */
3652 cmd.data0 = sc->num_slices;
3653 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3656 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3659 device_printf(sc->dev,
3660 "failed to setup rss tables\n");
3664 /* just enable an identity mapping */
3665 itable = sc->sram + cmd.data0;
3666 for (i = 0; i < sc->num_slices; i++)
3667 itable[i] = (uint8_t)i;
3670 cmd.data1 = mxge_rss_hash_type;
3671 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3673 device_printf(sc->dev, "failed to enable slices\n");
3679 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3682 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3684 /* error is only meaningful if we're trying to set
3685 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3686 if (err && nbufs > 1) {
3687 device_printf(sc->dev,
3688 "Failed to set alway-use-n to %d\n",
3692 /* Give the firmware the mtu and the big and small buffer
3693 sizes. The firmware wants the big buf size to be a power
3694 of two. Luckily, FreeBSD's clusters are powers of two */
3695 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3696 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3697 cmd.data0 = MHLEN - MXGEFW_PAD;
3698 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3700 cmd.data0 = big_bytes;
3701 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3704 device_printf(sc->dev, "failed to setup params\n");
3708 /* Now give him the pointer to the stats block */
3710 #ifdef IFNET_BUF_RING
3711 slice < sc->num_slices;
3716 ss = &sc->ss[slice];
3718 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3720 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3721 cmd.data2 = sizeof(struct mcp_irq_data);
3722 cmd.data2 |= (slice << 16);
3723 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3727 bus = sc->ss->fw_stats_dma.bus_addr;
3728 bus += offsetof(struct mcp_irq_data, send_done_count);
3729 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3730 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3731 err = mxge_send_cmd(sc,
3732 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3734 /* Firmware cannot support multicast without STATS_DMA_V2 */
3735 sc->fw_multicast_support = 0;
3737 sc->fw_multicast_support = 1;
3741 device_printf(sc->dev, "failed to setup params\n");
3745 for (slice = 0; slice < sc->num_slices; slice++) {
3746 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3748 device_printf(sc->dev, "couldn't open slice %d\n",
3754 /* Finally, start the firmware running */
3755 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3757 device_printf(sc->dev, "Couldn't bring up link\n");
3760 #ifdef IFNET_BUF_RING
3761 for (slice = 0; slice < sc->num_slices; slice++) {
3762 ss = &sc->ss[slice];
3763 ss->if_drv_flags |= IFF_DRV_RUNNING;
3764 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3767 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3768 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3774 mxge_free_mbufs(sc);
3780 mxge_close(mxge_softc_t *sc, int down)
3783 int err, old_down_cnt;
3784 #ifdef IFNET_BUF_RING
3785 struct mxge_slice_state *ss;
3789 #ifdef IFNET_BUF_RING
3790 for (slice = 0; slice < sc->num_slices; slice++) {
3791 ss = &sc->ss[slice];
3792 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3795 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3797 old_down_cnt = sc->down_cnt;
3799 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3801 device_printf(sc->dev,
3802 "Couldn't bring down link\n");
3804 if (old_down_cnt == sc->down_cnt) {
3805 /* wait for down irq */
3806 DELAY(10 * sc->intr_coal_delay);
3809 if (old_down_cnt == sc->down_cnt) {
3810 device_printf(sc->dev, "never got down irq\n");
3813 mxge_free_mbufs(sc);
3819 mxge_setup_cfg_space(mxge_softc_t *sc)
3821 device_t dev = sc->dev;
3823 uint16_t lnk, pectl;
3825 /* find the PCIe link width and set max read request to 4KB*/
3826 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3827 lnk = pci_read_config(dev, reg + 0x12, 2);
3828 sc->link_width = (lnk >> 4) & 0x3f;
3830 if (sc->pectl == 0) {
3831 pectl = pci_read_config(dev, reg + 0x8, 2);
3832 pectl = (pectl & ~0x7000) | (5 << 12);
3833 pci_write_config(dev, reg + 0x8, pectl, 2);
3836 /* restore saved pectl after watchdog reset */
3837 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3841 /* Enable DMA and Memory space access */
3842 pci_enable_busmaster(dev);
3846 mxge_read_reboot(mxge_softc_t *sc)
3848 device_t dev = sc->dev;
3851 /* find the vendor specific offset */
3852 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3853 device_printf(sc->dev,
3854 "could not find vendor specific offset\n");
3855 return (uint32_t)-1;
3857 /* enable read32 mode */
3858 pci_write_config(dev, vs + 0x10, 0x3, 1);
3859 /* tell NIC which register to read */
3860 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3861 return (pci_read_config(dev, vs + 0x14, 4));
3865 mxge_watchdog_reset(mxge_softc_t *sc)
3867 struct pci_devinfo *dinfo;
3868 struct mxge_slice_state *ss;
3869 int err, running, s, num_tx_slices = 1;
3875 device_printf(sc->dev, "Watchdog reset!\n");
3878 * check to see if the NIC rebooted. If it did, then all of
3879 * PCI config space has been reset, and things like the
3880 * busmaster bit will be zero. If this is the case, then we
3881 * must restore PCI config space before the NIC can be used
3884 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3885 if (cmd == 0xffff) {
3887 * maybe the watchdog caught the NIC rebooting; wait
3888 * up to 100ms for it to finish. If it does not come
3889 * back, then give up
3892 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3893 if (cmd == 0xffff) {
3894 device_printf(sc->dev, "NIC disappeared!\n");
3897 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3898 /* print the reboot status */
3899 reboot = mxge_read_reboot(sc);
3900 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3902 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3906 * quiesce NIC so that TX routines will not try to
3907 * xmit after restoration of BAR
3910 /* Mark the link as down */
3911 if (sc->link_state) {
3913 if_link_state_change(sc->ifp,
3916 #ifdef IFNET_BUF_RING
3917 num_tx_slices = sc->num_slices;
3919 /* grab all TX locks to ensure no tx */
3920 for (s = 0; s < num_tx_slices; s++) {
3922 mtx_lock(&ss->tx.mtx);
3926 /* restore PCI configuration space */
3927 dinfo = device_get_ivars(sc->dev);
3928 pci_cfg_restore(sc->dev, dinfo);
3930 /* and redo any changes we made to our config space */
3931 mxge_setup_cfg_space(sc);
3934 err = mxge_load_firmware(sc, 0);
3936 device_printf(sc->dev,
3937 "Unable to re-load f/w\n");
3941 err = mxge_open(sc);
3942 /* release all TX locks */
3943 for (s = 0; s < num_tx_slices; s++) {
3945 #ifdef IFNET_BUF_RING
3946 mxge_start_locked(ss);
3948 mtx_unlock(&ss->tx.mtx);
3951 sc->watchdog_resets++;
3953 device_printf(sc->dev,
3954 "NIC did not reboot, not resetting\n");
3958 device_printf(sc->dev, "watchdog reset failed\n");
3962 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3967 mxge_watchdog_task(void *arg, int pending)
3969 mxge_softc_t *sc = arg;
3972 mtx_lock(&sc->driver_mtx);
3973 mxge_watchdog_reset(sc);
3974 mtx_unlock(&sc->driver_mtx);
3978 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3980 tx = &sc->ss[slice].tx;
3981 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3982 device_printf(sc->dev,
3983 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3984 tx->req, tx->done, tx->queue_active);
3985 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3986 tx->activate, tx->deactivate);
3987 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3989 be32toh(sc->ss->fw_stats->send_done_count));
3993 mxge_watchdog(mxge_softc_t *sc)
3996 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3999 /* see if we have outstanding transmits, which
4000 have been pending for more than mxge_ticks */
4002 #ifdef IFNET_BUF_RING
4003 (i < sc->num_slices) && (err == 0);
4005 (i < 1) && (err == 0);
4009 if (tx->req != tx->done &&
4010 tx->watchdog_req != tx->watchdog_done &&
4011 tx->done == tx->watchdog_done) {
4012 /* check for pause blocking before resetting */
4013 if (tx->watchdog_rx_pause == rx_pause) {
4014 mxge_warn_stuck(sc, tx, i);
4015 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4019 device_printf(sc->dev, "Flow control blocking "
4020 "xmits, check link partner\n");
4023 tx->watchdog_req = tx->req;
4024 tx->watchdog_done = tx->done;
4025 tx->watchdog_rx_pause = rx_pause;
4028 if (sc->need_media_probe)
4029 mxge_media_probe(sc);
4034 mxge_get_counter(struct ifnet *ifp, ift_counter cnt)
4036 struct mxge_softc *sc;
4039 sc = if_getsoftc(ifp);
4043 case IFCOUNTER_IPACKETS:
4044 for (int s = 0; s < sc->num_slices; s++)
4045 rv += sc->ss[s].ipackets;
4047 case IFCOUNTER_OPACKETS:
4048 for (int s = 0; s < sc->num_slices; s++)
4049 rv += sc->ss[s].opackets;
4051 case IFCOUNTER_OERRORS:
4052 for (int s = 0; s < sc->num_slices; s++)
4053 rv += sc->ss[s].oerrors;
4055 #ifdef IFNET_BUF_RING
4056 case IFCOUNTER_OBYTES:
4057 for (int s = 0; s < sc->num_slices; s++)
4058 rv += sc->ss[s].obytes;
4060 case IFCOUNTER_OMCASTS:
4061 for (int s = 0; s < sc->num_slices; s++)
4062 rv += sc->ss[s].omcasts;
4064 case IFCOUNTER_OQDROPS:
4065 for (int s = 0; s < sc->num_slices; s++)
4066 rv += sc->ss[s].tx.br->br_drops;
4070 return (if_get_counter_default(ifp, cnt));
4075 mxge_tick(void *arg)
4077 mxge_softc_t *sc = arg;
4084 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4086 if (!sc->watchdog_countdown) {
4087 err = mxge_watchdog(sc);
4088 sc->watchdog_countdown = 4;
4090 sc->watchdog_countdown--;
4093 /* ensure NIC did not suffer h/w fault while idle */
4094 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4095 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4097 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4100 /* look less often if NIC is idle */
4105 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4110 mxge_media_change(struct ifnet *ifp)
4116 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4118 struct ifnet *ifp = sc->ifp;
4119 int real_mtu, old_mtu;
4123 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4124 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4126 mtx_lock(&sc->driver_mtx);
4127 old_mtu = ifp->if_mtu;
4129 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4131 err = mxge_open(sc);
4133 ifp->if_mtu = old_mtu;
4135 (void) mxge_open(sc);
4138 mtx_unlock(&sc->driver_mtx);
4143 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4145 mxge_softc_t *sc = ifp->if_softc;
4150 ifmr->ifm_status = IFM_AVALID;
4151 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4152 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4153 ifmr->ifm_active |= sc->current_media;
4157 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4159 mxge_softc_t *sc = ifp->if_softc;
4160 struct ifreq *ifr = (struct ifreq *)data;
4166 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4170 mtx_lock(&sc->driver_mtx);
4172 mtx_unlock(&sc->driver_mtx);
4175 if (ifp->if_flags & IFF_UP) {
4176 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4177 err = mxge_open(sc);
4179 /* take care of promis can allmulti
4181 mxge_change_promisc(sc,
4182 ifp->if_flags & IFF_PROMISC);
4183 mxge_set_multicast_list(sc);
4186 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4190 mtx_unlock(&sc->driver_mtx);
4195 mtx_lock(&sc->driver_mtx);
4196 mxge_set_multicast_list(sc);
4197 mtx_unlock(&sc->driver_mtx);
4201 mtx_lock(&sc->driver_mtx);
4202 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4203 if (mask & IFCAP_TXCSUM) {
4204 if (IFCAP_TXCSUM & ifp->if_capenable) {
4205 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4206 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4208 ifp->if_capenable |= IFCAP_TXCSUM;
4209 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4211 } else if (mask & IFCAP_RXCSUM) {
4212 if (IFCAP_RXCSUM & ifp->if_capenable) {
4213 ifp->if_capenable &= ~IFCAP_RXCSUM;
4215 ifp->if_capenable |= IFCAP_RXCSUM;
4218 if (mask & IFCAP_TSO4) {
4219 if (IFCAP_TSO4 & ifp->if_capenable) {
4220 ifp->if_capenable &= ~IFCAP_TSO4;
4221 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4222 ifp->if_capenable |= IFCAP_TSO4;
4223 ifp->if_hwassist |= CSUM_TSO;
4225 printf("mxge requires tx checksum offload"
4226 " be enabled to use TSO\n");
4231 if (mask & IFCAP_TXCSUM_IPV6) {
4232 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4233 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4235 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4238 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4239 ifp->if_hwassist |= (CSUM_TCP_IPV6
4242 } else if (mask & IFCAP_RXCSUM_IPV6) {
4243 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4244 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4246 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4249 if (mask & IFCAP_TSO6) {
4250 if (IFCAP_TSO6 & ifp->if_capenable) {
4251 ifp->if_capenable &= ~IFCAP_TSO6;
4252 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4253 ifp->if_capenable |= IFCAP_TSO6;
4254 ifp->if_hwassist |= CSUM_TSO;
4256 printf("mxge requires tx checksum offload"
4257 " be enabled to use TSO\n");
4261 #endif /*IFCAP_TSO6 */
4263 if (mask & IFCAP_LRO)
4264 ifp->if_capenable ^= IFCAP_LRO;
4265 if (mask & IFCAP_VLAN_HWTAGGING)
4266 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4267 if (mask & IFCAP_VLAN_HWTSO)
4268 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4270 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4271 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4272 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4274 mtx_unlock(&sc->driver_mtx);
4275 VLAN_CAPABILITIES(ifp);
4280 mtx_lock(&sc->driver_mtx);
4281 mxge_media_probe(sc);
4282 mtx_unlock(&sc->driver_mtx);
4283 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4284 &sc->media, command);
4288 err = ether_ioctl(ifp, command, data);
4295 mxge_fetch_tunables(mxge_softc_t *sc)
4298 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4299 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4300 &mxge_flow_control);
4301 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4302 &mxge_intr_coal_delay);
4303 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4304 &mxge_nvidia_ecrc_enable);
4305 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4306 &mxge_force_firmware);
4307 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4308 &mxge_deassert_wait);
4309 TUNABLE_INT_FETCH("hw.mxge.verbose",
4311 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4312 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4313 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4314 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4315 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4316 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4320 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4321 mxge_intr_coal_delay = 30;
4322 if (mxge_ticks == 0)
4323 mxge_ticks = hz / 2;
4324 sc->pause = mxge_flow_control;
4325 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4326 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4327 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4329 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4330 mxge_initial_mtu < ETHER_MIN_LEN)
4331 mxge_initial_mtu = ETHERMTU_JUMBO;
4333 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4334 mxge_throttle = MXGE_MAX_THROTTLE;
4335 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4336 mxge_throttle = MXGE_MIN_THROTTLE;
4337 sc->throttle = mxge_throttle;
4342 mxge_free_slices(mxge_softc_t *sc)
4344 struct mxge_slice_state *ss;
4351 for (i = 0; i < sc->num_slices; i++) {
4353 if (ss->fw_stats != NULL) {
4354 mxge_dma_free(&ss->fw_stats_dma);
4355 ss->fw_stats = NULL;
4356 #ifdef IFNET_BUF_RING
4357 if (ss->tx.br != NULL) {
4358 drbr_free(ss->tx.br, M_DEVBUF);
4362 mtx_destroy(&ss->tx.mtx);
4364 if (ss->rx_done.entry != NULL) {
4365 mxge_dma_free(&ss->rx_done.dma);
4366 ss->rx_done.entry = NULL;
4369 free(sc->ss, M_DEVBUF);
4374 mxge_alloc_slices(mxge_softc_t *sc)
4377 struct mxge_slice_state *ss;
4379 int err, i, max_intr_slots;
4381 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4383 device_printf(sc->dev, "Cannot determine rx ring size\n");
4386 sc->rx_ring_size = cmd.data0;
4387 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4389 bytes = sizeof (*sc->ss) * sc->num_slices;
4390 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4393 for (i = 0; i < sc->num_slices; i++) {
4398 /* allocate per-slice rx interrupt queues */
4400 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4401 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4404 ss->rx_done.entry = ss->rx_done.dma.addr;
4405 bzero(ss->rx_done.entry, bytes);
4408 * allocate the per-slice firmware stats; stats
4409 * (including tx) are used used only on the first
4412 #ifndef IFNET_BUF_RING
4417 bytes = sizeof (*ss->fw_stats);
4418 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4419 sizeof (*ss->fw_stats), 64);
4422 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4423 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4424 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4425 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4426 #ifdef IFNET_BUF_RING
4427 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4435 mxge_free_slices(sc);
4440 mxge_slice_probe(mxge_softc_t *sc)
4444 int msix_cnt, status, max_intr_slots;
4448 * don't enable multiple slices if they are not enabled,
4449 * or if this is not an SMP system
4452 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4455 /* see how many MSI-X interrupts are available */
4456 msix_cnt = pci_msix_count(sc->dev);
4460 /* now load the slice aware firmware see what it supports */
4461 old_fw = sc->fw_name;
4462 if (old_fw == mxge_fw_aligned)
4463 sc->fw_name = mxge_fw_rss_aligned;
4465 sc->fw_name = mxge_fw_rss_unaligned;
4466 status = mxge_load_firmware(sc, 0);
4468 device_printf(sc->dev, "Falling back to a single slice\n");
4472 /* try to send a reset command to the card to see if it
4474 memset(&cmd, 0, sizeof (cmd));
4475 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4477 device_printf(sc->dev, "failed reset\n");
4481 /* get rx ring size */
4482 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4484 device_printf(sc->dev, "Cannot determine rx ring size\n");
4487 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4489 /* tell it the size of the interrupt queues */
4490 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4491 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4493 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4497 /* ask the maximum number of slices it supports */
4498 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4500 device_printf(sc->dev,
4501 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4504 sc->num_slices = cmd.data0;
4505 if (sc->num_slices > msix_cnt)
4506 sc->num_slices = msix_cnt;
4508 if (mxge_max_slices == -1) {
4509 /* cap to number of CPUs in system */
4510 if (sc->num_slices > mp_ncpus)
4511 sc->num_slices = mp_ncpus;
4513 if (sc->num_slices > mxge_max_slices)
4514 sc->num_slices = mxge_max_slices;
4516 /* make sure it is a power of two */
4517 while (sc->num_slices & (sc->num_slices - 1))
4521 device_printf(sc->dev, "using %d slices\n",
4527 sc->fw_name = old_fw;
4528 (void) mxge_load_firmware(sc, 0);
4532 mxge_add_msix_irqs(mxge_softc_t *sc)
4535 int count, err, i, rid;
4538 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4541 if (sc->msix_table_res == NULL) {
4542 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4546 count = sc->num_slices;
4547 err = pci_alloc_msix(sc->dev, &count);
4549 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4550 "err = %d \n", sc->num_slices, err);
4551 goto abort_with_msix_table;
4553 if (count < sc->num_slices) {
4554 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4555 count, sc->num_slices);
4556 device_printf(sc->dev,
4557 "Try setting hw.mxge.max_slices to %d\n",
4560 goto abort_with_msix;
4562 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4563 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4564 if (sc->msix_irq_res == NULL) {
4566 goto abort_with_msix;
4569 for (i = 0; i < sc->num_slices; i++) {
4571 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4574 if (sc->msix_irq_res[i] == NULL) {
4575 device_printf(sc->dev, "couldn't allocate IRQ res"
4576 " for message %d\n", i);
4578 goto abort_with_res;
4582 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4583 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4585 for (i = 0; i < sc->num_slices; i++) {
4586 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4587 INTR_TYPE_NET | INTR_MPSAFE,
4588 #if __FreeBSD_version > 700030
4591 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4593 device_printf(sc->dev, "couldn't setup intr for "
4595 goto abort_with_intr;
4597 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4598 sc->msix_ih[i], "s%d", i);
4602 device_printf(sc->dev, "using %d msix IRQs:",
4604 for (i = 0; i < sc->num_slices; i++)
4605 printf(" %jd", rman_get_start(sc->msix_irq_res[i]));
4611 for (i = 0; i < sc->num_slices; i++) {
4612 if (sc->msix_ih[i] != NULL) {
4613 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4615 sc->msix_ih[i] = NULL;
4618 free(sc->msix_ih, M_DEVBUF);
4622 for (i = 0; i < sc->num_slices; i++) {
4624 if (sc->msix_irq_res[i] != NULL)
4625 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4626 sc->msix_irq_res[i]);
4627 sc->msix_irq_res[i] = NULL;
4629 free(sc->msix_irq_res, M_DEVBUF);
4633 pci_release_msi(sc->dev);
4635 abort_with_msix_table:
4636 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4637 sc->msix_table_res);
4643 mxge_add_single_irq(mxge_softc_t *sc)
4645 int count, err, rid;
4647 count = pci_msi_count(sc->dev);
4648 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4654 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
4655 RF_SHAREABLE | RF_ACTIVE);
4656 if (sc->irq_res == NULL) {
4657 device_printf(sc->dev, "could not alloc interrupt\n");
4661 device_printf(sc->dev, "using %s irq %jd\n",
4662 sc->legacy_irq ? "INTx" : "MSI",
4663 rman_get_start(sc->irq_res));
4664 err = bus_setup_intr(sc->dev, sc->irq_res,
4665 INTR_TYPE_NET | INTR_MPSAFE,
4666 #if __FreeBSD_version > 700030
4669 mxge_intr, &sc->ss[0], &sc->ih);
4671 bus_release_resource(sc->dev, SYS_RES_IRQ,
4672 sc->legacy_irq ? 0 : 1, sc->irq_res);
4673 if (!sc->legacy_irq)
4674 pci_release_msi(sc->dev);
4680 mxge_rem_msix_irqs(mxge_softc_t *sc)
4684 for (i = 0; i < sc->num_slices; i++) {
4685 if (sc->msix_ih[i] != NULL) {
4686 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4688 sc->msix_ih[i] = NULL;
4691 free(sc->msix_ih, M_DEVBUF);
4693 for (i = 0; i < sc->num_slices; i++) {
4695 if (sc->msix_irq_res[i] != NULL)
4696 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4697 sc->msix_irq_res[i]);
4698 sc->msix_irq_res[i] = NULL;
4700 free(sc->msix_irq_res, M_DEVBUF);
4702 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4703 sc->msix_table_res);
4705 pci_release_msi(sc->dev);
4710 mxge_rem_single_irq(mxge_softc_t *sc)
4712 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4713 bus_release_resource(sc->dev, SYS_RES_IRQ,
4714 sc->legacy_irq ? 0 : 1, sc->irq_res);
4715 if (!sc->legacy_irq)
4716 pci_release_msi(sc->dev);
4720 mxge_rem_irq(mxge_softc_t *sc)
4722 if (sc->num_slices > 1)
4723 mxge_rem_msix_irqs(sc);
4725 mxge_rem_single_irq(sc);
4729 mxge_add_irq(mxge_softc_t *sc)
4733 if (sc->num_slices > 1)
4734 err = mxge_add_msix_irqs(sc);
4736 err = mxge_add_single_irq(sc);
4738 if (0 && err == 0 && sc->num_slices > 1) {
4739 mxge_rem_msix_irqs(sc);
4740 err = mxge_add_msix_irqs(sc);
4747 mxge_attach(device_t dev)
4750 mxge_softc_t *sc = device_get_softc(dev);
4755 mxge_fetch_tunables(sc);
4757 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4758 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4759 taskqueue_thread_enqueue, &sc->tq);
4760 if (sc->tq == NULL) {
4762 goto abort_with_nothing;
4765 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4768 BUS_SPACE_MAXADDR, /* low */
4769 BUS_SPACE_MAXADDR, /* high */
4770 NULL, NULL, /* filter */
4771 65536 + 256, /* maxsize */
4772 MXGE_MAX_SEND_DESC, /* num segs */
4773 65536, /* maxsegsize */
4775 NULL, NULL, /* lock */
4776 &sc->parent_dmat); /* tag */
4779 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4784 ifp = sc->ifp = if_alloc(IFT_ETHER);
4786 device_printf(dev, "can not if_alloc()\n");
4788 goto abort_with_parent_dmat;
4790 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4792 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4793 device_get_nameunit(dev));
4794 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4795 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4796 "%s:drv", device_get_nameunit(dev));
4797 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4798 MTX_NETWORK_LOCK, MTX_DEF);
4800 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4802 mxge_setup_cfg_space(sc);
4804 /* Map the board into the kernel */
4806 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
4808 if (sc->mem_res == NULL) {
4809 device_printf(dev, "could not map memory\n");
4811 goto abort_with_lock;
4813 sc->sram = rman_get_virtual(sc->mem_res);
4814 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4815 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4816 device_printf(dev, "impossible memory region size %jd\n",
4817 rman_get_size(sc->mem_res));
4819 goto abort_with_mem_res;
4822 /* make NULL terminated copy of the EEPROM strings section of
4824 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4825 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4826 rman_get_bushandle(sc->mem_res),
4827 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4829 MXGE_EEPROM_STRINGS_SIZE - 2);
4830 err = mxge_parse_strings(sc);
4832 goto abort_with_mem_res;
4834 /* Enable write combining for efficient use of PCIe bus */
4837 /* Allocate the out of band dma memory */
4838 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4839 sizeof (mxge_cmd_t), 64);
4841 goto abort_with_mem_res;
4842 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4843 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4845 goto abort_with_cmd_dma;
4847 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4849 goto abort_with_zeropad_dma;
4851 /* select & load the firmware */
4852 err = mxge_select_firmware(sc);
4854 goto abort_with_dmabench;
4855 sc->intr_coal_delay = mxge_intr_coal_delay;
4857 mxge_slice_probe(sc);
4858 err = mxge_alloc_slices(sc);
4860 goto abort_with_dmabench;
4862 err = mxge_reset(sc, 0);
4864 goto abort_with_slices;
4866 err = mxge_alloc_rings(sc);
4868 device_printf(sc->dev, "failed to allocate rings\n");
4869 goto abort_with_slices;
4872 err = mxge_add_irq(sc);
4874 device_printf(sc->dev, "failed to add irq\n");
4875 goto abort_with_rings;
4878 ifp->if_baudrate = IF_Gbps(10);
4879 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4880 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4882 #if defined(INET) || defined(INET6)
4883 ifp->if_capabilities |= IFCAP_LRO;
4886 #ifdef MXGE_NEW_VLAN_API
4887 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4889 /* Only FW 1.4.32 and newer can do TSO over vlans */
4890 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4891 sc->fw_ver_tiny >= 32)
4892 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4894 sc->max_mtu = mxge_max_mtu(sc);
4895 if (sc->max_mtu >= 9000)
4896 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4898 device_printf(dev, "MTU limited to %d. Install "
4899 "latest firmware for 9000 byte jumbo support\n",
4900 sc->max_mtu - ETHER_HDR_LEN);
4901 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4902 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4903 /* check to see if f/w supports TSO for IPv6 */
4904 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4906 ifp->if_capabilities |= IFCAP_TSO6;
4907 sc->max_tso6_hlen = min(cmd.data0,
4908 sizeof (sc->ss[0].scratch));
4910 ifp->if_capenable = ifp->if_capabilities;
4911 if (sc->lro_cnt == 0)
4912 ifp->if_capenable &= ~IFCAP_LRO;
4913 ifp->if_init = mxge_init;
4915 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4916 ifp->if_ioctl = mxge_ioctl;
4917 ifp->if_start = mxge_start;
4918 ifp->if_get_counter = mxge_get_counter;
4919 /* Initialise the ifmedia structure */
4920 ifmedia_init(&sc->media, 0, mxge_media_change,
4922 mxge_media_init(sc);
4923 mxge_media_probe(sc);
4925 ether_ifattach(ifp, sc->mac_addr);
4926 /* ether_ifattach sets mtu to ETHERMTU */
4927 if (mxge_initial_mtu != ETHERMTU)
4928 mxge_change_mtu(sc, mxge_initial_mtu);
4930 mxge_add_sysctls(sc);
4931 #ifdef IFNET_BUF_RING
4932 ifp->if_transmit = mxge_transmit;
4933 ifp->if_qflush = mxge_qflush;
4935 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4936 device_get_nameunit(sc->dev));
4937 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4941 mxge_free_rings(sc);
4943 mxge_free_slices(sc);
4944 abort_with_dmabench:
4945 mxge_dma_free(&sc->dmabench_dma);
4946 abort_with_zeropad_dma:
4947 mxge_dma_free(&sc->zeropad_dma);
4949 mxge_dma_free(&sc->cmd_dma);
4951 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4953 pci_disable_busmaster(dev);
4954 mtx_destroy(&sc->cmd_mtx);
4955 mtx_destroy(&sc->driver_mtx);
4957 abort_with_parent_dmat:
4958 bus_dma_tag_destroy(sc->parent_dmat);
4960 if (sc->tq != NULL) {
4961 taskqueue_drain(sc->tq, &sc->watchdog_task);
4962 taskqueue_free(sc->tq);
4970 mxge_detach(device_t dev)
4972 mxge_softc_t *sc = device_get_softc(dev);
4974 if (mxge_vlans_active(sc)) {
4975 device_printf(sc->dev,
4976 "Detach vlans before removing module\n");
4979 mtx_lock(&sc->driver_mtx);
4981 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4983 mtx_unlock(&sc->driver_mtx);
4984 ether_ifdetach(sc->ifp);
4985 if (sc->tq != NULL) {
4986 taskqueue_drain(sc->tq, &sc->watchdog_task);
4987 taskqueue_free(sc->tq);
4990 callout_drain(&sc->co_hdl);
4991 ifmedia_removeall(&sc->media);
4992 mxge_dummy_rdma(sc, 0);
4993 mxge_rem_sysctls(sc);
4995 mxge_free_rings(sc);
4996 mxge_free_slices(sc);
4997 mxge_dma_free(&sc->dmabench_dma);
4998 mxge_dma_free(&sc->zeropad_dma);
4999 mxge_dma_free(&sc->cmd_dma);
5000 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5001 pci_disable_busmaster(dev);
5002 mtx_destroy(&sc->cmd_mtx);
5003 mtx_destroy(&sc->driver_mtx);
5005 bus_dma_tag_destroy(sc->parent_dmat);
5010 mxge_shutdown(device_t dev)
5016 This file uses Myri10GE driver indentation.
5019 c-file-style:"linux"