1 /******************************************************************************
3 Copyright (c) 2006-2009, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/sockio.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/module.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
50 #include <net/if_arp.h>
51 #include <net/ethernet.h>
52 #include <net/if_dl.h>
53 #include <net/if_media.h>
57 #include <net/if_types.h>
58 #include <net/if_vlan_var.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/tcp.h>
66 #include <machine/bus.h>
67 #include <machine/in_cksum.h>
68 #include <machine/resource.h>
73 #include <dev/pci/pcireg.h>
74 #include <dev/pci/pcivar.h>
75 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
77 #include <vm/vm.h> /* for pmap_mapdev() */
80 #if defined(__i386) || defined(__amd64)
81 #include <machine/specialreg.h>
84 #include <dev/mxge/mxge_mcp.h>
85 #include <dev/mxge/mcp_gen_header.h>
86 /*#define MXGE_FAKE_IFP*/
87 #include <dev/mxge/if_mxge_var.h>
90 static int mxge_nvidia_ecrc_enable = 1;
91 static int mxge_force_firmware = 0;
92 static int mxge_intr_coal_delay = 30;
93 static int mxge_deassert_wait = 1;
94 static int mxge_flow_control = 1;
95 static int mxge_verbose = 0;
96 static int mxge_lro_cnt = 8;
97 static int mxge_ticks;
98 static int mxge_max_slices = 1;
99 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
100 static int mxge_always_promisc = 0;
101 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
102 static char *mxge_fw_aligned = "mxge_eth_z8e";
103 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
104 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
106 static int mxge_probe(device_t dev);
107 static int mxge_attach(device_t dev);
108 static int mxge_detach(device_t dev);
109 static int mxge_shutdown(device_t dev);
110 static void mxge_intr(void *arg);
112 static device_method_t mxge_methods[] =
114 /* Device interface */
115 DEVMETHOD(device_probe, mxge_probe),
116 DEVMETHOD(device_attach, mxge_attach),
117 DEVMETHOD(device_detach, mxge_detach),
118 DEVMETHOD(device_shutdown, mxge_shutdown),
122 static driver_t mxge_driver =
126 sizeof(mxge_softc_t),
129 static devclass_t mxge_devclass;
131 /* Declare ourselves to be a child of the PCI bus.*/
132 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
133 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
134 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
136 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
137 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
138 static int mxge_close(mxge_softc_t *sc);
139 static int mxge_open(mxge_softc_t *sc);
140 static void mxge_tick(void *arg);
143 mxge_probe(device_t dev)
148 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
149 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
150 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
151 rev = pci_get_revid(dev);
153 case MXGE_PCI_REV_Z8E:
154 device_set_desc(dev, "Myri10G-PCIE-8A");
156 case MXGE_PCI_REV_Z8ES:
157 device_set_desc(dev, "Myri10G-PCIE-8B");
160 device_set_desc(dev, "Myri10G-PCIE-8??");
161 device_printf(dev, "Unrecognized rev %d NIC\n",
171 mxge_enable_wc(mxge_softc_t *sc)
173 #if defined(__i386) || defined(__amd64)
178 len = rman_get_size(sc->mem_res);
179 err = pmap_change_attr((vm_offset_t) sc->sram,
180 len, PAT_WRITE_COMBINING);
182 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
190 /* callback to get our DMA address */
192 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
196 *(bus_addr_t *) arg = segs->ds_addr;
201 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
202 bus_size_t alignment)
205 device_t dev = sc->dev;
206 bus_size_t boundary, maxsegsize;
208 if (bytes > 4096 && alignment == 4096) {
216 /* allocate DMAable memory tags */
217 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
218 alignment, /* alignment */
219 boundary, /* boundary */
220 BUS_SPACE_MAXADDR, /* low */
221 BUS_SPACE_MAXADDR, /* high */
222 NULL, NULL, /* filter */
225 maxsegsize, /* maxsegsize */
226 BUS_DMA_COHERENT, /* flags */
227 NULL, NULL, /* lock */
228 &dma->dmat); /* tag */
230 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
234 /* allocate DMAable memory & map */
235 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
236 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
237 | BUS_DMA_ZERO), &dma->map);
239 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
240 goto abort_with_dmat;
243 /* load the memory */
244 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
245 mxge_dmamap_callback,
246 (void *)&dma->bus_addr, 0);
248 device_printf(dev, "couldn't load map (err = %d)\n", err);
254 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
256 (void)bus_dma_tag_destroy(dma->dmat);
262 mxge_dma_free(mxge_dma_t *dma)
264 bus_dmamap_unload(dma->dmat, dma->map);
265 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
266 (void)bus_dma_tag_destroy(dma->dmat);
270 * The eeprom strings on the lanaiX have the format
277 mxge_parse_strings(mxge_softc_t *sc)
279 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
284 ptr = sc->eeprom_strings;
285 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
287 while (ptr < limit && *ptr != '\0') {
288 if (memcmp(ptr, "MAC=", 4) == 0) {
290 sc->mac_addr_string = ptr;
291 for (i = 0; i < 6; i++) {
293 if ((ptr + 2) > limit)
295 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
298 } else if (memcmp(ptr, "PC=", 3) == 0) {
300 strncpy(sc->product_code_string, ptr,
301 sizeof (sc->product_code_string) - 1);
302 } else if (memcmp(ptr, "SN=", 3) == 0) {
304 strncpy(sc->serial_number_string, ptr,
305 sizeof (sc->serial_number_string) - 1);
307 MXGE_NEXT_STRING(ptr);
314 device_printf(sc->dev, "failed to parse eeprom_strings\n");
319 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
321 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
324 unsigned long base, off;
326 device_t pdev, mcp55;
327 uint16_t vendor_id, device_id, word;
328 uintptr_t bus, slot, func, ivend, idev;
332 if (!mxge_nvidia_ecrc_enable)
335 pdev = device_get_parent(device_get_parent(sc->dev));
337 device_printf(sc->dev, "could not find parent?\n");
340 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
341 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
343 if (vendor_id != 0x10de)
348 if (device_id == 0x005d) {
349 /* ck804, base address is magic */
351 } else if (device_id >= 0x0374 && device_id <= 0x378) {
352 /* mcp55, base address stored in chipset */
353 mcp55 = pci_find_bsf(0, 0, 0);
355 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
356 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
357 word = pci_read_config(mcp55, 0x90, 2);
358 base = ((unsigned long)word & 0x7ffeU) << 25;
365 Test below is commented because it is believed that doing
366 config read/write beyond 0xff will access the config space
367 for the next larger function. Uncomment this and remove
368 the hacky pmap_mapdev() way of accessing config space when
369 FreeBSD grows support for extended pcie config space access
372 /* See if we can, by some miracle, access the extended
374 val = pci_read_config(pdev, 0x178, 4);
375 if (val != 0xffffffff) {
377 pci_write_config(pdev, 0x178, val, 4);
381 /* Rather than using normal pci config space writes, we must
382 * map the Nvidia config space ourselves. This is because on
383 * opteron/nvidia class machine the 0xe000000 mapping is
384 * handled by the nvidia chipset, that means the internal PCI
385 * device (the on-chip northbridge), or the amd-8131 bridge
386 * and things behind them are not visible by this method.
389 BUS_READ_IVAR(device_get_parent(pdev), pdev,
391 BUS_READ_IVAR(device_get_parent(pdev), pdev,
392 PCI_IVAR_SLOT, &slot);
393 BUS_READ_IVAR(device_get_parent(pdev), pdev,
394 PCI_IVAR_FUNCTION, &func);
395 BUS_READ_IVAR(device_get_parent(pdev), pdev,
396 PCI_IVAR_VENDOR, &ivend);
397 BUS_READ_IVAR(device_get_parent(pdev), pdev,
398 PCI_IVAR_DEVICE, &idev);
401 + 0x00100000UL * (unsigned long)bus
402 + 0x00001000UL * (unsigned long)(func
405 /* map it into the kernel */
406 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
410 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
413 /* get a pointer to the config space mapped into the kernel */
414 cfgptr = va + (off & PAGE_MASK);
416 /* make sure that we can really access it */
417 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
418 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
419 if (! (vendor_id == ivend && device_id == idev)) {
420 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
421 vendor_id, device_id);
422 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
426 ptr32 = (uint32_t*)(cfgptr + 0x178);
429 if (val == 0xffffffff) {
430 device_printf(sc->dev, "extended mapping failed\n");
431 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
435 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
437 device_printf(sc->dev,
438 "Enabled ECRC on upstream Nvidia bridge "
440 (int)bus, (int)slot, (int)func);
445 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
447 device_printf(sc->dev,
448 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
455 mxge_dma_test(mxge_softc_t *sc, int test_type)
458 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
464 /* Run a small DMA test.
465 * The magic multipliers to the length tell the firmware
466 * to do DMA read, write, or read+write tests. The
467 * results are returned in cmd.data0. The upper 16
468 * bits of the return is the number of transfers completed.
469 * The lower 16 bits is the time in 0.5us ticks that the
470 * transfers took to complete.
473 len = sc->tx_boundary;
475 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
476 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
477 cmd.data2 = len * 0x10000;
478 status = mxge_send_cmd(sc, test_type, &cmd);
483 sc->read_dma = ((cmd.data0>>16) * len * 2) /
484 (cmd.data0 & 0xffff);
485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
487 cmd.data2 = len * 0x1;
488 status = mxge_send_cmd(sc, test_type, &cmd);
493 sc->write_dma = ((cmd.data0>>16) * len * 2) /
494 (cmd.data0 & 0xffff);
496 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
497 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
498 cmd.data2 = len * 0x10001;
499 status = mxge_send_cmd(sc, test_type, &cmd);
504 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
505 (cmd.data0 & 0xffff);
508 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
509 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
516 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
517 * when the PCI-E Completion packets are aligned on an 8-byte
518 * boundary. Some PCI-E chip sets always align Completion packets; on
519 * the ones that do not, the alignment can be enforced by enabling
520 * ECRC generation (if supported).
522 * When PCI-E Completion packets are not aligned, it is actually more
523 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
525 * If the driver can neither enable ECRC nor verify that it has
526 * already been enabled, then it must use a firmware image which works
527 * around unaligned completion packets (ethp_z8e.dat), and it should
528 * also ensure that it never gives the device a Read-DMA which is
529 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
530 * enabled, then the driver should use the aligned (eth_z8e.dat)
531 * firmware image, and set tx_boundary to 4KB.
535 mxge_firmware_probe(mxge_softc_t *sc)
537 device_t dev = sc->dev;
541 sc->tx_boundary = 4096;
543 * Verify the max read request size was set to 4KB
544 * before trying the test with 4KB.
546 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
547 pectl = pci_read_config(dev, reg + 0x8, 2);
548 if ((pectl & (5 << 12)) != (5 << 12)) {
549 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
551 sc->tx_boundary = 2048;
556 * load the optimized firmware (which assumes aligned PCIe
557 * completions) in order to see if it works on this host.
559 sc->fw_name = mxge_fw_aligned;
560 status = mxge_load_firmware(sc, 1);
566 * Enable ECRC if possible
568 mxge_enable_nvidia_ecrc(sc);
571 * Run a DMA test which watches for unaligned completions and
572 * aborts on the first one seen.
575 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
577 return 0; /* keep the aligned firmware */
580 device_printf(dev, "DMA test failed: %d\n", status);
581 if (status == ENOSYS)
582 device_printf(dev, "Falling back to ethp! "
583 "Please install up to date fw\n");
588 mxge_select_firmware(mxge_softc_t *sc)
593 if (mxge_force_firmware != 0) {
594 if (mxge_force_firmware == 1)
599 device_printf(sc->dev,
600 "Assuming %s completions (forced)\n",
601 aligned ? "aligned" : "unaligned");
605 /* if the PCIe link width is 4 or less, we can use the aligned
606 firmware and skip any checks */
607 if (sc->link_width != 0 && sc->link_width <= 4) {
608 device_printf(sc->dev,
609 "PCIe x%d Link, expect reduced performance\n",
615 if (0 == mxge_firmware_probe(sc))
620 sc->fw_name = mxge_fw_aligned;
621 sc->tx_boundary = 4096;
623 sc->fw_name = mxge_fw_unaligned;
624 sc->tx_boundary = 2048;
626 return (mxge_load_firmware(sc, 0));
636 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
640 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
641 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
642 be32toh(hdr->mcp_type));
646 /* save firmware version for sysctl */
647 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
649 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
651 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
652 &sc->fw_ver_minor, &sc->fw_ver_tiny);
654 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
655 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
656 device_printf(sc->dev, "Found firmware version %s\n",
658 device_printf(sc->dev, "Driver needs %d.%d\n",
659 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
667 z_alloc(void *nil, u_int items, u_int size)
671 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
676 z_free(void *nil, void *ptr)
683 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
686 char *inflate_buffer;
687 const struct firmware *fw;
688 const mcp_gen_header_t *hdr;
695 fw = firmware_get(sc->fw_name);
697 device_printf(sc->dev, "Could not find firmware image %s\n",
704 /* setup zlib and decompress f/w */
705 bzero(&zs, sizeof (zs));
708 status = inflateInit(&zs);
709 if (status != Z_OK) {
714 /* the uncompressed size is stored as the firmware version,
715 which would otherwise go unused */
716 fw_len = (size_t) fw->version;
717 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
718 if (inflate_buffer == NULL)
720 zs.avail_in = fw->datasize;
721 zs.next_in = __DECONST(char *, fw->data);
722 zs.avail_out = fw_len;
723 zs.next_out = inflate_buffer;
724 status = inflate(&zs, Z_FINISH);
725 if (status != Z_STREAM_END) {
726 device_printf(sc->dev, "zlib %d\n", status);
728 goto abort_with_buffer;
732 hdr_offset = htobe32(*(const uint32_t *)
733 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
734 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
735 device_printf(sc->dev, "Bad firmware file");
737 goto abort_with_buffer;
739 hdr = (const void*)(inflate_buffer + hdr_offset);
741 status = mxge_validate_firmware(sc, hdr);
743 goto abort_with_buffer;
745 /* Copy the inflated firmware to NIC SRAM. */
746 for (i = 0; i < fw_len; i += 256) {
747 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
749 min(256U, (unsigned)(fw_len - i)));
758 free(inflate_buffer, M_TEMP);
762 firmware_put(fw, FIRMWARE_UNLOAD);
767 * Enable or disable periodic RDMAs from the host to make certain
768 * chipsets resend dropped PCIe messages
772 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
775 volatile uint32_t *confirm;
776 volatile char *submit;
777 uint32_t *buf, dma_low, dma_high;
780 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
782 /* clear confirmation addr */
783 confirm = (volatile uint32_t *)sc->cmd;
787 /* send an rdma command to the PCIe engine, and wait for the
788 response in the confirmation address. The firmware should
789 write a -1 there to indicate it is alive and well
792 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
793 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
794 buf[0] = htobe32(dma_high); /* confirm addr MSW */
795 buf[1] = htobe32(dma_low); /* confirm addr LSW */
796 buf[2] = htobe32(0xffffffff); /* confirm data */
797 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
798 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
799 buf[3] = htobe32(dma_high); /* dummy addr MSW */
800 buf[4] = htobe32(dma_low); /* dummy addr LSW */
801 buf[5] = htobe32(enable); /* enable? */
804 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
806 mxge_pio_copy(submit, buf, 64);
811 while (*confirm != 0xffffffff && i < 20) {
815 if (*confirm != 0xffffffff) {
816 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
817 (enable ? "enable" : "disable"), confirm,
824 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
827 char buf_bytes[sizeof(*buf) + 8];
828 volatile mcp_cmd_response_t *response = sc->cmd;
829 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
830 uint32_t dma_low, dma_high;
831 int err, sleep_total = 0;
833 /* ensure buf is aligned to 8 bytes */
834 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
836 buf->data0 = htobe32(data->data0);
837 buf->data1 = htobe32(data->data1);
838 buf->data2 = htobe32(data->data2);
839 buf->cmd = htobe32(cmd);
840 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
841 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
843 buf->response_addr.low = htobe32(dma_low);
844 buf->response_addr.high = htobe32(dma_high);
845 mtx_lock(&sc->cmd_mtx);
846 response->result = 0xffffffff;
848 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
850 /* wait up to 20ms */
852 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
853 bus_dmamap_sync(sc->cmd_dma.dmat,
854 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
856 switch (be32toh(response->result)) {
858 data->data0 = be32toh(response->data);
864 case MXGEFW_CMD_UNKNOWN:
867 case MXGEFW_CMD_ERROR_UNALIGNED:
870 case MXGEFW_CMD_ERROR_BUSY:
874 device_printf(sc->dev,
876 "failed, result = %d\n",
877 cmd, be32toh(response->result));
885 device_printf(sc->dev, "mxge: command %d timed out"
887 cmd, be32toh(response->result));
888 mtx_unlock(&sc->cmd_mtx);
893 mxge_adopt_running_firmware(mxge_softc_t *sc)
895 struct mcp_gen_header *hdr;
896 const size_t bytes = sizeof (struct mcp_gen_header);
900 /* find running firmware header */
901 hdr_offset = htobe32(*(volatile uint32_t *)
902 (sc->sram + MCP_HEADER_PTR_OFFSET));
904 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
905 device_printf(sc->dev,
906 "Running firmware has bad header offset (%d)\n",
911 /* copy header of running firmware from SRAM to host memory to
912 * validate firmware */
913 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
915 device_printf(sc->dev, "could not malloc firmware hdr\n");
918 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
919 rman_get_bushandle(sc->mem_res),
920 hdr_offset, (char *)hdr, bytes);
921 status = mxge_validate_firmware(sc, hdr);
925 * check to see if adopted firmware has bug where adopting
926 * it will cause broadcasts to be filtered unless the NIC
927 * is kept in ALLMULTI mode
929 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
930 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
931 sc->adopted_rx_filter_bug = 1;
932 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
933 "working around rx filter bug\n",
934 sc->fw_ver_major, sc->fw_ver_minor,
943 mxge_load_firmware(mxge_softc_t *sc, int adopt)
945 volatile uint32_t *confirm;
946 volatile char *submit;
948 uint32_t *buf, size, dma_low, dma_high;
951 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
953 size = sc->sram_size;
954 status = mxge_load_firmware_helper(sc, &size);
958 /* Try to use the currently running firmware, if
960 status = mxge_adopt_running_firmware(sc);
962 device_printf(sc->dev,
963 "failed to adopt running firmware\n");
966 device_printf(sc->dev,
967 "Successfully adopted running firmware\n");
968 if (sc->tx_boundary == 4096) {
969 device_printf(sc->dev,
970 "Using firmware currently running on NIC"
972 device_printf(sc->dev,
973 "performance consider loading optimized "
976 sc->fw_name = mxge_fw_unaligned;
977 sc->tx_boundary = 2048;
980 /* clear confirmation addr */
981 confirm = (volatile uint32_t *)sc->cmd;
984 /* send a reload command to the bootstrap MCP, and wait for the
985 response in the confirmation address. The firmware should
986 write a -1 there to indicate it is alive and well
989 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
990 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
992 buf[0] = htobe32(dma_high); /* confirm addr MSW */
993 buf[1] = htobe32(dma_low); /* confirm addr LSW */
994 buf[2] = htobe32(0xffffffff); /* confirm data */
996 /* FIX: All newest firmware should un-protect the bottom of
997 the sram before handoff. However, the very first interfaces
998 do not. Therefore the handoff copy must skip the first 8 bytes
1000 /* where the code starts*/
1001 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1002 buf[4] = htobe32(size - 8); /* length of code */
1003 buf[5] = htobe32(8); /* where to copy to */
1004 buf[6] = htobe32(0); /* where to jump to */
1006 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1007 mxge_pio_copy(submit, buf, 64);
1012 while (*confirm != 0xffffffff && i < 20) {
1015 bus_dmamap_sync(sc->cmd_dma.dmat,
1016 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1018 if (*confirm != 0xffffffff) {
1019 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1028 mxge_update_mac_address(mxge_softc_t *sc)
1031 uint8_t *addr = sc->mac_addr;
1035 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1036 | (addr[2] << 8) | addr[3]);
1038 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1040 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1045 mxge_change_pause(mxge_softc_t *sc, int pause)
1051 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1054 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1058 device_printf(sc->dev, "Failed to set flow control mode\n");
1066 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1071 if (mxge_always_promisc)
1075 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1078 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1082 device_printf(sc->dev, "Failed to set promisc mode\n");
1087 mxge_set_multicast_list(mxge_softc_t *sc)
1090 struct ifmultiaddr *ifma;
1091 struct ifnet *ifp = sc->ifp;
1094 /* This firmware is known to not support multicast */
1095 if (!sc->fw_multicast_support)
1098 /* Disable multicast filtering while we play with the lists*/
1099 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1101 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1102 " error status: %d\n", err);
1106 if (sc->adopted_rx_filter_bug)
1109 if (ifp->if_flags & IFF_ALLMULTI)
1110 /* request to disable multicast filtering, so quit here */
1113 /* Flush all the filters */
1115 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1117 device_printf(sc->dev,
1118 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1119 ", error status: %d\n", err);
1123 /* Walk the multicast list, and add each address */
1126 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1127 if (ifma->ifma_addr->sa_family != AF_LINK)
1129 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1131 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1133 cmd.data0 = htonl(cmd.data0);
1134 cmd.data1 = htonl(cmd.data1);
1135 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1137 device_printf(sc->dev, "Failed "
1138 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1140 /* abort, leaving multicast filtering off */
1141 IF_ADDR_UNLOCK(ifp);
1145 IF_ADDR_UNLOCK(ifp);
1146 /* Enable multicast filtering */
1147 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1149 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1150 ", error status: %d\n", err);
1155 mxge_max_mtu(mxge_softc_t *sc)
1160 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1161 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1163 /* try to set nbufs to see if it we can
1164 use virtually contiguous jumbos */
1166 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1169 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1171 /* otherwise, we're limited to MJUMPAGESIZE */
1172 return MJUMPAGESIZE - MXGEFW_PAD;
1176 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1178 struct mxge_slice_state *ss;
1179 mxge_rx_done_t *rx_done;
1180 volatile uint32_t *irq_claim;
1184 /* try to send a reset command to the card to see if it
1186 memset(&cmd, 0, sizeof (cmd));
1187 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1189 device_printf(sc->dev, "failed reset\n");
1193 mxge_dummy_rdma(sc, 1);
1196 /* set the intrq size */
1197 cmd.data0 = sc->rx_ring_size;
1198 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1201 * Even though we already know how many slices are supported
1202 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1203 * has magic side effects, and must be called after a reset.
1204 * It must be called prior to calling any RSS related cmds,
1205 * including assigning an interrupt queue for anything but
1206 * slice 0. It must also be called *after*
1207 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1208 * the firmware to compute offsets.
1211 if (sc->num_slices > 1) {
1212 /* ask the maximum number of slices it supports */
1213 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1216 device_printf(sc->dev,
1217 "failed to get number of slices\n");
1221 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1222 * to setting up the interrupt queue DMA
1224 cmd.data0 = sc->num_slices;
1225 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1226 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1229 device_printf(sc->dev,
1230 "failed to set number of slices\n");
1236 if (interrupts_setup) {
1237 /* Now exchange information about interrupts */
1238 for (slice = 0; slice < sc->num_slices; slice++) {
1239 rx_done = &sc->ss[slice].rx_done;
1240 memset(rx_done->entry, 0, sc->rx_ring_size);
1241 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1242 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1244 status |= mxge_send_cmd(sc,
1245 MXGEFW_CMD_SET_INTRQ_DMA,
1250 status |= mxge_send_cmd(sc,
1251 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1254 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1256 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1257 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1260 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1262 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1264 device_printf(sc->dev, "failed set interrupt parameters\n");
1269 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1272 /* run a DMA benchmark */
1273 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1275 for (slice = 0; slice < sc->num_slices; slice++) {
1276 ss = &sc->ss[slice];
1278 ss->irq_claim = irq_claim + (2 * slice);
1279 /* reset mcp/driver shared state back to 0 */
1280 ss->rx_done.idx = 0;
1281 ss->rx_done.cnt = 0;
1284 ss->tx.pkt_done = 0;
1289 ss->rx_small.cnt = 0;
1290 ss->lro_bad_csum = 0;
1292 ss->lro_flushed = 0;
1293 if (ss->fw_stats != NULL) {
1294 ss->fw_stats->valid = 0;
1295 ss->fw_stats->send_done_count = 0;
1298 sc->rdma_tags_available = 15;
1299 status = mxge_update_mac_address(sc);
1300 mxge_change_promisc(sc, 0);
1301 mxge_change_pause(sc, sc->pause);
1302 mxge_set_multicast_list(sc);
1307 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1310 unsigned int intr_coal_delay;
1314 intr_coal_delay = sc->intr_coal_delay;
1315 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1319 if (intr_coal_delay == sc->intr_coal_delay)
1322 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1325 mtx_lock(&sc->driver_mtx);
1326 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1327 sc->intr_coal_delay = intr_coal_delay;
1329 mtx_unlock(&sc->driver_mtx);
1334 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1337 unsigned int enabled;
1341 enabled = sc->pause;
1342 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1346 if (enabled == sc->pause)
1349 mtx_lock(&sc->driver_mtx);
1350 err = mxge_change_pause(sc, enabled);
1351 mtx_unlock(&sc->driver_mtx);
1356 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1363 ifp->if_capenable &= ~IFCAP_LRO;
1365 ifp->if_capenable |= IFCAP_LRO;
1366 sc->lro_cnt = lro_cnt;
1367 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1369 err = mxge_open(sc);
1375 mxge_change_lro(SYSCTL_HANDLER_ARGS)
1378 unsigned int lro_cnt;
1382 lro_cnt = sc->lro_cnt;
1383 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1387 if (lro_cnt == sc->lro_cnt)
1393 mtx_lock(&sc->driver_mtx);
1394 err = mxge_change_lro_locked(sc, lro_cnt);
1395 mtx_unlock(&sc->driver_mtx);
1400 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1406 arg2 = be32toh(*(int *)arg1);
1408 err = sysctl_handle_int(oidp, arg1, arg2, req);
1414 mxge_rem_sysctls(mxge_softc_t *sc)
1416 struct mxge_slice_state *ss;
1419 if (sc->slice_sysctl_tree == NULL)
1422 for (slice = 0; slice < sc->num_slices; slice++) {
1423 ss = &sc->ss[slice];
1424 if (ss == NULL || ss->sysctl_tree == NULL)
1426 sysctl_ctx_free(&ss->sysctl_ctx);
1427 ss->sysctl_tree = NULL;
1429 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1430 sc->slice_sysctl_tree = NULL;
1434 mxge_add_sysctls(mxge_softc_t *sc)
1436 struct sysctl_ctx_list *ctx;
1437 struct sysctl_oid_list *children;
1439 struct mxge_slice_state *ss;
1443 ctx = device_get_sysctl_ctx(sc->dev);
1444 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1445 fw = sc->ss[0].fw_stats;
1447 /* random information */
1448 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1450 CTLFLAG_RD, &sc->fw_version,
1451 0, "firmware version");
1452 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1454 CTLFLAG_RD, &sc->serial_number_string,
1455 0, "serial number");
1456 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1458 CTLFLAG_RD, &sc->product_code_string,
1460 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1462 CTLFLAG_RD, &sc->link_width,
1464 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1466 CTLFLAG_RD, &sc->tx_boundary,
1468 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1470 CTLFLAG_RD, &sc->wc,
1471 0, "write combining PIO?");
1472 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1474 CTLFLAG_RD, &sc->read_dma,
1475 0, "DMA Read speed in MB/s");
1476 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1478 CTLFLAG_RD, &sc->write_dma,
1479 0, "DMA Write speed in MB/s");
1480 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1481 "read_write_dma_MBs",
1482 CTLFLAG_RD, &sc->read_write_dma,
1483 0, "DMA concurrent Read/Write speed in MB/s");
1486 /* performance related tunables */
1487 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1489 CTLTYPE_INT|CTLFLAG_RW, sc,
1490 0, mxge_change_intr_coal,
1491 "I", "interrupt coalescing delay in usecs");
1493 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1494 "flow_control_enabled",
1495 CTLTYPE_INT|CTLFLAG_RW, sc,
1496 0, mxge_change_flow_control,
1497 "I", "interrupt coalescing delay in usecs");
1499 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1501 CTLFLAG_RW, &mxge_deassert_wait,
1502 0, "Wait for IRQ line to go low in ihandler");
1504 /* stats block from firmware is in network byte order.
1506 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1508 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1509 0, mxge_handle_be32,
1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1512 "rdma_tags_available",
1513 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1514 0, mxge_handle_be32,
1515 "I", "rdma_tags_available");
1516 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1517 "dropped_bad_crc32",
1518 CTLTYPE_INT|CTLFLAG_RD,
1519 &fw->dropped_bad_crc32,
1520 0, mxge_handle_be32,
1521 "I", "dropped_bad_crc32");
1522 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1524 CTLTYPE_INT|CTLFLAG_RD,
1525 &fw->dropped_bad_phy,
1526 0, mxge_handle_be32,
1527 "I", "dropped_bad_phy");
1528 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1529 "dropped_link_error_or_filtered",
1530 CTLTYPE_INT|CTLFLAG_RD,
1531 &fw->dropped_link_error_or_filtered,
1532 0, mxge_handle_be32,
1533 "I", "dropped_link_error_or_filtered");
1534 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1535 "dropped_link_overflow",
1536 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1537 0, mxge_handle_be32,
1538 "I", "dropped_link_overflow");
1539 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1540 "dropped_multicast_filtered",
1541 CTLTYPE_INT|CTLFLAG_RD,
1542 &fw->dropped_multicast_filtered,
1543 0, mxge_handle_be32,
1544 "I", "dropped_multicast_filtered");
1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1546 "dropped_no_big_buffer",
1547 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1548 0, mxge_handle_be32,
1549 "I", "dropped_no_big_buffer");
1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1551 "dropped_no_small_buffer",
1552 CTLTYPE_INT|CTLFLAG_RD,
1553 &fw->dropped_no_small_buffer,
1554 0, mxge_handle_be32,
1555 "I", "dropped_no_small_buffer");
1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1558 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1559 0, mxge_handle_be32,
1560 "I", "dropped_overrun");
1561 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1563 CTLTYPE_INT|CTLFLAG_RD,
1565 0, mxge_handle_be32,
1566 "I", "dropped_pause");
1567 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1569 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1570 0, mxge_handle_be32,
1571 "I", "dropped_runt");
1573 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1574 "dropped_unicast_filtered",
1575 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1576 0, mxge_handle_be32,
1577 "I", "dropped_unicast_filtered");
1579 /* verbose printing? */
1580 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1582 CTLFLAG_RW, &mxge_verbose,
1583 0, "verbose printing");
1586 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1588 CTLTYPE_INT|CTLFLAG_RW, sc,
1590 "I", "number of lro merge queues");
1593 /* add counters exported for debugging from all slices */
1594 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1595 sc->slice_sysctl_tree =
1596 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1597 "slice", CTLFLAG_RD, 0, "");
1599 for (slice = 0; slice < sc->num_slices; slice++) {
1600 ss = &sc->ss[slice];
1601 sysctl_ctx_init(&ss->sysctl_ctx);
1602 ctx = &ss->sysctl_ctx;
1603 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1604 sprintf(slice_num, "%d", slice);
1606 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1608 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1609 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1611 CTLFLAG_RD, &ss->rx_small.cnt,
1613 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1615 CTLFLAG_RD, &ss->rx_big.cnt,
1617 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1619 CTLFLAG_RD, &ss->tx.req,
1621 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1622 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1623 0, "number of lro merge queues flushed");
1625 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1626 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1627 0, "number of frames appended to lro merge"
1630 /* only transmit from slice 0 for now */
1634 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1636 CTLFLAG_RD, &ss->tx.done,
1638 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1640 CTLFLAG_RD, &ss->tx.pkt_done,
1642 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1644 CTLFLAG_RD, &ss->tx.stall,
1646 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1648 CTLFLAG_RD, &ss->tx.wake,
1650 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1652 CTLFLAG_RD, &ss->tx.defrag,
1657 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1658 backwards one at a time and handle ring wraps */
1661 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1662 mcp_kreq_ether_send_t *src, int cnt)
1664 int idx, starting_slot;
1665 starting_slot = tx->req;
1668 idx = (starting_slot + cnt) & tx->mask;
1669 mxge_pio_copy(&tx->lanai[idx],
1670 &src[cnt], sizeof(*src));
1676 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1677 * at most 32 bytes at a time, so as to avoid involving the software
1678 * pio handler in the nic. We re-write the first segment's flags
1679 * to mark them valid only after writing the entire chain
1683 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1688 volatile uint32_t *dst_ints;
1689 mcp_kreq_ether_send_t *srcp;
1690 volatile mcp_kreq_ether_send_t *dstp, *dst;
1693 idx = tx->req & tx->mask;
1695 last_flags = src->flags;
1698 dst = dstp = &tx->lanai[idx];
1701 if ((idx + cnt) < tx->mask) {
1702 for (i = 0; i < (cnt - 1); i += 2) {
1703 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1704 mb(); /* force write every 32 bytes */
1709 /* submit all but the first request, and ensure
1710 that it is submitted below */
1711 mxge_submit_req_backwards(tx, src, cnt);
1715 /* submit the first request */
1716 mxge_pio_copy(dstp, srcp, sizeof(*src));
1717 mb(); /* barrier before setting valid flag */
1720 /* re-write the last 32-bits with the valid flags */
1721 src->flags = last_flags;
1722 src_ints = (uint32_t *)src;
1724 dst_ints = (volatile uint32_t *)dst;
1726 *dst_ints = *src_ints;
1734 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1735 int busdma_seg_cnt, int ip_off)
1738 mcp_kreq_ether_send_t *req;
1739 bus_dma_segment_t *seg;
1742 uint32_t low, high_swapped;
1743 int len, seglen, cum_len, cum_len_next;
1744 int next_is_first, chop, cnt, rdma_count, small;
1745 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1746 uint8_t flags, flags_next;
1749 mss = m->m_pkthdr.tso_segsz;
1751 /* negative cum_len signifies to the
1752 * send loop that we are still in the
1753 * header portion of the TSO packet.
1756 /* ensure we have the ethernet, IP and TCP
1757 header together in the first mbuf, copy
1758 it to a scratch buffer if not */
1759 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1760 m_copydata(m, 0, ip_off + sizeof (*ip),
1762 ip = (struct ip *)(ss->scratch + ip_off);
1764 ip = (struct ip *)(mtod(m, char *) + ip_off);
1766 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1768 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1769 + sizeof (*tcp), ss->scratch);
1770 ip = (struct ip *)(mtod(m, char *) + ip_off);
1773 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1774 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1776 /* TSO implies checksum offload on this hardware */
1777 cksum_offset = ip_off + (ip->ip_hl << 2);
1778 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1781 /* for TSO, pseudo_hdr_offset holds mss.
1782 * The firmware figures out where to put
1783 * the checksum by parsing the header. */
1784 pseudo_hdr_offset = htobe16(mss);
1791 /* "rdma_count" is the number of RDMAs belonging to the
1792 * current packet BEFORE the current send request. For
1793 * non-TSO packets, this is equal to "count".
1794 * For TSO packets, rdma_count needs to be reset
1795 * to 0 after a segment cut.
1797 * The rdma_count field of the send request is
1798 * the number of RDMAs of the packet starting at
1799 * that request. For TSO send requests with one ore more cuts
1800 * in the middle, this is the number of RDMAs starting
1801 * after the last cut in the request. All previous
1802 * segments before the last cut implicitly have 1 RDMA.
1804 * Since the number of RDMAs is not known beforehand,
1805 * it must be filled-in retroactively - after each
1806 * segmentation cut or at the end of the entire packet.
1809 while (busdma_seg_cnt) {
1810 /* Break the busdma segment up into pieces*/
1811 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1812 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1816 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1818 cum_len_next = cum_len + seglen;
1819 (req-rdma_count)->rdma_count = rdma_count + 1;
1820 if (__predict_true(cum_len >= 0)) {
1822 chop = (cum_len_next > mss);
1823 cum_len_next = cum_len_next % mss;
1824 next_is_first = (cum_len_next == 0);
1825 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1826 flags_next |= next_is_first *
1828 rdma_count |= -(chop | next_is_first);
1829 rdma_count += chop & !next_is_first;
1830 } else if (cum_len_next >= 0) {
1835 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1836 flags_next = MXGEFW_FLAGS_TSO_PLD |
1837 MXGEFW_FLAGS_FIRST |
1838 (small * MXGEFW_FLAGS_SMALL);
1841 req->addr_high = high_swapped;
1842 req->addr_low = htobe32(low);
1843 req->pseudo_hdr_offset = pseudo_hdr_offset;
1845 req->rdma_count = 1;
1846 req->length = htobe16(seglen);
1847 req->cksum_offset = cksum_offset;
1848 req->flags = flags | ((cum_len & 1) *
1849 MXGEFW_FLAGS_ALIGN_ODD);
1852 cum_len = cum_len_next;
1857 if (__predict_false(cksum_offset > seglen))
1858 cksum_offset -= seglen;
1861 if (__predict_false(cnt > tx->max_desc))
1867 (req-rdma_count)->rdma_count = rdma_count;
1871 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1872 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1874 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1875 mxge_submit_req(tx, tx->req_list, cnt);
1879 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1881 ss->sc->ifp->if_oerrors++;
1883 printf("tx->max_desc exceeded via TSO!\n");
1884 printf("mss = %d, %ld, %d!\n", mss,
1885 (long)seg - (long)tx->seg_list, tx->max_desc);
1892 #endif /* IFCAP_TSO4 */
1894 #ifdef MXGE_NEW_VLAN_API
1896 * We reproduce the software vlan tag insertion from
1897 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1898 * vlan tag insertion. We need to advertise this in order to have the
1899 * vlan interface respect our csum offload flags.
1901 static struct mbuf *
1902 mxge_vlan_tag_insert(struct mbuf *m)
1904 struct ether_vlan_header *evl;
1906 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
1907 if (__predict_false(m == NULL))
1909 if (m->m_len < sizeof(*evl)) {
1910 m = m_pullup(m, sizeof(*evl));
1911 if (__predict_false(m == NULL))
1915 * Transform the Ethernet header into an Ethernet header
1916 * with 802.1Q encapsulation.
1918 evl = mtod(m, struct ether_vlan_header *);
1919 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
1920 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1921 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1922 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
1923 m->m_flags &= ~M_VLANTAG;
1926 #endif /* MXGE_NEW_VLAN_API */
1929 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
1932 mcp_kreq_ether_send_t *req;
1933 bus_dma_segment_t *seg;
1938 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
1939 uint16_t pseudo_hdr_offset;
1940 uint8_t flags, cksum_offset;
1947 ip_off = sizeof (struct ether_header);
1948 #ifdef MXGE_NEW_VLAN_API
1949 if (m->m_flags & M_VLANTAG) {
1950 m = mxge_vlan_tag_insert(m);
1951 if (__predict_false(m == NULL))
1953 ip_off += ETHER_VLAN_ENCAP_LEN;
1956 /* (try to) map the frame for DMA */
1957 idx = tx->req & tx->mask;
1958 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
1959 m, tx->seg_list, &cnt,
1961 if (__predict_false(err == EFBIG)) {
1962 /* Too many segments in the chain. Try
1964 m_tmp = m_defrag(m, M_NOWAIT);
1965 if (m_tmp == NULL) {
1970 err = bus_dmamap_load_mbuf_sg(tx->dmat,
1972 m, tx->seg_list, &cnt,
1975 if (__predict_false(err != 0)) {
1976 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
1977 " packet len = %d\n", err, m->m_pkthdr.len);
1980 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
1981 BUS_DMASYNC_PREWRITE);
1982 tx->info[idx].m = m;
1985 /* TSO is different enough, we handle it in another routine */
1986 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
1987 mxge_encap_tso(ss, m, cnt, ip_off);
1994 pseudo_hdr_offset = 0;
1995 flags = MXGEFW_FLAGS_NO_TSO;
1997 /* checksum offloading? */
1998 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
1999 /* ensure ip header is in first mbuf, copy
2000 it to a scratch buffer if not */
2001 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2002 m_copydata(m, 0, ip_off + sizeof (*ip),
2004 ip = (struct ip *)(ss->scratch + ip_off);
2006 ip = (struct ip *)(mtod(m, char *) + ip_off);
2008 cksum_offset = ip_off + (ip->ip_hl << 2);
2009 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2010 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2011 req->cksum_offset = cksum_offset;
2012 flags |= MXGEFW_FLAGS_CKSUM;
2013 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2017 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2018 flags |= MXGEFW_FLAGS_SMALL;
2020 /* convert segments into a request list */
2023 req->flags = MXGEFW_FLAGS_FIRST;
2024 for (i = 0; i < cnt; i++) {
2026 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2028 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2029 req->length = htobe16(seg->ds_len);
2030 req->cksum_offset = cksum_offset;
2031 if (cksum_offset > seg->ds_len)
2032 cksum_offset -= seg->ds_len;
2035 req->pseudo_hdr_offset = pseudo_hdr_offset;
2036 req->pad = 0; /* complete solid 16-byte block */
2037 req->rdma_count = 1;
2038 req->flags |= flags | ((cum_len & 1) * odd_flag);
2039 cum_len += seg->ds_len;
2045 /* pad runts to 60 bytes */
2049 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2051 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2052 req->length = htobe16(60 - cum_len);
2053 req->cksum_offset = 0;
2054 req->pseudo_hdr_offset = pseudo_hdr_offset;
2055 req->pad = 0; /* complete solid 16-byte block */
2056 req->rdma_count = 1;
2057 req->flags |= flags | ((cum_len & 1) * odd_flag);
2061 tx->req_list[0].rdma_count = cnt;
2063 /* print what the firmware will see */
2064 for (i = 0; i < cnt; i++) {
2065 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2066 "cso:%d, flags:0x%x, rdma:%d\n",
2067 i, (int)ntohl(tx->req_list[i].addr_high),
2068 (int)ntohl(tx->req_list[i].addr_low),
2069 (int)ntohs(tx->req_list[i].length),
2070 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2071 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2072 tx->req_list[i].rdma_count);
2074 printf("--------------\n");
2076 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2077 mxge_submit_req(tx, tx->req_list, cnt);
2090 mxge_start_locked(struct mxge_slice_state *ss)
2100 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2101 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2105 /* let BPF see it */
2108 /* give it to the nic */
2111 /* ran out of transmit slots */
2112 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2113 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2119 mxge_start(struct ifnet *ifp)
2121 mxge_softc_t *sc = ifp->if_softc;
2122 struct mxge_slice_state *ss;
2124 /* only use the first slice for now */
2126 mtx_lock(&ss->tx.mtx);
2127 mxge_start_locked(ss);
2128 mtx_unlock(&ss->tx.mtx);
2132 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2133 * at most 32 bytes at a time, so as to avoid involving the software
2134 * pio handler in the nic. We re-write the first segment's low
2135 * DMA address to mark it valid only after we write the entire chunk
2139 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2140 mcp_kreq_ether_recv_t *src)
2144 low = src->addr_low;
2145 src->addr_low = 0xffffffff;
2146 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2148 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2150 src->addr_low = low;
2151 dst->addr_low = low;
2156 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2158 bus_dma_segment_t seg;
2160 mxge_rx_ring_t *rx = &ss->rx_small;
2163 m = m_gethdr(M_DONTWAIT, MT_DATA);
2170 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2171 &seg, &cnt, BUS_DMA_NOWAIT);
2176 rx->info[idx].m = m;
2177 rx->shadow[idx].addr_low =
2178 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2179 rx->shadow[idx].addr_high =
2180 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2184 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2189 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2191 bus_dma_segment_t seg[3];
2193 mxge_rx_ring_t *rx = &ss->rx_big;
2196 if (rx->cl_size == MCLBYTES)
2197 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2199 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2205 m->m_len = rx->cl_size;
2206 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2207 seg, &cnt, BUS_DMA_NOWAIT);
2212 rx->info[idx].m = m;
2213 rx->shadow[idx].addr_low =
2214 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2215 rx->shadow[idx].addr_high =
2216 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2218 #if MXGE_VIRT_JUMBOS
2219 for (i = 1; i < cnt; i++) {
2220 rx->shadow[idx + i].addr_low =
2221 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2222 rx->shadow[idx + i].addr_high =
2223 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2228 for (i = 0; i < rx->nbufs; i++) {
2229 if ((idx & 7) == 7) {
2230 mxge_submit_8rx(&rx->lanai[idx - 7],
2231 &rx->shadow[idx - 7]);
2239 * Myri10GE hardware checksums are not valid if the sender
2240 * padded the frame with non-zero padding. This is because
2241 * the firmware just does a simple 16-bit 1s complement
2242 * checksum across the entire frame, excluding the first 14
2243 * bytes. It is best to simply to check the checksum and
2244 * tell the stack about it only if the checksum is good
2247 static inline uint16_t
2248 mxge_rx_csum(struct mbuf *m, int csum)
2250 struct ether_header *eh;
2254 eh = mtod(m, struct ether_header *);
2256 /* only deal with IPv4 TCP & UDP for now */
2257 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2259 ip = (struct ip *)(eh + 1);
2260 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2261 ip->ip_p != IPPROTO_UDP))
2264 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2265 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2266 - (ip->ip_hl << 2) + ip->ip_p));
2272 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2274 struct ether_vlan_header *evl;
2275 struct ether_header *eh;
2278 evl = mtod(m, struct ether_vlan_header *);
2279 eh = mtod(m, struct ether_header *);
2282 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2283 * after what the firmware thought was the end of the ethernet
2287 /* put checksum into host byte order */
2288 *csum = ntohs(*csum);
2289 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2290 (*csum) += ~partial;
2291 (*csum) += ((*csum) < ~partial);
2292 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2293 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2295 /* restore checksum to network byte order;
2296 later consumers expect this */
2297 *csum = htons(*csum);
2300 #ifdef MXGE_NEW_VLAN_API
2301 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2305 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2309 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2310 m_tag_prepend(m, mtag);
2314 m->m_flags |= M_VLANTAG;
2317 * Remove the 802.1q header by copying the Ethernet
2318 * addresses over it and adjusting the beginning of
2319 * the data in the mbuf. The encapsulated Ethernet
2320 * type field is already in place.
2322 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2323 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2324 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2329 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2334 struct ether_header *eh;
2336 bus_dmamap_t old_map;
2338 uint16_t tcpudp_csum;
2343 idx = rx->cnt & rx->mask;
2344 rx->cnt += rx->nbufs;
2345 /* save a pointer to the received mbuf */
2346 m = rx->info[idx].m;
2347 /* try to replace the received mbuf */
2348 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2349 /* drop the frame -- the old mbuf is re-cycled */
2354 /* unmap the received buffer */
2355 old_map = rx->info[idx].map;
2356 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2357 bus_dmamap_unload(rx->dmat, old_map);
2359 /* swap the bus_dmamap_t's */
2360 rx->info[idx].map = rx->extra_map;
2361 rx->extra_map = old_map;
2363 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2365 m->m_data += MXGEFW_PAD;
2367 m->m_pkthdr.rcvif = ifp;
2368 m->m_len = m->m_pkthdr.len = len;
2370 eh = mtod(m, struct ether_header *);
2371 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2372 mxge_vlan_tag_remove(m, &csum);
2374 /* if the checksum is valid, mark it in the mbuf header */
2375 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2376 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2378 /* otherwise, it was a UDP frame, or a TCP frame which
2379 we could not do LRO on. Tell the stack that the
2381 m->m_pkthdr.csum_data = 0xffff;
2382 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2384 /* pass the frame up the stack */
2385 (*ifp->if_input)(ifp, m);
2389 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2393 struct ether_header *eh;
2396 bus_dmamap_t old_map;
2398 uint16_t tcpudp_csum;
2403 idx = rx->cnt & rx->mask;
2405 /* save a pointer to the received mbuf */
2406 m = rx->info[idx].m;
2407 /* try to replace the received mbuf */
2408 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2409 /* drop the frame -- the old mbuf is re-cycled */
2414 /* unmap the received buffer */
2415 old_map = rx->info[idx].map;
2416 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2417 bus_dmamap_unload(rx->dmat, old_map);
2419 /* swap the bus_dmamap_t's */
2420 rx->info[idx].map = rx->extra_map;
2421 rx->extra_map = old_map;
2423 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2425 m->m_data += MXGEFW_PAD;
2427 m->m_pkthdr.rcvif = ifp;
2428 m->m_len = m->m_pkthdr.len = len;
2430 eh = mtod(m, struct ether_header *);
2431 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2432 mxge_vlan_tag_remove(m, &csum);
2434 /* if the checksum is valid, mark it in the mbuf header */
2435 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2436 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2438 /* otherwise, it was a UDP frame, or a TCP frame which
2439 we could not do LRO on. Tell the stack that the
2441 m->m_pkthdr.csum_data = 0xffff;
2442 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2444 /* pass the frame up the stack */
2445 (*ifp->if_input)(ifp, m);
2449 mxge_clean_rx_done(struct mxge_slice_state *ss)
2451 mxge_rx_done_t *rx_done = &ss->rx_done;
2452 struct lro_entry *lro;
2458 while (rx_done->entry[rx_done->idx].length != 0) {
2459 length = ntohs(rx_done->entry[rx_done->idx].length);
2460 rx_done->entry[rx_done->idx].length = 0;
2461 checksum = rx_done->entry[rx_done->idx].checksum;
2462 if (length <= (MHLEN - MXGEFW_PAD))
2463 mxge_rx_done_small(ss, length, checksum);
2465 mxge_rx_done_big(ss, length, checksum);
2467 rx_done->idx = rx_done->cnt & rx_done->mask;
2469 /* limit potential for livelock */
2470 if (__predict_false(++limit > rx_done->mask / 2))
2473 while (!SLIST_EMPTY(&ss->lro_active)) {
2474 lro = SLIST_FIRST(&ss->lro_active);
2475 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2476 mxge_lro_flush(ss, lro);
2482 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2492 while (tx->pkt_done != mcp_idx) {
2493 idx = tx->done & tx->mask;
2495 m = tx->info[idx].m;
2496 /* mbuf and DMA map only attached to the first
2500 tx->info[idx].m = NULL;
2501 map = tx->info[idx].map;
2502 bus_dmamap_unload(tx->dmat, map);
2505 if (tx->info[idx].flag) {
2506 tx->info[idx].flag = 0;
2511 /* If we have space, clear IFF_OACTIVE to tell the stack that
2512 its OK to send packets */
2514 if (ifp->if_drv_flags & IFF_DRV_OACTIVE &&
2515 tx->req - tx->done < (tx->mask + 1)/4) {
2516 mtx_lock(&ss->tx.mtx);
2517 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2519 mxge_start_locked(ss);
2520 mtx_unlock(&ss->tx.mtx);
2524 static struct mxge_media_type mxge_xfp_media_types[] =
2526 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2527 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2528 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2529 {0, (1 << 5), "10GBASE-ER"},
2530 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2531 {0, (1 << 3), "10GBASE-SW"},
2532 {0, (1 << 2), "10GBASE-LW"},
2533 {0, (1 << 1), "10GBASE-EW"},
2534 {0, (1 << 0), "Reserved"}
2536 static struct mxge_media_type mxge_sfp_media_types[] =
2538 {0, (1 << 7), "Reserved"},
2539 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2540 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2541 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}
2545 mxge_set_media(mxge_softc_t *sc, int type)
2547 sc->media_flags |= type;
2548 ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2549 ifmedia_set(&sc->media, sc->media_flags);
2554 * Determine the media type for a NIC. Some XFPs will identify
2555 * themselves only when their link is up, so this is initiated via a
2556 * link up interrupt. However, this can potentially take up to
2557 * several milliseconds, so it is run via the watchdog routine, rather
2558 * than in the interrupt handler itself. This need only be done
2559 * once, not each time the link is up.
2562 mxge_media_probe(mxge_softc_t *sc)
2567 struct mxge_media_type *mxge_media_types = NULL;
2568 int i, err, ms, mxge_media_type_entries;
2571 sc->need_media_probe = 0;
2573 /* if we've already set a media type, we're done */
2574 if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
2578 * parse the product code to deterimine the interface type
2579 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2580 * after the 3rd dash in the driver's cached copy of the
2581 * EEPROM's product code string.
2583 ptr = sc->product_code_string;
2585 device_printf(sc->dev, "Missing product code\n");
2588 for (i = 0; i < 3; i++, ptr++) {
2589 ptr = index(ptr, '-');
2591 device_printf(sc->dev,
2592 "only %d dashes in PC?!?\n", i);
2598 mxge_set_media(sc, IFM_10G_CX4);
2601 else if (*ptr == 'Q') {
2602 /* -Q is Quad Ribbon Fiber */
2603 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2604 /* FreeBSD has no media type for Quad ribbon fiber */
2610 mxge_media_types = mxge_xfp_media_types;
2611 mxge_media_type_entries =
2612 sizeof (mxge_xfp_media_types) /
2613 sizeof (mxge_xfp_media_types[0]);
2614 byte = MXGE_XFP_COMPLIANCE_BYTE;
2618 if (*ptr == 'S' || *(ptr +1) == 'S') {
2619 /* -S or -2S is SFP+ */
2620 mxge_media_types = mxge_sfp_media_types;
2621 mxge_media_type_entries =
2622 sizeof (mxge_sfp_media_types) /
2623 sizeof (mxge_sfp_media_types[0]);
2628 if (mxge_media_types == NULL) {
2629 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2634 * At this point we know the NIC has an XFP cage, so now we
2635 * try to determine what is in the cage by using the
2636 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2637 * register. We read just one byte, which may take over
2641 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2643 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2644 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2645 device_printf(sc->dev, "failed to read XFP\n");
2647 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2648 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2650 if (err != MXGEFW_CMD_OK) {
2654 /* now we wait for the data to be cached */
2656 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2657 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2660 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2662 if (err != MXGEFW_CMD_OK) {
2663 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2664 cage_type, err, ms);
2668 if (cmd.data0 == mxge_media_types[0].bitmask) {
2670 device_printf(sc->dev, "%s:%s\n", cage_type,
2671 mxge_media_types[0].name);
2672 mxge_set_media(sc, IFM_10G_CX4);
2675 for (i = 1; i < mxge_media_type_entries; i++) {
2676 if (cmd.data0 & mxge_media_types[i].bitmask) {
2678 device_printf(sc->dev, "%s:%s\n",
2680 mxge_media_types[i].name);
2682 mxge_set_media(sc, mxge_media_types[i].flag);
2686 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
2693 mxge_intr(void *arg)
2695 struct mxge_slice_state *ss = arg;
2696 mxge_softc_t *sc = ss->sc;
2697 mcp_irq_data_t *stats = ss->fw_stats;
2698 mxge_tx_ring_t *tx = &ss->tx;
2699 mxge_rx_done_t *rx_done = &ss->rx_done;
2700 uint32_t send_done_count;
2704 /* an interrupt on a non-zero slice is implicitly valid
2705 since MSI-X irqs are not shared */
2707 mxge_clean_rx_done(ss);
2708 *ss->irq_claim = be32toh(3);
2712 /* make sure the DMA has finished */
2713 if (!stats->valid) {
2716 valid = stats->valid;
2718 if (sc->legacy_irq) {
2719 /* lower legacy IRQ */
2720 *sc->irq_deassert = 0;
2721 if (!mxge_deassert_wait)
2722 /* don't wait for conf. that irq is low */
2728 /* loop while waiting for legacy irq deassertion */
2730 /* check for transmit completes and receives */
2731 send_done_count = be32toh(stats->send_done_count);
2732 while ((send_done_count != tx->pkt_done) ||
2733 (rx_done->entry[rx_done->idx].length != 0)) {
2734 mxge_tx_done(ss, (int)send_done_count);
2735 mxge_clean_rx_done(ss);
2736 send_done_count = be32toh(stats->send_done_count);
2738 if (sc->legacy_irq && mxge_deassert_wait)
2740 } while (*((volatile uint8_t *) &stats->valid));
2742 if (__predict_false(stats->stats_updated)) {
2743 if (sc->link_state != stats->link_up) {
2744 sc->link_state = stats->link_up;
2745 if (sc->link_state) {
2746 if_link_state_change(sc->ifp, LINK_STATE_UP);
2748 device_printf(sc->dev, "link up\n");
2750 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2752 device_printf(sc->dev, "link down\n");
2754 sc->need_media_probe = 1;
2756 if (sc->rdma_tags_available !=
2757 be32toh(stats->rdma_tags_available)) {
2758 sc->rdma_tags_available =
2759 be32toh(stats->rdma_tags_available);
2760 device_printf(sc->dev, "RDMA timed out! %d tags "
2761 "left\n", sc->rdma_tags_available);
2764 if (stats->link_down) {
2765 sc->down_cnt += stats->link_down;
2767 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2771 /* check to see if we have rx token to pass back */
2773 *ss->irq_claim = be32toh(3);
2774 *(ss->irq_claim + 1) = be32toh(3);
2778 mxge_init(void *arg)
2785 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2787 struct lro_entry *lro_entry;
2790 while (!SLIST_EMPTY(&ss->lro_free)) {
2791 lro_entry = SLIST_FIRST(&ss->lro_free);
2792 SLIST_REMOVE_HEAD(&ss->lro_free, next);
2793 free(lro_entry, M_DEVBUF);
2796 for (i = 0; i <= ss->rx_big.mask; i++) {
2797 if (ss->rx_big.info[i].m == NULL)
2799 bus_dmamap_unload(ss->rx_big.dmat,
2800 ss->rx_big.info[i].map);
2801 m_freem(ss->rx_big.info[i].m);
2802 ss->rx_big.info[i].m = NULL;
2805 for (i = 0; i <= ss->rx_small.mask; i++) {
2806 if (ss->rx_small.info[i].m == NULL)
2808 bus_dmamap_unload(ss->rx_small.dmat,
2809 ss->rx_small.info[i].map);
2810 m_freem(ss->rx_small.info[i].m);
2811 ss->rx_small.info[i].m = NULL;
2814 /* transmit ring used only on the first slice */
2815 if (ss->tx.info == NULL)
2818 for (i = 0; i <= ss->tx.mask; i++) {
2819 ss->tx.info[i].flag = 0;
2820 if (ss->tx.info[i].m == NULL)
2822 bus_dmamap_unload(ss->tx.dmat,
2823 ss->tx.info[i].map);
2824 m_freem(ss->tx.info[i].m);
2825 ss->tx.info[i].m = NULL;
2830 mxge_free_mbufs(mxge_softc_t *sc)
2834 for (slice = 0; slice < sc->num_slices; slice++)
2835 mxge_free_slice_mbufs(&sc->ss[slice]);
2839 mxge_free_slice_rings(struct mxge_slice_state *ss)
2844 if (ss->rx_done.entry != NULL)
2845 mxge_dma_free(&ss->rx_done.dma);
2846 ss->rx_done.entry = NULL;
2848 if (ss->tx.req_bytes != NULL)
2849 free(ss->tx.req_bytes, M_DEVBUF);
2850 ss->tx.req_bytes = NULL;
2852 if (ss->tx.seg_list != NULL)
2853 free(ss->tx.seg_list, M_DEVBUF);
2854 ss->tx.seg_list = NULL;
2856 if (ss->rx_small.shadow != NULL)
2857 free(ss->rx_small.shadow, M_DEVBUF);
2858 ss->rx_small.shadow = NULL;
2860 if (ss->rx_big.shadow != NULL)
2861 free(ss->rx_big.shadow, M_DEVBUF);
2862 ss->rx_big.shadow = NULL;
2864 if (ss->tx.info != NULL) {
2865 if (ss->tx.dmat != NULL) {
2866 for (i = 0; i <= ss->tx.mask; i++) {
2867 bus_dmamap_destroy(ss->tx.dmat,
2868 ss->tx.info[i].map);
2870 bus_dma_tag_destroy(ss->tx.dmat);
2872 free(ss->tx.info, M_DEVBUF);
2876 if (ss->rx_small.info != NULL) {
2877 if (ss->rx_small.dmat != NULL) {
2878 for (i = 0; i <= ss->rx_small.mask; i++) {
2879 bus_dmamap_destroy(ss->rx_small.dmat,
2880 ss->rx_small.info[i].map);
2882 bus_dmamap_destroy(ss->rx_small.dmat,
2883 ss->rx_small.extra_map);
2884 bus_dma_tag_destroy(ss->rx_small.dmat);
2886 free(ss->rx_small.info, M_DEVBUF);
2888 ss->rx_small.info = NULL;
2890 if (ss->rx_big.info != NULL) {
2891 if (ss->rx_big.dmat != NULL) {
2892 for (i = 0; i <= ss->rx_big.mask; i++) {
2893 bus_dmamap_destroy(ss->rx_big.dmat,
2894 ss->rx_big.info[i].map);
2896 bus_dmamap_destroy(ss->rx_big.dmat,
2897 ss->rx_big.extra_map);
2898 bus_dma_tag_destroy(ss->rx_big.dmat);
2900 free(ss->rx_big.info, M_DEVBUF);
2902 ss->rx_big.info = NULL;
2906 mxge_free_rings(mxge_softc_t *sc)
2910 for (slice = 0; slice < sc->num_slices; slice++)
2911 mxge_free_slice_rings(&sc->ss[slice]);
2915 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
2916 int tx_ring_entries)
2918 mxge_softc_t *sc = ss->sc;
2924 /* allocate per-slice receive resources */
2926 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
2927 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
2929 /* allocate the rx shadow rings */
2930 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2931 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2932 if (ss->rx_small.shadow == NULL)
2935 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2936 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2937 if (ss->rx_big.shadow == NULL)
2940 /* allocate the rx host info rings */
2941 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2942 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2943 if (ss->rx_small.info == NULL)
2946 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2947 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2948 if (ss->rx_big.info == NULL)
2951 /* allocate the rx busdma resources */
2952 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
2954 4096, /* boundary */
2955 BUS_SPACE_MAXADDR, /* low */
2956 BUS_SPACE_MAXADDR, /* high */
2957 NULL, NULL, /* filter */
2958 MHLEN, /* maxsize */
2960 MHLEN, /* maxsegsize */
2961 BUS_DMA_ALLOCNOW, /* flags */
2962 NULL, NULL, /* lock */
2963 &ss->rx_small.dmat); /* tag */
2965 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
2970 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
2972 #if MXGE_VIRT_JUMBOS
2973 4096, /* boundary */
2977 BUS_SPACE_MAXADDR, /* low */
2978 BUS_SPACE_MAXADDR, /* high */
2979 NULL, NULL, /* filter */
2980 3*4096, /* maxsize */
2981 #if MXGE_VIRT_JUMBOS
2983 4096, /* maxsegsize*/
2986 MJUM9BYTES, /* maxsegsize*/
2988 BUS_DMA_ALLOCNOW, /* flags */
2989 NULL, NULL, /* lock */
2990 &ss->rx_big.dmat); /* tag */
2992 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
2996 for (i = 0; i <= ss->rx_small.mask; i++) {
2997 err = bus_dmamap_create(ss->rx_small.dmat, 0,
2998 &ss->rx_small.info[i].map);
3000 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3005 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3006 &ss->rx_small.extra_map);
3008 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3013 for (i = 0; i <= ss->rx_big.mask; i++) {
3014 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3015 &ss->rx_big.info[i].map);
3017 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3022 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3023 &ss->rx_big.extra_map);
3025 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3030 /* now allocate TX resouces */
3032 /* only use a single TX ring for now */
3033 if (ss != ss->sc->ss)
3036 ss->tx.mask = tx_ring_entries - 1;
3037 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3040 /* allocate the tx request copy block */
3042 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3043 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3044 if (ss->tx.req_bytes == NULL)
3046 /* ensure req_list entries are aligned to 8 bytes */
3047 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3048 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3050 /* allocate the tx busdma segment list */
3051 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3052 ss->tx.seg_list = (bus_dma_segment_t *)
3053 malloc(bytes, M_DEVBUF, M_WAITOK);
3054 if (ss->tx.seg_list == NULL)
3057 /* allocate the tx host info ring */
3058 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3059 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3060 if (ss->tx.info == NULL)
3063 /* allocate the tx busdma resources */
3064 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3066 sc->tx_boundary, /* boundary */
3067 BUS_SPACE_MAXADDR, /* low */
3068 BUS_SPACE_MAXADDR, /* high */
3069 NULL, NULL, /* filter */
3070 65536 + 256, /* maxsize */
3071 ss->tx.max_desc - 2, /* num segs */
3072 sc->tx_boundary, /* maxsegsz */
3073 BUS_DMA_ALLOCNOW, /* flags */
3074 NULL, NULL, /* lock */
3075 &ss->tx.dmat); /* tag */
3078 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3083 /* now use these tags to setup dmamaps for each slot
3085 for (i = 0; i <= ss->tx.mask; i++) {
3086 err = bus_dmamap_create(ss->tx.dmat, 0,
3087 &ss->tx.info[i].map);
3089 device_printf(sc->dev, "Err %d tx dmamap\n",
3099 mxge_alloc_rings(mxge_softc_t *sc)
3103 int tx_ring_entries, rx_ring_entries;
3106 /* get ring sizes */
3107 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3108 tx_ring_size = cmd.data0;
3110 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3114 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3115 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3116 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3117 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3118 IFQ_SET_READY(&sc->ifp->if_snd);
3120 for (slice = 0; slice < sc->num_slices; slice++) {
3121 err = mxge_alloc_slice_rings(&sc->ss[slice],
3130 mxge_free_rings(sc);
3137 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3139 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3141 if (bufsize < MCLBYTES) {
3142 /* easy, everything fits in a single buffer */
3143 *big_buf_size = MCLBYTES;
3144 *cl_size = MCLBYTES;
3149 if (bufsize < MJUMPAGESIZE) {
3150 /* still easy, everything still fits in a single buffer */
3151 *big_buf_size = MJUMPAGESIZE;
3152 *cl_size = MJUMPAGESIZE;
3156 #if MXGE_VIRT_JUMBOS
3157 /* now we need to use virtually contiguous buffers */
3158 *cl_size = MJUM9BYTES;
3159 *big_buf_size = 4096;
3160 *nbufs = mtu / 4096 + 1;
3161 /* needs to be a power of two, so round up */
3165 *cl_size = MJUM9BYTES;
3166 *big_buf_size = MJUM9BYTES;
3172 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3177 struct lro_entry *lro_entry;
3182 slice = ss - sc->ss;
3184 SLIST_INIT(&ss->lro_free);
3185 SLIST_INIT(&ss->lro_active);
3187 for (i = 0; i < sc->lro_cnt; i++) {
3188 lro_entry = (struct lro_entry *)
3189 malloc(sizeof (*lro_entry), M_DEVBUF,
3191 if (lro_entry == NULL) {
3195 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3197 /* get the lanai pointers to the send and receive rings */
3200 /* We currently only send from the first slice */
3203 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3205 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3208 err |= mxge_send_cmd(sc,
3209 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3210 ss->rx_small.lanai =
3211 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3213 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3215 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3218 device_printf(sc->dev,
3219 "failed to get ring sizes or locations\n");
3223 /* stock receive rings */
3224 for (i = 0; i <= ss->rx_small.mask; i++) {
3225 map = ss->rx_small.info[i].map;
3226 err = mxge_get_buf_small(ss, map, i);
3228 device_printf(sc->dev, "alloced %d/%d smalls\n",
3229 i, ss->rx_small.mask + 1);
3233 for (i = 0; i <= ss->rx_big.mask; i++) {
3234 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3235 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3237 ss->rx_big.nbufs = nbufs;
3238 ss->rx_big.cl_size = cl_size;
3239 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3240 map = ss->rx_big.info[i].map;
3241 err = mxge_get_buf_big(ss, map, i);
3243 device_printf(sc->dev, "alloced %d/%d bigs\n",
3244 i, ss->rx_big.mask + 1);
3252 mxge_open(mxge_softc_t *sc)
3255 int err, big_bytes, nbufs, slice, cl_size, i;
3257 volatile uint8_t *itable;
3259 /* Copy the MAC address in case it was overridden */
3260 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3262 err = mxge_reset(sc, 1);
3264 device_printf(sc->dev, "failed to reset\n");
3268 if (sc->num_slices > 1) {
3269 /* setup the indirection table */
3270 cmd.data0 = sc->num_slices;
3271 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3274 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3277 device_printf(sc->dev,
3278 "failed to setup rss tables\n");
3282 /* just enable an identity mapping */
3283 itable = sc->sram + cmd.data0;
3284 for (i = 0; i < sc->num_slices; i++)
3285 itable[i] = (uint8_t)i;
3288 cmd.data1 = mxge_rss_hash_type;
3289 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3291 device_printf(sc->dev, "failed to enable slices\n");
3297 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3300 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3302 /* error is only meaningful if we're trying to set
3303 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3304 if (err && nbufs > 1) {
3305 device_printf(sc->dev,
3306 "Failed to set alway-use-n to %d\n",
3310 /* Give the firmware the mtu and the big and small buffer
3311 sizes. The firmware wants the big buf size to be a power
3312 of two. Luckily, FreeBSD's clusters are powers of two */
3313 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3314 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3315 cmd.data0 = MHLEN - MXGEFW_PAD;
3316 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3318 cmd.data0 = big_bytes;
3319 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3322 device_printf(sc->dev, "failed to setup params\n");
3326 /* Now give him the pointer to the stats block */
3327 cmd.data0 = MXGE_LOWPART_TO_U32(sc->ss->fw_stats_dma.bus_addr);
3328 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->ss->fw_stats_dma.bus_addr);
3329 cmd.data2 = sizeof(struct mcp_irq_data);
3330 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3333 bus = sc->ss->fw_stats_dma.bus_addr;
3334 bus += offsetof(struct mcp_irq_data, send_done_count);
3335 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3336 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3337 err = mxge_send_cmd(sc,
3338 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3340 /* Firmware cannot support multicast without STATS_DMA_V2 */
3341 sc->fw_multicast_support = 0;
3343 sc->fw_multicast_support = 1;
3347 device_printf(sc->dev, "failed to setup params\n");
3351 for (slice = 0; slice < sc->num_slices; slice++) {
3352 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3354 device_printf(sc->dev, "couldn't open slice %d\n",
3360 /* Finally, start the firmware running */
3361 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3363 device_printf(sc->dev, "Couldn't bring up link\n");
3366 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3367 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3368 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3374 mxge_free_mbufs(sc);
3380 mxge_close(mxge_softc_t *sc)
3383 int err, old_down_cnt;
3385 callout_stop(&sc->co_hdl);
3386 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3387 old_down_cnt = sc->down_cnt;
3389 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3391 device_printf(sc->dev, "Couldn't bring down link\n");
3393 if (old_down_cnt == sc->down_cnt) {
3394 /* wait for down irq */
3395 DELAY(10 * sc->intr_coal_delay);
3398 if (old_down_cnt == sc->down_cnt) {
3399 device_printf(sc->dev, "never got down irq\n");
3402 mxge_free_mbufs(sc);
3408 mxge_setup_cfg_space(mxge_softc_t *sc)
3410 device_t dev = sc->dev;
3412 uint16_t cmd, lnk, pectl;
3414 /* find the PCIe link width and set max read request to 4KB*/
3415 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
3416 lnk = pci_read_config(dev, reg + 0x12, 2);
3417 sc->link_width = (lnk >> 4) & 0x3f;
3419 pectl = pci_read_config(dev, reg + 0x8, 2);
3420 pectl = (pectl & ~0x7000) | (5 << 12);
3421 pci_write_config(dev, reg + 0x8, pectl, 2);
3424 /* Enable DMA and Memory space access */
3425 pci_enable_busmaster(dev);
3426 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3427 cmd |= PCIM_CMD_MEMEN;
3428 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3432 mxge_read_reboot(mxge_softc_t *sc)
3434 device_t dev = sc->dev;
3437 /* find the vendor specific offset */
3438 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3439 device_printf(sc->dev,
3440 "could not find vendor specific offset\n");
3441 return (uint32_t)-1;
3443 /* enable read32 mode */
3444 pci_write_config(dev, vs + 0x10, 0x3, 1);
3445 /* tell NIC which register to read */
3446 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3447 return (pci_read_config(dev, vs + 0x14, 4));
3451 mxge_watchdog_reset(mxge_softc_t *sc)
3453 struct pci_devinfo *dinfo;
3460 device_printf(sc->dev, "Watchdog reset!\n");
3463 * check to see if the NIC rebooted. If it did, then all of
3464 * PCI config space has been reset, and things like the
3465 * busmaster bit will be zero. If this is the case, then we
3466 * must restore PCI config space before the NIC can be used
3469 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3470 if (cmd == 0xffff) {
3472 * maybe the watchdog caught the NIC rebooting; wait
3473 * up to 100ms for it to finish. If it does not come
3474 * back, then give up
3477 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3478 if (cmd == 0xffff) {
3479 device_printf(sc->dev, "NIC disappeared!\n");
3483 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3484 /* print the reboot status */
3485 reboot = mxge_read_reboot(sc);
3486 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3488 /* restore PCI configuration space */
3489 dinfo = device_get_ivars(sc->dev);
3490 pci_cfg_restore(sc->dev, dinfo);
3492 /* and redo any changes we made to our config space */
3493 mxge_setup_cfg_space(sc);
3495 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) {
3497 err = mxge_open(sc);
3500 device_printf(sc->dev, "NIC did not reboot, ring state:\n");
3501 device_printf(sc->dev, "tx.req=%d tx.done=%d\n",
3502 sc->ss->tx.req, sc->ss->tx.done);
3503 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3504 sc->ss->tx.pkt_done,
3505 be32toh(sc->ss->fw_stats->send_done_count));
3506 device_printf(sc->dev, "not resetting\n");
3512 mxge_watchdog(mxge_softc_t *sc)
3514 mxge_tx_ring_t *tx = &sc->ss->tx;
3515 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3518 /* see if we have outstanding transmits, which
3519 have been pending for more than mxge_ticks */
3520 if (tx->req != tx->done &&
3521 tx->watchdog_req != tx->watchdog_done &&
3522 tx->done == tx->watchdog_done) {
3523 /* check for pause blocking before resetting */
3524 if (tx->watchdog_rx_pause == rx_pause)
3525 err = mxge_watchdog_reset(sc);
3527 device_printf(sc->dev, "Flow control blocking "
3528 "xmits, check link partner\n");
3531 tx->watchdog_req = tx->req;
3532 tx->watchdog_done = tx->done;
3533 tx->watchdog_rx_pause = rx_pause;
3535 if (sc->need_media_probe)
3536 mxge_media_probe(sc);
3541 mxge_update_stats(mxge_softc_t *sc)
3543 struct mxge_slice_state *ss;
3544 u_long ipackets = 0;
3547 for(slice = 0; slice < sc->num_slices; slice++) {
3548 ss = &sc->ss[slice];
3549 ipackets += ss->ipackets;
3551 sc->ifp->if_ipackets = ipackets;
3555 mxge_tick(void *arg)
3557 mxge_softc_t *sc = arg;
3560 /* aggregate stats from different slices */
3561 mxge_update_stats(sc);
3562 if (!sc->watchdog_countdown) {
3563 err = mxge_watchdog(sc);
3564 sc->watchdog_countdown = 4;
3566 sc->watchdog_countdown--;
3568 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3573 mxge_media_change(struct ifnet *ifp)
3579 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3581 struct ifnet *ifp = sc->ifp;
3582 int real_mtu, old_mtu;
3586 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3587 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3589 mtx_lock(&sc->driver_mtx);
3590 old_mtu = ifp->if_mtu;
3592 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3594 err = mxge_open(sc);
3596 ifp->if_mtu = old_mtu;
3598 (void) mxge_open(sc);
3601 mtx_unlock(&sc->driver_mtx);
3606 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3608 mxge_softc_t *sc = ifp->if_softc;
3613 ifmr->ifm_status = IFM_AVALID;
3614 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3615 ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
3616 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
3620 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
3622 mxge_softc_t *sc = ifp->if_softc;
3623 struct ifreq *ifr = (struct ifreq *)data;
3630 err = ether_ioctl(ifp, command, data);
3634 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3638 mtx_lock(&sc->driver_mtx);
3639 if (ifp->if_flags & IFF_UP) {
3640 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3641 err = mxge_open(sc);
3643 /* take care of promis can allmulti
3645 mxge_change_promisc(sc,
3646 ifp->if_flags & IFF_PROMISC);
3647 mxge_set_multicast_list(sc);
3650 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3654 mtx_unlock(&sc->driver_mtx);
3659 mtx_lock(&sc->driver_mtx);
3660 mxge_set_multicast_list(sc);
3661 mtx_unlock(&sc->driver_mtx);
3665 mtx_lock(&sc->driver_mtx);
3666 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3667 if (mask & IFCAP_TXCSUM) {
3668 if (IFCAP_TXCSUM & ifp->if_capenable) {
3669 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
3670 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
3673 ifp->if_capenable |= IFCAP_TXCSUM;
3674 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
3676 } else if (mask & IFCAP_RXCSUM) {
3677 if (IFCAP_RXCSUM & ifp->if_capenable) {
3678 ifp->if_capenable &= ~IFCAP_RXCSUM;
3681 ifp->if_capenable |= IFCAP_RXCSUM;
3685 if (mask & IFCAP_TSO4) {
3686 if (IFCAP_TSO4 & ifp->if_capenable) {
3687 ifp->if_capenable &= ~IFCAP_TSO4;
3688 ifp->if_hwassist &= ~CSUM_TSO;
3689 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
3690 ifp->if_capenable |= IFCAP_TSO4;
3691 ifp->if_hwassist |= CSUM_TSO;
3693 printf("mxge requires tx checksum offload"
3694 " be enabled to use TSO\n");
3698 if (mask & IFCAP_LRO) {
3699 if (IFCAP_LRO & ifp->if_capenable)
3700 err = mxge_change_lro_locked(sc, 0);
3702 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
3704 if (mask & IFCAP_VLAN_HWTAGGING)
3705 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3706 mtx_unlock(&sc->driver_mtx);
3707 VLAN_CAPABILITIES(ifp);
3712 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3713 &sc->media, command);
3723 mxge_fetch_tunables(mxge_softc_t *sc)
3726 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
3727 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
3728 &mxge_flow_control);
3729 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
3730 &mxge_intr_coal_delay);
3731 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
3732 &mxge_nvidia_ecrc_enable);
3733 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
3734 &mxge_force_firmware);
3735 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
3736 &mxge_deassert_wait);
3737 TUNABLE_INT_FETCH("hw.mxge.verbose",
3739 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
3740 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
3741 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
3742 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
3743 if (sc->lro_cnt != 0)
3744 mxge_lro_cnt = sc->lro_cnt;
3748 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
3749 mxge_intr_coal_delay = 30;
3750 if (mxge_ticks == 0)
3751 mxge_ticks = hz / 2;
3752 sc->pause = mxge_flow_control;
3753 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
3754 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_SRC_PORT) {
3755 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
3761 mxge_free_slices(mxge_softc_t *sc)
3763 struct mxge_slice_state *ss;
3770 for (i = 0; i < sc->num_slices; i++) {
3772 if (ss->fw_stats != NULL) {
3773 mxge_dma_free(&ss->fw_stats_dma);
3774 ss->fw_stats = NULL;
3775 mtx_destroy(&ss->tx.mtx);
3777 if (ss->rx_done.entry != NULL) {
3778 mxge_dma_free(&ss->rx_done.dma);
3779 ss->rx_done.entry = NULL;
3782 free(sc->ss, M_DEVBUF);
3787 mxge_alloc_slices(mxge_softc_t *sc)
3790 struct mxge_slice_state *ss;
3792 int err, i, max_intr_slots;
3794 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
3796 device_printf(sc->dev, "Cannot determine rx ring size\n");
3799 sc->rx_ring_size = cmd.data0;
3800 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
3802 bytes = sizeof (*sc->ss) * sc->num_slices;
3803 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
3806 for (i = 0; i < sc->num_slices; i++) {
3811 /* allocate per-slice rx interrupt queues */
3813 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
3814 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
3817 ss->rx_done.entry = ss->rx_done.dma.addr;
3818 bzero(ss->rx_done.entry, bytes);
3821 * allocate the per-slice firmware stats; stats
3822 * (including tx) are used used only on the first
3828 bytes = sizeof (*ss->fw_stats);
3829 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
3830 sizeof (*ss->fw_stats), 64);
3833 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
3834 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
3835 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
3836 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
3842 mxge_free_slices(sc);
3847 mxge_slice_probe(mxge_softc_t *sc)
3851 int msix_cnt, status, max_intr_slots;
3855 * don't enable multiple slices if they are not enabled,
3856 * or if this is not an SMP system
3859 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
3862 /* see how many MSI-X interrupts are available */
3863 msix_cnt = pci_msix_count(sc->dev);
3867 /* now load the slice aware firmware see what it supports */
3868 old_fw = sc->fw_name;
3869 if (old_fw == mxge_fw_aligned)
3870 sc->fw_name = mxge_fw_rss_aligned;
3872 sc->fw_name = mxge_fw_rss_unaligned;
3873 status = mxge_load_firmware(sc, 0);
3875 device_printf(sc->dev, "Falling back to a single slice\n");
3879 /* try to send a reset command to the card to see if it
3881 memset(&cmd, 0, sizeof (cmd));
3882 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
3884 device_printf(sc->dev, "failed reset\n");
3888 /* get rx ring size */
3889 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
3891 device_printf(sc->dev, "Cannot determine rx ring size\n");
3894 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
3896 /* tell it the size of the interrupt queues */
3897 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
3898 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
3900 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
3904 /* ask the maximum number of slices it supports */
3905 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
3907 device_printf(sc->dev,
3908 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
3911 sc->num_slices = cmd.data0;
3912 if (sc->num_slices > msix_cnt)
3913 sc->num_slices = msix_cnt;
3915 if (mxge_max_slices == -1) {
3916 /* cap to number of CPUs in system */
3917 if (sc->num_slices > mp_ncpus)
3918 sc->num_slices = mp_ncpus;
3920 if (sc->num_slices > mxge_max_slices)
3921 sc->num_slices = mxge_max_slices;
3923 /* make sure it is a power of two */
3924 while (sc->num_slices & (sc->num_slices - 1))
3928 device_printf(sc->dev, "using %d slices\n",
3934 sc->fw_name = old_fw;
3935 (void) mxge_load_firmware(sc, 0);
3939 mxge_add_msix_irqs(mxge_softc_t *sc)
3942 int count, err, i, rid;
3945 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
3948 if (sc->msix_table_res == NULL) {
3949 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
3953 count = sc->num_slices;
3954 err = pci_alloc_msix(sc->dev, &count);
3956 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
3957 "err = %d \n", sc->num_slices, err);
3958 goto abort_with_msix_table;
3960 if (count < sc->num_slices) {
3961 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
3962 count, sc->num_slices);
3963 device_printf(sc->dev,
3964 "Try setting hw.mxge.max_slices to %d\n",
3967 goto abort_with_msix;
3969 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
3970 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
3971 if (sc->msix_irq_res == NULL) {
3973 goto abort_with_msix;
3976 for (i = 0; i < sc->num_slices; i++) {
3978 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
3981 if (sc->msix_irq_res[i] == NULL) {
3982 device_printf(sc->dev, "couldn't allocate IRQ res"
3983 " for message %d\n", i);
3985 goto abort_with_res;
3989 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
3990 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
3992 for (i = 0; i < sc->num_slices; i++) {
3993 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
3994 INTR_TYPE_NET | INTR_MPSAFE,
3995 #if __FreeBSD_version > 700030
3998 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4000 device_printf(sc->dev, "couldn't setup intr for "
4002 goto abort_with_intr;
4007 device_printf(sc->dev, "using %d msix IRQs:",
4009 for (i = 0; i < sc->num_slices; i++)
4010 printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4016 for (i = 0; i < sc->num_slices; i++) {
4017 if (sc->msix_ih[i] != NULL) {
4018 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4020 sc->msix_ih[i] = NULL;
4023 free(sc->msix_ih, M_DEVBUF);
4027 for (i = 0; i < sc->num_slices; i++) {
4029 if (sc->msix_irq_res[i] != NULL)
4030 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4031 sc->msix_irq_res[i]);
4032 sc->msix_irq_res[i] = NULL;
4034 free(sc->msix_irq_res, M_DEVBUF);
4038 pci_release_msi(sc->dev);
4040 abort_with_msix_table:
4041 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4042 sc->msix_table_res);
4048 mxge_add_single_irq(mxge_softc_t *sc)
4050 int count, err, rid;
4052 count = pci_msi_count(sc->dev);
4053 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4059 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4060 1, RF_SHAREABLE | RF_ACTIVE);
4061 if (sc->irq_res == NULL) {
4062 device_printf(sc->dev, "could not alloc interrupt\n");
4066 device_printf(sc->dev, "using %s irq %ld\n",
4067 sc->legacy_irq ? "INTx" : "MSI",
4068 rman_get_start(sc->irq_res));
4069 err = bus_setup_intr(sc->dev, sc->irq_res,
4070 INTR_TYPE_NET | INTR_MPSAFE,
4071 #if __FreeBSD_version > 700030
4074 mxge_intr, &sc->ss[0], &sc->ih);
4076 bus_release_resource(sc->dev, SYS_RES_IRQ,
4077 sc->legacy_irq ? 0 : 1, sc->irq_res);
4078 if (!sc->legacy_irq)
4079 pci_release_msi(sc->dev);
4085 mxge_rem_msix_irqs(mxge_softc_t *sc)
4089 for (i = 0; i < sc->num_slices; i++) {
4090 if (sc->msix_ih[i] != NULL) {
4091 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4093 sc->msix_ih[i] = NULL;
4096 free(sc->msix_ih, M_DEVBUF);
4098 for (i = 0; i < sc->num_slices; i++) {
4100 if (sc->msix_irq_res[i] != NULL)
4101 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4102 sc->msix_irq_res[i]);
4103 sc->msix_irq_res[i] = NULL;
4105 free(sc->msix_irq_res, M_DEVBUF);
4107 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4108 sc->msix_table_res);
4110 pci_release_msi(sc->dev);
4115 mxge_rem_single_irq(mxge_softc_t *sc)
4117 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4118 bus_release_resource(sc->dev, SYS_RES_IRQ,
4119 sc->legacy_irq ? 0 : 1, sc->irq_res);
4120 if (!sc->legacy_irq)
4121 pci_release_msi(sc->dev);
4125 mxge_rem_irq(mxge_softc_t *sc)
4127 if (sc->num_slices > 1)
4128 mxge_rem_msix_irqs(sc);
4130 mxge_rem_single_irq(sc);
4134 mxge_add_irq(mxge_softc_t *sc)
4138 if (sc->num_slices > 1)
4139 err = mxge_add_msix_irqs(sc);
4141 err = mxge_add_single_irq(sc);
4143 if (0 && err == 0 && sc->num_slices > 1) {
4144 mxge_rem_msix_irqs(sc);
4145 err = mxge_add_msix_irqs(sc);
4152 mxge_attach(device_t dev)
4154 mxge_softc_t *sc = device_get_softc(dev);
4159 mxge_fetch_tunables(sc);
4161 err = bus_dma_tag_create(NULL, /* parent */
4164 BUS_SPACE_MAXADDR, /* low */
4165 BUS_SPACE_MAXADDR, /* high */
4166 NULL, NULL, /* filter */
4167 65536 + 256, /* maxsize */
4168 MXGE_MAX_SEND_DESC, /* num segs */
4169 65536, /* maxsegsize */
4171 NULL, NULL, /* lock */
4172 &sc->parent_dmat); /* tag */
4175 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4177 goto abort_with_nothing;
4180 ifp = sc->ifp = if_alloc(IFT_ETHER);
4182 device_printf(dev, "can not if_alloc()\n");
4184 goto abort_with_parent_dmat;
4186 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4188 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4189 device_get_nameunit(dev));
4190 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4191 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4192 "%s:drv", device_get_nameunit(dev));
4193 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4194 MTX_NETWORK_LOCK, MTX_DEF);
4196 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4198 mxge_setup_cfg_space(sc);
4200 /* Map the board into the kernel */
4202 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4204 if (sc->mem_res == NULL) {
4205 device_printf(dev, "could not map memory\n");
4207 goto abort_with_lock;
4209 sc->sram = rman_get_virtual(sc->mem_res);
4210 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4211 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4212 device_printf(dev, "impossible memory region size %ld\n",
4213 rman_get_size(sc->mem_res));
4215 goto abort_with_mem_res;
4218 /* make NULL terminated copy of the EEPROM strings section of
4220 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4221 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4222 rman_get_bushandle(sc->mem_res),
4223 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4225 MXGE_EEPROM_STRINGS_SIZE - 2);
4226 err = mxge_parse_strings(sc);
4228 goto abort_with_mem_res;
4230 /* Enable write combining for efficient use of PCIe bus */
4233 /* Allocate the out of band dma memory */
4234 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4235 sizeof (mxge_cmd_t), 64);
4237 goto abort_with_mem_res;
4238 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4239 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4241 goto abort_with_cmd_dma;
4243 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4245 goto abort_with_zeropad_dma;
4247 /* select & load the firmware */
4248 err = mxge_select_firmware(sc);
4250 goto abort_with_dmabench;
4251 sc->intr_coal_delay = mxge_intr_coal_delay;
4253 mxge_slice_probe(sc);
4254 err = mxge_alloc_slices(sc);
4256 goto abort_with_dmabench;
4258 err = mxge_reset(sc, 0);
4260 goto abort_with_slices;
4262 err = mxge_alloc_rings(sc);
4264 device_printf(sc->dev, "failed to allocate rings\n");
4265 goto abort_with_dmabench;
4268 err = mxge_add_irq(sc);
4270 device_printf(sc->dev, "failed to add irq\n");
4271 goto abort_with_rings;
4274 ifp->if_baudrate = IF_Gbps(10UL);
4275 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4276 IFCAP_VLAN_MTU | IFCAP_LRO;
4278 #ifdef MXGE_NEW_VLAN_API
4279 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4282 sc->max_mtu = mxge_max_mtu(sc);
4283 if (sc->max_mtu >= 9000)
4284 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4286 device_printf(dev, "MTU limited to %d. Install "
4287 "latest firmware for 9000 byte jumbo support\n",
4288 sc->max_mtu - ETHER_HDR_LEN);
4289 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4290 ifp->if_capenable = ifp->if_capabilities;
4291 if (sc->lro_cnt == 0)
4292 ifp->if_capenable &= ~IFCAP_LRO;
4294 ifp->if_init = mxge_init;
4296 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4297 ifp->if_ioctl = mxge_ioctl;
4298 ifp->if_start = mxge_start;
4299 /* Initialise the ifmedia structure */
4300 ifmedia_init(&sc->media, 0, mxge_media_change,
4302 mxge_set_media(sc, IFM_ETHER | IFM_AUTO);
4303 mxge_media_probe(sc);
4304 ether_ifattach(ifp, sc->mac_addr);
4305 /* ether_ifattach sets mtu to 1500 */
4306 if (ifp->if_capabilities & IFCAP_JUMBO_MTU)
4309 mxge_add_sysctls(sc);
4313 mxge_free_rings(sc);
4315 mxge_free_slices(sc);
4316 abort_with_dmabench:
4317 mxge_dma_free(&sc->dmabench_dma);
4318 abort_with_zeropad_dma:
4319 mxge_dma_free(&sc->zeropad_dma);
4321 mxge_dma_free(&sc->cmd_dma);
4323 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4325 pci_disable_busmaster(dev);
4326 mtx_destroy(&sc->cmd_mtx);
4327 mtx_destroy(&sc->driver_mtx);
4329 abort_with_parent_dmat:
4330 bus_dma_tag_destroy(sc->parent_dmat);
4337 mxge_detach(device_t dev)
4339 mxge_softc_t *sc = device_get_softc(dev);
4341 if (mxge_vlans_active(sc)) {
4342 device_printf(sc->dev,
4343 "Detach vlans before removing module\n");
4346 mtx_lock(&sc->driver_mtx);
4347 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4349 mtx_unlock(&sc->driver_mtx);
4350 ether_ifdetach(sc->ifp);
4351 callout_drain(&sc->co_hdl);
4352 ifmedia_removeall(&sc->media);
4353 mxge_dummy_rdma(sc, 0);
4354 mxge_rem_sysctls(sc);
4356 mxge_free_rings(sc);
4357 mxge_free_slices(sc);
4358 mxge_dma_free(&sc->dmabench_dma);
4359 mxge_dma_free(&sc->zeropad_dma);
4360 mxge_dma_free(&sc->cmd_dma);
4361 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4362 pci_disable_busmaster(dev);
4363 mtx_destroy(&sc->cmd_mtx);
4364 mtx_destroy(&sc->driver_mtx);
4366 bus_dma_tag_destroy(sc->parent_dmat);
4371 mxge_shutdown(device_t dev)
4377 This file uses Myri10GE driver indentation.
4380 c-file-style:"linux"