1 /******************************************************************************
3 Copyright (c) 2006-2008, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/sockio.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/module.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
50 #include <net/if_arp.h>
51 #include <net/ethernet.h>
52 #include <net/if_dl.h>
53 #include <net/if_media.h>
57 #include <net/if_types.h>
58 #include <net/if_vlan_var.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/tcp.h>
66 #include <machine/bus.h>
67 #include <machine/in_cksum.h>
68 #include <machine/resource.h>
73 #include <dev/pci/pcireg.h>
74 #include <dev/pci/pcivar.h>
75 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
77 #include <vm/vm.h> /* for pmap_mapdev() */
80 #if defined(__i386) || defined(__amd64)
81 #include <machine/specialreg.h>
84 #include <dev/mxge/mxge_mcp.h>
85 #include <dev/mxge/mcp_gen_header.h>
86 /*#define MXGE_FAKE_IFP*/
87 #include <dev/mxge/if_mxge_var.h>
90 static int mxge_nvidia_ecrc_enable = 1;
91 static int mxge_force_firmware = 0;
92 static int mxge_intr_coal_delay = 30;
93 static int mxge_deassert_wait = 1;
94 static int mxge_flow_control = 1;
95 static int mxge_verbose = 0;
96 static int mxge_lro_cnt = 8;
97 static int mxge_ticks;
98 static int mxge_max_slices = 1;
99 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
100 static int mxge_always_promisc = 0;
101 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
102 static char *mxge_fw_aligned = "mxge_eth_z8e";
103 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
104 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
106 static int mxge_probe(device_t dev);
107 static int mxge_attach(device_t dev);
108 static int mxge_detach(device_t dev);
109 static int mxge_shutdown(device_t dev);
110 static void mxge_intr(void *arg);
112 static device_method_t mxge_methods[] =
114 /* Device interface */
115 DEVMETHOD(device_probe, mxge_probe),
116 DEVMETHOD(device_attach, mxge_attach),
117 DEVMETHOD(device_detach, mxge_detach),
118 DEVMETHOD(device_shutdown, mxge_shutdown),
122 static driver_t mxge_driver =
126 sizeof(mxge_softc_t),
129 static devclass_t mxge_devclass;
131 /* Declare ourselves to be a child of the PCI bus.*/
132 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
133 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
134 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
136 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
137 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
138 static int mxge_close(mxge_softc_t *sc);
139 static int mxge_open(mxge_softc_t *sc);
140 static void mxge_tick(void *arg);
143 mxge_probe(device_t dev)
145 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
146 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
147 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
148 device_set_desc(dev, "Myri10G-PCIE-8A");
155 mxge_enable_wc(mxge_softc_t *sc)
157 #if defined(__i386) || defined(__amd64)
162 len = rman_get_size(sc->mem_res);
163 err = pmap_change_attr((vm_offset_t) sc->sram,
164 len, PAT_WRITE_COMBINING);
166 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
174 /* callback to get our DMA address */
176 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
180 *(bus_addr_t *) arg = segs->ds_addr;
185 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
186 bus_size_t alignment)
189 device_t dev = sc->dev;
190 bus_size_t boundary, maxsegsize;
192 if (bytes > 4096 && alignment == 4096) {
200 /* allocate DMAable memory tags */
201 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
202 alignment, /* alignment */
203 boundary, /* boundary */
204 BUS_SPACE_MAXADDR, /* low */
205 BUS_SPACE_MAXADDR, /* high */
206 NULL, NULL, /* filter */
209 maxsegsize, /* maxsegsize */
210 BUS_DMA_COHERENT, /* flags */
211 NULL, NULL, /* lock */
212 &dma->dmat); /* tag */
214 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
218 /* allocate DMAable memory & map */
219 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
220 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
221 | BUS_DMA_ZERO), &dma->map);
223 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
224 goto abort_with_dmat;
227 /* load the memory */
228 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
229 mxge_dmamap_callback,
230 (void *)&dma->bus_addr, 0);
232 device_printf(dev, "couldn't load map (err = %d)\n", err);
238 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
240 (void)bus_dma_tag_destroy(dma->dmat);
246 mxge_dma_free(mxge_dma_t *dma)
248 bus_dmamap_unload(dma->dmat, dma->map);
249 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
250 (void)bus_dma_tag_destroy(dma->dmat);
254 * The eeprom strings on the lanaiX have the format
261 mxge_parse_strings(mxge_softc_t *sc)
263 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
268 ptr = sc->eeprom_strings;
269 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
271 while (ptr < limit && *ptr != '\0') {
272 if (memcmp(ptr, "MAC=", 4) == 0) {
274 sc->mac_addr_string = ptr;
275 for (i = 0; i < 6; i++) {
277 if ((ptr + 2) > limit)
279 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
282 } else if (memcmp(ptr, "PC=", 3) == 0) {
284 strncpy(sc->product_code_string, ptr,
285 sizeof (sc->product_code_string) - 1);
286 } else if (memcmp(ptr, "SN=", 3) == 0) {
288 strncpy(sc->serial_number_string, ptr,
289 sizeof (sc->serial_number_string) - 1);
291 MXGE_NEXT_STRING(ptr);
298 device_printf(sc->dev, "failed to parse eeprom_strings\n");
303 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
305 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
308 unsigned long base, off;
310 device_t pdev, mcp55;
311 uint16_t vendor_id, device_id, word;
312 uintptr_t bus, slot, func, ivend, idev;
316 if (!mxge_nvidia_ecrc_enable)
319 pdev = device_get_parent(device_get_parent(sc->dev));
321 device_printf(sc->dev, "could not find parent?\n");
324 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
325 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
327 if (vendor_id != 0x10de)
332 if (device_id == 0x005d) {
333 /* ck804, base address is magic */
335 } else if (device_id >= 0x0374 && device_id <= 0x378) {
336 /* mcp55, base address stored in chipset */
337 mcp55 = pci_find_bsf(0, 0, 0);
339 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
340 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
341 word = pci_read_config(mcp55, 0x90, 2);
342 base = ((unsigned long)word & 0x7ffeU) << 25;
349 Test below is commented because it is believed that doing
350 config read/write beyond 0xff will access the config space
351 for the next larger function. Uncomment this and remove
352 the hacky pmap_mapdev() way of accessing config space when
353 FreeBSD grows support for extended pcie config space access
356 /* See if we can, by some miracle, access the extended
358 val = pci_read_config(pdev, 0x178, 4);
359 if (val != 0xffffffff) {
361 pci_write_config(pdev, 0x178, val, 4);
365 /* Rather than using normal pci config space writes, we must
366 * map the Nvidia config space ourselves. This is because on
367 * opteron/nvidia class machine the 0xe000000 mapping is
368 * handled by the nvidia chipset, that means the internal PCI
369 * device (the on-chip northbridge), or the amd-8131 bridge
370 * and things behind them are not visible by this method.
373 BUS_READ_IVAR(device_get_parent(pdev), pdev,
375 BUS_READ_IVAR(device_get_parent(pdev), pdev,
376 PCI_IVAR_SLOT, &slot);
377 BUS_READ_IVAR(device_get_parent(pdev), pdev,
378 PCI_IVAR_FUNCTION, &func);
379 BUS_READ_IVAR(device_get_parent(pdev), pdev,
380 PCI_IVAR_VENDOR, &ivend);
381 BUS_READ_IVAR(device_get_parent(pdev), pdev,
382 PCI_IVAR_DEVICE, &idev);
385 + 0x00100000UL * (unsigned long)bus
386 + 0x00001000UL * (unsigned long)(func
389 /* map it into the kernel */
390 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
394 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
397 /* get a pointer to the config space mapped into the kernel */
398 cfgptr = va + (off & PAGE_MASK);
400 /* make sure that we can really access it */
401 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
402 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
403 if (! (vendor_id == ivend && device_id == idev)) {
404 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
405 vendor_id, device_id);
406 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
410 ptr32 = (uint32_t*)(cfgptr + 0x178);
413 if (val == 0xffffffff) {
414 device_printf(sc->dev, "extended mapping failed\n");
415 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
419 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
421 device_printf(sc->dev,
422 "Enabled ECRC on upstream Nvidia bridge "
424 (int)bus, (int)slot, (int)func);
429 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
431 device_printf(sc->dev,
432 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
439 mxge_dma_test(mxge_softc_t *sc, int test_type)
442 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
448 /* Run a small DMA test.
449 * The magic multipliers to the length tell the firmware
450 * to do DMA read, write, or read+write tests. The
451 * results are returned in cmd.data0. The upper 16
452 * bits of the return is the number of transfers completed.
453 * The lower 16 bits is the time in 0.5us ticks that the
454 * transfers took to complete.
457 len = sc->tx_boundary;
459 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
460 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
461 cmd.data2 = len * 0x10000;
462 status = mxge_send_cmd(sc, test_type, &cmd);
467 sc->read_dma = ((cmd.data0>>16) * len * 2) /
468 (cmd.data0 & 0xffff);
469 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
470 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
471 cmd.data2 = len * 0x1;
472 status = mxge_send_cmd(sc, test_type, &cmd);
477 sc->write_dma = ((cmd.data0>>16) * len * 2) /
478 (cmd.data0 & 0xffff);
480 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
481 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
482 cmd.data2 = len * 0x10001;
483 status = mxge_send_cmd(sc, test_type, &cmd);
488 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
489 (cmd.data0 & 0xffff);
492 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
493 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
500 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
501 * when the PCI-E Completion packets are aligned on an 8-byte
502 * boundary. Some PCI-E chip sets always align Completion packets; on
503 * the ones that do not, the alignment can be enforced by enabling
504 * ECRC generation (if supported).
506 * When PCI-E Completion packets are not aligned, it is actually more
507 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
509 * If the driver can neither enable ECRC nor verify that it has
510 * already been enabled, then it must use a firmware image which works
511 * around unaligned completion packets (ethp_z8e.dat), and it should
512 * also ensure that it never gives the device a Read-DMA which is
513 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
514 * enabled, then the driver should use the aligned (eth_z8e.dat)
515 * firmware image, and set tx_boundary to 4KB.
519 mxge_firmware_probe(mxge_softc_t *sc)
521 device_t dev = sc->dev;
525 sc->tx_boundary = 4096;
527 * Verify the max read request size was set to 4KB
528 * before trying the test with 4KB.
530 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
531 pectl = pci_read_config(dev, reg + 0x8, 2);
532 if ((pectl & (5 << 12)) != (5 << 12)) {
533 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
535 sc->tx_boundary = 2048;
540 * load the optimized firmware (which assumes aligned PCIe
541 * completions) in order to see if it works on this host.
543 sc->fw_name = mxge_fw_aligned;
544 status = mxge_load_firmware(sc, 1);
550 * Enable ECRC if possible
552 mxge_enable_nvidia_ecrc(sc);
555 * Run a DMA test which watches for unaligned completions and
556 * aborts on the first one seen.
559 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
561 return 0; /* keep the aligned firmware */
564 device_printf(dev, "DMA test failed: %d\n", status);
565 if (status == ENOSYS)
566 device_printf(dev, "Falling back to ethp! "
567 "Please install up to date fw\n");
572 mxge_select_firmware(mxge_softc_t *sc)
577 if (mxge_force_firmware != 0) {
578 if (mxge_force_firmware == 1)
583 device_printf(sc->dev,
584 "Assuming %s completions (forced)\n",
585 aligned ? "aligned" : "unaligned");
589 /* if the PCIe link width is 4 or less, we can use the aligned
590 firmware and skip any checks */
591 if (sc->link_width != 0 && sc->link_width <= 4) {
592 device_printf(sc->dev,
593 "PCIe x%d Link, expect reduced performance\n",
599 if (0 == mxge_firmware_probe(sc))
604 sc->fw_name = mxge_fw_aligned;
605 sc->tx_boundary = 4096;
607 sc->fw_name = mxge_fw_unaligned;
608 sc->tx_boundary = 2048;
610 return (mxge_load_firmware(sc, 0));
620 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
624 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
625 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
626 be32toh(hdr->mcp_type));
630 /* save firmware version for sysctl */
631 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
633 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
635 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
636 &sc->fw_ver_minor, &sc->fw_ver_tiny);
638 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
639 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
640 device_printf(sc->dev, "Found firmware version %s\n",
642 device_printf(sc->dev, "Driver needs %d.%d\n",
643 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
651 z_alloc(void *nil, u_int items, u_int size)
655 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
660 z_free(void *nil, void *ptr)
667 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
670 char *inflate_buffer;
671 const struct firmware *fw;
672 const mcp_gen_header_t *hdr;
679 fw = firmware_get(sc->fw_name);
681 device_printf(sc->dev, "Could not find firmware image %s\n",
688 /* setup zlib and decompress f/w */
689 bzero(&zs, sizeof (zs));
692 status = inflateInit(&zs);
693 if (status != Z_OK) {
698 /* the uncompressed size is stored as the firmware version,
699 which would otherwise go unused */
700 fw_len = (size_t) fw->version;
701 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
702 if (inflate_buffer == NULL)
704 zs.avail_in = fw->datasize;
705 zs.next_in = __DECONST(char *, fw->data);
706 zs.avail_out = fw_len;
707 zs.next_out = inflate_buffer;
708 status = inflate(&zs, Z_FINISH);
709 if (status != Z_STREAM_END) {
710 device_printf(sc->dev, "zlib %d\n", status);
712 goto abort_with_buffer;
716 hdr_offset = htobe32(*(const uint32_t *)
717 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
718 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
719 device_printf(sc->dev, "Bad firmware file");
721 goto abort_with_buffer;
723 hdr = (const void*)(inflate_buffer + hdr_offset);
725 status = mxge_validate_firmware(sc, hdr);
727 goto abort_with_buffer;
729 /* Copy the inflated firmware to NIC SRAM. */
730 for (i = 0; i < fw_len; i += 256) {
731 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
733 min(256U, (unsigned)(fw_len - i)));
742 free(inflate_buffer, M_TEMP);
746 firmware_put(fw, FIRMWARE_UNLOAD);
751 * Enable or disable periodic RDMAs from the host to make certain
752 * chipsets resend dropped PCIe messages
756 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
759 volatile uint32_t *confirm;
760 volatile char *submit;
761 uint32_t *buf, dma_low, dma_high;
764 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
766 /* clear confirmation addr */
767 confirm = (volatile uint32_t *)sc->cmd;
771 /* send an rdma command to the PCIe engine, and wait for the
772 response in the confirmation address. The firmware should
773 write a -1 there to indicate it is alive and well
776 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
777 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
778 buf[0] = htobe32(dma_high); /* confirm addr MSW */
779 buf[1] = htobe32(dma_low); /* confirm addr LSW */
780 buf[2] = htobe32(0xffffffff); /* confirm data */
781 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
782 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
783 buf[3] = htobe32(dma_high); /* dummy addr MSW */
784 buf[4] = htobe32(dma_low); /* dummy addr LSW */
785 buf[5] = htobe32(enable); /* enable? */
788 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
790 mxge_pio_copy(submit, buf, 64);
795 while (*confirm != 0xffffffff && i < 20) {
799 if (*confirm != 0xffffffff) {
800 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
801 (enable ? "enable" : "disable"), confirm,
808 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
811 char buf_bytes[sizeof(*buf) + 8];
812 volatile mcp_cmd_response_t *response = sc->cmd;
813 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
814 uint32_t dma_low, dma_high;
815 int err, sleep_total = 0;
817 /* ensure buf is aligned to 8 bytes */
818 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
820 buf->data0 = htobe32(data->data0);
821 buf->data1 = htobe32(data->data1);
822 buf->data2 = htobe32(data->data2);
823 buf->cmd = htobe32(cmd);
824 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
825 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
827 buf->response_addr.low = htobe32(dma_low);
828 buf->response_addr.high = htobe32(dma_high);
829 mtx_lock(&sc->cmd_mtx);
830 response->result = 0xffffffff;
832 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
834 /* wait up to 20ms */
836 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
837 bus_dmamap_sync(sc->cmd_dma.dmat,
838 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
840 switch (be32toh(response->result)) {
842 data->data0 = be32toh(response->data);
848 case MXGEFW_CMD_UNKNOWN:
851 case MXGEFW_CMD_ERROR_UNALIGNED:
854 case MXGEFW_CMD_ERROR_BUSY:
858 device_printf(sc->dev,
860 "failed, result = %d\n",
861 cmd, be32toh(response->result));
869 device_printf(sc->dev, "mxge: command %d timed out"
871 cmd, be32toh(response->result));
872 mtx_unlock(&sc->cmd_mtx);
877 mxge_adopt_running_firmware(mxge_softc_t *sc)
879 struct mcp_gen_header *hdr;
880 const size_t bytes = sizeof (struct mcp_gen_header);
884 /* find running firmware header */
885 hdr_offset = htobe32(*(volatile uint32_t *)
886 (sc->sram + MCP_HEADER_PTR_OFFSET));
888 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
889 device_printf(sc->dev,
890 "Running firmware has bad header offset (%d)\n",
895 /* copy header of running firmware from SRAM to host memory to
896 * validate firmware */
897 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
899 device_printf(sc->dev, "could not malloc firmware hdr\n");
902 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
903 rman_get_bushandle(sc->mem_res),
904 hdr_offset, (char *)hdr, bytes);
905 status = mxge_validate_firmware(sc, hdr);
909 * check to see if adopted firmware has bug where adopting
910 * it will cause broadcasts to be filtered unless the NIC
911 * is kept in ALLMULTI mode
913 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
914 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
915 sc->adopted_rx_filter_bug = 1;
916 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
917 "working around rx filter bug\n",
918 sc->fw_ver_major, sc->fw_ver_minor,
927 mxge_load_firmware(mxge_softc_t *sc, int adopt)
929 volatile uint32_t *confirm;
930 volatile char *submit;
932 uint32_t *buf, size, dma_low, dma_high;
935 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
937 size = sc->sram_size;
938 status = mxge_load_firmware_helper(sc, &size);
942 /* Try to use the currently running firmware, if
944 status = mxge_adopt_running_firmware(sc);
946 device_printf(sc->dev,
947 "failed to adopt running firmware\n");
950 device_printf(sc->dev,
951 "Successfully adopted running firmware\n");
952 if (sc->tx_boundary == 4096) {
953 device_printf(sc->dev,
954 "Using firmware currently running on NIC"
956 device_printf(sc->dev,
957 "performance consider loading optimized "
960 sc->fw_name = mxge_fw_unaligned;
961 sc->tx_boundary = 2048;
964 /* clear confirmation addr */
965 confirm = (volatile uint32_t *)sc->cmd;
968 /* send a reload command to the bootstrap MCP, and wait for the
969 response in the confirmation address. The firmware should
970 write a -1 there to indicate it is alive and well
973 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
974 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
976 buf[0] = htobe32(dma_high); /* confirm addr MSW */
977 buf[1] = htobe32(dma_low); /* confirm addr LSW */
978 buf[2] = htobe32(0xffffffff); /* confirm data */
980 /* FIX: All newest firmware should un-protect the bottom of
981 the sram before handoff. However, the very first interfaces
982 do not. Therefore the handoff copy must skip the first 8 bytes
984 /* where the code starts*/
985 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
986 buf[4] = htobe32(size - 8); /* length of code */
987 buf[5] = htobe32(8); /* where to copy to */
988 buf[6] = htobe32(0); /* where to jump to */
990 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
991 mxge_pio_copy(submit, buf, 64);
996 while (*confirm != 0xffffffff && i < 20) {
999 bus_dmamap_sync(sc->cmd_dma.dmat,
1000 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1002 if (*confirm != 0xffffffff) {
1003 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1012 mxge_update_mac_address(mxge_softc_t *sc)
1015 uint8_t *addr = sc->mac_addr;
1019 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1020 | (addr[2] << 8) | addr[3]);
1022 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1024 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1029 mxge_change_pause(mxge_softc_t *sc, int pause)
1035 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1038 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1042 device_printf(sc->dev, "Failed to set flow control mode\n");
1050 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1055 if (mxge_always_promisc)
1059 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1062 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1066 device_printf(sc->dev, "Failed to set promisc mode\n");
1071 mxge_set_multicast_list(mxge_softc_t *sc)
1074 struct ifmultiaddr *ifma;
1075 struct ifnet *ifp = sc->ifp;
1078 /* This firmware is known to not support multicast */
1079 if (!sc->fw_multicast_support)
1082 /* Disable multicast filtering while we play with the lists*/
1083 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1085 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1086 " error status: %d\n", err);
1090 if (sc->adopted_rx_filter_bug)
1093 if (ifp->if_flags & IFF_ALLMULTI)
1094 /* request to disable multicast filtering, so quit here */
1097 /* Flush all the filters */
1099 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1101 device_printf(sc->dev,
1102 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1103 ", error status: %d\n", err);
1107 /* Walk the multicast list, and add each address */
1110 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1111 if (ifma->ifma_addr->sa_family != AF_LINK)
1113 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1115 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1117 cmd.data0 = htonl(cmd.data0);
1118 cmd.data1 = htonl(cmd.data1);
1119 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1121 device_printf(sc->dev, "Failed "
1122 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1124 /* abort, leaving multicast filtering off */
1125 IF_ADDR_UNLOCK(ifp);
1129 IF_ADDR_UNLOCK(ifp);
1130 /* Enable multicast filtering */
1131 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1133 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1134 ", error status: %d\n", err);
1139 mxge_max_mtu(mxge_softc_t *sc)
1144 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1145 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1147 /* try to set nbufs to see if it we can
1148 use virtually contiguous jumbos */
1150 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1153 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1155 /* otherwise, we're limited to MJUMPAGESIZE */
1156 return MJUMPAGESIZE - MXGEFW_PAD;
1160 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1162 struct mxge_slice_state *ss;
1163 mxge_rx_done_t *rx_done;
1164 volatile uint32_t *irq_claim;
1168 /* try to send a reset command to the card to see if it
1170 memset(&cmd, 0, sizeof (cmd));
1171 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1173 device_printf(sc->dev, "failed reset\n");
1177 mxge_dummy_rdma(sc, 1);
1180 /* set the intrq size */
1181 cmd.data0 = sc->rx_ring_size;
1182 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1185 * Even though we already know how many slices are supported
1186 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1187 * has magic side effects, and must be called after a reset.
1188 * It must be called prior to calling any RSS related cmds,
1189 * including assigning an interrupt queue for anything but
1190 * slice 0. It must also be called *after*
1191 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1192 * the firmware to compute offsets.
1195 if (sc->num_slices > 1) {
1196 /* ask the maximum number of slices it supports */
1197 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1200 device_printf(sc->dev,
1201 "failed to get number of slices\n");
1205 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1206 * to setting up the interrupt queue DMA
1208 cmd.data0 = sc->num_slices;
1209 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1210 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1213 device_printf(sc->dev,
1214 "failed to set number of slices\n");
1220 if (interrupts_setup) {
1221 /* Now exchange information about interrupts */
1222 for (slice = 0; slice < sc->num_slices; slice++) {
1223 rx_done = &sc->ss[slice].rx_done;
1224 memset(rx_done->entry, 0, sc->rx_ring_size);
1225 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1226 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1228 status |= mxge_send_cmd(sc,
1229 MXGEFW_CMD_SET_INTRQ_DMA,
1234 status |= mxge_send_cmd(sc,
1235 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1238 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1240 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1241 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1244 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1246 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1248 device_printf(sc->dev, "failed set interrupt parameters\n");
1253 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1256 /* run a DMA benchmark */
1257 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1259 for (slice = 0; slice < sc->num_slices; slice++) {
1260 ss = &sc->ss[slice];
1262 ss->irq_claim = irq_claim + (2 * slice);
1263 /* reset mcp/driver shared state back to 0 */
1264 ss->rx_done.idx = 0;
1265 ss->rx_done.cnt = 0;
1268 ss->tx.pkt_done = 0;
1273 ss->rx_small.cnt = 0;
1274 ss->lro_bad_csum = 0;
1276 ss->lro_flushed = 0;
1277 if (ss->fw_stats != NULL) {
1278 ss->fw_stats->valid = 0;
1279 ss->fw_stats->send_done_count = 0;
1282 sc->rdma_tags_available = 15;
1283 status = mxge_update_mac_address(sc);
1284 mxge_change_promisc(sc, 0);
1285 mxge_change_pause(sc, sc->pause);
1286 mxge_set_multicast_list(sc);
1291 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1294 unsigned int intr_coal_delay;
1298 intr_coal_delay = sc->intr_coal_delay;
1299 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1303 if (intr_coal_delay == sc->intr_coal_delay)
1306 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1309 mtx_lock(&sc->driver_mtx);
1310 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1311 sc->intr_coal_delay = intr_coal_delay;
1313 mtx_unlock(&sc->driver_mtx);
1318 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1321 unsigned int enabled;
1325 enabled = sc->pause;
1326 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1330 if (enabled == sc->pause)
1333 mtx_lock(&sc->driver_mtx);
1334 err = mxge_change_pause(sc, enabled);
1335 mtx_unlock(&sc->driver_mtx);
1340 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1347 ifp->if_capenable &= ~IFCAP_LRO;
1349 ifp->if_capenable |= IFCAP_LRO;
1350 sc->lro_cnt = lro_cnt;
1351 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1353 err = mxge_open(sc);
1359 mxge_change_lro(SYSCTL_HANDLER_ARGS)
1362 unsigned int lro_cnt;
1366 lro_cnt = sc->lro_cnt;
1367 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1371 if (lro_cnt == sc->lro_cnt)
1377 mtx_lock(&sc->driver_mtx);
1378 err = mxge_change_lro_locked(sc, lro_cnt);
1379 mtx_unlock(&sc->driver_mtx);
1384 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1390 arg2 = be32toh(*(int *)arg1);
1392 err = sysctl_handle_int(oidp, arg1, arg2, req);
1398 mxge_rem_sysctls(mxge_softc_t *sc)
1400 struct mxge_slice_state *ss;
1403 if (sc->slice_sysctl_tree == NULL)
1406 for (slice = 0; slice < sc->num_slices; slice++) {
1407 ss = &sc->ss[slice];
1408 if (ss == NULL || ss->sysctl_tree == NULL)
1410 sysctl_ctx_free(&ss->sysctl_ctx);
1411 ss->sysctl_tree = NULL;
1413 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1414 sc->slice_sysctl_tree = NULL;
1418 mxge_add_sysctls(mxge_softc_t *sc)
1420 struct sysctl_ctx_list *ctx;
1421 struct sysctl_oid_list *children;
1423 struct mxge_slice_state *ss;
1427 ctx = device_get_sysctl_ctx(sc->dev);
1428 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1429 fw = sc->ss[0].fw_stats;
1431 /* random information */
1432 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1434 CTLFLAG_RD, &sc->fw_version,
1435 0, "firmware version");
1436 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1438 CTLFLAG_RD, &sc->serial_number_string,
1439 0, "serial number");
1440 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1442 CTLFLAG_RD, &sc->product_code_string,
1444 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1446 CTLFLAG_RD, &sc->link_width,
1448 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1450 CTLFLAG_RD, &sc->tx_boundary,
1452 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1454 CTLFLAG_RD, &sc->wc,
1455 0, "write combining PIO?");
1456 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1458 CTLFLAG_RD, &sc->read_dma,
1459 0, "DMA Read speed in MB/s");
1460 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1462 CTLFLAG_RD, &sc->write_dma,
1463 0, "DMA Write speed in MB/s");
1464 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1465 "read_write_dma_MBs",
1466 CTLFLAG_RD, &sc->read_write_dma,
1467 0, "DMA concurrent Read/Write speed in MB/s");
1470 /* performance related tunables */
1471 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1473 CTLTYPE_INT|CTLFLAG_RW, sc,
1474 0, mxge_change_intr_coal,
1475 "I", "interrupt coalescing delay in usecs");
1477 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1478 "flow_control_enabled",
1479 CTLTYPE_INT|CTLFLAG_RW, sc,
1480 0, mxge_change_flow_control,
1481 "I", "interrupt coalescing delay in usecs");
1483 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1485 CTLFLAG_RW, &mxge_deassert_wait,
1486 0, "Wait for IRQ line to go low in ihandler");
1488 /* stats block from firmware is in network byte order.
1490 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1492 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1493 0, mxge_handle_be32,
1495 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1496 "rdma_tags_available",
1497 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1498 0, mxge_handle_be32,
1499 "I", "rdma_tags_available");
1500 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1501 "dropped_bad_crc32",
1502 CTLTYPE_INT|CTLFLAG_RD,
1503 &fw->dropped_bad_crc32,
1504 0, mxge_handle_be32,
1505 "I", "dropped_bad_crc32");
1506 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1508 CTLTYPE_INT|CTLFLAG_RD,
1509 &fw->dropped_bad_phy,
1510 0, mxge_handle_be32,
1511 "I", "dropped_bad_phy");
1512 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1513 "dropped_link_error_or_filtered",
1514 CTLTYPE_INT|CTLFLAG_RD,
1515 &fw->dropped_link_error_or_filtered,
1516 0, mxge_handle_be32,
1517 "I", "dropped_link_error_or_filtered");
1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1519 "dropped_link_overflow",
1520 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1521 0, mxge_handle_be32,
1522 "I", "dropped_link_overflow");
1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1524 "dropped_multicast_filtered",
1525 CTLTYPE_INT|CTLFLAG_RD,
1526 &fw->dropped_multicast_filtered,
1527 0, mxge_handle_be32,
1528 "I", "dropped_multicast_filtered");
1529 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1530 "dropped_no_big_buffer",
1531 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1532 0, mxge_handle_be32,
1533 "I", "dropped_no_big_buffer");
1534 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1535 "dropped_no_small_buffer",
1536 CTLTYPE_INT|CTLFLAG_RD,
1537 &fw->dropped_no_small_buffer,
1538 0, mxge_handle_be32,
1539 "I", "dropped_no_small_buffer");
1540 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1542 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1543 0, mxge_handle_be32,
1544 "I", "dropped_overrun");
1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1547 CTLTYPE_INT|CTLFLAG_RD,
1549 0, mxge_handle_be32,
1550 "I", "dropped_pause");
1551 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1553 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1554 0, mxge_handle_be32,
1555 "I", "dropped_runt");
1557 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1558 "dropped_unicast_filtered",
1559 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1560 0, mxge_handle_be32,
1561 "I", "dropped_unicast_filtered");
1563 /* verbose printing? */
1564 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1566 CTLFLAG_RW, &mxge_verbose,
1567 0, "verbose printing");
1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1572 CTLTYPE_INT|CTLFLAG_RW, sc,
1574 "I", "number of lro merge queues");
1577 /* add counters exported for debugging from all slices */
1578 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1579 sc->slice_sysctl_tree =
1580 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1581 "slice", CTLFLAG_RD, 0, "");
1583 for (slice = 0; slice < sc->num_slices; slice++) {
1584 ss = &sc->ss[slice];
1585 sysctl_ctx_init(&ss->sysctl_ctx);
1586 ctx = &ss->sysctl_ctx;
1587 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1588 sprintf(slice_num, "%d", slice);
1590 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1592 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1593 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1595 CTLFLAG_RD, &ss->rx_small.cnt,
1597 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1599 CTLFLAG_RD, &ss->rx_big.cnt,
1601 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1603 CTLFLAG_RD, &ss->tx.req,
1605 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1606 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1607 0, "number of lro merge queues flushed");
1609 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1610 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1611 0, "number of frames appended to lro merge"
1614 /* only transmit from slice 0 for now */
1618 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1620 CTLFLAG_RD, &ss->tx.done,
1622 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1624 CTLFLAG_RD, &ss->tx.pkt_done,
1626 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1628 CTLFLAG_RD, &ss->tx.stall,
1630 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1632 CTLFLAG_RD, &ss->tx.wake,
1634 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1636 CTLFLAG_RD, &ss->tx.defrag,
1641 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1642 backwards one at a time and handle ring wraps */
1645 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1646 mcp_kreq_ether_send_t *src, int cnt)
1648 int idx, starting_slot;
1649 starting_slot = tx->req;
1652 idx = (starting_slot + cnt) & tx->mask;
1653 mxge_pio_copy(&tx->lanai[idx],
1654 &src[cnt], sizeof(*src));
1660 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1661 * at most 32 bytes at a time, so as to avoid involving the software
1662 * pio handler in the nic. We re-write the first segment's flags
1663 * to mark them valid only after writing the entire chain
1667 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1672 volatile uint32_t *dst_ints;
1673 mcp_kreq_ether_send_t *srcp;
1674 volatile mcp_kreq_ether_send_t *dstp, *dst;
1677 idx = tx->req & tx->mask;
1679 last_flags = src->flags;
1682 dst = dstp = &tx->lanai[idx];
1685 if ((idx + cnt) < tx->mask) {
1686 for (i = 0; i < (cnt - 1); i += 2) {
1687 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1688 mb(); /* force write every 32 bytes */
1693 /* submit all but the first request, and ensure
1694 that it is submitted below */
1695 mxge_submit_req_backwards(tx, src, cnt);
1699 /* submit the first request */
1700 mxge_pio_copy(dstp, srcp, sizeof(*src));
1701 mb(); /* barrier before setting valid flag */
1704 /* re-write the last 32-bits with the valid flags */
1705 src->flags = last_flags;
1706 src_ints = (uint32_t *)src;
1708 dst_ints = (volatile uint32_t *)dst;
1710 *dst_ints = *src_ints;
1718 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1719 int busdma_seg_cnt, int ip_off)
1722 mcp_kreq_ether_send_t *req;
1723 bus_dma_segment_t *seg;
1726 uint32_t low, high_swapped;
1727 int len, seglen, cum_len, cum_len_next;
1728 int next_is_first, chop, cnt, rdma_count, small;
1729 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1730 uint8_t flags, flags_next;
1733 mss = m->m_pkthdr.tso_segsz;
1735 /* negative cum_len signifies to the
1736 * send loop that we are still in the
1737 * header portion of the TSO packet.
1740 /* ensure we have the ethernet, IP and TCP
1741 header together in the first mbuf, copy
1742 it to a scratch buffer if not */
1743 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1744 m_copydata(m, 0, ip_off + sizeof (*ip),
1746 ip = (struct ip *)(ss->scratch + ip_off);
1748 ip = (struct ip *)(mtod(m, char *) + ip_off);
1750 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1752 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1753 + sizeof (*tcp), ss->scratch);
1754 ip = (struct ip *)(mtod(m, char *) + ip_off);
1757 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1758 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1760 /* TSO implies checksum offload on this hardware */
1761 cksum_offset = ip_off + (ip->ip_hl << 2);
1762 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1765 /* for TSO, pseudo_hdr_offset holds mss.
1766 * The firmware figures out where to put
1767 * the checksum by parsing the header. */
1768 pseudo_hdr_offset = htobe16(mss);
1775 /* "rdma_count" is the number of RDMAs belonging to the
1776 * current packet BEFORE the current send request. For
1777 * non-TSO packets, this is equal to "count".
1778 * For TSO packets, rdma_count needs to be reset
1779 * to 0 after a segment cut.
1781 * The rdma_count field of the send request is
1782 * the number of RDMAs of the packet starting at
1783 * that request. For TSO send requests with one ore more cuts
1784 * in the middle, this is the number of RDMAs starting
1785 * after the last cut in the request. All previous
1786 * segments before the last cut implicitly have 1 RDMA.
1788 * Since the number of RDMAs is not known beforehand,
1789 * it must be filled-in retroactively - after each
1790 * segmentation cut or at the end of the entire packet.
1793 while (busdma_seg_cnt) {
1794 /* Break the busdma segment up into pieces*/
1795 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1796 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1800 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1802 cum_len_next = cum_len + seglen;
1803 (req-rdma_count)->rdma_count = rdma_count + 1;
1804 if (__predict_true(cum_len >= 0)) {
1806 chop = (cum_len_next > mss);
1807 cum_len_next = cum_len_next % mss;
1808 next_is_first = (cum_len_next == 0);
1809 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1810 flags_next |= next_is_first *
1812 rdma_count |= -(chop | next_is_first);
1813 rdma_count += chop & !next_is_first;
1814 } else if (cum_len_next >= 0) {
1819 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1820 flags_next = MXGEFW_FLAGS_TSO_PLD |
1821 MXGEFW_FLAGS_FIRST |
1822 (small * MXGEFW_FLAGS_SMALL);
1825 req->addr_high = high_swapped;
1826 req->addr_low = htobe32(low);
1827 req->pseudo_hdr_offset = pseudo_hdr_offset;
1829 req->rdma_count = 1;
1830 req->length = htobe16(seglen);
1831 req->cksum_offset = cksum_offset;
1832 req->flags = flags | ((cum_len & 1) *
1833 MXGEFW_FLAGS_ALIGN_ODD);
1836 cum_len = cum_len_next;
1841 if (__predict_false(cksum_offset > seglen))
1842 cksum_offset -= seglen;
1845 if (__predict_false(cnt > tx->max_desc))
1851 (req-rdma_count)->rdma_count = rdma_count;
1855 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1856 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1858 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1859 mxge_submit_req(tx, tx->req_list, cnt);
1863 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1865 ss->sc->ifp->if_oerrors++;
1867 printf("tx->max_desc exceeded via TSO!\n");
1868 printf("mss = %d, %ld, %d!\n", mss,
1869 (long)seg - (long)tx->seg_list, tx->max_desc);
1876 #endif /* IFCAP_TSO4 */
1878 #ifdef MXGE_NEW_VLAN_API
1880 * We reproduce the software vlan tag insertion from
1881 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1882 * vlan tag insertion. We need to advertise this in order to have the
1883 * vlan interface respect our csum offload flags.
1885 static struct mbuf *
1886 mxge_vlan_tag_insert(struct mbuf *m)
1888 struct ether_vlan_header *evl;
1890 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
1891 if (__predict_false(m == NULL))
1893 if (m->m_len < sizeof(*evl)) {
1894 m = m_pullup(m, sizeof(*evl));
1895 if (__predict_false(m == NULL))
1899 * Transform the Ethernet header into an Ethernet header
1900 * with 802.1Q encapsulation.
1902 evl = mtod(m, struct ether_vlan_header *);
1903 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
1904 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1905 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1906 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
1907 m->m_flags &= ~M_VLANTAG;
1910 #endif /* MXGE_NEW_VLAN_API */
1913 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
1916 mcp_kreq_ether_send_t *req;
1917 bus_dma_segment_t *seg;
1922 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
1923 uint16_t pseudo_hdr_offset;
1924 uint8_t flags, cksum_offset;
1931 ip_off = sizeof (struct ether_header);
1932 #ifdef MXGE_NEW_VLAN_API
1933 if (m->m_flags & M_VLANTAG) {
1934 m = mxge_vlan_tag_insert(m);
1935 if (__predict_false(m == NULL))
1937 ip_off += ETHER_VLAN_ENCAP_LEN;
1940 /* (try to) map the frame for DMA */
1941 idx = tx->req & tx->mask;
1942 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
1943 m, tx->seg_list, &cnt,
1945 if (__predict_false(err == EFBIG)) {
1946 /* Too many segments in the chain. Try
1948 m_tmp = m_defrag(m, M_NOWAIT);
1949 if (m_tmp == NULL) {
1954 err = bus_dmamap_load_mbuf_sg(tx->dmat,
1956 m, tx->seg_list, &cnt,
1959 if (__predict_false(err != 0)) {
1960 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
1961 " packet len = %d\n", err, m->m_pkthdr.len);
1964 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
1965 BUS_DMASYNC_PREWRITE);
1966 tx->info[idx].m = m;
1969 /* TSO is different enough, we handle it in another routine */
1970 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
1971 mxge_encap_tso(ss, m, cnt, ip_off);
1978 pseudo_hdr_offset = 0;
1979 flags = MXGEFW_FLAGS_NO_TSO;
1981 /* checksum offloading? */
1982 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
1983 /* ensure ip header is in first mbuf, copy
1984 it to a scratch buffer if not */
1985 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1986 m_copydata(m, 0, ip_off + sizeof (*ip),
1988 ip = (struct ip *)(ss->scratch + ip_off);
1990 ip = (struct ip *)(mtod(m, char *) + ip_off);
1992 cksum_offset = ip_off + (ip->ip_hl << 2);
1993 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
1994 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
1995 req->cksum_offset = cksum_offset;
1996 flags |= MXGEFW_FLAGS_CKSUM;
1997 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2001 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2002 flags |= MXGEFW_FLAGS_SMALL;
2004 /* convert segments into a request list */
2007 req->flags = MXGEFW_FLAGS_FIRST;
2008 for (i = 0; i < cnt; i++) {
2010 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2012 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2013 req->length = htobe16(seg->ds_len);
2014 req->cksum_offset = cksum_offset;
2015 if (cksum_offset > seg->ds_len)
2016 cksum_offset -= seg->ds_len;
2019 req->pseudo_hdr_offset = pseudo_hdr_offset;
2020 req->pad = 0; /* complete solid 16-byte block */
2021 req->rdma_count = 1;
2022 req->flags |= flags | ((cum_len & 1) * odd_flag);
2023 cum_len += seg->ds_len;
2029 /* pad runts to 60 bytes */
2033 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2035 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2036 req->length = htobe16(60 - cum_len);
2037 req->cksum_offset = 0;
2038 req->pseudo_hdr_offset = pseudo_hdr_offset;
2039 req->pad = 0; /* complete solid 16-byte block */
2040 req->rdma_count = 1;
2041 req->flags |= flags | ((cum_len & 1) * odd_flag);
2045 tx->req_list[0].rdma_count = cnt;
2047 /* print what the firmware will see */
2048 for (i = 0; i < cnt; i++) {
2049 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2050 "cso:%d, flags:0x%x, rdma:%d\n",
2051 i, (int)ntohl(tx->req_list[i].addr_high),
2052 (int)ntohl(tx->req_list[i].addr_low),
2053 (int)ntohs(tx->req_list[i].length),
2054 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2055 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2056 tx->req_list[i].rdma_count);
2058 printf("--------------\n");
2060 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2061 mxge_submit_req(tx, tx->req_list, cnt);
2074 mxge_start_locked(struct mxge_slice_state *ss)
2084 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2085 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2089 /* let BPF see it */
2092 /* give it to the nic */
2095 /* ran out of transmit slots */
2096 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2097 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2103 mxge_start(struct ifnet *ifp)
2105 mxge_softc_t *sc = ifp->if_softc;
2106 struct mxge_slice_state *ss;
2108 /* only use the first slice for now */
2110 mtx_lock(&ss->tx.mtx);
2111 mxge_start_locked(ss);
2112 mtx_unlock(&ss->tx.mtx);
2116 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2117 * at most 32 bytes at a time, so as to avoid involving the software
2118 * pio handler in the nic. We re-write the first segment's low
2119 * DMA address to mark it valid only after we write the entire chunk
2123 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2124 mcp_kreq_ether_recv_t *src)
2128 low = src->addr_low;
2129 src->addr_low = 0xffffffff;
2130 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2132 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2134 src->addr_low = low;
2135 dst->addr_low = low;
2140 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2142 bus_dma_segment_t seg;
2144 mxge_rx_ring_t *rx = &ss->rx_small;
2147 m = m_gethdr(M_DONTWAIT, MT_DATA);
2154 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2155 &seg, &cnt, BUS_DMA_NOWAIT);
2160 rx->info[idx].m = m;
2161 rx->shadow[idx].addr_low =
2162 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2163 rx->shadow[idx].addr_high =
2164 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2168 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2173 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2175 bus_dma_segment_t seg[3];
2177 mxge_rx_ring_t *rx = &ss->rx_big;
2180 if (rx->cl_size == MCLBYTES)
2181 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2183 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2189 m->m_len = rx->cl_size;
2190 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2191 seg, &cnt, BUS_DMA_NOWAIT);
2196 rx->info[idx].m = m;
2197 rx->shadow[idx].addr_low =
2198 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2199 rx->shadow[idx].addr_high =
2200 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2202 #if MXGE_VIRT_JUMBOS
2203 for (i = 1; i < cnt; i++) {
2204 rx->shadow[idx + i].addr_low =
2205 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2206 rx->shadow[idx + i].addr_high =
2207 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2212 for (i = 0; i < rx->nbufs; i++) {
2213 if ((idx & 7) == 7) {
2214 mxge_submit_8rx(&rx->lanai[idx - 7],
2215 &rx->shadow[idx - 7]);
2223 * Myri10GE hardware checksums are not valid if the sender
2224 * padded the frame with non-zero padding. This is because
2225 * the firmware just does a simple 16-bit 1s complement
2226 * checksum across the entire frame, excluding the first 14
2227 * bytes. It is best to simply to check the checksum and
2228 * tell the stack about it only if the checksum is good
2231 static inline uint16_t
2232 mxge_rx_csum(struct mbuf *m, int csum)
2234 struct ether_header *eh;
2238 eh = mtod(m, struct ether_header *);
2240 /* only deal with IPv4 TCP & UDP for now */
2241 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2243 ip = (struct ip *)(eh + 1);
2244 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2245 ip->ip_p != IPPROTO_UDP))
2248 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2249 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2250 - (ip->ip_hl << 2) + ip->ip_p));
2256 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2258 struct ether_vlan_header *evl;
2259 struct ether_header *eh;
2262 evl = mtod(m, struct ether_vlan_header *);
2263 eh = mtod(m, struct ether_header *);
2266 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2267 * after what the firmware thought was the end of the ethernet
2271 /* put checksum into host byte order */
2272 *csum = ntohs(*csum);
2273 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2274 (*csum) += ~partial;
2275 (*csum) += ((*csum) < ~partial);
2276 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2277 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2279 /* restore checksum to network byte order;
2280 later consumers expect this */
2281 *csum = htons(*csum);
2284 #ifdef MXGE_NEW_VLAN_API
2285 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2289 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2293 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2294 m_tag_prepend(m, mtag);
2298 m->m_flags |= M_VLANTAG;
2301 * Remove the 802.1q header by copying the Ethernet
2302 * addresses over it and adjusting the beginning of
2303 * the data in the mbuf. The encapsulated Ethernet
2304 * type field is already in place.
2306 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2307 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2308 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2313 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2318 struct ether_header *eh;
2320 bus_dmamap_t old_map;
2322 uint16_t tcpudp_csum;
2327 idx = rx->cnt & rx->mask;
2328 rx->cnt += rx->nbufs;
2329 /* save a pointer to the received mbuf */
2330 m = rx->info[idx].m;
2331 /* try to replace the received mbuf */
2332 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2333 /* drop the frame -- the old mbuf is re-cycled */
2338 /* unmap the received buffer */
2339 old_map = rx->info[idx].map;
2340 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2341 bus_dmamap_unload(rx->dmat, old_map);
2343 /* swap the bus_dmamap_t's */
2344 rx->info[idx].map = rx->extra_map;
2345 rx->extra_map = old_map;
2347 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2349 m->m_data += MXGEFW_PAD;
2351 m->m_pkthdr.rcvif = ifp;
2352 m->m_len = m->m_pkthdr.len = len;
2354 eh = mtod(m, struct ether_header *);
2355 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2356 mxge_vlan_tag_remove(m, &csum);
2358 /* if the checksum is valid, mark it in the mbuf header */
2359 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2360 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2362 /* otherwise, it was a UDP frame, or a TCP frame which
2363 we could not do LRO on. Tell the stack that the
2365 m->m_pkthdr.csum_data = 0xffff;
2366 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2368 /* pass the frame up the stack */
2369 (*ifp->if_input)(ifp, m);
2373 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2377 struct ether_header *eh;
2380 bus_dmamap_t old_map;
2382 uint16_t tcpudp_csum;
2387 idx = rx->cnt & rx->mask;
2389 /* save a pointer to the received mbuf */
2390 m = rx->info[idx].m;
2391 /* try to replace the received mbuf */
2392 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2393 /* drop the frame -- the old mbuf is re-cycled */
2398 /* unmap the received buffer */
2399 old_map = rx->info[idx].map;
2400 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2401 bus_dmamap_unload(rx->dmat, old_map);
2403 /* swap the bus_dmamap_t's */
2404 rx->info[idx].map = rx->extra_map;
2405 rx->extra_map = old_map;
2407 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2409 m->m_data += MXGEFW_PAD;
2411 m->m_pkthdr.rcvif = ifp;
2412 m->m_len = m->m_pkthdr.len = len;
2414 eh = mtod(m, struct ether_header *);
2415 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2416 mxge_vlan_tag_remove(m, &csum);
2418 /* if the checksum is valid, mark it in the mbuf header */
2419 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2420 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2422 /* otherwise, it was a UDP frame, or a TCP frame which
2423 we could not do LRO on. Tell the stack that the
2425 m->m_pkthdr.csum_data = 0xffff;
2426 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2428 /* pass the frame up the stack */
2429 (*ifp->if_input)(ifp, m);
2433 mxge_clean_rx_done(struct mxge_slice_state *ss)
2435 mxge_rx_done_t *rx_done = &ss->rx_done;
2436 struct lro_entry *lro;
2442 while (rx_done->entry[rx_done->idx].length != 0) {
2443 length = ntohs(rx_done->entry[rx_done->idx].length);
2444 rx_done->entry[rx_done->idx].length = 0;
2445 checksum = rx_done->entry[rx_done->idx].checksum;
2446 if (length <= (MHLEN - MXGEFW_PAD))
2447 mxge_rx_done_small(ss, length, checksum);
2449 mxge_rx_done_big(ss, length, checksum);
2451 rx_done->idx = rx_done->cnt & rx_done->mask;
2453 /* limit potential for livelock */
2454 if (__predict_false(++limit > rx_done->mask / 2))
2457 while (!SLIST_EMPTY(&ss->lro_active)) {
2458 lro = SLIST_FIRST(&ss->lro_active);
2459 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2460 mxge_lro_flush(ss, lro);
2466 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2476 while (tx->pkt_done != mcp_idx) {
2477 idx = tx->done & tx->mask;
2479 m = tx->info[idx].m;
2480 /* mbuf and DMA map only attached to the first
2484 tx->info[idx].m = NULL;
2485 map = tx->info[idx].map;
2486 bus_dmamap_unload(tx->dmat, map);
2489 if (tx->info[idx].flag) {
2490 tx->info[idx].flag = 0;
2495 /* If we have space, clear IFF_OACTIVE to tell the stack that
2496 its OK to send packets */
2498 if (ifp->if_drv_flags & IFF_DRV_OACTIVE &&
2499 tx->req - tx->done < (tx->mask + 1)/4) {
2500 mtx_lock(&ss->tx.mtx);
2501 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2503 mxge_start_locked(ss);
2504 mtx_unlock(&ss->tx.mtx);
2508 static struct mxge_media_type mxge_media_types[] =
2510 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2511 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2512 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2513 {0, (1 << 5), "10GBASE-ER"},
2514 {0, (1 << 4), "10GBASE-LRM"},
2515 {0, (1 << 3), "10GBASE-SW"},
2516 {0, (1 << 2), "10GBASE-LW"},
2517 {0, (1 << 1), "10GBASE-EW"},
2518 {0, (1 << 0), "Reserved"}
2522 mxge_set_media(mxge_softc_t *sc, int type)
2524 sc->media_flags |= type;
2525 ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2526 ifmedia_set(&sc->media, sc->media_flags);
2531 * Determine the media type for a NIC. Some XFPs will identify
2532 * themselves only when their link is up, so this is initiated via a
2533 * link up interrupt. However, this can potentially take up to
2534 * several milliseconds, so it is run via the watchdog routine, rather
2535 * than in the interrupt handler itself. This need only be done
2536 * once, not each time the link is up.
2539 mxge_media_probe(mxge_softc_t *sc)
2545 sc->need_media_probe = 0;
2547 /* if we've already set a media type, we're done */
2548 if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
2552 * parse the product code to deterimine the interface type
2553 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2554 * after the 3rd dash in the driver's cached copy of the
2555 * EEPROM's product code string.
2557 ptr = sc->product_code_string;
2559 device_printf(sc->dev, "Missing product code\n");
2562 for (i = 0; i < 3; i++, ptr++) {
2563 ptr = index(ptr, '-');
2565 device_printf(sc->dev,
2566 "only %d dashes in PC?!?\n", i);
2571 mxge_set_media(sc, IFM_10G_CX4);
2574 else if (*ptr == 'Q') {
2575 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2576 /* FreeBSD has no media type for Quad ribbon fiber */
2581 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2586 * At this point we know the NIC has an XFP cage, so now we
2587 * try to determine what is in the cage by using the
2588 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2589 * register. We read just one byte, which may take over
2593 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2594 cmd.data1 = MXGE_XFP_COMPLIANCE_BYTE; /* the byte we want */
2595 err = mxge_send_cmd(sc, MXGEFW_CMD_XFP_I2C_READ, &cmd);
2596 if (err == MXGEFW_CMD_ERROR_XFP_FAILURE) {
2597 device_printf(sc->dev, "failed to read XFP\n");
2599 if (err == MXGEFW_CMD_ERROR_XFP_ABSENT) {
2600 device_printf(sc->dev, "Type R with no XFP!?!?\n");
2602 if (err != MXGEFW_CMD_OK) {
2606 /* now we wait for the data to be cached */
2607 cmd.data0 = MXGE_XFP_COMPLIANCE_BYTE;
2608 err = mxge_send_cmd(sc, MXGEFW_CMD_XFP_BYTE, &cmd);
2609 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2611 cmd.data0 = MXGE_XFP_COMPLIANCE_BYTE;
2612 err = mxge_send_cmd(sc, MXGEFW_CMD_XFP_BYTE, &cmd);
2614 if (err != MXGEFW_CMD_OK) {
2615 device_printf(sc->dev, "failed to read XFP (%d, %dms)\n",
2620 if (cmd.data0 == mxge_media_types[0].bitmask) {
2622 device_printf(sc->dev, "XFP:%s\n",
2623 mxge_media_types[0].name);
2624 mxge_set_media(sc, IFM_10G_CX4);
2628 i < sizeof (mxge_media_types) / sizeof (mxge_media_types[0]);
2630 if (cmd.data0 & mxge_media_types[i].bitmask) {
2632 device_printf(sc->dev, "XFP:%s\n",
2633 mxge_media_types[i].name);
2635 mxge_set_media(sc, mxge_media_types[i].flag);
2639 device_printf(sc->dev, "XFP media 0x%x unknown\n", cmd.data0);
2645 mxge_intr(void *arg)
2647 struct mxge_slice_state *ss = arg;
2648 mxge_softc_t *sc = ss->sc;
2649 mcp_irq_data_t *stats = ss->fw_stats;
2650 mxge_tx_ring_t *tx = &ss->tx;
2651 mxge_rx_done_t *rx_done = &ss->rx_done;
2652 uint32_t send_done_count;
2656 /* an interrupt on a non-zero slice is implicitly valid
2657 since MSI-X irqs are not shared */
2659 mxge_clean_rx_done(ss);
2660 *ss->irq_claim = be32toh(3);
2664 /* make sure the DMA has finished */
2665 if (!stats->valid) {
2668 valid = stats->valid;
2670 if (sc->legacy_irq) {
2671 /* lower legacy IRQ */
2672 *sc->irq_deassert = 0;
2673 if (!mxge_deassert_wait)
2674 /* don't wait for conf. that irq is low */
2680 /* loop while waiting for legacy irq deassertion */
2682 /* check for transmit completes and receives */
2683 send_done_count = be32toh(stats->send_done_count);
2684 while ((send_done_count != tx->pkt_done) ||
2685 (rx_done->entry[rx_done->idx].length != 0)) {
2686 mxge_tx_done(ss, (int)send_done_count);
2687 mxge_clean_rx_done(ss);
2688 send_done_count = be32toh(stats->send_done_count);
2690 if (sc->legacy_irq && mxge_deassert_wait)
2692 } while (*((volatile uint8_t *) &stats->valid));
2694 if (__predict_false(stats->stats_updated)) {
2695 if (sc->link_state != stats->link_up) {
2696 sc->link_state = stats->link_up;
2697 if (sc->link_state) {
2698 if_link_state_change(sc->ifp, LINK_STATE_UP);
2700 device_printf(sc->dev, "link up\n");
2702 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2704 device_printf(sc->dev, "link down\n");
2706 sc->need_media_probe = 1;
2708 if (sc->rdma_tags_available !=
2709 be32toh(stats->rdma_tags_available)) {
2710 sc->rdma_tags_available =
2711 be32toh(stats->rdma_tags_available);
2712 device_printf(sc->dev, "RDMA timed out! %d tags "
2713 "left\n", sc->rdma_tags_available);
2716 if (stats->link_down) {
2717 sc->down_cnt += stats->link_down;
2719 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2723 /* check to see if we have rx token to pass back */
2725 *ss->irq_claim = be32toh(3);
2726 *(ss->irq_claim + 1) = be32toh(3);
2730 mxge_init(void *arg)
2737 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2739 struct lro_entry *lro_entry;
2742 while (!SLIST_EMPTY(&ss->lro_free)) {
2743 lro_entry = SLIST_FIRST(&ss->lro_free);
2744 SLIST_REMOVE_HEAD(&ss->lro_free, next);
2745 free(lro_entry, M_DEVBUF);
2748 for (i = 0; i <= ss->rx_big.mask; i++) {
2749 if (ss->rx_big.info[i].m == NULL)
2751 bus_dmamap_unload(ss->rx_big.dmat,
2752 ss->rx_big.info[i].map);
2753 m_freem(ss->rx_big.info[i].m);
2754 ss->rx_big.info[i].m = NULL;
2757 for (i = 0; i <= ss->rx_small.mask; i++) {
2758 if (ss->rx_small.info[i].m == NULL)
2760 bus_dmamap_unload(ss->rx_small.dmat,
2761 ss->rx_small.info[i].map);
2762 m_freem(ss->rx_small.info[i].m);
2763 ss->rx_small.info[i].m = NULL;
2766 /* transmit ring used only on the first slice */
2767 if (ss->tx.info == NULL)
2770 for (i = 0; i <= ss->tx.mask; i++) {
2771 ss->tx.info[i].flag = 0;
2772 if (ss->tx.info[i].m == NULL)
2774 bus_dmamap_unload(ss->tx.dmat,
2775 ss->tx.info[i].map);
2776 m_freem(ss->tx.info[i].m);
2777 ss->tx.info[i].m = NULL;
2782 mxge_free_mbufs(mxge_softc_t *sc)
2786 for (slice = 0; slice < sc->num_slices; slice++)
2787 mxge_free_slice_mbufs(&sc->ss[slice]);
2791 mxge_free_slice_rings(struct mxge_slice_state *ss)
2796 if (ss->rx_done.entry != NULL)
2797 mxge_dma_free(&ss->rx_done.dma);
2798 ss->rx_done.entry = NULL;
2800 if (ss->tx.req_bytes != NULL)
2801 free(ss->tx.req_bytes, M_DEVBUF);
2802 ss->tx.req_bytes = NULL;
2804 if (ss->tx.seg_list != NULL)
2805 free(ss->tx.seg_list, M_DEVBUF);
2806 ss->tx.seg_list = NULL;
2808 if (ss->rx_small.shadow != NULL)
2809 free(ss->rx_small.shadow, M_DEVBUF);
2810 ss->rx_small.shadow = NULL;
2812 if (ss->rx_big.shadow != NULL)
2813 free(ss->rx_big.shadow, M_DEVBUF);
2814 ss->rx_big.shadow = NULL;
2816 if (ss->tx.info != NULL) {
2817 if (ss->tx.dmat != NULL) {
2818 for (i = 0; i <= ss->tx.mask; i++) {
2819 bus_dmamap_destroy(ss->tx.dmat,
2820 ss->tx.info[i].map);
2822 bus_dma_tag_destroy(ss->tx.dmat);
2824 free(ss->tx.info, M_DEVBUF);
2828 if (ss->rx_small.info != NULL) {
2829 if (ss->rx_small.dmat != NULL) {
2830 for (i = 0; i <= ss->rx_small.mask; i++) {
2831 bus_dmamap_destroy(ss->rx_small.dmat,
2832 ss->rx_small.info[i].map);
2834 bus_dmamap_destroy(ss->rx_small.dmat,
2835 ss->rx_small.extra_map);
2836 bus_dma_tag_destroy(ss->rx_small.dmat);
2838 free(ss->rx_small.info, M_DEVBUF);
2840 ss->rx_small.info = NULL;
2842 if (ss->rx_big.info != NULL) {
2843 if (ss->rx_big.dmat != NULL) {
2844 for (i = 0; i <= ss->rx_big.mask; i++) {
2845 bus_dmamap_destroy(ss->rx_big.dmat,
2846 ss->rx_big.info[i].map);
2848 bus_dmamap_destroy(ss->rx_big.dmat,
2849 ss->rx_big.extra_map);
2850 bus_dma_tag_destroy(ss->rx_big.dmat);
2852 free(ss->rx_big.info, M_DEVBUF);
2854 ss->rx_big.info = NULL;
2858 mxge_free_rings(mxge_softc_t *sc)
2862 for (slice = 0; slice < sc->num_slices; slice++)
2863 mxge_free_slice_rings(&sc->ss[slice]);
2867 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
2868 int tx_ring_entries)
2870 mxge_softc_t *sc = ss->sc;
2876 /* allocate per-slice receive resources */
2878 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
2879 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
2881 /* allocate the rx shadow rings */
2882 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2883 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2884 if (ss->rx_small.shadow == NULL)
2887 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2888 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2889 if (ss->rx_big.shadow == NULL)
2892 /* allocate the rx host info rings */
2893 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2894 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2895 if (ss->rx_small.info == NULL)
2898 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2899 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2900 if (ss->rx_big.info == NULL)
2903 /* allocate the rx busdma resources */
2904 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
2906 4096, /* boundary */
2907 BUS_SPACE_MAXADDR, /* low */
2908 BUS_SPACE_MAXADDR, /* high */
2909 NULL, NULL, /* filter */
2910 MHLEN, /* maxsize */
2912 MHLEN, /* maxsegsize */
2913 BUS_DMA_ALLOCNOW, /* flags */
2914 NULL, NULL, /* lock */
2915 &ss->rx_small.dmat); /* tag */
2917 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
2922 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
2924 #if MXGE_VIRT_JUMBOS
2925 4096, /* boundary */
2929 BUS_SPACE_MAXADDR, /* low */
2930 BUS_SPACE_MAXADDR, /* high */
2931 NULL, NULL, /* filter */
2932 3*4096, /* maxsize */
2933 #if MXGE_VIRT_JUMBOS
2935 4096, /* maxsegsize*/
2938 MJUM9BYTES, /* maxsegsize*/
2940 BUS_DMA_ALLOCNOW, /* flags */
2941 NULL, NULL, /* lock */
2942 &ss->rx_big.dmat); /* tag */
2944 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
2948 for (i = 0; i <= ss->rx_small.mask; i++) {
2949 err = bus_dmamap_create(ss->rx_small.dmat, 0,
2950 &ss->rx_small.info[i].map);
2952 device_printf(sc->dev, "Err %d rx_small dmamap\n",
2957 err = bus_dmamap_create(ss->rx_small.dmat, 0,
2958 &ss->rx_small.extra_map);
2960 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
2965 for (i = 0; i <= ss->rx_big.mask; i++) {
2966 err = bus_dmamap_create(ss->rx_big.dmat, 0,
2967 &ss->rx_big.info[i].map);
2969 device_printf(sc->dev, "Err %d rx_big dmamap\n",
2974 err = bus_dmamap_create(ss->rx_big.dmat, 0,
2975 &ss->rx_big.extra_map);
2977 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
2982 /* now allocate TX resouces */
2984 /* only use a single TX ring for now */
2985 if (ss != ss->sc->ss)
2988 ss->tx.mask = tx_ring_entries - 1;
2989 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
2992 /* allocate the tx request copy block */
2994 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
2995 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
2996 if (ss->tx.req_bytes == NULL)
2998 /* ensure req_list entries are aligned to 8 bytes */
2999 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3000 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3002 /* allocate the tx busdma segment list */
3003 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3004 ss->tx.seg_list = (bus_dma_segment_t *)
3005 malloc(bytes, M_DEVBUF, M_WAITOK);
3006 if (ss->tx.seg_list == NULL)
3009 /* allocate the tx host info ring */
3010 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3011 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3012 if (ss->tx.info == NULL)
3015 /* allocate the tx busdma resources */
3016 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3018 sc->tx_boundary, /* boundary */
3019 BUS_SPACE_MAXADDR, /* low */
3020 BUS_SPACE_MAXADDR, /* high */
3021 NULL, NULL, /* filter */
3022 65536 + 256, /* maxsize */
3023 ss->tx.max_desc - 2, /* num segs */
3024 sc->tx_boundary, /* maxsegsz */
3025 BUS_DMA_ALLOCNOW, /* flags */
3026 NULL, NULL, /* lock */
3027 &ss->tx.dmat); /* tag */
3030 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3035 /* now use these tags to setup dmamaps for each slot
3037 for (i = 0; i <= ss->tx.mask; i++) {
3038 err = bus_dmamap_create(ss->tx.dmat, 0,
3039 &ss->tx.info[i].map);
3041 device_printf(sc->dev, "Err %d tx dmamap\n",
3051 mxge_alloc_rings(mxge_softc_t *sc)
3055 int tx_ring_entries, rx_ring_entries;
3058 /* get ring sizes */
3059 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3060 tx_ring_size = cmd.data0;
3062 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3066 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3067 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3068 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3069 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3070 IFQ_SET_READY(&sc->ifp->if_snd);
3072 for (slice = 0; slice < sc->num_slices; slice++) {
3073 err = mxge_alloc_slice_rings(&sc->ss[slice],
3082 mxge_free_rings(sc);
3089 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3091 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3093 if (bufsize < MCLBYTES) {
3094 /* easy, everything fits in a single buffer */
3095 *big_buf_size = MCLBYTES;
3096 *cl_size = MCLBYTES;
3101 if (bufsize < MJUMPAGESIZE) {
3102 /* still easy, everything still fits in a single buffer */
3103 *big_buf_size = MJUMPAGESIZE;
3104 *cl_size = MJUMPAGESIZE;
3108 #if MXGE_VIRT_JUMBOS
3109 /* now we need to use virtually contiguous buffers */
3110 *cl_size = MJUM9BYTES;
3111 *big_buf_size = 4096;
3112 *nbufs = mtu / 4096 + 1;
3113 /* needs to be a power of two, so round up */
3117 *cl_size = MJUM9BYTES;
3118 *big_buf_size = MJUM9BYTES;
3124 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3129 struct lro_entry *lro_entry;
3134 slice = ss - sc->ss;
3136 SLIST_INIT(&ss->lro_free);
3137 SLIST_INIT(&ss->lro_active);
3139 for (i = 0; i < sc->lro_cnt; i++) {
3140 lro_entry = (struct lro_entry *)
3141 malloc(sizeof (*lro_entry), M_DEVBUF,
3143 if (lro_entry == NULL) {
3147 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3149 /* get the lanai pointers to the send and receive rings */
3152 /* We currently only send from the first slice */
3155 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3157 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3160 err |= mxge_send_cmd(sc,
3161 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3162 ss->rx_small.lanai =
3163 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3165 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3167 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3170 device_printf(sc->dev,
3171 "failed to get ring sizes or locations\n");
3175 /* stock receive rings */
3176 for (i = 0; i <= ss->rx_small.mask; i++) {
3177 map = ss->rx_small.info[i].map;
3178 err = mxge_get_buf_small(ss, map, i);
3180 device_printf(sc->dev, "alloced %d/%d smalls\n",
3181 i, ss->rx_small.mask + 1);
3185 for (i = 0; i <= ss->rx_big.mask; i++) {
3186 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3187 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3189 ss->rx_big.nbufs = nbufs;
3190 ss->rx_big.cl_size = cl_size;
3191 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3192 map = ss->rx_big.info[i].map;
3193 err = mxge_get_buf_big(ss, map, i);
3195 device_printf(sc->dev, "alloced %d/%d bigs\n",
3196 i, ss->rx_big.mask + 1);
3204 mxge_open(mxge_softc_t *sc)
3207 int err, big_bytes, nbufs, slice, cl_size, i;
3209 volatile uint8_t *itable;
3211 /* Copy the MAC address in case it was overridden */
3212 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3214 err = mxge_reset(sc, 1);
3216 device_printf(sc->dev, "failed to reset\n");
3220 if (sc->num_slices > 1) {
3221 /* setup the indirection table */
3222 cmd.data0 = sc->num_slices;
3223 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3226 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3229 device_printf(sc->dev,
3230 "failed to setup rss tables\n");
3234 /* just enable an identity mapping */
3235 itable = sc->sram + cmd.data0;
3236 for (i = 0; i < sc->num_slices; i++)
3237 itable[i] = (uint8_t)i;
3240 cmd.data1 = mxge_rss_hash_type;
3241 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3243 device_printf(sc->dev, "failed to enable slices\n");
3249 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3252 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3254 /* error is only meaningful if we're trying to set
3255 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3256 if (err && nbufs > 1) {
3257 device_printf(sc->dev,
3258 "Failed to set alway-use-n to %d\n",
3262 /* Give the firmware the mtu and the big and small buffer
3263 sizes. The firmware wants the big buf size to be a power
3264 of two. Luckily, FreeBSD's clusters are powers of two */
3265 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3266 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3267 cmd.data0 = MHLEN - MXGEFW_PAD;
3268 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3270 cmd.data0 = big_bytes;
3271 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3274 device_printf(sc->dev, "failed to setup params\n");
3278 /* Now give him the pointer to the stats block */
3279 cmd.data0 = MXGE_LOWPART_TO_U32(sc->ss->fw_stats_dma.bus_addr);
3280 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->ss->fw_stats_dma.bus_addr);
3281 cmd.data2 = sizeof(struct mcp_irq_data);
3282 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3285 bus = sc->ss->fw_stats_dma.bus_addr;
3286 bus += offsetof(struct mcp_irq_data, send_done_count);
3287 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3288 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3289 err = mxge_send_cmd(sc,
3290 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3292 /* Firmware cannot support multicast without STATS_DMA_V2 */
3293 sc->fw_multicast_support = 0;
3295 sc->fw_multicast_support = 1;
3299 device_printf(sc->dev, "failed to setup params\n");
3303 for (slice = 0; slice < sc->num_slices; slice++) {
3304 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3306 device_printf(sc->dev, "couldn't open slice %d\n",
3312 /* Finally, start the firmware running */
3313 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3315 device_printf(sc->dev, "Couldn't bring up link\n");
3318 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3319 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3320 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3326 mxge_free_mbufs(sc);
3332 mxge_close(mxge_softc_t *sc)
3335 int err, old_down_cnt;
3337 callout_stop(&sc->co_hdl);
3338 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3339 old_down_cnt = sc->down_cnt;
3341 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3343 device_printf(sc->dev, "Couldn't bring down link\n");
3345 if (old_down_cnt == sc->down_cnt) {
3346 /* wait for down irq */
3347 DELAY(10 * sc->intr_coal_delay);
3350 if (old_down_cnt == sc->down_cnt) {
3351 device_printf(sc->dev, "never got down irq\n");
3354 mxge_free_mbufs(sc);
3360 mxge_setup_cfg_space(mxge_softc_t *sc)
3362 device_t dev = sc->dev;
3364 uint16_t cmd, lnk, pectl;
3366 /* find the PCIe link width and set max read request to 4KB*/
3367 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
3368 lnk = pci_read_config(dev, reg + 0x12, 2);
3369 sc->link_width = (lnk >> 4) & 0x3f;
3371 pectl = pci_read_config(dev, reg + 0x8, 2);
3372 pectl = (pectl & ~0x7000) | (5 << 12);
3373 pci_write_config(dev, reg + 0x8, pectl, 2);
3376 /* Enable DMA and Memory space access */
3377 pci_enable_busmaster(dev);
3378 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3379 cmd |= PCIM_CMD_MEMEN;
3380 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3384 mxge_read_reboot(mxge_softc_t *sc)
3386 device_t dev = sc->dev;
3389 /* find the vendor specific offset */
3390 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3391 device_printf(sc->dev,
3392 "could not find vendor specific offset\n");
3393 return (uint32_t)-1;
3395 /* enable read32 mode */
3396 pci_write_config(dev, vs + 0x10, 0x3, 1);
3397 /* tell NIC which register to read */
3398 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3399 return (pci_read_config(dev, vs + 0x14, 4));
3403 mxge_watchdog_reset(mxge_softc_t *sc)
3405 struct pci_devinfo *dinfo;
3412 device_printf(sc->dev, "Watchdog reset!\n");
3415 * check to see if the NIC rebooted. If it did, then all of
3416 * PCI config space has been reset, and things like the
3417 * busmaster bit will be zero. If this is the case, then we
3418 * must restore PCI config space before the NIC can be used
3421 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3422 if (cmd == 0xffff) {
3424 * maybe the watchdog caught the NIC rebooting; wait
3425 * up to 100ms for it to finish. If it does not come
3426 * back, then give up
3429 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3430 if (cmd == 0xffff) {
3431 device_printf(sc->dev, "NIC disappeared!\n");
3435 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3436 /* print the reboot status */
3437 reboot = mxge_read_reboot(sc);
3438 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3440 /* restore PCI configuration space */
3441 dinfo = device_get_ivars(sc->dev);
3442 pci_cfg_restore(sc->dev, dinfo);
3444 /* and redo any changes we made to our config space */
3445 mxge_setup_cfg_space(sc);
3447 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) {
3449 err = mxge_open(sc);
3452 device_printf(sc->dev, "NIC did not reboot, ring state:\n");
3453 device_printf(sc->dev, "tx.req=%d tx.done=%d\n",
3454 sc->ss->tx.req, sc->ss->tx.done);
3455 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3456 sc->ss->tx.pkt_done,
3457 be32toh(sc->ss->fw_stats->send_done_count));
3458 device_printf(sc->dev, "not resetting\n");
3464 mxge_watchdog(mxge_softc_t *sc)
3466 mxge_tx_ring_t *tx = &sc->ss->tx;
3467 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3470 /* see if we have outstanding transmits, which
3471 have been pending for more than mxge_ticks */
3472 if (tx->req != tx->done &&
3473 tx->watchdog_req != tx->watchdog_done &&
3474 tx->done == tx->watchdog_done) {
3475 /* check for pause blocking before resetting */
3476 if (tx->watchdog_rx_pause == rx_pause)
3477 err = mxge_watchdog_reset(sc);
3479 device_printf(sc->dev, "Flow control blocking "
3480 "xmits, check link partner\n");
3483 tx->watchdog_req = tx->req;
3484 tx->watchdog_done = tx->done;
3485 tx->watchdog_rx_pause = rx_pause;
3487 if (sc->need_media_probe)
3488 mxge_media_probe(sc);
3493 mxge_update_stats(mxge_softc_t *sc)
3495 struct mxge_slice_state *ss;
3496 u_long ipackets = 0;
3499 for(slice = 0; slice < sc->num_slices; slice++) {
3500 ss = &sc->ss[slice];
3501 ipackets += ss->ipackets;
3503 sc->ifp->if_ipackets = ipackets;
3507 mxge_tick(void *arg)
3509 mxge_softc_t *sc = arg;
3512 /* aggregate stats from different slices */
3513 mxge_update_stats(sc);
3514 if (!sc->watchdog_countdown) {
3515 err = mxge_watchdog(sc);
3516 sc->watchdog_countdown = 4;
3518 sc->watchdog_countdown--;
3520 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3525 mxge_media_change(struct ifnet *ifp)
3531 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3533 struct ifnet *ifp = sc->ifp;
3534 int real_mtu, old_mtu;
3538 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3539 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3541 mtx_lock(&sc->driver_mtx);
3542 old_mtu = ifp->if_mtu;
3544 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3546 err = mxge_open(sc);
3548 ifp->if_mtu = old_mtu;
3550 (void) mxge_open(sc);
3553 mtx_unlock(&sc->driver_mtx);
3558 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3560 mxge_softc_t *sc = ifp->if_softc;
3565 ifmr->ifm_status = IFM_AVALID;
3566 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3567 ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
3568 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
3572 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
3574 mxge_softc_t *sc = ifp->if_softc;
3575 struct ifreq *ifr = (struct ifreq *)data;
3582 err = ether_ioctl(ifp, command, data);
3586 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3590 mtx_lock(&sc->driver_mtx);
3591 if (ifp->if_flags & IFF_UP) {
3592 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3593 err = mxge_open(sc);
3595 /* take care of promis can allmulti
3597 mxge_change_promisc(sc,
3598 ifp->if_flags & IFF_PROMISC);
3599 mxge_set_multicast_list(sc);
3602 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3606 mtx_unlock(&sc->driver_mtx);
3611 mtx_lock(&sc->driver_mtx);
3612 mxge_set_multicast_list(sc);
3613 mtx_unlock(&sc->driver_mtx);
3617 mtx_lock(&sc->driver_mtx);
3618 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3619 if (mask & IFCAP_TXCSUM) {
3620 if (IFCAP_TXCSUM & ifp->if_capenable) {
3621 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
3622 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
3625 ifp->if_capenable |= IFCAP_TXCSUM;
3626 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
3628 } else if (mask & IFCAP_RXCSUM) {
3629 if (IFCAP_RXCSUM & ifp->if_capenable) {
3630 ifp->if_capenable &= ~IFCAP_RXCSUM;
3633 ifp->if_capenable |= IFCAP_RXCSUM;
3637 if (mask & IFCAP_TSO4) {
3638 if (IFCAP_TSO4 & ifp->if_capenable) {
3639 ifp->if_capenable &= ~IFCAP_TSO4;
3640 ifp->if_hwassist &= ~CSUM_TSO;
3641 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
3642 ifp->if_capenable |= IFCAP_TSO4;
3643 ifp->if_hwassist |= CSUM_TSO;
3645 printf("mxge requires tx checksum offload"
3646 " be enabled to use TSO\n");
3650 if (mask & IFCAP_LRO) {
3651 if (IFCAP_LRO & ifp->if_capenable)
3652 err = mxge_change_lro_locked(sc, 0);
3654 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
3656 if (mask & IFCAP_VLAN_HWTAGGING)
3657 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3658 mtx_unlock(&sc->driver_mtx);
3659 VLAN_CAPABILITIES(ifp);
3664 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3665 &sc->media, command);
3675 mxge_fetch_tunables(mxge_softc_t *sc)
3678 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
3679 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
3680 &mxge_flow_control);
3681 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
3682 &mxge_intr_coal_delay);
3683 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
3684 &mxge_nvidia_ecrc_enable);
3685 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
3686 &mxge_force_firmware);
3687 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
3688 &mxge_deassert_wait);
3689 TUNABLE_INT_FETCH("hw.mxge.verbose",
3691 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
3692 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
3693 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
3694 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
3695 if (sc->lro_cnt != 0)
3696 mxge_lro_cnt = sc->lro_cnt;
3700 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
3701 mxge_intr_coal_delay = 30;
3702 if (mxge_ticks == 0)
3703 mxge_ticks = hz / 2;
3704 sc->pause = mxge_flow_control;
3705 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
3706 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_SRC_PORT) {
3707 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
3713 mxge_free_slices(mxge_softc_t *sc)
3715 struct mxge_slice_state *ss;
3722 for (i = 0; i < sc->num_slices; i++) {
3724 if (ss->fw_stats != NULL) {
3725 mxge_dma_free(&ss->fw_stats_dma);
3726 ss->fw_stats = NULL;
3727 mtx_destroy(&ss->tx.mtx);
3729 if (ss->rx_done.entry != NULL) {
3730 mxge_dma_free(&ss->rx_done.dma);
3731 ss->rx_done.entry = NULL;
3734 free(sc->ss, M_DEVBUF);
3739 mxge_alloc_slices(mxge_softc_t *sc)
3742 struct mxge_slice_state *ss;
3744 int err, i, max_intr_slots;
3746 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
3748 device_printf(sc->dev, "Cannot determine rx ring size\n");
3751 sc->rx_ring_size = cmd.data0;
3752 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
3754 bytes = sizeof (*sc->ss) * sc->num_slices;
3755 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
3758 for (i = 0; i < sc->num_slices; i++) {
3763 /* allocate per-slice rx interrupt queues */
3765 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
3766 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
3769 ss->rx_done.entry = ss->rx_done.dma.addr;
3770 bzero(ss->rx_done.entry, bytes);
3773 * allocate the per-slice firmware stats; stats
3774 * (including tx) are used used only on the first
3780 bytes = sizeof (*ss->fw_stats);
3781 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
3782 sizeof (*ss->fw_stats), 64);
3785 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
3786 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
3787 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
3788 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
3794 mxge_free_slices(sc);
3799 mxge_slice_probe(mxge_softc_t *sc)
3803 int msix_cnt, status, max_intr_slots;
3807 * don't enable multiple slices if they are not enabled,
3808 * or if this is not an SMP system
3811 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
3814 /* see how many MSI-X interrupts are available */
3815 msix_cnt = pci_msix_count(sc->dev);
3819 /* now load the slice aware firmware see what it supports */
3820 old_fw = sc->fw_name;
3821 if (old_fw == mxge_fw_aligned)
3822 sc->fw_name = mxge_fw_rss_aligned;
3824 sc->fw_name = mxge_fw_rss_unaligned;
3825 status = mxge_load_firmware(sc, 0);
3827 device_printf(sc->dev, "Falling back to a single slice\n");
3831 /* try to send a reset command to the card to see if it
3833 memset(&cmd, 0, sizeof (cmd));
3834 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
3836 device_printf(sc->dev, "failed reset\n");
3840 /* get rx ring size */
3841 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
3843 device_printf(sc->dev, "Cannot determine rx ring size\n");
3846 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
3848 /* tell it the size of the interrupt queues */
3849 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
3850 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
3852 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
3856 /* ask the maximum number of slices it supports */
3857 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
3859 device_printf(sc->dev,
3860 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
3863 sc->num_slices = cmd.data0;
3864 if (sc->num_slices > msix_cnt)
3865 sc->num_slices = msix_cnt;
3867 if (mxge_max_slices == -1) {
3868 /* cap to number of CPUs in system */
3869 if (sc->num_slices > mp_ncpus)
3870 sc->num_slices = mp_ncpus;
3872 if (sc->num_slices > mxge_max_slices)
3873 sc->num_slices = mxge_max_slices;
3875 /* make sure it is a power of two */
3876 while (sc->num_slices & (sc->num_slices - 1))
3880 device_printf(sc->dev, "using %d slices\n",
3886 sc->fw_name = old_fw;
3887 (void) mxge_load_firmware(sc, 0);
3891 mxge_add_msix_irqs(mxge_softc_t *sc)
3894 int count, err, i, rid;
3897 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
3900 if (sc->msix_table_res == NULL) {
3901 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
3905 count = sc->num_slices;
3906 err = pci_alloc_msix(sc->dev, &count);
3908 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
3909 "err = %d \n", sc->num_slices, err);
3910 goto abort_with_msix_table;
3912 if (count < sc->num_slices) {
3913 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
3914 count, sc->num_slices);
3915 device_printf(sc->dev,
3916 "Try setting hw.mxge.max_slices to %d\n",
3919 goto abort_with_msix;
3921 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
3922 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
3923 if (sc->msix_irq_res == NULL) {
3925 goto abort_with_msix;
3928 for (i = 0; i < sc->num_slices; i++) {
3930 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
3933 if (sc->msix_irq_res[i] == NULL) {
3934 device_printf(sc->dev, "couldn't allocate IRQ res"
3935 " for message %d\n", i);
3937 goto abort_with_res;
3941 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
3942 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
3944 for (i = 0; i < sc->num_slices; i++) {
3945 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
3946 INTR_TYPE_NET | INTR_MPSAFE,
3947 #if __FreeBSD_version > 700030
3950 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
3952 device_printf(sc->dev, "couldn't setup intr for "
3954 goto abort_with_intr;
3959 device_printf(sc->dev, "using %d msix IRQs:",
3961 for (i = 0; i < sc->num_slices; i++)
3962 printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
3968 for (i = 0; i < sc->num_slices; i++) {
3969 if (sc->msix_ih[i] != NULL) {
3970 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
3972 sc->msix_ih[i] = NULL;
3975 free(sc->msix_ih, M_DEVBUF);
3979 for (i = 0; i < sc->num_slices; i++) {
3981 if (sc->msix_irq_res[i] != NULL)
3982 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
3983 sc->msix_irq_res[i]);
3984 sc->msix_irq_res[i] = NULL;
3986 free(sc->msix_irq_res, M_DEVBUF);
3990 pci_release_msi(sc->dev);
3992 abort_with_msix_table:
3993 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
3994 sc->msix_table_res);
4000 mxge_add_single_irq(mxge_softc_t *sc)
4002 int count, err, rid;
4004 count = pci_msi_count(sc->dev);
4005 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4011 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4012 1, RF_SHAREABLE | RF_ACTIVE);
4013 if (sc->irq_res == NULL) {
4014 device_printf(sc->dev, "could not alloc interrupt\n");
4018 device_printf(sc->dev, "using %s irq %ld\n",
4019 sc->legacy_irq ? "INTx" : "MSI",
4020 rman_get_start(sc->irq_res));
4021 err = bus_setup_intr(sc->dev, sc->irq_res,
4022 INTR_TYPE_NET | INTR_MPSAFE,
4023 #if __FreeBSD_version > 700030
4026 mxge_intr, &sc->ss[0], &sc->ih);
4028 bus_release_resource(sc->dev, SYS_RES_IRQ,
4029 sc->legacy_irq ? 0 : 1, sc->irq_res);
4030 if (!sc->legacy_irq)
4031 pci_release_msi(sc->dev);
4037 mxge_rem_msix_irqs(mxge_softc_t *sc)
4041 for (i = 0; i < sc->num_slices; i++) {
4042 if (sc->msix_ih[i] != NULL) {
4043 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4045 sc->msix_ih[i] = NULL;
4048 free(sc->msix_ih, M_DEVBUF);
4050 for (i = 0; i < sc->num_slices; i++) {
4052 if (sc->msix_irq_res[i] != NULL)
4053 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4054 sc->msix_irq_res[i]);
4055 sc->msix_irq_res[i] = NULL;
4057 free(sc->msix_irq_res, M_DEVBUF);
4059 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4060 sc->msix_table_res);
4062 pci_release_msi(sc->dev);
4067 mxge_rem_single_irq(mxge_softc_t *sc)
4069 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4070 bus_release_resource(sc->dev, SYS_RES_IRQ,
4071 sc->legacy_irq ? 0 : 1, sc->irq_res);
4072 if (!sc->legacy_irq)
4073 pci_release_msi(sc->dev);
4077 mxge_rem_irq(mxge_softc_t *sc)
4079 if (sc->num_slices > 1)
4080 mxge_rem_msix_irqs(sc);
4082 mxge_rem_single_irq(sc);
4086 mxge_add_irq(mxge_softc_t *sc)
4090 if (sc->num_slices > 1)
4091 err = mxge_add_msix_irqs(sc);
4093 err = mxge_add_single_irq(sc);
4095 if (0 && err == 0 && sc->num_slices > 1) {
4096 mxge_rem_msix_irqs(sc);
4097 err = mxge_add_msix_irqs(sc);
4104 mxge_attach(device_t dev)
4106 mxge_softc_t *sc = device_get_softc(dev);
4111 mxge_fetch_tunables(sc);
4113 err = bus_dma_tag_create(NULL, /* parent */
4116 BUS_SPACE_MAXADDR, /* low */
4117 BUS_SPACE_MAXADDR, /* high */
4118 NULL, NULL, /* filter */
4119 65536 + 256, /* maxsize */
4120 MXGE_MAX_SEND_DESC, /* num segs */
4121 65536, /* maxsegsize */
4123 NULL, NULL, /* lock */
4124 &sc->parent_dmat); /* tag */
4127 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4129 goto abort_with_nothing;
4132 ifp = sc->ifp = if_alloc(IFT_ETHER);
4134 device_printf(dev, "can not if_alloc()\n");
4136 goto abort_with_parent_dmat;
4138 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4140 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4141 device_get_nameunit(dev));
4142 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4143 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4144 "%s:drv", device_get_nameunit(dev));
4145 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4146 MTX_NETWORK_LOCK, MTX_DEF);
4148 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4150 mxge_setup_cfg_space(sc);
4152 /* Map the board into the kernel */
4154 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4156 if (sc->mem_res == NULL) {
4157 device_printf(dev, "could not map memory\n");
4159 goto abort_with_lock;
4161 sc->sram = rman_get_virtual(sc->mem_res);
4162 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4163 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4164 device_printf(dev, "impossible memory region size %ld\n",
4165 rman_get_size(sc->mem_res));
4167 goto abort_with_mem_res;
4170 /* make NULL terminated copy of the EEPROM strings section of
4172 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4173 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4174 rman_get_bushandle(sc->mem_res),
4175 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4177 MXGE_EEPROM_STRINGS_SIZE - 2);
4178 err = mxge_parse_strings(sc);
4180 goto abort_with_mem_res;
4182 /* Enable write combining for efficient use of PCIe bus */
4185 /* Allocate the out of band dma memory */
4186 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4187 sizeof (mxge_cmd_t), 64);
4189 goto abort_with_mem_res;
4190 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4191 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4193 goto abort_with_cmd_dma;
4195 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4197 goto abort_with_zeropad_dma;
4199 /* select & load the firmware */
4200 err = mxge_select_firmware(sc);
4202 goto abort_with_dmabench;
4203 sc->intr_coal_delay = mxge_intr_coal_delay;
4205 mxge_slice_probe(sc);
4206 err = mxge_alloc_slices(sc);
4208 goto abort_with_dmabench;
4210 err = mxge_reset(sc, 0);
4212 goto abort_with_slices;
4214 err = mxge_alloc_rings(sc);
4216 device_printf(sc->dev, "failed to allocate rings\n");
4217 goto abort_with_dmabench;
4220 err = mxge_add_irq(sc);
4222 device_printf(sc->dev, "failed to add irq\n");
4223 goto abort_with_rings;
4226 ifp->if_baudrate = IF_Gbps(10UL);
4227 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4228 IFCAP_VLAN_MTU | IFCAP_LRO;
4230 #ifdef MXGE_NEW_VLAN_API
4231 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4234 sc->max_mtu = mxge_max_mtu(sc);
4235 if (sc->max_mtu >= 9000)
4236 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4238 device_printf(dev, "MTU limited to %d. Install "
4239 "latest firmware for 9000 byte jumbo support\n",
4240 sc->max_mtu - ETHER_HDR_LEN);
4241 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4242 ifp->if_capenable = ifp->if_capabilities;
4243 if (sc->lro_cnt == 0)
4244 ifp->if_capenable &= ~IFCAP_LRO;
4246 ifp->if_init = mxge_init;
4248 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4249 ifp->if_ioctl = mxge_ioctl;
4250 ifp->if_start = mxge_start;
4251 /* Initialise the ifmedia structure */
4252 ifmedia_init(&sc->media, 0, mxge_media_change,
4254 mxge_set_media(sc, IFM_ETHER | IFM_AUTO);
4255 mxge_media_probe(sc);
4256 ether_ifattach(ifp, sc->mac_addr);
4257 /* ether_ifattach sets mtu to 1500 */
4258 if (ifp->if_capabilities & IFCAP_JUMBO_MTU)
4261 mxge_add_sysctls(sc);
4265 mxge_free_rings(sc);
4267 mxge_free_slices(sc);
4268 abort_with_dmabench:
4269 mxge_dma_free(&sc->dmabench_dma);
4270 abort_with_zeropad_dma:
4271 mxge_dma_free(&sc->zeropad_dma);
4273 mxge_dma_free(&sc->cmd_dma);
4275 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4277 pci_disable_busmaster(dev);
4278 mtx_destroy(&sc->cmd_mtx);
4279 mtx_destroy(&sc->driver_mtx);
4281 abort_with_parent_dmat:
4282 bus_dma_tag_destroy(sc->parent_dmat);
4289 mxge_detach(device_t dev)
4291 mxge_softc_t *sc = device_get_softc(dev);
4293 if (mxge_vlans_active(sc)) {
4294 device_printf(sc->dev,
4295 "Detach vlans before removing module\n");
4298 mtx_lock(&sc->driver_mtx);
4299 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4301 mtx_unlock(&sc->driver_mtx);
4302 ether_ifdetach(sc->ifp);
4303 callout_drain(&sc->co_hdl);
4304 ifmedia_removeall(&sc->media);
4305 mxge_dummy_rdma(sc, 0);
4306 mxge_rem_sysctls(sc);
4308 mxge_free_rings(sc);
4309 mxge_free_slices(sc);
4310 mxge_dma_free(&sc->dmabench_dma);
4311 mxge_dma_free(&sc->zeropad_dma);
4312 mxge_dma_free(&sc->cmd_dma);
4313 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4314 pci_disable_busmaster(dev);
4315 mtx_destroy(&sc->cmd_mtx);
4316 mtx_destroy(&sc->driver_mtx);
4318 bus_dma_tag_destroy(sc->parent_dmat);
4323 mxge_shutdown(device_t dev)
4329 This file uses Myri10GE driver indentation.
4332 c-file-style:"linux"