2 * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
3 * Copyright (c) 2015 Peter Grehan <grehan@freebsd.org>
4 * Copyright (c) 2013 Jeremiah Lott, Avere Systems
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer
12 * in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/types.h>
34 #ifndef WITHOUT_CAPSICUM
35 #include <sys/capsicum.h>
37 #include <sys/limits.h>
38 #include <sys/ioctl.h>
40 #include <net/ethernet.h>
41 #include <netinet/in.h>
42 #include <netinet/tcp.h>
54 #include <pthread_np.h>
56 #include "e1000_regs.h"
57 #include "e1000_defines.h"
64 /* Hardware/register definitions XXX: move some to common code. */
65 #define E82545_VENDOR_ID_INTEL 0x8086
66 #define E82545_DEV_ID_82545EM_COPPER 0x100F
67 #define E82545_SUBDEV_ID 0x1008
69 #define E82545_REVISION_4 4
71 #define E82545_MDIC_DATA_MASK 0x0000FFFF
72 #define E82545_MDIC_OP_MASK 0x0c000000
73 #define E82545_MDIC_IE 0x20000000
75 #define E82545_EECD_FWE_DIS 0x00000010 /* Flash writes disabled */
76 #define E82545_EECD_FWE_EN 0x00000020 /* Flash writes enabled */
77 #define E82545_EECD_FWE_MASK 0x00000030 /* Flash writes mask */
79 #define E82545_BAR_REGISTER 0
80 #define E82545_BAR_REGISTER_LEN (128*1024)
81 #define E82545_BAR_FLASH 1
82 #define E82545_BAR_FLASH_LEN (64*1024)
83 #define E82545_BAR_IO 2
84 #define E82545_BAR_IO_LEN 8
86 #define E82545_IOADDR 0x00000000
87 #define E82545_IODATA 0x00000004
88 #define E82545_IO_REGISTER_MAX 0x0001FFFF
89 #define E82545_IO_FLASH_BASE 0x00080000
90 #define E82545_IO_FLASH_MAX 0x000FFFFF
92 #define E82545_ARRAY_ENTRY(reg, offset) (reg + (offset<<2))
93 #define E82545_RAR_MAX 15
94 #define E82545_MTA_MAX 127
95 #define E82545_VFTA_MAX 127
97 /* Slightly modified from the driver versions, hardcoded for 3 opcode bits,
98 * followed by 6 address bits.
99 * TODO: make opcode bits and addr bits configurable?
100 * NVM Commands - Microwire */
101 #define E82545_NVM_OPCODE_BITS 3
102 #define E82545_NVM_ADDR_BITS 6
103 #define E82545_NVM_DATA_BITS 16
104 #define E82545_NVM_OPADDR_BITS (E82545_NVM_OPCODE_BITS + E82545_NVM_ADDR_BITS)
105 #define E82545_NVM_ADDR_MASK ((1 << E82545_NVM_ADDR_BITS)-1)
106 #define E82545_NVM_OPCODE_MASK \
107 (((1 << E82545_NVM_OPCODE_BITS) - 1) << E82545_NVM_ADDR_BITS)
108 #define E82545_NVM_OPCODE_READ (0x6 << E82545_NVM_ADDR_BITS) /* read */
109 #define E82545_NVM_OPCODE_WRITE (0x5 << E82545_NVM_ADDR_BITS) /* write */
110 #define E82545_NVM_OPCODE_ERASE (0x7 << E82545_NVM_ADDR_BITS) /* erase */
111 #define E82545_NVM_OPCODE_EWEN (0x4 << E82545_NVM_ADDR_BITS) /* wr-enable */
113 #define E82545_NVM_EEPROM_SIZE 64 /* 64 * 16-bit values == 128K */
115 #define E1000_ICR_SRPD 0x00010000
117 /* This is an arbitrary number. There is no hard limit on the chip. */
118 #define I82545_MAX_TXSEGS 64
120 /* Legacy receive descriptor */
121 struct e1000_rx_desc {
122 uint64_t buffer_addr; /* Address of the descriptor's data buffer */
123 uint16_t length; /* Length of data DMAed into data buffer */
124 uint16_t csum; /* Packet checksum */
125 uint8_t status; /* Descriptor status */
126 uint8_t errors; /* Descriptor Errors */
130 /* Transmit descriptor types */
131 #define E1000_TXD_MASK (E1000_TXD_CMD_DEXT | 0x00F00000)
132 #define E1000_TXD_TYP_L (0)
133 #define E1000_TXD_TYP_C (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_C)
134 #define E1000_TXD_TYP_D (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)
136 /* Legacy transmit descriptor */
137 struct e1000_tx_desc {
138 uint64_t buffer_addr; /* Address of the descriptor's data buffer */
142 uint16_t length; /* Data buffer length */
143 uint8_t cso; /* Checksum offset */
144 uint8_t cmd; /* Descriptor control */
150 uint8_t status; /* Descriptor status */
151 uint8_t css; /* Checksum start */
157 /* Context descriptor */
158 struct e1000_context_desc {
162 uint8_t ipcss; /* IP checksum start */
163 uint8_t ipcso; /* IP checksum offset */
164 uint16_t ipcse; /* IP checksum end */
170 uint8_t tucss; /* TCP checksum start */
171 uint8_t tucso; /* TCP checksum offset */
172 uint16_t tucse; /* TCP checksum end */
175 uint32_t cmd_and_length;
179 uint8_t status; /* Descriptor status */
180 uint8_t hdr_len; /* Header length */
181 uint16_t mss; /* Maximum segment size */
186 /* Data descriptor */
187 struct e1000_data_desc {
188 uint64_t buffer_addr; /* Address of the descriptor's buffer address */
192 uint16_t length; /* Data buffer length */
200 uint8_t status; /* Descriptor status */
201 uint8_t popts; /* Packet Options */
207 union e1000_tx_udesc {
208 struct e1000_tx_desc td;
209 struct e1000_context_desc cd;
210 struct e1000_data_desc dd;
213 /* Tx checksum info for a packet. */
215 int ck_valid; /* ck_info is valid */
216 uint8_t ck_start; /* start byte of cksum calcuation */
217 uint8_t ck_off; /* offset of cksum insertion */
218 uint16_t ck_len; /* length of cksum calc: 0 is to packet-end */
224 static int e82545_debug = 0;
225 #define DPRINTF(msg,params...) if (e82545_debug) fprintf(stderr, "e82545: " msg, params)
226 #define WPRINTF(msg,params...) fprintf(stderr, "e82545: " msg, params)
228 #define MIN(a,b) (((a)<(b))?(a):(b))
229 #define MAX(a,b) (((a)>(b))?(a):(b))
231 /* s/w representation of the RAL/RAH regs */
235 struct ether_addr eu_eth;
239 struct e82545_softc {
240 struct pci_devinst *esc_pi;
241 struct vmctx *esc_ctx;
242 struct mevent *esc_mevp;
243 struct mevent *esc_mevpitr;
244 pthread_mutex_t esc_mtx;
245 struct ether_addr esc_mac;
249 uint32_t esc_CTRL; /* x0000 device ctl */
250 uint32_t esc_FCAL; /* x0028 flow ctl addr lo */
251 uint32_t esc_FCAH; /* x002C flow ctl addr hi */
252 uint32_t esc_FCT; /* x0030 flow ctl type */
253 uint32_t esc_VET; /* x0038 VLAN eth type */
254 uint32_t esc_FCTTV; /* x0170 flow ctl tx timer */
255 uint32_t esc_LEDCTL; /* x0E00 LED control */
256 uint32_t esc_PBA; /* x1000 pkt buffer allocation */
258 /* Interrupt control */
259 int esc_irq_asserted;
260 uint32_t esc_ICR; /* x00C0 cause read/clear */
261 uint32_t esc_ITR; /* x00C4 intr throttling */
262 uint32_t esc_ICS; /* x00C8 cause set */
263 uint32_t esc_IMS; /* x00D0 mask set/read */
264 uint32_t esc_IMC; /* x00D8 mask clear */
267 union e1000_tx_udesc *esc_txdesc;
268 struct e1000_context_desc esc_txctx;
269 pthread_t esc_tx_tid;
270 pthread_cond_t esc_tx_cond;
273 uint32_t esc_TXCW; /* x0178 transmit config */
274 uint32_t esc_TCTL; /* x0400 transmit ctl */
275 uint32_t esc_TIPG; /* x0410 inter-packet gap */
276 uint16_t esc_AIT; /* x0458 Adaptive Interframe Throttle */
277 uint64_t esc_tdba; /* verified 64-bit desc table addr */
278 uint32_t esc_TDBAL; /* x3800 desc table addr, low bits */
279 uint32_t esc_TDBAH; /* x3804 desc table addr, hi 32-bits */
280 uint32_t esc_TDLEN; /* x3808 # descriptors in bytes */
281 uint16_t esc_TDH; /* x3810 desc table head idx */
282 uint16_t esc_TDHr; /* internal read version of TDH */
283 uint16_t esc_TDT; /* x3818 desc table tail idx */
284 uint32_t esc_TIDV; /* x3820 intr delay */
285 uint32_t esc_TXDCTL; /* x3828 desc control */
286 uint32_t esc_TADV; /* x382C intr absolute delay */
288 /* L2 frame acceptance */
289 struct eth_uni esc_uni[16]; /* 16 x unicast MAC addresses */
290 uint32_t esc_fmcast[128]; /* Multicast filter bit-match */
291 uint32_t esc_fvlan[128]; /* VLAN 4096-bit filter */
294 struct e1000_rx_desc *esc_rxdesc;
295 pthread_cond_t esc_rx_cond;
299 uint32_t esc_RCTL; /* x0100 receive ctl */
300 uint32_t esc_FCRTL; /* x2160 flow cntl thresh, low */
301 uint32_t esc_FCRTH; /* x2168 flow cntl thresh, hi */
302 uint64_t esc_rdba; /* verified 64-bit desc table addr */
303 uint32_t esc_RDBAL; /* x2800 desc table addr, low bits */
304 uint32_t esc_RDBAH; /* x2804 desc table addr, hi 32-bits*/
305 uint32_t esc_RDLEN; /* x2808 #descriptors */
306 uint16_t esc_RDH; /* x2810 desc table head idx */
307 uint16_t esc_RDT; /* x2818 desc table tail idx */
308 uint32_t esc_RDTR; /* x2820 intr delay */
309 uint32_t esc_RXDCTL; /* x2828 desc control */
310 uint32_t esc_RADV; /* x282C intr absolute delay */
311 uint32_t esc_RSRPD; /* x2C00 recv small packet detect */
312 uint32_t esc_RXCSUM; /* x5000 receive cksum ctl */
314 /* IO Port register access */
317 /* Shadow copy of MDIC */
318 uint32_t mdi_control;
319 /* Shadow copy of EECD */
320 uint32_t eeprom_control;
321 /* Latest NVM in/out */
325 uint32_t missed_pkt_count; /* dropped for no room in rx queue */
326 uint32_t pkt_rx_by_size[6];
327 uint32_t pkt_tx_by_size[6];
328 uint32_t good_pkt_rx_count;
329 uint32_t bcast_pkt_rx_count;
330 uint32_t mcast_pkt_rx_count;
331 uint32_t good_pkt_tx_count;
332 uint32_t bcast_pkt_tx_count;
333 uint32_t mcast_pkt_tx_count;
334 uint32_t oversize_rx_count;
335 uint32_t tso_tx_count;
336 uint64_t good_octets_rx;
337 uint64_t good_octets_tx;
338 uint64_t missed_octets; /* counts missed and oversized */
340 uint8_t nvm_bits:6; /* number of bits remaining in/out */
342 #define E82545_NVM_MODE_OPADDR 0x0
343 #define E82545_NVM_MODE_DATAIN 0x1
344 #define E82545_NVM_MODE_DATAOUT 0x2
346 uint16_t eeprom_data[E82545_NVM_EEPROM_SIZE];
349 static void e82545_reset(struct e82545_softc *sc, int dev);
350 static void e82545_rx_enable(struct e82545_softc *sc);
351 static void e82545_rx_disable(struct e82545_softc *sc);
352 static void e82545_tap_callback(int fd, enum ev_type type, void *param);
353 static void e82545_tx_start(struct e82545_softc *sc);
354 static void e82545_tx_enable(struct e82545_softc *sc);
355 static void e82545_tx_disable(struct e82545_softc *sc);
358 e82545_size_stat_index(uint32_t size)
362 } else if (size >= 1024) {
366 return (ffs(size) - 6);
371 e82545_init_eeprom(struct e82545_softc *sc)
373 uint16_t checksum, i;
376 sc->eeprom_data[NVM_MAC_ADDR] = ((uint16_t)sc->esc_mac.octet[0]) |
377 (((uint16_t)sc->esc_mac.octet[1]) << 8);
378 sc->eeprom_data[NVM_MAC_ADDR+1] = ((uint16_t)sc->esc_mac.octet[2]) |
379 (((uint16_t)sc->esc_mac.octet[3]) << 8);
380 sc->eeprom_data[NVM_MAC_ADDR+2] = ((uint16_t)sc->esc_mac.octet[4]) |
381 (((uint16_t)sc->esc_mac.octet[5]) << 8);
384 sc->eeprom_data[NVM_SUB_DEV_ID] = E82545_SUBDEV_ID;
385 sc->eeprom_data[NVM_SUB_VEN_ID] = E82545_VENDOR_ID_INTEL;
386 sc->eeprom_data[NVM_DEV_ID] = E82545_DEV_ID_82545EM_COPPER;
387 sc->eeprom_data[NVM_VEN_ID] = E82545_VENDOR_ID_INTEL;
389 /* fill in the checksum */
391 for (i = 0; i < NVM_CHECKSUM_REG; i++) {
392 checksum += sc->eeprom_data[i];
394 checksum = NVM_SUM - checksum;
395 sc->eeprom_data[NVM_CHECKSUM_REG] = checksum;
396 DPRINTF("eeprom checksum: 0x%x\r\n", checksum);
400 e82545_write_mdi(struct e82545_softc *sc, uint8_t reg_addr,
401 uint8_t phy_addr, uint32_t data)
403 DPRINTF("Write mdi reg:0x%x phy:0x%x data: 0x%x\r\n", reg_addr, phy_addr, data);
407 e82545_read_mdi(struct e82545_softc *sc, uint8_t reg_addr,
410 //DPRINTF("Read mdi reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr);
413 return (MII_SR_LINK_STATUS | MII_SR_AUTONEG_CAPS |
414 MII_SR_AUTONEG_COMPLETE);
415 case PHY_AUTONEG_ADV:
416 return NWAY_AR_SELECTOR_FIELD;
419 case PHY_1000T_STATUS:
420 return (SR_1000T_LP_FD_CAPS | SR_1000T_REMOTE_RX_STATUS |
421 SR_1000T_LOCAL_RX_STATUS);
423 return (M88E1011_I_PHY_ID >> 16) & 0xFFFF;
425 return (M88E1011_I_PHY_ID | E82545_REVISION_4) & 0xFFFF;
427 DPRINTF("Unknown mdi read reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr);
434 e82545_eecd_strobe(struct e82545_softc *sc)
436 /* Microwire state machine */
438 DPRINTF("eeprom state machine srtobe "
439 "0x%x 0x%x 0x%x 0x%x\r\n",
440 sc->nvm_mode, sc->nvm_bits,
441 sc->nvm_opaddr, sc->nvm_data);*/
443 if (sc->nvm_bits == 0) {
444 DPRINTF("eeprom state machine not expecting data! "
445 "0x%x 0x%x 0x%x 0x%x\r\n",
446 sc->nvm_mode, sc->nvm_bits,
447 sc->nvm_opaddr, sc->nvm_data);
451 if (sc->nvm_mode == E82545_NVM_MODE_DATAOUT) {
453 if (sc->nvm_data & 0x8000) {
454 sc->eeprom_control |= E1000_EECD_DO;
456 sc->eeprom_control &= ~E1000_EECD_DO;
459 if (sc->nvm_bits == 0) {
460 /* read done, back to opcode mode. */
462 sc->nvm_mode = E82545_NVM_MODE_OPADDR;
463 sc->nvm_bits = E82545_NVM_OPADDR_BITS;
465 } else if (sc->nvm_mode == E82545_NVM_MODE_DATAIN) {
468 if (sc->eeprom_control & E1000_EECD_DI) {
471 if (sc->nvm_bits == 0) {
473 uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK;
474 uint16_t addr = sc->nvm_opaddr & E82545_NVM_ADDR_MASK;
475 if (op != E82545_NVM_OPCODE_WRITE) {
476 DPRINTF("Illegal eeprom write op 0x%x\r\n",
478 } else if (addr >= E82545_NVM_EEPROM_SIZE) {
479 DPRINTF("Illegal eeprom write addr 0x%x\r\n",
482 DPRINTF("eeprom write eeprom[0x%x] = 0x%x\r\n",
484 sc->eeprom_data[addr] = sc->nvm_data;
486 /* back to opcode mode */
488 sc->nvm_mode = E82545_NVM_MODE_OPADDR;
489 sc->nvm_bits = E82545_NVM_OPADDR_BITS;
491 } else if (sc->nvm_mode == E82545_NVM_MODE_OPADDR) {
492 sc->nvm_opaddr <<= 1;
493 if (sc->eeprom_control & E1000_EECD_DI) {
496 if (sc->nvm_bits == 0) {
497 uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK;
499 case E82545_NVM_OPCODE_EWEN:
500 DPRINTF("eeprom write enable: 0x%x\r\n",
502 /* back to opcode mode */
504 sc->nvm_mode = E82545_NVM_MODE_OPADDR;
505 sc->nvm_bits = E82545_NVM_OPADDR_BITS;
507 case E82545_NVM_OPCODE_READ:
509 uint16_t addr = sc->nvm_opaddr &
510 E82545_NVM_ADDR_MASK;
511 sc->nvm_mode = E82545_NVM_MODE_DATAOUT;
512 sc->nvm_bits = E82545_NVM_DATA_BITS;
513 if (addr < E82545_NVM_EEPROM_SIZE) {
514 sc->nvm_data = sc->eeprom_data[addr];
515 DPRINTF("eeprom read: eeprom[0x%x] = 0x%x\r\n",
518 DPRINTF("eeprom illegal read: 0x%x\r\n",
524 case E82545_NVM_OPCODE_WRITE:
525 sc->nvm_mode = E82545_NVM_MODE_DATAIN;
526 sc->nvm_bits = E82545_NVM_DATA_BITS;
530 DPRINTF("eeprom unknown op: 0x%x\r\r",
532 /* back to opcode mode */
534 sc->nvm_mode = E82545_NVM_MODE_OPADDR;
535 sc->nvm_bits = E82545_NVM_OPADDR_BITS;
539 DPRINTF("eeprom state machine wrong state! "
540 "0x%x 0x%x 0x%x 0x%x\r\n",
541 sc->nvm_mode, sc->nvm_bits,
542 sc->nvm_opaddr, sc->nvm_data);
547 e82545_itr_callback(int fd, enum ev_type type, void *param)
550 struct e82545_softc *sc = param;
552 pthread_mutex_lock(&sc->esc_mtx);
553 new = sc->esc_ICR & sc->esc_IMS;
554 if (new && !sc->esc_irq_asserted) {
555 DPRINTF("itr callback: lintr assert %x\r\n", new);
556 sc->esc_irq_asserted = 1;
557 pci_lintr_assert(sc->esc_pi);
559 mevent_delete(sc->esc_mevpitr);
560 sc->esc_mevpitr = NULL;
562 pthread_mutex_unlock(&sc->esc_mtx);
566 e82545_icr_assert(struct e82545_softc *sc, uint32_t bits)
570 DPRINTF("icr assert: 0x%x\r\n", bits);
573 * An interrupt is only generated if bits are set that
574 * aren't already in the ICR, these bits are unmasked,
575 * and there isn't an interrupt already pending.
577 new = bits & ~sc->esc_ICR & sc->esc_IMS;
581 DPRINTF("icr assert: masked %x, ims %x\r\n", new, sc->esc_IMS);
582 } else if (sc->esc_mevpitr != NULL) {
583 DPRINTF("icr assert: throttled %x, ims %x\r\n", new, sc->esc_IMS);
584 } else if (!sc->esc_irq_asserted) {
585 DPRINTF("icr assert: lintr assert %x\r\n", new);
586 sc->esc_irq_asserted = 1;
587 pci_lintr_assert(sc->esc_pi);
588 if (sc->esc_ITR != 0) {
589 sc->esc_mevpitr = mevent_add(
590 (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */
591 EVF_TIMER, e82545_itr_callback, sc);
597 e82545_ims_change(struct e82545_softc *sc, uint32_t bits)
602 * Changing the mask may allow previously asserted
603 * but masked interrupt requests to generate an interrupt.
605 new = bits & sc->esc_ICR & ~sc->esc_IMS;
609 DPRINTF("ims change: masked %x, ims %x\r\n", new, sc->esc_IMS);
610 } else if (sc->esc_mevpitr != NULL) {
611 DPRINTF("ims change: throttled %x, ims %x\r\n", new, sc->esc_IMS);
612 } else if (!sc->esc_irq_asserted) {
613 DPRINTF("ims change: lintr assert %x\n\r", new);
614 sc->esc_irq_asserted = 1;
615 pci_lintr_assert(sc->esc_pi);
616 if (sc->esc_ITR != 0) {
617 sc->esc_mevpitr = mevent_add(
618 (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */
619 EVF_TIMER, e82545_itr_callback, sc);
625 e82545_icr_deassert(struct e82545_softc *sc, uint32_t bits)
628 DPRINTF("icr deassert: 0x%x\r\n", bits);
629 sc->esc_ICR &= ~bits;
632 * If there are no longer any interrupt sources and there
633 * was an asserted interrupt, clear it
635 if (sc->esc_irq_asserted && !(sc->esc_ICR & sc->esc_IMS)) {
636 DPRINTF("icr deassert: lintr deassert %x\r\n", bits);
637 pci_lintr_deassert(sc->esc_pi);
638 sc->esc_irq_asserted = 0;
643 e82545_intr_write(struct e82545_softc *sc, uint32_t offset, uint32_t value)
646 DPRINTF("intr_write: off %x, val %x\n\r", offset, value);
650 e82545_icr_deassert(sc, value);
656 sc->esc_ICS = value; /* not used: store for debug */
657 e82545_icr_assert(sc, value);
660 e82545_ims_change(sc, value);
663 sc->esc_IMC = value; /* for debug */
664 sc->esc_IMS &= ~value;
665 // XXX clear interrupts if all ICR bits now masked
666 // and interrupt was pending ?
674 e82545_intr_read(struct e82545_softc *sc, uint32_t offset)
680 DPRINTF("intr_read: off %x\n\r", offset);
684 retval = sc->esc_ICR;
686 e82545_icr_deassert(sc, ~0);
689 retval = sc->esc_ITR;
692 /* write-only register */
695 retval = sc->esc_IMS;
698 /* write-only register */
708 e82545_devctl(struct e82545_softc *sc, uint32_t val)
711 sc->esc_CTRL = val & ~E1000_CTRL_RST;
713 if (val & E1000_CTRL_RST) {
714 DPRINTF("e1k: s/w reset, ctl %x\n", val);
717 /* XXX check for phy reset ? */
721 e82545_rx_update_rdba(struct e82545_softc *sc)
724 /* XXX verify desc base/len within phys mem range */
725 sc->esc_rdba = (uint64_t)sc->esc_RDBAH << 32 |
728 /* Cache host mapping of guest descriptor array */
729 sc->esc_rxdesc = paddr_guest2host(sc->esc_ctx,
730 sc->esc_rdba, sc->esc_RDLEN);
734 e82545_rx_ctl(struct e82545_softc *sc, uint32_t val)
738 on = ((val & E1000_RCTL_EN) == E1000_RCTL_EN);
740 /* Save RCTL after stripping reserved bits 31:27,24,21,14,11:10,0 */
741 sc->esc_RCTL = val & ~0xF9204c01;
743 DPRINTF("rx_ctl - %s RCTL %x, val %x\n",
744 on ? "on" : "off", sc->esc_RCTL, val);
746 /* state change requested */
747 if (on != sc->esc_rx_enabled) {
749 /* Catch disallowed/unimplemented settings */
750 //assert(!(val & E1000_RCTL_LBM_TCVR));
752 if (sc->esc_RCTL & E1000_RCTL_LBM_TCVR) {
753 sc->esc_rx_loopback = 1;
755 sc->esc_rx_loopback = 0;
758 e82545_rx_update_rdba(sc);
759 e82545_rx_enable(sc);
761 e82545_rx_disable(sc);
762 sc->esc_rx_loopback = 0;
764 sc->esc_rxdesc = NULL;
770 e82545_tx_update_tdba(struct e82545_softc *sc)
773 /* XXX verify desc base/len within phys mem range */
774 sc->esc_tdba = (uint64_t)sc->esc_TDBAH << 32 | sc->esc_TDBAL;
776 /* Cache host mapping of guest descriptor array */
777 sc->esc_txdesc = paddr_guest2host(sc->esc_ctx, sc->esc_tdba,
782 e82545_tx_ctl(struct e82545_softc *sc, uint32_t val)
786 on = ((val & E1000_TCTL_EN) == E1000_TCTL_EN);
788 /* ignore TCTL_EN settings that don't change state */
789 if (on == sc->esc_tx_enabled)
793 e82545_tx_update_tdba(sc);
794 e82545_tx_enable(sc);
796 e82545_tx_disable(sc);
798 sc->esc_txdesc = NULL;
801 /* Save TCTL value after stripping reserved bits 31:25,23,2,0 */
802 sc->esc_TCTL = val & ~0xFE800005;
806 e82545_bufsz(uint32_t rctl)
809 switch (rctl & (E1000_RCTL_BSEX | E1000_RCTL_SZ_256)) {
810 case (E1000_RCTL_SZ_2048): return (2048);
811 case (E1000_RCTL_SZ_1024): return (1024);
812 case (E1000_RCTL_SZ_512): return (512);
813 case (E1000_RCTL_SZ_256): return (256);
814 case (E1000_RCTL_BSEX|E1000_RCTL_SZ_16384): return (16384);
815 case (E1000_RCTL_BSEX|E1000_RCTL_SZ_8192): return (8192);
816 case (E1000_RCTL_BSEX|E1000_RCTL_SZ_4096): return (4096);
818 return (256); /* Forbidden value. */
821 static uint8_t dummybuf[2048];
823 /* XXX one packet at a time until this is debugged */
825 e82545_tap_callback(int fd, enum ev_type type, void *param)
827 struct e82545_softc *sc = param;
828 struct e1000_rx_desc *rxd;
829 struct iovec vec[64];
830 int left, len, lim, maxpktsz, maxpktdesc, bufsz, i, n, size;
832 uint16_t *tp, tag, head;
834 pthread_mutex_lock(&sc->esc_mtx);
835 DPRINTF("rx_run: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT);
837 if (!sc->esc_rx_enabled || sc->esc_rx_loopback) {
838 DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped\r\n",
839 sc->esc_rx_enabled, sc->esc_rx_loopback);
840 while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) {
844 bufsz = e82545_bufsz(sc->esc_RCTL);
845 maxpktsz = (sc->esc_RCTL & E1000_RCTL_LPE) ? 16384 : 1522;
846 maxpktdesc = (maxpktsz + bufsz - 1) / bufsz;
847 size = sc->esc_RDLEN / 16;
849 left = (size + sc->esc_RDT - head) % size;
850 if (left < maxpktdesc) {
851 DPRINTF("rx overflow (%d < %d) -- packet(s) dropped\r\n",
853 while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) {
858 sc->esc_rx_active = 1;
859 pthread_mutex_unlock(&sc->esc_mtx);
861 for (lim = size / 4; lim > 0 && left >= maxpktdesc; lim -= n) {
863 /* Grab rx descriptor pointed to by the head pointer */
864 for (i = 0; i < maxpktdesc; i++) {
865 rxd = &sc->esc_rxdesc[(head + i) % size];
866 vec[i].iov_base = paddr_guest2host(sc->esc_ctx,
867 rxd->buffer_addr, bufsz);
868 vec[i].iov_len = bufsz;
870 len = readv(sc->esc_tapfd, vec, maxpktdesc);
872 DPRINTF("tap: readv() returned %d\n", len);
877 * Adjust the packet length based on whether the CRC needs
878 * to be stripped or if the packet is less than the minimum
881 if (len < ETHER_MIN_LEN - ETHER_CRC_LEN)
882 len = ETHER_MIN_LEN - ETHER_CRC_LEN;
883 if (!(sc->esc_RCTL & E1000_RCTL_SECRC))
884 len += ETHER_CRC_LEN;
885 n = (len + bufsz - 1) / bufsz;
887 DPRINTF("packet read %d bytes, %d segs, head %d\r\n",
890 /* Apply VLAN filter. */
891 tp = (uint16_t *)vec[0].iov_base + 6;
892 if ((sc->esc_RCTL & E1000_RCTL_VFE) &&
893 (ntohs(tp[0]) == sc->esc_VET)) {
894 tag = ntohs(tp[1]) & 0x0fff;
895 if ((sc->esc_fvlan[tag >> 5] &
896 (1 << (tag & 0x1f))) != 0) {
897 DPRINTF("known VLAN %d\r\n", tag);
899 DPRINTF("unknown VLAN %d\r\n", tag);
905 /* Update all consumed descriptors. */
906 for (i = 0; i < n - 1; i++) {
907 rxd = &sc->esc_rxdesc[(head + i) % size];
912 rxd->status = E1000_RXD_STAT_DD;
914 rxd = &sc->esc_rxdesc[(head + i) % size];
915 rxd->length = len % bufsz;
919 /* XXX signal no checksum for now */
920 rxd->status = E1000_RXD_STAT_PIF | E1000_RXD_STAT_IXSM |
921 E1000_RXD_STAT_EOP | E1000_RXD_STAT_DD;
923 /* Schedule receive interrupts. */
924 if (len <= sc->esc_RSRPD) {
925 cause |= E1000_ICR_SRPD | E1000_ICR_RXT0;
927 /* XXX: RDRT and RADV timers should be here. */
928 cause |= E1000_ICR_RXT0;
931 head = (head + n) % size;
936 pthread_mutex_lock(&sc->esc_mtx);
937 sc->esc_rx_active = 0;
938 if (sc->esc_rx_enabled == 0)
939 pthread_cond_signal(&sc->esc_rx_cond);
942 /* Respect E1000_RCTL_RDMTS */
943 left = (size + sc->esc_RDT - head) % size;
944 if (left < (size >> (((sc->esc_RCTL >> 8) & 3) + 1)))
945 cause |= E1000_ICR_RXDMT0;
946 /* Assert all accumulated interrupts. */
948 e82545_icr_assert(sc, cause);
950 DPRINTF("rx_run done: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT);
951 pthread_mutex_unlock(&sc->esc_mtx);
955 e82545_carry(uint32_t sum)
958 sum = (sum & 0xFFFF) + (sum >> 16);
965 e82545_buf_checksum(uint8_t *buf, int len)
970 /* Checksum all the pairs of bytes first... */
971 for (i = 0; i < (len & ~1U); i += 2)
972 sum += *((u_int16_t *)(buf + i));
975 * If there's a single byte left over, checksum it, too.
976 * Network byte order is big-endian, so the remaining byte is
980 sum += htons(buf[i] << 8);
982 return (e82545_carry(sum));
986 e82545_iov_checksum(struct iovec *iov, int iovcnt, int off, int len)
991 /* Skip completely unneeded vectors. */
992 while (iovcnt > 0 && iov->iov_len <= off && off > 0) {
998 /* Calculate checksum of requested range. */
1000 while (len > 0 && iovcnt > 0) {
1001 now = MIN(len, iov->iov_len - off);
1002 s = e82545_buf_checksum(iov->iov_base + off, now);
1003 sum += odd ? (s << 8) : s;
1011 return (e82545_carry(sum));
1015 * Return the transmit descriptor type.
1018 e82545_txdesc_type(uint32_t lower)
1024 if (lower & E1000_TXD_CMD_DEXT)
1025 type = lower & E1000_TXD_MASK;
1031 e82545_transmit_checksum(struct iovec *iov, int iovcnt, struct ck_info *ck)
1036 DPRINTF("tx cksum: iovcnt/s/off/len %d/%d/%d/%d\r\n",
1037 iovcnt, ck->ck_start, ck->ck_off, ck->ck_len);
1038 cklen = ck->ck_len ? ck->ck_len - ck->ck_start + 1 : INT_MAX;
1039 cksum = e82545_iov_checksum(iov, iovcnt, ck->ck_start, cklen);
1040 *(uint16_t *)((uint8_t *)iov[0].iov_base + ck->ck_off) = ~cksum;
1044 e82545_transmit_backend(struct e82545_softc *sc, struct iovec *iov, int iovcnt)
1047 if (sc->esc_tapfd == -1)
1050 (void) writev(sc->esc_tapfd, iov, iovcnt);
1054 e82545_transmit_done(struct e82545_softc *sc, uint16_t head, uint16_t tail,
1055 uint16_t dsize, int *tdwb)
1057 union e1000_tx_udesc *dsc;
1059 for ( ; head != tail; head = (head + 1) % dsize) {
1060 dsc = &sc->esc_txdesc[head];
1061 if (dsc->td.lower.data & E1000_TXD_CMD_RS) {
1062 dsc->td.upper.data |= E1000_TXD_STAT_DD;
1069 e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail,
1070 uint16_t dsize, uint16_t *rhead, int *tdwb)
1072 uint8_t *hdr, *hdrp;
1073 struct iovec iovb[I82545_MAX_TXSEGS + 2];
1074 struct iovec tiov[I82545_MAX_TXSEGS + 2];
1075 struct e1000_context_desc *cd;
1076 struct ck_info ckinfo[2];
1078 union e1000_tx_udesc *dsc;
1079 int desc, dtype, len, ntype, iovcnt, tlen, hdrlen, vlen, tcp, tso;
1080 int mss, paylen, seg, tiovcnt, left, now, nleft, nnow, pv, pvoff;
1081 uint32_t tcpsum, tcpseq;
1082 uint16_t ipcs, tcpcs, ipid, ohead;
1084 ckinfo[0].ck_valid = ckinfo[1].ck_valid = 0;
1091 /* iovb[0/1] may be used for writable copy of headers. */
1094 for (desc = 0; ; desc++, head = (head + 1) % dsize) {
1099 dsc = &sc->esc_txdesc[head];
1100 dtype = e82545_txdesc_type(dsc->td.lower.data);
1104 case E1000_TXD_TYP_C:
1105 DPRINTF("tx ctxt desc idx %d: %016jx "
1107 head, dsc->td.buffer_addr,
1108 dsc->td.upper.data, dsc->td.lower.data);
1109 /* Save context and return */
1110 sc->esc_txctx = dsc->cd;
1112 case E1000_TXD_TYP_L:
1113 DPRINTF("tx legacy desc idx %d: %08x%08x\r\n",
1114 head, dsc->td.upper.data, dsc->td.lower.data);
1116 * legacy cksum start valid in first descriptor
1119 ckinfo[0].ck_start = dsc->td.upper.fields.css;
1121 case E1000_TXD_TYP_D:
1122 DPRINTF("tx data desc idx %d: %08x%08x\r\n",
1123 head, dsc->td.upper.data, dsc->td.lower.data);
1130 /* Descriptor type must be consistent */
1131 assert(dtype == ntype);
1132 DPRINTF("tx next desc idx %d: %08x%08x\r\n",
1133 head, dsc->td.upper.data, dsc->td.lower.data);
1136 len = (dtype == E1000_TXD_TYP_L) ? dsc->td.lower.flags.length :
1137 dsc->dd.lower.data & 0xFFFFF;
1140 /* Strip checksum supplied by guest. */
1141 if ((dsc->td.lower.data & E1000_TXD_CMD_EOP) != 0 &&
1142 (dsc->td.lower.data & E1000_TXD_CMD_IFCS) == 0)
1145 if (iovcnt < I82545_MAX_TXSEGS) {
1146 iov[iovcnt].iov_base = paddr_guest2host(
1147 sc->esc_ctx, dsc->td.buffer_addr, len);
1148 iov[iovcnt].iov_len = len;
1154 * Pull out info that is valid in the final descriptor
1155 * and exit descriptor loop.
1157 if (dsc->td.lower.data & E1000_TXD_CMD_EOP) {
1158 if (dtype == E1000_TXD_TYP_L) {
1159 if (dsc->td.lower.data & E1000_TXD_CMD_IC) {
1160 ckinfo[0].ck_valid = 1;
1162 dsc->td.lower.flags.cso;
1163 ckinfo[0].ck_len = 0;
1166 cd = &sc->esc_txctx;
1167 if (dsc->dd.lower.data & E1000_TXD_CMD_TSE)
1169 if (dsc->dd.upper.fields.popts &
1170 E1000_TXD_POPTS_IXSM)
1171 ckinfo[0].ck_valid = 1;
1172 if (dsc->dd.upper.fields.popts &
1173 E1000_TXD_POPTS_IXSM || tso) {
1174 ckinfo[0].ck_start =
1175 cd->lower_setup.ip_fields.ipcss;
1177 cd->lower_setup.ip_fields.ipcso;
1179 cd->lower_setup.ip_fields.ipcse;
1181 if (dsc->dd.upper.fields.popts &
1182 E1000_TXD_POPTS_TXSM)
1183 ckinfo[1].ck_valid = 1;
1184 if (dsc->dd.upper.fields.popts &
1185 E1000_TXD_POPTS_TXSM || tso) {
1186 ckinfo[1].ck_start =
1187 cd->upper_setup.tcp_fields.tucss;
1189 cd->upper_setup.tcp_fields.tucso;
1191 cd->upper_setup.tcp_fields.tucse;
1198 if (iovcnt > I82545_MAX_TXSEGS) {
1199 WPRINTF("tx too many descriptors (%d > %d) -- dropped\r\n",
1200 iovcnt, I82545_MAX_TXSEGS);
1205 /* Estimate writable space for VLAN header insertion. */
1206 if ((sc->esc_CTRL & E1000_CTRL_VME) &&
1207 (dsc->td.lower.data & E1000_TXD_CMD_VLE)) {
1208 hdrlen = ETHER_ADDR_LEN*2;
1209 vlen = ETHER_VLAN_ENCAP_LEN;
1212 /* Estimate required writable space for checksums. */
1213 if (ckinfo[0].ck_valid)
1214 hdrlen = MAX(hdrlen, ckinfo[0].ck_off + 2);
1215 if (ckinfo[1].ck_valid)
1216 hdrlen = MAX(hdrlen, ckinfo[1].ck_off + 2);
1217 /* Round up writable space to the first vector. */
1218 if (hdrlen != 0 && iov[0].iov_len > hdrlen &&
1219 iov[0].iov_len < hdrlen + 100)
1220 hdrlen = iov[0].iov_len;
1222 /* In case of TSO header length provided by software. */
1223 hdrlen = sc->esc_txctx.tcp_seg_setup.fields.hdr_len;
1226 /* Allocate, fill and prepend writable header vector. */
1228 hdr = __builtin_alloca(hdrlen + vlen);
1230 for (left = hdrlen, hdrp = hdr; left > 0;
1231 left -= now, hdrp += now) {
1232 now = MIN(left, iov->iov_len);
1233 memcpy(hdrp, iov->iov_base, now);
1234 iov->iov_base += now;
1235 iov->iov_len -= now;
1236 if (iov->iov_len == 0) {
1243 iov->iov_base = hdr;
1244 iov->iov_len = hdrlen;
1247 /* Insert VLAN tag. */
1249 hdr -= ETHER_VLAN_ENCAP_LEN;
1250 memmove(hdr, hdr + ETHER_VLAN_ENCAP_LEN, ETHER_ADDR_LEN*2);
1251 hdrlen += ETHER_VLAN_ENCAP_LEN;
1252 hdr[ETHER_ADDR_LEN*2 + 0] = sc->esc_VET >> 8;
1253 hdr[ETHER_ADDR_LEN*2 + 1] = sc->esc_VET & 0xff;
1254 hdr[ETHER_ADDR_LEN*2 + 2] = dsc->td.upper.fields.special >> 8;
1255 hdr[ETHER_ADDR_LEN*2 + 3] = dsc->td.upper.fields.special & 0xff;
1256 iov->iov_base = hdr;
1257 iov->iov_len += ETHER_VLAN_ENCAP_LEN;
1258 /* Correct checksum offsets after VLAN tag insertion. */
1259 ckinfo[0].ck_start += ETHER_VLAN_ENCAP_LEN;
1260 ckinfo[0].ck_off += ETHER_VLAN_ENCAP_LEN;
1261 if (ckinfo[0].ck_len != 0)
1262 ckinfo[0].ck_len += ETHER_VLAN_ENCAP_LEN;
1263 ckinfo[1].ck_start += ETHER_VLAN_ENCAP_LEN;
1264 ckinfo[1].ck_off += ETHER_VLAN_ENCAP_LEN;
1265 if (ckinfo[1].ck_len != 0)
1266 ckinfo[1].ck_len += ETHER_VLAN_ENCAP_LEN;
1269 /* Simple non-TSO case. */
1271 /* Calculate checksums and transmit. */
1272 if (ckinfo[0].ck_valid)
1273 e82545_transmit_checksum(iov, iovcnt, &ckinfo[0]);
1274 if (ckinfo[1].ck_valid)
1275 e82545_transmit_checksum(iov, iovcnt, &ckinfo[1]);
1276 e82545_transmit_backend(sc, iov, iovcnt);
1281 tcp = (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_TCP) != 0;
1282 mss = sc->esc_txctx.tcp_seg_setup.fields.mss;
1283 paylen = (sc->esc_txctx.cmd_and_length & 0x000fffff);
1284 DPRINTF("tx %s segmentation offload %d+%d/%d bytes %d iovs\r\n",
1285 tcp ? "TCP" : "UDP", hdrlen, paylen, mss, iovcnt);
1286 ipid = ntohs(*(uint16_t *)&hdr[ckinfo[0].ck_start + 4]);
1287 tcpseq = ntohl(*(uint32_t *)&hdr[ckinfo[1].ck_start + 4]);
1288 ipcs = *(uint16_t *)&hdr[ckinfo[0].ck_off];
1290 if (ckinfo[1].ck_valid) /* Save partial pseudo-header checksum. */
1291 tcpcs = *(uint16_t *)&hdr[ckinfo[1].ck_off];
1294 for (seg = 0, left = paylen; left > 0; seg++, left -= now) {
1295 now = MIN(left, mss);
1297 /* Construct IOVs for the segment. */
1298 /* Include whole original header. */
1299 tiov[0].iov_base = hdr;
1300 tiov[0].iov_len = hdrlen;
1302 /* Include respective part of payload IOV. */
1303 for (nleft = now; pv < iovcnt && nleft > 0; nleft -= nnow) {
1304 nnow = MIN(nleft, iov[pv].iov_len - pvoff);
1305 tiov[tiovcnt].iov_base = iov[pv].iov_base + pvoff;
1306 tiov[tiovcnt++].iov_len = nnow;
1307 if (pvoff + nnow == iov[pv].iov_len) {
1313 DPRINTF("tx segment %d %d+%d bytes %d iovs\r\n",
1314 seg, hdrlen, now, tiovcnt);
1316 /* Update IP header. */
1317 if (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_IP) {
1318 /* IPv4 -- set length and ID */
1319 *(uint16_t *)&hdr[ckinfo[0].ck_start + 2] =
1320 htons(hdrlen - ckinfo[0].ck_start + now);
1321 *(uint16_t *)&hdr[ckinfo[0].ck_start + 4] =
1324 /* IPv6 -- set length */
1325 *(uint16_t *)&hdr[ckinfo[0].ck_start + 4] =
1326 htons(hdrlen - ckinfo[0].ck_start - 40 +
1330 /* Update pseudo-header checksum. */
1332 tcpsum += htons(hdrlen - ckinfo[1].ck_start + now);
1334 /* Update TCP/UDP headers. */
1336 /* Update sequence number and FIN/PUSH flags. */
1337 *(uint32_t *)&hdr[ckinfo[1].ck_start + 4] =
1338 htonl(tcpseq + paylen - left);
1340 hdr[ckinfo[1].ck_start + 13] &=
1341 ~(TH_FIN | TH_PUSH);
1344 /* Update payload length. */
1345 *(uint32_t *)&hdr[ckinfo[1].ck_start + 4] =
1346 hdrlen - ckinfo[1].ck_start + now;
1349 /* Calculate checksums and transmit. */
1350 if (ckinfo[0].ck_valid) {
1351 *(uint16_t *)&hdr[ckinfo[0].ck_off] = ipcs;
1352 e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[0]);
1354 if (ckinfo[1].ck_valid) {
1355 *(uint16_t *)&hdr[ckinfo[1].ck_off] =
1356 e82545_carry(tcpsum);
1357 e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[1]);
1359 e82545_transmit_backend(sc, tiov, tiovcnt);
1363 head = (head + 1) % dsize;
1364 e82545_transmit_done(sc, ohead, head, dsize, tdwb);
1371 e82545_tx_run(struct e82545_softc *sc)
1374 uint16_t head, rhead, tail, size;
1375 int lim, tdwb, sent;
1379 size = sc->esc_TDLEN / 16;
1380 DPRINTF("tx_run: head %x, rhead %x, tail %x\r\n",
1381 sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT);
1383 pthread_mutex_unlock(&sc->esc_mtx);
1386 for (lim = size / 4; sc->esc_tx_enabled && lim > 0; lim -= sent) {
1387 sent = e82545_transmit(sc, head, tail, size, &rhead, &tdwb);
1392 pthread_mutex_lock(&sc->esc_mtx);
1395 sc->esc_TDHr = rhead;
1398 cause |= E1000_ICR_TXDW;
1399 if (lim != size / 4 && sc->esc_TDH == sc->esc_TDT)
1400 cause |= E1000_ICR_TXQE;
1402 e82545_icr_assert(sc, cause);
1404 DPRINTF("tx_run done: head %x, rhead %x, tail %x\r\n",
1405 sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT);
1408 static _Noreturn void *
1409 e82545_tx_thread(void *param)
1411 struct e82545_softc *sc = param;
1413 pthread_mutex_lock(&sc->esc_mtx);
1415 while (!sc->esc_tx_enabled || sc->esc_TDHr == sc->esc_TDT) {
1416 if (sc->esc_tx_enabled && sc->esc_TDHr != sc->esc_TDT)
1418 sc->esc_tx_active = 0;
1419 if (sc->esc_tx_enabled == 0)
1420 pthread_cond_signal(&sc->esc_tx_cond);
1421 pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx);
1423 sc->esc_tx_active = 1;
1425 /* Process some tx descriptors. Lock dropped inside. */
1431 e82545_tx_start(struct e82545_softc *sc)
1434 if (sc->esc_tx_active == 0)
1435 pthread_cond_signal(&sc->esc_tx_cond);
1439 e82545_tx_enable(struct e82545_softc *sc)
1442 sc->esc_tx_enabled = 1;
1446 e82545_tx_disable(struct e82545_softc *sc)
1449 sc->esc_tx_enabled = 0;
1450 while (sc->esc_tx_active)
1451 pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx);
1455 e82545_rx_enable(struct e82545_softc *sc)
1458 sc->esc_rx_enabled = 1;
1462 e82545_rx_disable(struct e82545_softc *sc)
1465 sc->esc_rx_enabled = 0;
1466 while (sc->esc_rx_active)
1467 pthread_cond_wait(&sc->esc_rx_cond, &sc->esc_mtx);
1471 e82545_write_ra(struct e82545_softc *sc, int reg, uint32_t wval)
1479 eu = &sc->esc_uni[idx];
1483 eu->eu_valid = ((wval & E1000_RAH_AV) == E1000_RAH_AV);
1484 eu->eu_addrsel = (wval >> 16) & 0x3;
1485 eu->eu_eth.octet[5] = wval >> 8;
1486 eu->eu_eth.octet[4] = wval;
1489 eu->eu_eth.octet[3] = wval >> 24;
1490 eu->eu_eth.octet[2] = wval >> 16;
1491 eu->eu_eth.octet[1] = wval >> 8;
1492 eu->eu_eth.octet[0] = wval;
1497 e82545_read_ra(struct e82545_softc *sc, int reg)
1506 eu = &sc->esc_uni[idx];
1510 retval = (eu->eu_valid << 31) |
1511 (eu->eu_addrsel << 16) |
1512 (eu->eu_eth.octet[5] << 8) |
1513 eu->eu_eth.octet[4];
1516 retval = (eu->eu_eth.octet[3] << 24) |
1517 (eu->eu_eth.octet[2] << 16) |
1518 (eu->eu_eth.octet[1] << 8) |
1519 eu->eu_eth.octet[0];
1526 e82545_write_register(struct e82545_softc *sc, uint32_t offset, uint32_t value)
1531 DPRINTF("Unaligned register write offset:0x%x value:0x%x\r\n", offset, value);
1534 DPRINTF("Register write: 0x%x value: 0x%x\r\n", offset, value);
1538 case E1000_CTRL_DUP:
1539 e82545_devctl(sc, value);
1542 sc->esc_FCAL = value;
1545 sc->esc_FCAH = value & ~0xFFFF0000;
1548 sc->esc_FCT = value & ~0xFFFF0000;
1551 sc->esc_VET = value & ~0xFFFF0000;
1554 sc->esc_FCTTV = value & ~0xFFFF0000;
1557 sc->esc_LEDCTL = value & ~0x30303000;
1560 sc->esc_PBA = value & 0x0000FF80;
1567 e82545_intr_write(sc, offset, value);
1570 e82545_rx_ctl(sc, value);
1573 sc->esc_FCRTL = value & ~0xFFFF0007;
1576 sc->esc_FCRTH = value & ~0xFFFF0007;
1578 case E1000_RDBAL(0):
1579 sc->esc_RDBAL = value & ~0xF;
1580 if (sc->esc_rx_enabled) {
1581 /* Apparently legal: update cached address */
1582 e82545_rx_update_rdba(sc);
1585 case E1000_RDBAH(0):
1586 assert(!sc->esc_rx_enabled);
1587 sc->esc_RDBAH = value;
1589 case E1000_RDLEN(0):
1590 assert(!sc->esc_rx_enabled);
1591 sc->esc_RDLEN = value & ~0xFFF0007F;
1594 /* XXX should only ever be zero ? Range check ? */
1595 sc->esc_RDH = value;
1598 /* XXX if this opens up the rx ring, do something ? */
1599 sc->esc_RDT = value;
1602 /* ignore FPD bit 31 */
1603 sc->esc_RDTR = value & ~0xFFFF0000;
1605 case E1000_RXDCTL(0):
1606 sc->esc_RXDCTL = value & ~0xFEC0C0C0;
1609 sc->esc_RADV = value & ~0xFFFF0000;
1612 sc->esc_RSRPD = value & ~0xFFFFF000;
1615 sc->esc_RXCSUM = value & ~0xFFFFF800;
1618 sc->esc_TXCW = value & ~0x3FFF0000;
1621 e82545_tx_ctl(sc, value);
1624 sc->esc_TIPG = value;
1627 sc->esc_AIT = value;
1629 case E1000_TDBAL(0):
1630 sc->esc_TDBAL = value & ~0xF;
1631 if (sc->esc_tx_enabled) {
1632 /* Apparently legal */
1633 e82545_tx_update_tdba(sc);
1636 case E1000_TDBAH(0):
1637 //assert(!sc->esc_tx_enabled);
1638 sc->esc_TDBAH = value;
1640 case E1000_TDLEN(0):
1641 //assert(!sc->esc_tx_enabled);
1642 sc->esc_TDLEN = value & ~0xFFF0007F;
1645 //assert(!sc->esc_tx_enabled);
1646 /* XXX should only ever be zero ? Range check ? */
1647 sc->esc_TDHr = sc->esc_TDH = value;
1650 /* XXX range check ? */
1651 sc->esc_TDT = value;
1652 if (sc->esc_tx_enabled)
1653 e82545_tx_start(sc);
1656 sc->esc_TIDV = value & ~0xFFFF0000;
1658 case E1000_TXDCTL(0):
1659 //assert(!sc->esc_tx_enabled);
1660 sc->esc_TXDCTL = value & ~0xC0C0C0;
1663 sc->esc_TADV = value & ~0xFFFF0000;
1665 case E1000_RAL(0) ... E1000_RAH(15):
1666 /* convert to u32 offset */
1667 ridx = (offset - E1000_RAL(0)) >> 2;
1668 e82545_write_ra(sc, ridx, value);
1670 case E1000_MTA ... (E1000_MTA + (127*4)):
1671 sc->esc_fmcast[(offset - E1000_MTA) >> 2] = value;
1673 case E1000_VFTA ... (E1000_VFTA + (127*4)):
1674 sc->esc_fvlan[(offset - E1000_VFTA) >> 2] = value;
1678 //DPRINTF("EECD write 0x%x -> 0x%x\r\n", sc->eeprom_control, value);
1679 /* edge triggered low->high */
1680 uint32_t eecd_strobe = ((sc->eeprom_control & E1000_EECD_SK) ?
1681 0 : (value & E1000_EECD_SK));
1682 uint32_t eecd_mask = (E1000_EECD_SK|E1000_EECD_CS|
1683 E1000_EECD_DI|E1000_EECD_REQ);
1684 sc->eeprom_control &= ~eecd_mask;
1685 sc->eeprom_control |= (value & eecd_mask);
1686 /* grant/revoke immediately */
1687 if (value & E1000_EECD_REQ) {
1688 sc->eeprom_control |= E1000_EECD_GNT;
1690 sc->eeprom_control &= ~E1000_EECD_GNT;
1692 if (eecd_strobe && (sc->eeprom_control & E1000_EECD_CS)) {
1693 e82545_eecd_strobe(sc);
1699 uint8_t reg_addr = (uint8_t)((value & E1000_MDIC_REG_MASK) >>
1700 E1000_MDIC_REG_SHIFT);
1701 uint8_t phy_addr = (uint8_t)((value & E1000_MDIC_PHY_MASK) >>
1702 E1000_MDIC_PHY_SHIFT);
1704 (value & ~(E1000_MDIC_ERROR|E1000_MDIC_DEST));
1705 if ((value & E1000_MDIC_READY) != 0) {
1706 DPRINTF("Incorrect MDIC ready bit: 0x%x\r\n", value);
1709 switch (value & E82545_MDIC_OP_MASK) {
1710 case E1000_MDIC_OP_READ:
1711 sc->mdi_control &= ~E82545_MDIC_DATA_MASK;
1712 sc->mdi_control |= e82545_read_mdi(sc, reg_addr, phy_addr);
1714 case E1000_MDIC_OP_WRITE:
1715 e82545_write_mdi(sc, reg_addr, phy_addr,
1716 value & E82545_MDIC_DATA_MASK);
1719 DPRINTF("Unknown MDIC op: 0x%x\r\n", value);
1722 /* TODO: barrier? */
1723 sc->mdi_control |= E1000_MDIC_READY;
1724 if (value & E82545_MDIC_IE) {
1725 // TODO: generate interrupt
1733 DPRINTF("Unknown write register: 0x%x value:%x\r\n", offset, value);
1739 e82545_read_register(struct e82545_softc *sc, uint32_t offset)
1745 DPRINTF("Unaligned register read offset:0x%x\r\n", offset);
1749 DPRINTF("Register read: 0x%x\r\n", offset);
1753 retval = sc->esc_CTRL;
1756 retval = E1000_STATUS_FD | E1000_STATUS_LU |
1757 E1000_STATUS_SPEED_1000;
1760 retval = sc->esc_FCAL;
1763 retval = sc->esc_FCAH;
1766 retval = sc->esc_FCT;
1769 retval = sc->esc_VET;
1772 retval = sc->esc_FCTTV;
1775 retval = sc->esc_LEDCTL;
1778 retval = sc->esc_PBA;
1785 retval = e82545_intr_read(sc, offset);
1788 retval = sc->esc_RCTL;
1791 retval = sc->esc_FCRTL;
1794 retval = sc->esc_FCRTH;
1796 case E1000_RDBAL(0):
1797 retval = sc->esc_RDBAL;
1799 case E1000_RDBAH(0):
1800 retval = sc->esc_RDBAH;
1802 case E1000_RDLEN(0):
1803 retval = sc->esc_RDLEN;
1806 retval = sc->esc_RDH;
1809 retval = sc->esc_RDT;
1812 retval = sc->esc_RDTR;
1814 case E1000_RXDCTL(0):
1815 retval = sc->esc_RXDCTL;
1818 retval = sc->esc_RADV;
1821 retval = sc->esc_RSRPD;
1824 retval = sc->esc_RXCSUM;
1827 retval = sc->esc_TXCW;
1830 retval = sc->esc_TCTL;
1833 retval = sc->esc_TIPG;
1836 retval = sc->esc_AIT;
1838 case E1000_TDBAL(0):
1839 retval = sc->esc_TDBAL;
1841 case E1000_TDBAH(0):
1842 retval = sc->esc_TDBAH;
1844 case E1000_TDLEN(0):
1845 retval = sc->esc_TDLEN;
1848 retval = sc->esc_TDH;
1851 retval = sc->esc_TDT;
1854 retval = sc->esc_TIDV;
1856 case E1000_TXDCTL(0):
1857 retval = sc->esc_TXDCTL;
1860 retval = sc->esc_TADV;
1862 case E1000_RAL(0) ... E1000_RAH(15):
1863 /* convert to u32 offset */
1864 ridx = (offset - E1000_RAL(0)) >> 2;
1865 retval = e82545_read_ra(sc, ridx);
1867 case E1000_MTA ... (E1000_MTA + (127*4)):
1868 retval = sc->esc_fmcast[(offset - E1000_MTA) >> 2];
1870 case E1000_VFTA ... (E1000_VFTA + (127*4)):
1871 retval = sc->esc_fvlan[(offset - E1000_VFTA) >> 2];
1874 //DPRINTF("EECD read %x\r\n", sc->eeprom_control);
1875 retval = sc->eeprom_control;
1878 retval = sc->mdi_control;
1883 /* stats that we emulate. */
1885 retval = sc->missed_pkt_count;
1888 retval = sc->pkt_rx_by_size[0];
1891 retval = sc->pkt_rx_by_size[1];
1894 retval = sc->pkt_rx_by_size[2];
1897 retval = sc->pkt_rx_by_size[3];
1900 retval = sc->pkt_rx_by_size[4];
1903 retval = sc->pkt_rx_by_size[5];
1906 retval = sc->good_pkt_rx_count;
1909 retval = sc->bcast_pkt_rx_count;
1912 retval = sc->mcast_pkt_rx_count;
1916 retval = sc->good_pkt_tx_count;
1919 retval = (uint32_t)sc->good_octets_rx;
1922 retval = (uint32_t)(sc->good_octets_rx >> 32);
1926 retval = (uint32_t)sc->good_octets_tx;
1930 retval = (uint32_t)(sc->good_octets_tx >> 32);
1933 retval = sc->oversize_rx_count;
1936 retval = (uint32_t)(sc->good_octets_rx + sc->missed_octets);
1939 retval = (uint32_t)((sc->good_octets_rx +
1940 sc->missed_octets) >> 32);
1943 retval = sc->good_pkt_rx_count + sc->missed_pkt_count +
1944 sc->oversize_rx_count;
1947 retval = sc->pkt_tx_by_size[0];
1950 retval = sc->pkt_tx_by_size[1];
1953 retval = sc->pkt_tx_by_size[2];
1956 retval = sc->pkt_tx_by_size[3];
1959 retval = sc->pkt_tx_by_size[4];
1962 retval = sc->pkt_tx_by_size[5];
1965 retval = sc->mcast_pkt_tx_count;
1968 retval = sc->bcast_pkt_tx_count;
1971 retval = sc->tso_tx_count;
1973 /* stats that are always 0. */
1975 case E1000_ALGNERRC:
2004 DPRINTF("Unknown read register: 0x%x\r\n", offset);
2013 e82545_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2014 uint64_t offset, int size, uint64_t value)
2016 struct e82545_softc *sc;
2018 //DPRINTF("Write bar:%d offset:0x%lx value:0x%lx size:%d\r\n", baridx, offset, value, size);
2022 pthread_mutex_lock(&sc->esc_mtx);
2029 DPRINTF("Wrong io addr write sz:%d value:0x%lx\r\n", size, value);
2031 sc->io_addr = (uint32_t)value;
2035 DPRINTF("Wrong io data write size:%d value:0x%lx\r\n", size, value);
2036 } else if (sc->io_addr > E82545_IO_REGISTER_MAX) {
2037 DPRINTF("Non-register io write addr:0x%x value:0x%lx\r\n", sc->io_addr, value);
2039 e82545_write_register(sc, sc->io_addr,
2043 DPRINTF("Unknown io bar write offset:0x%lx value:0x%lx size:%d\r\n", offset, value, size);
2047 case E82545_BAR_REGISTER:
2049 DPRINTF("Wrong register write size:%d offset:0x%lx value:0x%lx\r\n", size, offset, value);
2051 e82545_write_register(sc, (uint32_t)offset,
2055 DPRINTF("Unknown write bar:%d off:0x%lx val:0x%lx size:%d\r\n",
2056 baridx, offset, value, size);
2059 pthread_mutex_unlock(&sc->esc_mtx);
2063 e82545_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2064 uint64_t offset, int size)
2066 struct e82545_softc *sc;
2069 //DPRINTF("Read bar:%d offset:0x%lx size:%d\r\n", baridx, offset, size);
2073 pthread_mutex_lock(&sc->esc_mtx);
2080 DPRINTF("Wrong io addr read sz:%d\r\n", size);
2082 retval = sc->io_addr;
2086 DPRINTF("Wrong io data read sz:%d\r\n", size);
2088 if (sc->io_addr > E82545_IO_REGISTER_MAX) {
2089 DPRINTF("Non-register io read addr:0x%x\r\n",
2092 retval = e82545_read_register(sc, sc->io_addr);
2095 DPRINTF("Unknown io bar read offset:0x%lx size:%d\r\n",
2100 case E82545_BAR_REGISTER:
2102 DPRINTF("Wrong register read size:%d offset:0x%lx\r\n",
2105 retval = e82545_read_register(sc, (uint32_t)offset);
2108 DPRINTF("Unknown read bar:%d offset:0x%lx size:%d\r\n",
2109 baridx, offset, size);
2113 pthread_mutex_unlock(&sc->esc_mtx);
2119 e82545_reset(struct e82545_softc *sc, int drvr)
2123 e82545_rx_disable(sc);
2124 e82545_tx_disable(sc);
2126 /* clear outstanding interrupts */
2127 if (sc->esc_irq_asserted)
2128 pci_lintr_deassert(sc->esc_pi);
2138 sc->esc_LEDCTL = 0x07061302;
2139 sc->esc_PBA = 0x00100030;
2141 /* start nvm in opcode mode. */
2143 sc->nvm_mode = E82545_NVM_MODE_OPADDR;
2144 sc->nvm_bits = E82545_NVM_OPADDR_BITS;
2145 sc->eeprom_control = E1000_EECD_PRES | E82545_EECD_FWE_EN;
2146 e82545_init_eeprom(sc);
2157 memset(sc->esc_fvlan, 0, sizeof(sc->esc_fvlan));
2158 memset(sc->esc_fmcast, 0, sizeof(sc->esc_fmcast));
2159 memset(sc->esc_uni, 0, sizeof(sc->esc_uni));
2161 /* XXX not necessary on 82545 ?? */
2162 sc->esc_uni[0].eu_valid = 1;
2163 memcpy(sc->esc_uni[0].eu_eth.octet, sc->esc_mac.octet,
2166 /* Clear RAH valid bits */
2167 for (i = 0; i < 16; i++)
2168 sc->esc_uni[i].eu_valid = 0;
2183 sc->esc_RXDCTL = (1 << 24) | (1 << 16); /* default GRAN/WTHRESH */
2197 sc->esc_txdesc = NULL;
2202 sc->esc_TDHr = sc->esc_TDH = 0;
2207 e82545_open_tap(struct e82545_softc *sc, char *opts)
2210 #ifndef WITHOUT_CAPSICUM
2211 cap_rights_t rights;
2219 strcpy(tbuf, "/dev/");
2220 strlcat(tbuf, opts, sizeof(tbuf));
2222 sc->esc_tapfd = open(tbuf, O_RDWR);
2223 if (sc->esc_tapfd == -1) {
2224 DPRINTF("unable to open tap device %s\n", opts);
2229 * Set non-blocking and register for read
2230 * notifications with the event loop
2233 if (ioctl(sc->esc_tapfd, FIONBIO, &opt) < 0) {
2234 WPRINTF("tap device O_NONBLOCK failed: %d\n", errno);
2235 close(sc->esc_tapfd);
2239 #ifndef WITHOUT_CAPSICUM
2240 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
2241 if (cap_rights_limit(sc->esc_tapfd, &rights) == -1 && errno != ENOSYS)
2242 errx(EX_OSERR, "Unable to apply rights for sandbox");
2245 sc->esc_mevp = mevent_add(sc->esc_tapfd,
2247 e82545_tap_callback,
2249 if (sc->esc_mevp == NULL) {
2250 DPRINTF("Could not register mevent %d\n", EVF_READ);
2251 close(sc->esc_tapfd);
2257 e82545_parsemac(char *mac_str, uint8_t *mac_addr)
2259 struct ether_addr *ea;
2261 char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
2263 tmpstr = strsep(&mac_str,"=");
2264 if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
2265 ea = ether_aton(mac_str);
2266 if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
2267 memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
2268 fprintf(stderr, "Invalid MAC %s\n", mac_str);
2271 memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
2277 e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2279 DPRINTF("Loading with options: %s\r\n", opts);
2282 unsigned char digest[16];
2284 struct e82545_softc *sc;
2289 /* Setup our softc */
2290 sc = calloc(1, sizeof(*sc));
2296 pthread_mutex_init(&sc->esc_mtx, NULL);
2297 pthread_cond_init(&sc->esc_rx_cond, NULL);
2298 pthread_cond_init(&sc->esc_tx_cond, NULL);
2299 pthread_create(&sc->esc_tx_tid, NULL, e82545_tx_thread, sc);
2300 snprintf(nstr, sizeof(nstr), "e82545-%d:%d tx", pi->pi_slot,
2302 pthread_set_name_np(sc->esc_tx_tid, nstr);
2304 pci_set_cfgdata16(pi, PCIR_DEVICE, E82545_DEV_ID_82545EM_COPPER);
2305 pci_set_cfgdata16(pi, PCIR_VENDOR, E82545_VENDOR_ID_INTEL);
2306 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
2307 pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_NETWORK_ETHERNET);
2308 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, E82545_SUBDEV_ID);
2309 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, E82545_VENDOR_ID_INTEL);
2311 pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL);
2312 pci_set_cfgdata8(pi, PCIR_INTPIN, 0x1);
2314 /* TODO: this card also supports msi, but the freebsd driver for it
2315 * does not, so I have not implemented it. */
2316 pci_lintr_request(pi);
2318 pci_emul_alloc_bar(pi, E82545_BAR_REGISTER, PCIBAR_MEM32,
2319 E82545_BAR_REGISTER_LEN);
2320 pci_emul_alloc_bar(pi, E82545_BAR_FLASH, PCIBAR_MEM32,
2321 E82545_BAR_FLASH_LEN);
2322 pci_emul_alloc_bar(pi, E82545_BAR_IO, PCIBAR_IO,
2326 * Attempt to open the tap device and read the MAC address
2327 * if specified. Copied from virtio-net, slightly modified.
2334 devname = vtopts = strdup(opts);
2335 (void) strsep(&vtopts, ",");
2337 if (vtopts != NULL) {
2338 err = e82545_parsemac(vtopts, sc->esc_mac.octet);
2346 if (strncmp(devname, "tap", 3) == 0 ||
2347 strncmp(devname, "vmnet", 5) == 0)
2348 e82545_open_tap(sc, devname);
2354 * The default MAC address is the standard NetApp OUI of 00-a0-98,
2355 * followed by an MD5 of the PCI slot/func number and dev name
2357 if (!mac_provided) {
2358 snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
2359 pi->pi_func, vmname);
2362 MD5Update(&mdctx, nstr, strlen(nstr));
2363 MD5Final(digest, &mdctx);
2365 sc->esc_mac.octet[0] = 0x00;
2366 sc->esc_mac.octet[1] = 0xa0;
2367 sc->esc_mac.octet[2] = 0x98;
2368 sc->esc_mac.octet[3] = digest[0];
2369 sc->esc_mac.octet[4] = digest[1];
2370 sc->esc_mac.octet[5] = digest[2];
2373 /* H/w initiated reset */
2374 e82545_reset(sc, 0);
2379 struct pci_devemu pci_de_e82545 = {
2381 .pe_init = e82545_init,
2382 .pe_barwrite = e82545_write,
2383 .pe_barread = e82545_read
2385 PCI_EMUL_SET(pci_de_e82545);