]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/re/if_re.c
MFV r357163:
[FreeBSD/FreeBSD.git] / sys / dev / re / if_re.c
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 1997, 1998-2003
5  *      Bill Paul <wpaul@windriver.com>.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed by Bill Paul.
18  * 4. Neither the name of the author nor the names of any co-contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32  * THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 /*
39  * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver
40  *
41  * Written by Bill Paul <wpaul@windriver.com>
42  * Senior Networking Software Engineer
43  * Wind River Systems
44  */
45
46 /*
47  * This driver is designed to support RealTek's next generation of
48  * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
49  * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
50  * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E.
51  *
52  * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
53  * with the older 8139 family, however it also supports a special
54  * C+ mode of operation that provides several new performance enhancing
55  * features. These include:
56  *
57  *      o Descriptor based DMA mechanism. Each descriptor represents
58  *        a single packet fragment. Data buffers may be aligned on
59  *        any byte boundary.
60  *
61  *      o 64-bit DMA
62  *
63  *      o TCP/IP checksum offload for both RX and TX
64  *
65  *      o High and normal priority transmit DMA rings
66  *
67  *      o VLAN tag insertion and extraction
68  *
69  *      o TCP large send (segmentation offload)
70  *
71  * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
72  * programming API is fairly straightforward. The RX filtering, EEPROM
73  * access and PHY access is the same as it is on the older 8139 series
74  * chips.
75  *
76  * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
77  * same programming API and feature set as the 8139C+ with the following
78  * differences and additions:
79  *
80  *      o 1000Mbps mode
81  *
82  *      o Jumbo frames
83  *
84  *      o GMII and TBI ports/registers for interfacing with copper
85  *        or fiber PHYs
86  *
87  *      o RX and TX DMA rings can have up to 1024 descriptors
88  *        (the 8139C+ allows a maximum of 64)
89  *
90  *      o Slight differences in register layout from the 8139C+
91  *
92  * The TX start and timer interrupt registers are at different locations
93  * on the 8169 than they are on the 8139C+. Also, the status word in the
94  * RX descriptor has a slightly different bit layout. The 8169 does not
95  * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
96  * copper gigE PHY.
97  *
98  * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
99  * (the 'S' stands for 'single-chip'). These devices have the same
100  * programming API as the older 8169, but also have some vendor-specific
101  * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
102  * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
103  *
104  * This driver takes advantage of the RX and TX checksum offload and
105  * VLAN tag insertion/extraction features. It also implements TX
106  * interrupt moderation using the timer interrupt registers, which
107  * significantly reduces TX interrupt load. There is also support
108  * for jumbo frames, however the 8169/8169S/8110S can not transmit
109  * jumbo frames larger than 7440, so the max MTU possible with this
110  * driver is 7422 bytes.
111  */
112
113 #ifdef HAVE_KERNEL_OPTION_HEADERS
114 #include "opt_device_polling.h"
115 #endif
116
117 #include <sys/param.h>
118 #include <sys/endian.h>
119 #include <sys/systm.h>
120 #include <sys/sockio.h>
121 #include <sys/mbuf.h>
122 #include <sys/malloc.h>
123 #include <sys/module.h>
124 #include <sys/kernel.h>
125 #include <sys/socket.h>
126 #include <sys/lock.h>
127 #include <sys/mutex.h>
128 #include <sys/sysctl.h>
129 #include <sys/taskqueue.h>
130
131 #include <net/debugnet.h>
132 #include <net/if.h>
133 #include <net/if_var.h>
134 #include <net/if_arp.h>
135 #include <net/ethernet.h>
136 #include <net/if_dl.h>
137 #include <net/if_media.h>
138 #include <net/if_types.h>
139 #include <net/if_vlan_var.h>
140
141 #include <net/bpf.h>
142
143 #include <machine/bus.h>
144 #include <machine/resource.h>
145 #include <sys/bus.h>
146 #include <sys/rman.h>
147
148 #include <dev/mii/mii.h>
149 #include <dev/mii/miivar.h>
150
151 #include <dev/pci/pcireg.h>
152 #include <dev/pci/pcivar.h>
153
154 #include <dev/rl/if_rlreg.h>
155
156 MODULE_DEPEND(re, pci, 1, 1, 1);
157 MODULE_DEPEND(re, ether, 1, 1, 1);
158 MODULE_DEPEND(re, miibus, 1, 1, 1);
159
160 /* "device miibus" required.  See GENERIC if you get errors here. */
161 #include "miibus_if.h"
162
163 /* Tunables. */
164 static int intr_filter = 0;
165 TUNABLE_INT("hw.re.intr_filter", &intr_filter);
166 static int msi_disable = 0;
167 TUNABLE_INT("hw.re.msi_disable", &msi_disable);
168 static int msix_disable = 0;
169 TUNABLE_INT("hw.re.msix_disable", &msix_disable);
170 static int prefer_iomap = 0;
171 TUNABLE_INT("hw.re.prefer_iomap", &prefer_iomap);
172
173 #define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
174
175 /*
176  * Various supported device vendors/types and their names.
177  */
178 static const struct rl_type re_devs[] = {
179         { DLINK_VENDORID, DLINK_DEVICEID_528T, 0,
180             "D-Link DGE-528(T) Gigabit Ethernet Adapter" },
181         { DLINK_VENDORID, DLINK_DEVICEID_530T_REVC, 0,
182             "D-Link DGE-530(T) Gigabit Ethernet Adapter" },
183         { RT_VENDORID, RT_DEVICEID_8139, 0,
184             "RealTek 8139C+ 10/100BaseTX" },
185         { RT_VENDORID, RT_DEVICEID_8101E, 0,
186             "RealTek 810xE PCIe 10/100baseTX" },
187         { RT_VENDORID, RT_DEVICEID_8168, 0,
188             "RealTek 8168/8111 B/C/CP/D/DP/E/F/G PCIe Gigabit Ethernet" },
189         { NCUBE_VENDORID, RT_DEVICEID_8168, 0,
190             "TP-Link TG-3468 v2 (RTL8168) Gigabit Ethernet" },
191         { RT_VENDORID, RT_DEVICEID_8169, 0,
192             "RealTek 8169/8169S/8169SB(L)/8110S/8110SB(L) Gigabit Ethernet" },
193         { RT_VENDORID, RT_DEVICEID_8169SC, 0,
194             "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
195         { COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, 0,
196             "Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" },
197         { LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, 0,
198             "Linksys EG1032 (RTL8169S) Gigabit Ethernet" },
199         { USR_VENDORID, USR_DEVICEID_997902, 0,
200             "US Robotics 997902 (RTL8169S) Gigabit Ethernet" }
201 };
202
203 static const struct rl_hwrev re_hwrevs[] = {
204         { RL_HWREV_8139, RL_8139, "", RL_MTU },
205         { RL_HWREV_8139A, RL_8139, "A", RL_MTU },
206         { RL_HWREV_8139AG, RL_8139, "A-G", RL_MTU },
207         { RL_HWREV_8139B, RL_8139, "B", RL_MTU },
208         { RL_HWREV_8130, RL_8139, "8130", RL_MTU },
209         { RL_HWREV_8139C, RL_8139, "C", RL_MTU },
210         { RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C", RL_MTU },
211         { RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+", RL_MTU },
212         { RL_HWREV_8168B_SPIN1, RL_8169, "8168", RL_JUMBO_MTU },
213         { RL_HWREV_8169, RL_8169, "8169", RL_JUMBO_MTU },
214         { RL_HWREV_8169S, RL_8169, "8169S", RL_JUMBO_MTU },
215         { RL_HWREV_8110S, RL_8169, "8110S", RL_JUMBO_MTU },
216         { RL_HWREV_8169_8110SB, RL_8169, "8169SB/8110SB", RL_JUMBO_MTU },
217         { RL_HWREV_8169_8110SC, RL_8169, "8169SC/8110SC", RL_JUMBO_MTU },
218         { RL_HWREV_8169_8110SBL, RL_8169, "8169SBL/8110SBL", RL_JUMBO_MTU },
219         { RL_HWREV_8169_8110SCE, RL_8169, "8169SC/8110SC", RL_JUMBO_MTU },
220         { RL_HWREV_8100, RL_8139, "8100", RL_MTU },
221         { RL_HWREV_8101, RL_8139, "8101", RL_MTU },
222         { RL_HWREV_8100E, RL_8169, "8100E", RL_MTU },
223         { RL_HWREV_8101E, RL_8169, "8101E", RL_MTU },
224         { RL_HWREV_8102E, RL_8169, "8102E", RL_MTU },
225         { RL_HWREV_8102EL, RL_8169, "8102EL", RL_MTU },
226         { RL_HWREV_8102EL_SPIN1, RL_8169, "8102EL", RL_MTU },
227         { RL_HWREV_8103E, RL_8169, "8103E", RL_MTU },
228         { RL_HWREV_8401E, RL_8169, "8401E", RL_MTU },
229         { RL_HWREV_8402, RL_8169, "8402", RL_MTU },
230         { RL_HWREV_8105E, RL_8169, "8105E", RL_MTU },
231         { RL_HWREV_8105E_SPIN1, RL_8169, "8105E", RL_MTU },
232         { RL_HWREV_8106E, RL_8169, "8106E", RL_MTU },
233         { RL_HWREV_8168B_SPIN2, RL_8169, "8168", RL_JUMBO_MTU },
234         { RL_HWREV_8168B_SPIN3, RL_8169, "8168", RL_JUMBO_MTU },
235         { RL_HWREV_8168C, RL_8169, "8168C/8111C", RL_JUMBO_MTU_6K },
236         { RL_HWREV_8168C_SPIN2, RL_8169, "8168C/8111C", RL_JUMBO_MTU_6K },
237         { RL_HWREV_8168CP, RL_8169, "8168CP/8111CP", RL_JUMBO_MTU_6K },
238         { RL_HWREV_8168D, RL_8169, "8168D/8111D", RL_JUMBO_MTU_9K },
239         { RL_HWREV_8168DP, RL_8169, "8168DP/8111DP", RL_JUMBO_MTU_9K },
240         { RL_HWREV_8168E, RL_8169, "8168E/8111E", RL_JUMBO_MTU_9K},
241         { RL_HWREV_8168E_VL, RL_8169, "8168E/8111E-VL", RL_JUMBO_MTU_6K},
242         { RL_HWREV_8168EP, RL_8169, "8168EP/8111EP", RL_JUMBO_MTU_9K},
243         { RL_HWREV_8168F, RL_8169, "8168F/8111F", RL_JUMBO_MTU_9K},
244         { RL_HWREV_8168G, RL_8169, "8168G/8111G", RL_JUMBO_MTU_9K},
245         { RL_HWREV_8168GU, RL_8169, "8168GU/8111GU", RL_JUMBO_MTU_9K},
246         { RL_HWREV_8168H, RL_8169, "8168H/8111H", RL_JUMBO_MTU_9K},
247         { RL_HWREV_8411, RL_8169, "8411", RL_JUMBO_MTU_9K},
248         { RL_HWREV_8411B, RL_8169, "8411B", RL_JUMBO_MTU_9K},
249         { 0, 0, NULL, 0 }
250 };
251
252 static int re_probe             (device_t);
253 static int re_attach            (device_t);
254 static int re_detach            (device_t);
255
256 static int re_encap             (struct rl_softc *, struct mbuf **);
257
258 static void re_dma_map_addr     (void *, bus_dma_segment_t *, int, int);
259 static int re_allocmem          (device_t, struct rl_softc *);
260 static __inline void re_discard_rxbuf
261                                 (struct rl_softc *, int);
262 static int re_newbuf            (struct rl_softc *, int);
263 static int re_jumbo_newbuf      (struct rl_softc *, int);
264 static int re_rx_list_init      (struct rl_softc *);
265 static int re_jrx_list_init     (struct rl_softc *);
266 static int re_tx_list_init      (struct rl_softc *);
267 #ifdef RE_FIXUP_RX
268 static __inline void re_fixup_rx
269                                 (struct mbuf *);
270 #endif
271 static int re_rxeof             (struct rl_softc *, int *);
272 static void re_txeof            (struct rl_softc *);
273 #ifdef DEVICE_POLLING
274 static int re_poll              (struct ifnet *, enum poll_cmd, int);
275 static int re_poll_locked       (struct ifnet *, enum poll_cmd, int);
276 #endif
277 static int re_intr              (void *);
278 static void re_intr_msi         (void *);
279 static void re_tick             (void *);
280 static void re_int_task         (void *, int);
281 static void re_start            (struct ifnet *);
282 static void re_start_locked     (struct ifnet *);
283 static void re_start_tx         (struct rl_softc *);
284 static int re_ioctl             (struct ifnet *, u_long, caddr_t);
285 static void re_init             (void *);
286 static void re_init_locked      (struct rl_softc *);
287 static void re_stop             (struct rl_softc *);
288 static void re_watchdog         (struct rl_softc *);
289 static int re_suspend           (device_t);
290 static int re_resume            (device_t);
291 static int re_shutdown          (device_t);
292 static int re_ifmedia_upd       (struct ifnet *);
293 static void re_ifmedia_sts      (struct ifnet *, struct ifmediareq *);
294
295 static void re_eeprom_putbyte   (struct rl_softc *, int);
296 static void re_eeprom_getword   (struct rl_softc *, int, u_int16_t *);
297 static void re_read_eeprom      (struct rl_softc *, caddr_t, int, int);
298 static int re_gmii_readreg      (device_t, int, int);
299 static int re_gmii_writereg     (device_t, int, int, int);
300
301 static int re_miibus_readreg    (device_t, int, int);
302 static int re_miibus_writereg   (device_t, int, int, int);
303 static void re_miibus_statchg   (device_t);
304
305 static void re_set_jumbo        (struct rl_softc *, int);
306 static void re_set_rxmode               (struct rl_softc *);
307 static void re_reset            (struct rl_softc *);
308 static void re_setwol           (struct rl_softc *);
309 static void re_clrwol           (struct rl_softc *);
310 static void re_set_linkspeed    (struct rl_softc *);
311
312 DEBUGNET_DEFINE(re);
313
314 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
315 #include <dev/netmap/if_re_netmap.h>
316 MODULE_DEPEND(re, netmap, 1, 1, 1);
317 #endif /* !DEV_NETMAP */
318
319 #ifdef RE_DIAG
320 static int re_diag              (struct rl_softc *);
321 #endif
322
323 static void re_add_sysctls      (struct rl_softc *);
324 static int re_sysctl_stats      (SYSCTL_HANDLER_ARGS);
325 static int sysctl_int_range     (SYSCTL_HANDLER_ARGS, int, int);
326 static int sysctl_hw_re_int_mod (SYSCTL_HANDLER_ARGS);
327
328 static device_method_t re_methods[] = {
329         /* Device interface */
330         DEVMETHOD(device_probe,         re_probe),
331         DEVMETHOD(device_attach,        re_attach),
332         DEVMETHOD(device_detach,        re_detach),
333         DEVMETHOD(device_suspend,       re_suspend),
334         DEVMETHOD(device_resume,        re_resume),
335         DEVMETHOD(device_shutdown,      re_shutdown),
336
337         /* MII interface */
338         DEVMETHOD(miibus_readreg,       re_miibus_readreg),
339         DEVMETHOD(miibus_writereg,      re_miibus_writereg),
340         DEVMETHOD(miibus_statchg,       re_miibus_statchg),
341
342         DEVMETHOD_END
343 };
344
345 static driver_t re_driver = {
346         "re",
347         re_methods,
348         sizeof(struct rl_softc)
349 };
350
351 static devclass_t re_devclass;
352
353 DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0);
354 DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
355
356 #define EE_SET(x)                                       \
357         CSR_WRITE_1(sc, RL_EECMD,                       \
358                 CSR_READ_1(sc, RL_EECMD) | x)
359
360 #define EE_CLR(x)                                       \
361         CSR_WRITE_1(sc, RL_EECMD,                       \
362                 CSR_READ_1(sc, RL_EECMD) & ~x)
363
364 /*
365  * Send a read command and address to the EEPROM, check for ACK.
366  */
367 static void
368 re_eeprom_putbyte(struct rl_softc *sc, int addr)
369 {
370         int                     d, i;
371
372         d = addr | (RL_9346_READ << sc->rl_eewidth);
373
374         /*
375          * Feed in each bit and strobe the clock.
376          */
377
378         for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) {
379                 if (d & i) {
380                         EE_SET(RL_EE_DATAIN);
381                 } else {
382                         EE_CLR(RL_EE_DATAIN);
383                 }
384                 DELAY(100);
385                 EE_SET(RL_EE_CLK);
386                 DELAY(150);
387                 EE_CLR(RL_EE_CLK);
388                 DELAY(100);
389         }
390 }
391
392 /*
393  * Read a word of data stored in the EEPROM at address 'addr.'
394  */
395 static void
396 re_eeprom_getword(struct rl_softc *sc, int addr, u_int16_t *dest)
397 {
398         int                     i;
399         u_int16_t               word = 0;
400
401         /*
402          * Send address of word we want to read.
403          */
404         re_eeprom_putbyte(sc, addr);
405
406         /*
407          * Start reading bits from EEPROM.
408          */
409         for (i = 0x8000; i; i >>= 1) {
410                 EE_SET(RL_EE_CLK);
411                 DELAY(100);
412                 if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT)
413                         word |= i;
414                 EE_CLR(RL_EE_CLK);
415                 DELAY(100);
416         }
417
418         *dest = word;
419 }
420
421 /*
422  * Read a sequence of words from the EEPROM.
423  */
424 static void
425 re_read_eeprom(struct rl_softc *sc, caddr_t dest, int off, int cnt)
426 {
427         int                     i;
428         u_int16_t               word = 0, *ptr;
429
430         CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
431
432         DELAY(100);
433
434         for (i = 0; i < cnt; i++) {
435                 CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL);
436                 re_eeprom_getword(sc, off + i, &word);
437                 CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL);
438                 ptr = (u_int16_t *)(dest + (i * 2));
439                 *ptr = word;
440         }
441
442         CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
443 }
444
445 static int
446 re_gmii_readreg(device_t dev, int phy, int reg)
447 {
448         struct rl_softc         *sc;
449         u_int32_t               rval;
450         int                     i;
451
452         sc = device_get_softc(dev);
453
454         /* Let the rgephy driver read the GMEDIASTAT register */
455
456         if (reg == RL_GMEDIASTAT) {
457                 rval = CSR_READ_1(sc, RL_GMEDIASTAT);
458                 return (rval);
459         }
460
461         CSR_WRITE_4(sc, RL_PHYAR, reg << 16);
462
463         for (i = 0; i < RL_PHY_TIMEOUT; i++) {
464                 rval = CSR_READ_4(sc, RL_PHYAR);
465                 if (rval & RL_PHYAR_BUSY)
466                         break;
467                 DELAY(25);
468         }
469
470         if (i == RL_PHY_TIMEOUT) {
471                 device_printf(sc->rl_dev, "PHY read failed\n");
472                 return (0);
473         }
474
475         /*
476          * Controller requires a 20us delay to process next MDIO request.
477          */
478         DELAY(20);
479
480         return (rval & RL_PHYAR_PHYDATA);
481 }
482
483 static int
484 re_gmii_writereg(device_t dev, int phy, int reg, int data)
485 {
486         struct rl_softc         *sc;
487         u_int32_t               rval;
488         int                     i;
489
490         sc = device_get_softc(dev);
491
492         CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) |
493             (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY);
494
495         for (i = 0; i < RL_PHY_TIMEOUT; i++) {
496                 rval = CSR_READ_4(sc, RL_PHYAR);
497                 if (!(rval & RL_PHYAR_BUSY))
498                         break;
499                 DELAY(25);
500         }
501
502         if (i == RL_PHY_TIMEOUT) {
503                 device_printf(sc->rl_dev, "PHY write failed\n");
504                 return (0);
505         }
506
507         /*
508          * Controller requires a 20us delay to process next MDIO request.
509          */
510         DELAY(20);
511
512         return (0);
513 }
514
515 static int
516 re_miibus_readreg(device_t dev, int phy, int reg)
517 {
518         struct rl_softc         *sc;
519         u_int16_t               rval = 0;
520         u_int16_t               re8139_reg = 0;
521
522         sc = device_get_softc(dev);
523
524         if (sc->rl_type == RL_8169) {
525                 rval = re_gmii_readreg(dev, phy, reg);
526                 return (rval);
527         }
528
529         switch (reg) {
530         case MII_BMCR:
531                 re8139_reg = RL_BMCR;
532                 break;
533         case MII_BMSR:
534                 re8139_reg = RL_BMSR;
535                 break;
536         case MII_ANAR:
537                 re8139_reg = RL_ANAR;
538                 break;
539         case MII_ANER:
540                 re8139_reg = RL_ANER;
541                 break;
542         case MII_ANLPAR:
543                 re8139_reg = RL_LPAR;
544                 break;
545         case MII_PHYIDR1:
546         case MII_PHYIDR2:
547                 return (0);
548         /*
549          * Allow the rlphy driver to read the media status
550          * register. If we have a link partner which does not
551          * support NWAY, this is the register which will tell
552          * us the results of parallel detection.
553          */
554         case RL_MEDIASTAT:
555                 rval = CSR_READ_1(sc, RL_MEDIASTAT);
556                 return (rval);
557         default:
558                 device_printf(sc->rl_dev, "bad phy register\n");
559                 return (0);
560         }
561         rval = CSR_READ_2(sc, re8139_reg);
562         if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) {
563                 /* 8139C+ has different bit layout. */
564                 rval &= ~(BMCR_LOOP | BMCR_ISO);
565         }
566         return (rval);
567 }
568
569 static int
570 re_miibus_writereg(device_t dev, int phy, int reg, int data)
571 {
572         struct rl_softc         *sc;
573         u_int16_t               re8139_reg = 0;
574         int                     rval = 0;
575
576         sc = device_get_softc(dev);
577
578         if (sc->rl_type == RL_8169) {
579                 rval = re_gmii_writereg(dev, phy, reg, data);
580                 return (rval);
581         }
582
583         switch (reg) {
584         case MII_BMCR:
585                 re8139_reg = RL_BMCR;
586                 if (sc->rl_type == RL_8139CPLUS) {
587                         /* 8139C+ has different bit layout. */
588                         data &= ~(BMCR_LOOP | BMCR_ISO);
589                 }
590                 break;
591         case MII_BMSR:
592                 re8139_reg = RL_BMSR;
593                 break;
594         case MII_ANAR:
595                 re8139_reg = RL_ANAR;
596                 break;
597         case MII_ANER:
598                 re8139_reg = RL_ANER;
599                 break;
600         case MII_ANLPAR:
601                 re8139_reg = RL_LPAR;
602                 break;
603         case MII_PHYIDR1:
604         case MII_PHYIDR2:
605                 return (0);
606                 break;
607         default:
608                 device_printf(sc->rl_dev, "bad phy register\n");
609                 return (0);
610         }
611         CSR_WRITE_2(sc, re8139_reg, data);
612         return (0);
613 }
614
615 static void
616 re_miibus_statchg(device_t dev)
617 {
618         struct rl_softc         *sc;
619         struct ifnet            *ifp;
620         struct mii_data         *mii;
621
622         sc = device_get_softc(dev);
623         mii = device_get_softc(sc->rl_miibus);
624         ifp = sc->rl_ifp;
625         if (mii == NULL || ifp == NULL ||
626             (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
627                 return;
628
629         sc->rl_flags &= ~RL_FLAG_LINK;
630         if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
631             (IFM_ACTIVE | IFM_AVALID)) {
632                 switch (IFM_SUBTYPE(mii->mii_media_active)) {
633                 case IFM_10_T:
634                 case IFM_100_TX:
635                         sc->rl_flags |= RL_FLAG_LINK;
636                         break;
637                 case IFM_1000_T:
638                         if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0)
639                                 break;
640                         sc->rl_flags |= RL_FLAG_LINK;
641                         break;
642                 default:
643                         break;
644                 }
645         }
646         /*
647          * RealTek controllers do not provide any interface to the RX/TX
648          * MACs for resolved speed, duplex and flow-control parameters.
649          */
650 }
651
652 static u_int
653 re_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
654 {
655         uint32_t h, *hashes = arg;
656
657         h = ether_crc32_be(LLADDR(sdl), ETHER_ADDR_LEN) >> 26;
658         if (h < 32)
659                 hashes[0] |= (1 << h);
660         else
661                 hashes[1] |= (1 << (h - 32));
662
663         return (1);
664 }
665
666 /*
667  * Set the RX configuration and 64-bit multicast hash filter.
668  */
669 static void
670 re_set_rxmode(struct rl_softc *sc)
671 {
672         struct ifnet            *ifp;
673         uint32_t                h, hashes[2] = { 0, 0 };
674         uint32_t                rxfilt;
675
676         RL_LOCK_ASSERT(sc);
677
678         ifp = sc->rl_ifp;
679
680         rxfilt = RL_RXCFG_CONFIG | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_BROAD;
681         if ((sc->rl_flags & RL_FLAG_EARLYOFF) != 0)
682                 rxfilt |= RL_RXCFG_EARLYOFF;
683         else if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0)
684                 rxfilt |= RL_RXCFG_EARLYOFFV2;
685
686         if (ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) {
687                 if (ifp->if_flags & IFF_PROMISC)
688                         rxfilt |= RL_RXCFG_RX_ALLPHYS;
689                 /*
690                  * Unlike other hardwares, we have to explicitly set
691                  * RL_RXCFG_RX_MULTI to receive multicast frames in
692                  * promiscuous mode.
693                  */
694                 rxfilt |= RL_RXCFG_RX_MULTI;
695                 hashes[0] = hashes[1] = 0xffffffff;
696                 goto done;
697         }
698
699         if_foreach_llmaddr(ifp, re_hash_maddr, hashes);
700
701         if (hashes[0] != 0 || hashes[1] != 0) {
702                 /*
703                  * For some unfathomable reason, RealTek decided to
704                  * reverse the order of the multicast hash registers
705                  * in the PCI Express parts.  This means we have to
706                  * write the hash pattern in reverse order for those
707                  * devices.
708                  */
709                 if ((sc->rl_flags & RL_FLAG_PCIE) != 0) {
710                         h = bswap32(hashes[0]);
711                         hashes[0] = bswap32(hashes[1]);
712                         hashes[1] = h;
713                 }
714                 rxfilt |= RL_RXCFG_RX_MULTI;
715         }
716
717         if  (sc->rl_hwrev->rl_rev == RL_HWREV_8168F) {
718                 /* Disable multicast filtering due to silicon bug. */
719                 hashes[0] = 0xffffffff;
720                 hashes[1] = 0xffffffff;
721         }
722
723 done:
724         CSR_WRITE_4(sc, RL_MAR0, hashes[0]);
725         CSR_WRITE_4(sc, RL_MAR4, hashes[1]);
726         CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
727 }
728
729 static void
730 re_reset(struct rl_softc *sc)
731 {
732         int                     i;
733
734         RL_LOCK_ASSERT(sc);
735
736         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET);
737
738         for (i = 0; i < RL_TIMEOUT; i++) {
739                 DELAY(10);
740                 if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET))
741                         break;
742         }
743         if (i == RL_TIMEOUT)
744                 device_printf(sc->rl_dev, "reset never completed!\n");
745
746         if ((sc->rl_flags & RL_FLAG_MACRESET) != 0)
747                 CSR_WRITE_1(sc, 0x82, 1);
748         if (sc->rl_hwrev->rl_rev == RL_HWREV_8169S)
749                 re_gmii_writereg(sc->rl_dev, 1, 0x0b, 0);
750 }
751
752 #ifdef RE_DIAG
753
754 /*
755  * The following routine is designed to test for a defect on some
756  * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
757  * lines connected to the bus, however for a 32-bit only card, they
758  * should be pulled high. The result of this defect is that the
759  * NIC will not work right if you plug it into a 64-bit slot: DMA
760  * operations will be done with 64-bit transfers, which will fail
761  * because the 64-bit data lines aren't connected.
762  *
763  * There's no way to work around this (short of talking a soldering
764  * iron to the board), however we can detect it. The method we use
765  * here is to put the NIC into digital loopback mode, set the receiver
766  * to promiscuous mode, and then try to send a frame. We then compare
767  * the frame data we sent to what was received. If the data matches,
768  * then the NIC is working correctly, otherwise we know the user has
769  * a defective NIC which has been mistakenly plugged into a 64-bit PCI
770  * slot. In the latter case, there's no way the NIC can work correctly,
771  * so we print out a message on the console and abort the device attach.
772  */
773
774 static int
775 re_diag(struct rl_softc *sc)
776 {
777         struct ifnet            *ifp = sc->rl_ifp;
778         struct mbuf             *m0;
779         struct ether_header     *eh;
780         struct rl_desc          *cur_rx;
781         u_int16_t               status;
782         u_int32_t               rxstat;
783         int                     total_len, i, error = 0, phyaddr;
784         u_int8_t                dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
785         u_int8_t                src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
786
787         /* Allocate a single mbuf */
788         MGETHDR(m0, M_NOWAIT, MT_DATA);
789         if (m0 == NULL)
790                 return (ENOBUFS);
791
792         RL_LOCK(sc);
793
794         /*
795          * Initialize the NIC in test mode. This sets the chip up
796          * so that it can send and receive frames, but performs the
797          * following special functions:
798          * - Puts receiver in promiscuous mode
799          * - Enables digital loopback mode
800          * - Leaves interrupts turned off
801          */
802
803         ifp->if_flags |= IFF_PROMISC;
804         sc->rl_testmode = 1;
805         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
806         re_init_locked(sc);
807         sc->rl_flags |= RL_FLAG_LINK;
808         if (sc->rl_type == RL_8169)
809                 phyaddr = 1;
810         else
811                 phyaddr = 0;
812
813         re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET);
814         for (i = 0; i < RL_TIMEOUT; i++) {
815                 status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR);
816                 if (!(status & BMCR_RESET))
817                         break;
818         }
819
820         re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP);
821         CSR_WRITE_2(sc, RL_ISR, RL_INTRS);
822
823         DELAY(100000);
824
825         /* Put some data in the mbuf */
826
827         eh = mtod(m0, struct ether_header *);
828         bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN);
829         bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN);
830         eh->ether_type = htons(ETHERTYPE_IP);
831         m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
832
833         /*
834          * Queue the packet, start transmission.
835          * Note: IF_HANDOFF() ultimately calls re_start() for us.
836          */
837
838         CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
839         RL_UNLOCK(sc);
840         /* XXX: re_diag must not be called when in ALTQ mode */
841         IF_HANDOFF(&ifp->if_snd, m0, ifp);
842         RL_LOCK(sc);
843         m0 = NULL;
844
845         /* Wait for it to propagate through the chip */
846
847         DELAY(100000);
848         for (i = 0; i < RL_TIMEOUT; i++) {
849                 status = CSR_READ_2(sc, RL_ISR);
850                 CSR_WRITE_2(sc, RL_ISR, status);
851                 if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) ==
852                     (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK))
853                         break;
854                 DELAY(10);
855         }
856
857         if (i == RL_TIMEOUT) {
858                 device_printf(sc->rl_dev,
859                     "diagnostic failed, failed to receive packet in"
860                     " loopback mode\n");
861                 error = EIO;
862                 goto done;
863         }
864
865         /*
866          * The packet should have been dumped into the first
867          * entry in the RX DMA ring. Grab it from there.
868          */
869
870         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
871             sc->rl_ldata.rl_rx_list_map,
872             BUS_DMASYNC_POSTREAD);
873         bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
874             sc->rl_ldata.rl_rx_desc[0].rx_dmamap,
875             BUS_DMASYNC_POSTREAD);
876         bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
877             sc->rl_ldata.rl_rx_desc[0].rx_dmamap);
878
879         m0 = sc->rl_ldata.rl_rx_desc[0].rx_m;
880         sc->rl_ldata.rl_rx_desc[0].rx_m = NULL;
881         eh = mtod(m0, struct ether_header *);
882
883         cur_rx = &sc->rl_ldata.rl_rx_list[0];
884         total_len = RL_RXBYTES(cur_rx);
885         rxstat = le32toh(cur_rx->rl_cmdstat);
886
887         if (total_len != ETHER_MIN_LEN) {
888                 device_printf(sc->rl_dev,
889                     "diagnostic failed, received short packet\n");
890                 error = EIO;
891                 goto done;
892         }
893
894         /* Test that the received packet data matches what we sent. */
895
896         if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) ||
897             bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) ||
898             ntohs(eh->ether_type) != ETHERTYPE_IP) {
899                 device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n");
900                 device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n",
901                     dst, ":", src, ":", ETHERTYPE_IP);
902                 device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n",
903                     eh->ether_dhost, ":", eh->ether_shost, ":",
904                     ntohs(eh->ether_type));
905                 device_printf(sc->rl_dev, "You may have a defective 32-bit "
906                     "NIC plugged into a 64-bit PCI slot.\n");
907                 device_printf(sc->rl_dev, "Please re-install the NIC in a "
908                     "32-bit slot for proper operation.\n");
909                 device_printf(sc->rl_dev, "Read the re(4) man page for more "
910                     "details.\n");
911                 error = EIO;
912         }
913
914 done:
915         /* Turn interface off, release resources */
916
917         sc->rl_testmode = 0;
918         sc->rl_flags &= ~RL_FLAG_LINK;
919         ifp->if_flags &= ~IFF_PROMISC;
920         re_stop(sc);
921         if (m0 != NULL)
922                 m_freem(m0);
923
924         RL_UNLOCK(sc);
925
926         return (error);
927 }
928
929 #endif
930
931 /*
932  * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
933  * IDs against our list and return a device name if we find a match.
934  */
935 static int
936 re_probe(device_t dev)
937 {
938         const struct rl_type    *t;
939         uint16_t                devid, vendor;
940         uint16_t                revid, sdevid;
941         int                     i;
942
943         vendor = pci_get_vendor(dev);
944         devid = pci_get_device(dev);
945         revid = pci_get_revid(dev);
946         sdevid = pci_get_subdevice(dev);
947
948         if (vendor == LINKSYS_VENDORID && devid == LINKSYS_DEVICEID_EG1032) {
949                 if (sdevid != LINKSYS_SUBDEVICE_EG1032_REV3) {
950                         /*
951                          * Only attach to rev. 3 of the Linksys EG1032 adapter.
952                          * Rev. 2 is supported by sk(4).
953                          */
954                         return (ENXIO);
955                 }
956         }
957
958         if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) {
959                 if (revid != 0x20) {
960                         /* 8139, let rl(4) take care of this device. */
961                         return (ENXIO);
962                 }
963         }
964
965         t = re_devs;
966         for (i = 0; i < nitems(re_devs); i++, t++) {
967                 if (vendor == t->rl_vid && devid == t->rl_did) {
968                         device_set_desc(dev, t->rl_name);
969                         return (BUS_PROBE_DEFAULT);
970                 }
971         }
972
973         return (ENXIO);
974 }
975
976 /*
977  * Map a single buffer address.
978  */
979
980 static void
981 re_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
982 {
983         bus_addr_t              *addr;
984
985         if (error)
986                 return;
987
988         KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
989         addr = arg;
990         *addr = segs->ds_addr;
991 }
992
993 static int
994 re_allocmem(device_t dev, struct rl_softc *sc)
995 {
996         bus_addr_t              lowaddr;
997         bus_size_t              rx_list_size, tx_list_size;
998         int                     error;
999         int                     i;
1000
1001         rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc);
1002         tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc);
1003
1004         /*
1005          * Allocate the parent bus DMA tag appropriate for PCI.
1006          * In order to use DAC, RL_CPLUSCMD_PCI_DAC bit of RL_CPLUS_CMD
1007          * register should be set. However some RealTek chips are known
1008          * to be buggy on DAC handling, therefore disable DAC by limiting
1009          * DMA address space to 32bit. PCIe variants of RealTek chips
1010          * may not have the limitation.
1011          */
1012         lowaddr = BUS_SPACE_MAXADDR;
1013         if ((sc->rl_flags & RL_FLAG_PCIE) == 0)
1014                 lowaddr = BUS_SPACE_MAXADDR_32BIT;
1015         error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
1016             lowaddr, BUS_SPACE_MAXADDR, NULL, NULL,
1017             BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1018             NULL, NULL, &sc->rl_parent_tag);
1019         if (error) {
1020                 device_printf(dev, "could not allocate parent DMA tag\n");
1021                 return (error);
1022         }
1023
1024         /*
1025          * Allocate map for TX mbufs.
1026          */
1027         error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0,
1028             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
1029             NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0,
1030             NULL, NULL, &sc->rl_ldata.rl_tx_mtag);
1031         if (error) {
1032                 device_printf(dev, "could not allocate TX DMA tag\n");
1033                 return (error);
1034         }
1035
1036         /*
1037          * Allocate map for RX mbufs.
1038          */
1039
1040         if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
1041                 error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t),
1042                     0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1043                     MJUM9BYTES, 1, MJUM9BYTES, 0, NULL, NULL,
1044                     &sc->rl_ldata.rl_jrx_mtag);
1045                 if (error) {
1046                         device_printf(dev,
1047                             "could not allocate jumbo RX DMA tag\n");
1048                         return (error);
1049                 }
1050         }
1051         error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0,
1052             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1053             MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag);
1054         if (error) {
1055                 device_printf(dev, "could not allocate RX DMA tag\n");
1056                 return (error);
1057         }
1058
1059         /*
1060          * Allocate map for TX descriptor list.
1061          */
1062         error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1063             0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1064             NULL, tx_list_size, 1, tx_list_size, 0,
1065             NULL, NULL, &sc->rl_ldata.rl_tx_list_tag);
1066         if (error) {
1067                 device_printf(dev, "could not allocate TX DMA ring tag\n");
1068                 return (error);
1069         }
1070
1071         /* Allocate DMA'able memory for the TX ring */
1072
1073         error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag,
1074             (void **)&sc->rl_ldata.rl_tx_list,
1075             BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1076             &sc->rl_ldata.rl_tx_list_map);
1077         if (error) {
1078                 device_printf(dev, "could not allocate TX DMA ring\n");
1079                 return (error);
1080         }
1081
1082         /* Load the map for the TX ring. */
1083
1084         sc->rl_ldata.rl_tx_list_addr = 0;
1085         error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag,
1086              sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list,
1087              tx_list_size, re_dma_map_addr,
1088              &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT);
1089         if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) {
1090                 device_printf(dev, "could not load TX DMA ring\n");
1091                 return (ENOMEM);
1092         }
1093
1094         /* Create DMA maps for TX buffers */
1095
1096         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
1097                 error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0,
1098                     &sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1099                 if (error) {
1100                         device_printf(dev, "could not create DMA map for TX\n");
1101                         return (error);
1102                 }
1103         }
1104
1105         /*
1106          * Allocate map for RX descriptor list.
1107          */
1108         error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1109             0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1110             NULL, rx_list_size, 1, rx_list_size, 0,
1111             NULL, NULL, &sc->rl_ldata.rl_rx_list_tag);
1112         if (error) {
1113                 device_printf(dev, "could not create RX DMA ring tag\n");
1114                 return (error);
1115         }
1116
1117         /* Allocate DMA'able memory for the RX ring */
1118
1119         error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag,
1120             (void **)&sc->rl_ldata.rl_rx_list,
1121             BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1122             &sc->rl_ldata.rl_rx_list_map);
1123         if (error) {
1124                 device_printf(dev, "could not allocate RX DMA ring\n");
1125                 return (error);
1126         }
1127
1128         /* Load the map for the RX ring. */
1129
1130         sc->rl_ldata.rl_rx_list_addr = 0;
1131         error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag,
1132              sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list,
1133              rx_list_size, re_dma_map_addr,
1134              &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT);
1135         if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) {
1136                 device_printf(dev, "could not load RX DMA ring\n");
1137                 return (ENOMEM);
1138         }
1139
1140         /* Create DMA maps for RX buffers */
1141
1142         if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
1143                 error = bus_dmamap_create(sc->rl_ldata.rl_jrx_mtag, 0,
1144                     &sc->rl_ldata.rl_jrx_sparemap);
1145                 if (error) {
1146                         device_printf(dev,
1147                             "could not create spare DMA map for jumbo RX\n");
1148                         return (error);
1149                 }
1150                 for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1151                         error = bus_dmamap_create(sc->rl_ldata.rl_jrx_mtag, 0,
1152                             &sc->rl_ldata.rl_jrx_desc[i].rx_dmamap);
1153                         if (error) {
1154                                 device_printf(dev,
1155                                     "could not create DMA map for jumbo RX\n");
1156                                 return (error);
1157                         }
1158                 }
1159         }
1160         error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1161             &sc->rl_ldata.rl_rx_sparemap);
1162         if (error) {
1163                 device_printf(dev, "could not create spare DMA map for RX\n");
1164                 return (error);
1165         }
1166         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1167                 error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1168                     &sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1169                 if (error) {
1170                         device_printf(dev, "could not create DMA map for RX\n");
1171                         return (error);
1172                 }
1173         }
1174
1175         /* Create DMA map for statistics. */
1176         error = bus_dma_tag_create(sc->rl_parent_tag, RL_DUMP_ALIGN, 0,
1177             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1178             sizeof(struct rl_stats), 1, sizeof(struct rl_stats), 0, NULL, NULL,
1179             &sc->rl_ldata.rl_stag);
1180         if (error) {
1181                 device_printf(dev, "could not create statistics DMA tag\n");
1182                 return (error);
1183         }
1184         /* Allocate DMA'able memory for statistics. */
1185         error = bus_dmamem_alloc(sc->rl_ldata.rl_stag,
1186             (void **)&sc->rl_ldata.rl_stats,
1187             BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1188             &sc->rl_ldata.rl_smap);
1189         if (error) {
1190                 device_printf(dev,
1191                     "could not allocate statistics DMA memory\n");
1192                 return (error);
1193         }
1194         /* Load the map for statistics. */
1195         sc->rl_ldata.rl_stats_addr = 0;
1196         error = bus_dmamap_load(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap,
1197             sc->rl_ldata.rl_stats, sizeof(struct rl_stats), re_dma_map_addr,
1198              &sc->rl_ldata.rl_stats_addr, BUS_DMA_NOWAIT);
1199         if (error != 0 || sc->rl_ldata.rl_stats_addr == 0) {
1200                 device_printf(dev, "could not load statistics DMA memory\n");
1201                 return (ENOMEM);
1202         }
1203
1204         return (0);
1205 }
1206
1207 /*
1208  * Attach the interface. Allocate softc structures, do ifmedia
1209  * setup and ethernet/BPF attach.
1210  */
1211 static int
1212 re_attach(device_t dev)
1213 {
1214         u_char                  eaddr[ETHER_ADDR_LEN];
1215         u_int16_t               as[ETHER_ADDR_LEN / 2];
1216         struct rl_softc         *sc;
1217         struct ifnet            *ifp;
1218         const struct rl_hwrev   *hw_rev;
1219         int                     capmask, error = 0, hwrev, i, msic, msixc,
1220                                 phy, reg, rid;
1221         u_int32_t               cap, ctl;
1222         u_int16_t               devid, re_did = 0;
1223         uint8_t                 cfg;
1224
1225         sc = device_get_softc(dev);
1226         sc->rl_dev = dev;
1227
1228         mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
1229             MTX_DEF);
1230         callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0);
1231
1232         /*
1233          * Map control/status registers.
1234          */
1235         pci_enable_busmaster(dev);
1236
1237         devid = pci_get_device(dev);
1238         /*
1239          * Prefer memory space register mapping over IO space.
1240          * Because RTL8169SC does not seem to work when memory mapping
1241          * is used always activate io mapping.
1242          */
1243         if (devid == RT_DEVICEID_8169SC)
1244                 prefer_iomap = 1;
1245         if (prefer_iomap == 0) {
1246                 sc->rl_res_id = PCIR_BAR(1);
1247                 sc->rl_res_type = SYS_RES_MEMORY;
1248                 /* RTL8168/8101E seems to use different BARs. */
1249                 if (devid == RT_DEVICEID_8168 || devid == RT_DEVICEID_8101E)
1250                         sc->rl_res_id = PCIR_BAR(2);
1251         } else {
1252                 sc->rl_res_id = PCIR_BAR(0);
1253                 sc->rl_res_type = SYS_RES_IOPORT;
1254         }
1255         sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1256             &sc->rl_res_id, RF_ACTIVE);
1257         if (sc->rl_res == NULL && prefer_iomap == 0) {
1258                 sc->rl_res_id = PCIR_BAR(0);
1259                 sc->rl_res_type = SYS_RES_IOPORT;
1260                 sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1261                     &sc->rl_res_id, RF_ACTIVE);
1262         }
1263         if (sc->rl_res == NULL) {
1264                 device_printf(dev, "couldn't map ports/memory\n");
1265                 error = ENXIO;
1266                 goto fail;
1267         }
1268
1269         sc->rl_btag = rman_get_bustag(sc->rl_res);
1270         sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
1271
1272         msic = pci_msi_count(dev);
1273         msixc = pci_msix_count(dev);
1274         if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
1275                 sc->rl_flags |= RL_FLAG_PCIE;
1276                 sc->rl_expcap = reg;
1277         }
1278         if (bootverbose) {
1279                 device_printf(dev, "MSI count : %d\n", msic);
1280                 device_printf(dev, "MSI-X count : %d\n", msixc);
1281         }
1282         if (msix_disable > 0)
1283                 msixc = 0;
1284         if (msi_disable > 0)
1285                 msic = 0;
1286         /* Prefer MSI-X to MSI. */
1287         if (msixc > 0) {
1288                 msixc = RL_MSI_MESSAGES;
1289                 rid = PCIR_BAR(4);
1290                 sc->rl_res_pba = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1291                     &rid, RF_ACTIVE);
1292                 if (sc->rl_res_pba == NULL) {
1293                         device_printf(sc->rl_dev,
1294                             "could not allocate MSI-X PBA resource\n");
1295                 }
1296                 if (sc->rl_res_pba != NULL &&
1297                     pci_alloc_msix(dev, &msixc) == 0) {
1298                         if (msixc == RL_MSI_MESSAGES) {
1299                                 device_printf(dev, "Using %d MSI-X message\n",
1300                                     msixc);
1301                                 sc->rl_flags |= RL_FLAG_MSIX;
1302                         } else
1303                                 pci_release_msi(dev);
1304                 }
1305                 if ((sc->rl_flags & RL_FLAG_MSIX) == 0) {
1306                         if (sc->rl_res_pba != NULL)
1307                                 bus_release_resource(dev, SYS_RES_MEMORY, rid,
1308                                     sc->rl_res_pba);
1309                         sc->rl_res_pba = NULL;
1310                         msixc = 0;
1311                 }
1312         }
1313         /* Prefer MSI to INTx. */
1314         if (msixc == 0 && msic > 0) {
1315                 msic = RL_MSI_MESSAGES;
1316                 if (pci_alloc_msi(dev, &msic) == 0) {
1317                         if (msic == RL_MSI_MESSAGES) {
1318                                 device_printf(dev, "Using %d MSI message\n",
1319                                     msic);
1320                                 sc->rl_flags |= RL_FLAG_MSI;
1321                                 /* Explicitly set MSI enable bit. */
1322                                 CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1323                                 cfg = CSR_READ_1(sc, RL_CFG2);
1324                                 cfg |= RL_CFG2_MSI;
1325                                 CSR_WRITE_1(sc, RL_CFG2, cfg);
1326                                 CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1327                         } else
1328                                 pci_release_msi(dev);
1329                 }
1330                 if ((sc->rl_flags & RL_FLAG_MSI) == 0)
1331                         msic = 0;
1332         }
1333
1334         /* Allocate interrupt */
1335         if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) {
1336                 rid = 0;
1337                 sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
1338                     RF_SHAREABLE | RF_ACTIVE);
1339                 if (sc->rl_irq[0] == NULL) {
1340                         device_printf(dev, "couldn't allocate IRQ resources\n");
1341                         error = ENXIO;
1342                         goto fail;
1343                 }
1344         } else {
1345                 for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1346                         sc->rl_irq[i] = bus_alloc_resource_any(dev,
1347                             SYS_RES_IRQ, &rid, RF_ACTIVE);
1348                         if (sc->rl_irq[i] == NULL) {
1349                                 device_printf(dev,
1350                                     "couldn't allocate IRQ resources for "
1351                                     "message %d\n", rid);
1352                                 error = ENXIO;
1353                                 goto fail;
1354                         }
1355                 }
1356         }
1357
1358         if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1359                 CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1360                 cfg = CSR_READ_1(sc, RL_CFG2);
1361                 if ((cfg & RL_CFG2_MSI) != 0) {
1362                         device_printf(dev, "turning off MSI enable bit.\n");
1363                         cfg &= ~RL_CFG2_MSI;
1364                         CSR_WRITE_1(sc, RL_CFG2, cfg);
1365                 }
1366                 CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1367         }
1368
1369         /* Disable ASPM L0S/L1 and CLKREQ. */
1370         if (sc->rl_expcap != 0) {
1371                 cap = pci_read_config(dev, sc->rl_expcap +
1372                     PCIER_LINK_CAP, 2);
1373                 if ((cap & PCIEM_LINK_CAP_ASPM) != 0) {
1374                         ctl = pci_read_config(dev, sc->rl_expcap +
1375                             PCIER_LINK_CTL, 2);
1376                         if ((ctl & (PCIEM_LINK_CTL_ECPM |
1377                             PCIEM_LINK_CTL_ASPMC))!= 0) {
1378                                 ctl &= ~(PCIEM_LINK_CTL_ECPM |
1379                                     PCIEM_LINK_CTL_ASPMC);
1380                                 pci_write_config(dev, sc->rl_expcap +
1381                                     PCIER_LINK_CTL, ctl, 2);
1382                                 device_printf(dev, "ASPM disabled\n");
1383                         }
1384                 } else
1385                         device_printf(dev, "no ASPM capability\n");
1386         }
1387
1388         hw_rev = re_hwrevs;
1389         hwrev = CSR_READ_4(sc, RL_TXCFG);
1390         switch (hwrev & 0x70000000) {
1391         case 0x00000000:
1392         case 0x10000000:
1393                 device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0xfc800000);
1394                 hwrev &= (RL_TXCFG_HWREV | 0x80000000);
1395                 break;
1396         default:
1397                 device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0x7c800000);
1398                 sc->rl_macrev = hwrev & 0x00700000;
1399                 hwrev &= RL_TXCFG_HWREV;
1400                 break;
1401         }
1402         device_printf(dev, "MAC rev. 0x%08x\n", sc->rl_macrev);
1403         while (hw_rev->rl_desc != NULL) {
1404                 if (hw_rev->rl_rev == hwrev) {
1405                         sc->rl_type = hw_rev->rl_type;
1406                         sc->rl_hwrev = hw_rev;
1407                         break;
1408                 }
1409                 hw_rev++;
1410         }
1411         if (hw_rev->rl_desc == NULL) {
1412                 device_printf(dev, "Unknown H/W revision: 0x%08x\n", hwrev);
1413                 error = ENXIO;
1414                 goto fail;
1415         }
1416
1417         switch (hw_rev->rl_rev) {
1418         case RL_HWREV_8139CPLUS:
1419                 sc->rl_flags |= RL_FLAG_FASTETHER | RL_FLAG_AUTOPAD;
1420                 break;
1421         case RL_HWREV_8100E:
1422         case RL_HWREV_8101E:
1423                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_FASTETHER;
1424                 break;
1425         case RL_HWREV_8102E:
1426         case RL_HWREV_8102EL:
1427         case RL_HWREV_8102EL_SPIN1:
1428                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 |
1429                     RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP |
1430                     RL_FLAG_AUTOPAD;
1431                 break;
1432         case RL_HWREV_8103E:
1433                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 |
1434                     RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP |
1435                     RL_FLAG_AUTOPAD | RL_FLAG_MACSLEEP;
1436                 break;
1437         case RL_HWREV_8401E:
1438         case RL_HWREV_8105E:
1439         case RL_HWREV_8105E_SPIN1:
1440         case RL_HWREV_8106E:
1441                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM |
1442                     RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
1443                     RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD;
1444                 break;
1445         case RL_HWREV_8402:
1446                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM |
1447                     RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
1448                     RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD |
1449                     RL_FLAG_CMDSTOP_WAIT_TXQ;
1450                 break;
1451         case RL_HWREV_8168B_SPIN1:
1452         case RL_HWREV_8168B_SPIN2:
1453                 sc->rl_flags |= RL_FLAG_WOLRXENB;
1454                 /* FALLTHROUGH */
1455         case RL_HWREV_8168B_SPIN3:
1456                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_MACSTAT;
1457                 break;
1458         case RL_HWREV_8168C_SPIN2:
1459                 sc->rl_flags |= RL_FLAG_MACSLEEP;
1460                 /* FALLTHROUGH */
1461         case RL_HWREV_8168C:
1462                 if (sc->rl_macrev == 0x00200000)
1463                         sc->rl_flags |= RL_FLAG_MACSLEEP;
1464                 /* FALLTHROUGH */
1465         case RL_HWREV_8168CP:
1466                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
1467                     RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
1468                     RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK;
1469                 break;
1470         case RL_HWREV_8168D:
1471                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM |
1472                     RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
1473                     RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 |
1474                     RL_FLAG_WOL_MANLINK;
1475                 break;
1476         case RL_HWREV_8168DP:
1477                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
1478                     RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_AUTOPAD |
1479                     RL_FLAG_JUMBOV2 | RL_FLAG_WAIT_TXPOLL | RL_FLAG_WOL_MANLINK;
1480                 break;
1481         case RL_HWREV_8168E:
1482                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM |
1483                     RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
1484                     RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 |
1485                     RL_FLAG_WOL_MANLINK;
1486                 break;
1487         case RL_HWREV_8168E_VL:
1488         case RL_HWREV_8168F:
1489                 sc->rl_flags |= RL_FLAG_EARLYOFF;
1490                 /* FALLTHROUGH */
1491         case RL_HWREV_8411:
1492                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
1493                     RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
1494                     RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 |
1495                     RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_WOL_MANLINK;
1496                 break;
1497         case RL_HWREV_8168EP:
1498         case RL_HWREV_8168G:
1499         case RL_HWREV_8411B:
1500                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
1501                     RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
1502                     RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 |
1503                     RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_WOL_MANLINK |
1504                     RL_FLAG_8168G_PLUS;
1505                 break;
1506         case RL_HWREV_8168GU:
1507         case RL_HWREV_8168H:
1508                 if (pci_get_device(dev) == RT_DEVICEID_8101E) {
1509                         /* RTL8106E(US), RTL8107E */
1510                         sc->rl_flags |= RL_FLAG_FASTETHER;
1511                 } else
1512                         sc->rl_flags |= RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK;
1513
1514                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
1515                     RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
1516                     RL_FLAG_AUTOPAD | RL_FLAG_CMDSTOP_WAIT_TXQ |
1517                     RL_FLAG_8168G_PLUS;
1518                 break;
1519         case RL_HWREV_8169_8110SB:
1520         case RL_HWREV_8169_8110SBL:
1521         case RL_HWREV_8169_8110SC:
1522         case RL_HWREV_8169_8110SCE:
1523                 sc->rl_flags |= RL_FLAG_PHYWAKE;
1524                 /* FALLTHROUGH */
1525         case RL_HWREV_8169:
1526         case RL_HWREV_8169S:
1527         case RL_HWREV_8110S:
1528                 sc->rl_flags |= RL_FLAG_MACRESET;
1529                 break;
1530         default:
1531                 break;
1532         }
1533
1534         if (sc->rl_hwrev->rl_rev == RL_HWREV_8139CPLUS) {
1535                 sc->rl_cfg0 = RL_8139_CFG0;
1536                 sc->rl_cfg1 = RL_8139_CFG1;
1537                 sc->rl_cfg2 = 0;
1538                 sc->rl_cfg3 = RL_8139_CFG3;
1539                 sc->rl_cfg4 = RL_8139_CFG4;
1540                 sc->rl_cfg5 = RL_8139_CFG5;
1541         } else {
1542                 sc->rl_cfg0 = RL_CFG0;
1543                 sc->rl_cfg1 = RL_CFG1;
1544                 sc->rl_cfg2 = RL_CFG2;
1545                 sc->rl_cfg3 = RL_CFG3;
1546                 sc->rl_cfg4 = RL_CFG4;
1547                 sc->rl_cfg5 = RL_CFG5;
1548         }
1549
1550         /* Reset the adapter. */
1551         RL_LOCK(sc);
1552         re_reset(sc);
1553         RL_UNLOCK(sc);
1554
1555         /* Enable PME. */
1556         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1557         cfg = CSR_READ_1(sc, sc->rl_cfg1);
1558         cfg |= RL_CFG1_PME;
1559         CSR_WRITE_1(sc, sc->rl_cfg1, cfg);
1560         cfg = CSR_READ_1(sc, sc->rl_cfg5);
1561         cfg &= RL_CFG5_PME_STS;
1562         CSR_WRITE_1(sc, sc->rl_cfg5, cfg);
1563         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1564
1565         if ((sc->rl_flags & RL_FLAG_PAR) != 0) {
1566                 /*
1567                  * XXX Should have a better way to extract station
1568                  * address from EEPROM.
1569                  */
1570                 for (i = 0; i < ETHER_ADDR_LEN; i++)
1571                         eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i);
1572         } else {
1573                 sc->rl_eewidth = RL_9356_ADDR_LEN;
1574                 re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
1575                 if (re_did != 0x8129)
1576                         sc->rl_eewidth = RL_9346_ADDR_LEN;
1577
1578                 /*
1579                  * Get station address from the EEPROM.
1580                  */
1581                 re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3);
1582                 for (i = 0; i < ETHER_ADDR_LEN / 2; i++)
1583                         as[i] = le16toh(as[i]);
1584                 bcopy(as, eaddr, ETHER_ADDR_LEN);
1585         }
1586
1587         if (sc->rl_type == RL_8169) {
1588                 /* Set RX length mask and number of descriptors. */
1589                 sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN;
1590                 sc->rl_txstart = RL_GTXSTART;
1591                 sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT;
1592                 sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT;
1593         } else {
1594                 /* Set RX length mask and number of descriptors. */
1595                 sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN;
1596                 sc->rl_txstart = RL_TXSTART;
1597                 sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT;
1598                 sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT;
1599         }
1600
1601         error = re_allocmem(dev, sc);
1602         if (error)
1603                 goto fail;
1604         re_add_sysctls(sc);
1605
1606         ifp = sc->rl_ifp = if_alloc(IFT_ETHER);
1607         if (ifp == NULL) {
1608                 device_printf(dev, "can not if_alloc()\n");
1609                 error = ENOSPC;
1610                 goto fail;
1611         }
1612
1613         /* Take controller out of deep sleep mode. */
1614         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
1615                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
1616                         CSR_WRITE_1(sc, RL_GPIO,
1617                             CSR_READ_1(sc, RL_GPIO) | 0x01);
1618                 else
1619                         CSR_WRITE_1(sc, RL_GPIO,
1620                             CSR_READ_1(sc, RL_GPIO) & ~0x01);
1621         }
1622
1623         /* Take PHY out of power down mode. */
1624         if ((sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0) {
1625                 CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) | 0x80);
1626                 if (hw_rev->rl_rev == RL_HWREV_8401E)
1627                         CSR_WRITE_1(sc, 0xD1, CSR_READ_1(sc, 0xD1) & ~0x08);
1628         }
1629         if ((sc->rl_flags & RL_FLAG_PHYWAKE) != 0) {
1630                 re_gmii_writereg(dev, 1, 0x1f, 0);
1631                 re_gmii_writereg(dev, 1, 0x0e, 0);
1632         }
1633
1634         ifp->if_softc = sc;
1635         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1636         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1637         ifp->if_ioctl = re_ioctl;
1638         ifp->if_start = re_start;
1639         /*
1640          * RTL8168/8111C generates wrong IP checksummed frame if the
1641          * packet has IP options so disable TX checksum offloading.
1642          */
1643         if (sc->rl_hwrev->rl_rev == RL_HWREV_8168C ||
1644             sc->rl_hwrev->rl_rev == RL_HWREV_8168C_SPIN2 ||
1645             sc->rl_hwrev->rl_rev == RL_HWREV_8168CP) {
1646                 ifp->if_hwassist = 0;
1647                 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TSO4;
1648         } else {
1649                 ifp->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP;
1650                 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_TSO4;
1651         }
1652         ifp->if_hwassist |= CSUM_TSO;
1653         ifp->if_capenable = ifp->if_capabilities;
1654         ifp->if_init = re_init;
1655         IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN);
1656         ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN;
1657         IFQ_SET_READY(&ifp->if_snd);
1658
1659         TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc);
1660
1661 #define RE_PHYAD_INTERNAL        0
1662
1663         /* Do MII setup. */
1664         phy = RE_PHYAD_INTERNAL;
1665         if (sc->rl_type == RL_8169)
1666                 phy = 1;
1667         capmask = BMSR_DEFCAPMASK;
1668         if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0)
1669                  capmask &= ~BMSR_EXTSTAT;
1670         error = mii_attach(dev, &sc->rl_miibus, ifp, re_ifmedia_upd,
1671             re_ifmedia_sts, capmask, phy, MII_OFFSET_ANY, MIIF_DOPAUSE);
1672         if (error != 0) {
1673                 device_printf(dev, "attaching PHYs failed\n");
1674                 goto fail;
1675         }
1676
1677         /*
1678          * Call MI attach routine.
1679          */
1680         ether_ifattach(ifp, eaddr);
1681
1682         /* VLAN capability setup */
1683         ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
1684         if (ifp->if_capabilities & IFCAP_HWCSUM)
1685                 ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
1686         /* Enable WOL if PM is supported. */
1687         if (pci_find_cap(sc->rl_dev, PCIY_PMG, &reg) == 0)
1688                 ifp->if_capabilities |= IFCAP_WOL;
1689         ifp->if_capenable = ifp->if_capabilities;
1690         ifp->if_capenable &= ~(IFCAP_WOL_UCAST | IFCAP_WOL_MCAST);
1691         /*
1692          * Don't enable TSO by default.  It is known to generate
1693          * corrupted TCP segments(bad TCP options) under certain
1694          * circumstances.
1695          */
1696         ifp->if_hwassist &= ~CSUM_TSO;
1697         ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO);
1698 #ifdef DEVICE_POLLING
1699         ifp->if_capabilities |= IFCAP_POLLING;
1700 #endif
1701         /*
1702          * Tell the upper layer(s) we support long frames.
1703          * Must appear after the call to ether_ifattach() because
1704          * ether_ifattach() sets ifi_hdrlen to the default value.
1705          */
1706         ifp->if_hdrlen = sizeof(struct ether_vlan_header);
1707
1708 #ifdef DEV_NETMAP
1709         re_netmap_attach(sc);
1710 #endif /* DEV_NETMAP */
1711
1712 #ifdef RE_DIAG
1713         /*
1714          * Perform hardware diagnostic on the original RTL8169.
1715          * Some 32-bit cards were incorrectly wired and would
1716          * malfunction if plugged into a 64-bit slot.
1717          */
1718         if (hwrev == RL_HWREV_8169) {
1719                 error = re_diag(sc);
1720                 if (error) {
1721                         device_printf(dev,
1722                         "attach aborted due to hardware diag failure\n");
1723                         ether_ifdetach(ifp);
1724                         goto fail;
1725                 }
1726         }
1727 #endif
1728
1729 #ifdef RE_TX_MODERATION
1730         intr_filter = 1;
1731 #endif
1732         /* Hook interrupt last to avoid having to lock softc */
1733         if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0 &&
1734             intr_filter == 0) {
1735                 error = bus_setup_intr(dev, sc->rl_irq[0],
1736                     INTR_TYPE_NET | INTR_MPSAFE, NULL, re_intr_msi, sc,
1737                     &sc->rl_intrhand[0]);
1738         } else {
1739                 error = bus_setup_intr(dev, sc->rl_irq[0],
1740                     INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1741                     &sc->rl_intrhand[0]);
1742         }
1743         if (error) {
1744                 device_printf(dev, "couldn't set up irq\n");
1745                 ether_ifdetach(ifp);
1746                 goto fail;
1747         }
1748
1749         DEBUGNET_SET(ifp, re);
1750
1751 fail:
1752         if (error)
1753                 re_detach(dev);
1754
1755         return (error);
1756 }
1757
1758 /*
1759  * Shutdown hardware and free up resources. This can be called any
1760  * time after the mutex has been initialized. It is called in both
1761  * the error case in attach and the normal detach case so it needs
1762  * to be careful about only freeing resources that have actually been
1763  * allocated.
1764  */
1765 static int
1766 re_detach(device_t dev)
1767 {
1768         struct rl_softc         *sc;
1769         struct ifnet            *ifp;
1770         int                     i, rid;
1771
1772         sc = device_get_softc(dev);
1773         ifp = sc->rl_ifp;
1774         KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized"));
1775
1776         /* These should only be active if attach succeeded */
1777         if (device_is_attached(dev)) {
1778 #ifdef DEVICE_POLLING
1779                 if (ifp->if_capenable & IFCAP_POLLING)
1780                         ether_poll_deregister(ifp);
1781 #endif
1782                 RL_LOCK(sc);
1783 #if 0
1784                 sc->suspended = 1;
1785 #endif
1786                 re_stop(sc);
1787                 RL_UNLOCK(sc);
1788                 callout_drain(&sc->rl_stat_callout);
1789                 taskqueue_drain(taskqueue_fast, &sc->rl_inttask);
1790                 /*
1791                  * Force off the IFF_UP flag here, in case someone
1792                  * still had a BPF descriptor attached to this
1793                  * interface. If they do, ether_ifdetach() will cause
1794                  * the BPF code to try and clear the promisc mode
1795                  * flag, which will bubble down to re_ioctl(),
1796                  * which will try to call re_init() again. This will
1797                  * turn the NIC back on and restart the MII ticker,
1798                  * which will panic the system when the kernel tries
1799                  * to invoke the re_tick() function that isn't there
1800                  * anymore.
1801                  */
1802                 ifp->if_flags &= ~IFF_UP;
1803                 ether_ifdetach(ifp);
1804         }
1805         if (sc->rl_miibus)
1806                 device_delete_child(dev, sc->rl_miibus);
1807         bus_generic_detach(dev);
1808
1809         /*
1810          * The rest is resource deallocation, so we should already be
1811          * stopped here.
1812          */
1813
1814         if (sc->rl_intrhand[0] != NULL) {
1815                 bus_teardown_intr(dev, sc->rl_irq[0], sc->rl_intrhand[0]);
1816                 sc->rl_intrhand[0] = NULL;
1817         }
1818         if (ifp != NULL) {
1819 #ifdef DEV_NETMAP
1820                 netmap_detach(ifp);
1821 #endif /* DEV_NETMAP */
1822                 if_free(ifp);
1823         }
1824         if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0)
1825                 rid = 0;
1826         else
1827                 rid = 1;
1828         if (sc->rl_irq[0] != NULL) {
1829                 bus_release_resource(dev, SYS_RES_IRQ, rid, sc->rl_irq[0]);
1830                 sc->rl_irq[0] = NULL;
1831         }
1832         if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0)
1833                 pci_release_msi(dev);
1834         if (sc->rl_res_pba) {
1835                 rid = PCIR_BAR(4);
1836                 bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->rl_res_pba);
1837         }
1838         if (sc->rl_res)
1839                 bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id,
1840                     sc->rl_res);
1841
1842         /* Unload and free the RX DMA ring memory and map */
1843
1844         if (sc->rl_ldata.rl_rx_list_tag) {
1845                 if (sc->rl_ldata.rl_rx_list_addr)
1846                         bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
1847                             sc->rl_ldata.rl_rx_list_map);
1848                 if (sc->rl_ldata.rl_rx_list)
1849                         bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
1850                             sc->rl_ldata.rl_rx_list,
1851                             sc->rl_ldata.rl_rx_list_map);
1852                 bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag);
1853         }
1854
1855         /* Unload and free the TX DMA ring memory and map */
1856
1857         if (sc->rl_ldata.rl_tx_list_tag) {
1858                 if (sc->rl_ldata.rl_tx_list_addr)
1859                         bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
1860                             sc->rl_ldata.rl_tx_list_map);
1861                 if (sc->rl_ldata.rl_tx_list)
1862                         bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
1863                             sc->rl_ldata.rl_tx_list,
1864                             sc->rl_ldata.rl_tx_list_map);
1865                 bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag);
1866         }
1867
1868         /* Destroy all the RX and TX buffer maps */
1869
1870         if (sc->rl_ldata.rl_tx_mtag) {
1871                 for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
1872                         if (sc->rl_ldata.rl_tx_desc[i].tx_dmamap)
1873                                 bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag,
1874                                     sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1875                 }
1876                 bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag);
1877         }
1878         if (sc->rl_ldata.rl_rx_mtag) {
1879                 for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1880                         if (sc->rl_ldata.rl_rx_desc[i].rx_dmamap)
1881                                 bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1882                                     sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1883                 }
1884                 if (sc->rl_ldata.rl_rx_sparemap)
1885                         bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1886                             sc->rl_ldata.rl_rx_sparemap);
1887                 bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag);
1888         }
1889         if (sc->rl_ldata.rl_jrx_mtag) {
1890                 for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1891                         if (sc->rl_ldata.rl_jrx_desc[i].rx_dmamap)
1892                                 bus_dmamap_destroy(sc->rl_ldata.rl_jrx_mtag,
1893                                     sc->rl_ldata.rl_jrx_desc[i].rx_dmamap);
1894                 }
1895                 if (sc->rl_ldata.rl_jrx_sparemap)
1896                         bus_dmamap_destroy(sc->rl_ldata.rl_jrx_mtag,
1897                             sc->rl_ldata.rl_jrx_sparemap);
1898                 bus_dma_tag_destroy(sc->rl_ldata.rl_jrx_mtag);
1899         }
1900         /* Unload and free the stats buffer and map */
1901
1902         if (sc->rl_ldata.rl_stag) {
1903                 if (sc->rl_ldata.rl_stats_addr)
1904                         bus_dmamap_unload(sc->rl_ldata.rl_stag,
1905                             sc->rl_ldata.rl_smap);
1906                 if (sc->rl_ldata.rl_stats)
1907                         bus_dmamem_free(sc->rl_ldata.rl_stag,
1908                             sc->rl_ldata.rl_stats, sc->rl_ldata.rl_smap);
1909                 bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
1910         }
1911
1912         if (sc->rl_parent_tag)
1913                 bus_dma_tag_destroy(sc->rl_parent_tag);
1914
1915         mtx_destroy(&sc->rl_mtx);
1916
1917         return (0);
1918 }
1919
1920 static __inline void
1921 re_discard_rxbuf(struct rl_softc *sc, int idx)
1922 {
1923         struct rl_desc          *desc;
1924         struct rl_rxdesc        *rxd;
1925         uint32_t                cmdstat;
1926
1927         if (sc->rl_ifp->if_mtu > RL_MTU &&
1928             (sc->rl_flags & RL_FLAG_JUMBOV2) != 0)
1929                 rxd = &sc->rl_ldata.rl_jrx_desc[idx];
1930         else
1931                 rxd = &sc->rl_ldata.rl_rx_desc[idx];
1932         desc = &sc->rl_ldata.rl_rx_list[idx];
1933         desc->rl_vlanctl = 0;
1934         cmdstat = rxd->rx_size;
1935         if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1936                 cmdstat |= RL_RDESC_CMD_EOR;
1937         desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1938 }
1939
1940 static int
1941 re_newbuf(struct rl_softc *sc, int idx)
1942 {
1943         struct mbuf             *m;
1944         struct rl_rxdesc        *rxd;
1945         bus_dma_segment_t       segs[1];
1946         bus_dmamap_t            map;
1947         struct rl_desc          *desc;
1948         uint32_t                cmdstat;
1949         int                     error, nsegs;
1950
1951         m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1952         if (m == NULL)
1953                 return (ENOBUFS);
1954
1955         m->m_len = m->m_pkthdr.len = MCLBYTES;
1956 #ifdef RE_FIXUP_RX
1957         /*
1958          * This is part of an evil trick to deal with non-x86 platforms.
1959          * The RealTek chip requires RX buffers to be aligned on 64-bit
1960          * boundaries, but that will hose non-x86 machines. To get around
1961          * this, we leave some empty space at the start of each buffer
1962          * and for non-x86 hosts, we copy the buffer back six bytes
1963          * to achieve word alignment. This is slightly more efficient
1964          * than allocating a new buffer, copying the contents, and
1965          * discarding the old buffer.
1966          */
1967         m_adj(m, RE_ETHER_ALIGN);
1968 #endif
1969         error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag,
1970             sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
1971         if (error != 0) {
1972                 m_freem(m);
1973                 return (ENOBUFS);
1974         }
1975         KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
1976
1977         rxd = &sc->rl_ldata.rl_rx_desc[idx];
1978         if (rxd->rx_m != NULL) {
1979                 bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1980                     BUS_DMASYNC_POSTREAD);
1981                 bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap);
1982         }
1983
1984         rxd->rx_m = m;
1985         map = rxd->rx_dmamap;
1986         rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap;
1987         rxd->rx_size = segs[0].ds_len;
1988         sc->rl_ldata.rl_rx_sparemap = map;
1989         bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1990             BUS_DMASYNC_PREREAD);
1991
1992         desc = &sc->rl_ldata.rl_rx_list[idx];
1993         desc->rl_vlanctl = 0;
1994         desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
1995         desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
1996         cmdstat = segs[0].ds_len;
1997         if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1998                 cmdstat |= RL_RDESC_CMD_EOR;
1999         desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
2000
2001         return (0);
2002 }
2003
2004 static int
2005 re_jumbo_newbuf(struct rl_softc *sc, int idx)
2006 {
2007         struct mbuf             *m;
2008         struct rl_rxdesc        *rxd;
2009         bus_dma_segment_t       segs[1];
2010         bus_dmamap_t            map;
2011         struct rl_desc          *desc;
2012         uint32_t                cmdstat;
2013         int                     error, nsegs;
2014
2015         m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES);
2016         if (m == NULL)
2017                 return (ENOBUFS);
2018         m->m_len = m->m_pkthdr.len = MJUM9BYTES;
2019 #ifdef RE_FIXUP_RX
2020         m_adj(m, RE_ETHER_ALIGN);
2021 #endif
2022         error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_jrx_mtag,
2023             sc->rl_ldata.rl_jrx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
2024         if (error != 0) {
2025                 m_freem(m);
2026                 return (ENOBUFS);
2027         }
2028         KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
2029
2030         rxd = &sc->rl_ldata.rl_jrx_desc[idx];
2031         if (rxd->rx_m != NULL) {
2032                 bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap,
2033                     BUS_DMASYNC_POSTREAD);
2034                 bus_dmamap_unload(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap);
2035         }
2036
2037         rxd->rx_m = m;
2038         map = rxd->rx_dmamap;
2039         rxd->rx_dmamap = sc->rl_ldata.rl_jrx_sparemap;
2040         rxd->rx_size = segs[0].ds_len;
2041         sc->rl_ldata.rl_jrx_sparemap = map;
2042         bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap,
2043             BUS_DMASYNC_PREREAD);
2044
2045         desc = &sc->rl_ldata.rl_rx_list[idx];
2046         desc->rl_vlanctl = 0;
2047         desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
2048         desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
2049         cmdstat = segs[0].ds_len;
2050         if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
2051                 cmdstat |= RL_RDESC_CMD_EOR;
2052         desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
2053
2054         return (0);
2055 }
2056
2057 #ifdef RE_FIXUP_RX
2058 static __inline void
2059 re_fixup_rx(struct mbuf *m)
2060 {
2061         int                     i;
2062         uint16_t                *src, *dst;
2063
2064         src = mtod(m, uint16_t *);
2065         dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src;
2066
2067         for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
2068                 *dst++ = *src++;
2069
2070         m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN;
2071 }
2072 #endif
2073
2074 static int
2075 re_tx_list_init(struct rl_softc *sc)
2076 {
2077         struct rl_desc          *desc;
2078         int                     i;
2079
2080         RL_LOCK_ASSERT(sc);
2081
2082         bzero(sc->rl_ldata.rl_tx_list,
2083             sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
2084         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
2085                 sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
2086 #ifdef DEV_NETMAP
2087         re_netmap_tx_init(sc);
2088 #endif /* DEV_NETMAP */
2089         /* Set EOR. */
2090         desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
2091         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
2092
2093         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2094             sc->rl_ldata.rl_tx_list_map,
2095             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2096
2097         sc->rl_ldata.rl_tx_prodidx = 0;
2098         sc->rl_ldata.rl_tx_considx = 0;
2099         sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt;
2100
2101         return (0);
2102 }
2103
2104 static int
2105 re_rx_list_init(struct rl_softc *sc)
2106 {
2107         int                     error, i;
2108
2109         bzero(sc->rl_ldata.rl_rx_list,
2110             sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
2111         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
2112                 sc->rl_ldata.rl_rx_desc[i].rx_m = NULL;
2113                 if ((error = re_newbuf(sc, i)) != 0)
2114                         return (error);
2115         }
2116 #ifdef DEV_NETMAP
2117         re_netmap_rx_init(sc);
2118 #endif /* DEV_NETMAP */
2119
2120         /* Flush the RX descriptors */
2121
2122         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
2123             sc->rl_ldata.rl_rx_list_map,
2124             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2125
2126         sc->rl_ldata.rl_rx_prodidx = 0;
2127         sc->rl_head = sc->rl_tail = NULL;
2128         sc->rl_int_rx_act = 0;
2129
2130         return (0);
2131 }
2132
2133 static int
2134 re_jrx_list_init(struct rl_softc *sc)
2135 {
2136         int                     error, i;
2137
2138         bzero(sc->rl_ldata.rl_rx_list,
2139             sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
2140         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
2141                 sc->rl_ldata.rl_jrx_desc[i].rx_m = NULL;
2142                 if ((error = re_jumbo_newbuf(sc, i)) != 0)
2143                         return (error);
2144         }
2145
2146         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
2147             sc->rl_ldata.rl_rx_list_map,
2148             BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
2149
2150         sc->rl_ldata.rl_rx_prodidx = 0;
2151         sc->rl_head = sc->rl_tail = NULL;
2152         sc->rl_int_rx_act = 0;
2153
2154         return (0);
2155 }
2156
2157 /*
2158  * RX handler for C+ and 8169. For the gigE chips, we support
2159  * the reception of jumbo frames that have been fragmented
2160  * across multiple 2K mbuf cluster buffers.
2161  */
2162 static int
2163 re_rxeof(struct rl_softc *sc, int *rx_npktsp)
2164 {
2165         struct mbuf             *m;
2166         struct ifnet            *ifp;
2167         int                     i, rxerr, total_len;
2168         struct rl_desc          *cur_rx;
2169         u_int32_t               rxstat, rxvlan;
2170         int                     jumbo, maxpkt = 16, rx_npkts = 0;
2171
2172         RL_LOCK_ASSERT(sc);
2173
2174         ifp = sc->rl_ifp;
2175 #ifdef DEV_NETMAP
2176         if (netmap_rx_irq(ifp, 0, &rx_npkts))
2177                 return 0;
2178 #endif /* DEV_NETMAP */
2179         if (ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0)
2180                 jumbo = 1;
2181         else
2182                 jumbo = 0;
2183
2184         /* Invalidate the descriptor memory */
2185
2186         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
2187             sc->rl_ldata.rl_rx_list_map,
2188             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2189
2190         for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0;
2191             i = RL_RX_DESC_NXT(sc, i)) {
2192                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2193                         break;
2194                 cur_rx = &sc->rl_ldata.rl_rx_list[i];
2195                 rxstat = le32toh(cur_rx->rl_cmdstat);
2196                 if ((rxstat & RL_RDESC_STAT_OWN) != 0)
2197                         break;
2198                 total_len = rxstat & sc->rl_rxlenmask;
2199                 rxvlan = le32toh(cur_rx->rl_vlanctl);
2200                 if (jumbo != 0)
2201                         m = sc->rl_ldata.rl_jrx_desc[i].rx_m;
2202                 else
2203                         m = sc->rl_ldata.rl_rx_desc[i].rx_m;
2204
2205                 if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 &&
2206                     (rxstat & (RL_RDESC_STAT_SOF | RL_RDESC_STAT_EOF)) !=
2207                     (RL_RDESC_STAT_SOF | RL_RDESC_STAT_EOF)) {
2208                         /*
2209                          * RTL8168C or later controllers do not
2210                          * support multi-fragment packet.
2211                          */
2212                         re_discard_rxbuf(sc, i);
2213                         continue;
2214                 } else if ((rxstat & RL_RDESC_STAT_EOF) == 0) {
2215                         if (re_newbuf(sc, i) != 0) {
2216                                 /*
2217                                  * If this is part of a multi-fragment packet,
2218                                  * discard all the pieces.
2219                                  */
2220                                 if (sc->rl_head != NULL) {
2221                                         m_freem(sc->rl_head);
2222                                         sc->rl_head = sc->rl_tail = NULL;
2223                                 }
2224                                 re_discard_rxbuf(sc, i);
2225                                 continue;
2226                         }
2227                         m->m_len = RE_RX_DESC_BUFLEN;
2228                         if (sc->rl_head == NULL)
2229                                 sc->rl_head = sc->rl_tail = m;
2230                         else {
2231                                 m->m_flags &= ~M_PKTHDR;
2232                                 sc->rl_tail->m_next = m;
2233                                 sc->rl_tail = m;
2234                         }
2235                         continue;
2236                 }
2237
2238                 /*
2239                  * NOTE: for the 8139C+, the frame length field
2240                  * is always 12 bits in size, but for the gigE chips,
2241                  * it is 13 bits (since the max RX frame length is 16K).
2242                  * Unfortunately, all 32 bits in the status word
2243                  * were already used, so to make room for the extra
2244                  * length bit, RealTek took out the 'frame alignment
2245                  * error' bit and shifted the other status bits
2246                  * over one slot. The OWN, EOR, FS and LS bits are
2247                  * still in the same places. We have already extracted
2248                  * the frame length and checked the OWN bit, so rather
2249                  * than using an alternate bit mapping, we shift the
2250                  * status bits one space to the right so we can evaluate
2251                  * them using the 8169 status as though it was in the
2252                  * same format as that of the 8139C+.
2253                  */
2254                 if (sc->rl_type == RL_8169)
2255                         rxstat >>= 1;
2256
2257                 /*
2258                  * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be
2259                  * set, but if CRC is clear, it will still be a valid frame.
2260                  */
2261                 if ((rxstat & RL_RDESC_STAT_RXERRSUM) != 0) {
2262                         rxerr = 1;
2263                         if ((sc->rl_flags & RL_FLAG_JUMBOV2) == 0 &&
2264                             total_len > 8191 &&
2265                             (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)
2266                                 rxerr = 0;
2267                         if (rxerr != 0) {
2268                                 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2269                                 /*
2270                                  * If this is part of a multi-fragment packet,
2271                                  * discard all the pieces.
2272                                  */
2273                                 if (sc->rl_head != NULL) {
2274                                         m_freem(sc->rl_head);
2275                                         sc->rl_head = sc->rl_tail = NULL;
2276                                 }
2277                                 re_discard_rxbuf(sc, i);
2278                                 continue;
2279                         }
2280                 }
2281
2282                 /*
2283                  * If allocating a replacement mbuf fails,
2284                  * reload the current one.
2285                  */
2286                 if (jumbo != 0)
2287                         rxerr = re_jumbo_newbuf(sc, i);
2288                 else
2289                         rxerr = re_newbuf(sc, i);
2290                 if (rxerr != 0) {
2291                         if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
2292                         if (sc->rl_head != NULL) {
2293                                 m_freem(sc->rl_head);
2294                                 sc->rl_head = sc->rl_tail = NULL;
2295                         }
2296                         re_discard_rxbuf(sc, i);
2297                         continue;
2298                 }
2299
2300                 if (sc->rl_head != NULL) {
2301                         if (jumbo != 0)
2302                                 m->m_len = total_len;
2303                         else {
2304                                 m->m_len = total_len % RE_RX_DESC_BUFLEN;
2305                                 if (m->m_len == 0)
2306                                         m->m_len = RE_RX_DESC_BUFLEN;
2307                         }
2308                         /*
2309                          * Special case: if there's 4 bytes or less
2310                          * in this buffer, the mbuf can be discarded:
2311                          * the last 4 bytes is the CRC, which we don't
2312                          * care about anyway.
2313                          */
2314                         if (m->m_len <= ETHER_CRC_LEN) {
2315                                 sc->rl_tail->m_len -=
2316                                     (ETHER_CRC_LEN - m->m_len);
2317                                 m_freem(m);
2318                         } else {
2319                                 m->m_len -= ETHER_CRC_LEN;
2320                                 m->m_flags &= ~M_PKTHDR;
2321                                 sc->rl_tail->m_next = m;
2322                         }
2323                         m = sc->rl_head;
2324                         sc->rl_head = sc->rl_tail = NULL;
2325                         m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
2326                 } else
2327                         m->m_pkthdr.len = m->m_len =
2328                             (total_len - ETHER_CRC_LEN);
2329
2330 #ifdef RE_FIXUP_RX
2331                 re_fixup_rx(m);
2332 #endif
2333                 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
2334                 m->m_pkthdr.rcvif = ifp;
2335
2336                 /* Do RX checksumming if enabled */
2337
2338                 if (ifp->if_capenable & IFCAP_RXCSUM) {
2339                         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
2340                                 /* Check IP header checksum */
2341                                 if (rxstat & RL_RDESC_STAT_PROTOID)
2342                                         m->m_pkthdr.csum_flags |=
2343                                             CSUM_IP_CHECKED;
2344                                 if (!(rxstat & RL_RDESC_STAT_IPSUMBAD))
2345                                         m->m_pkthdr.csum_flags |=
2346                                             CSUM_IP_VALID;
2347
2348                                 /* Check TCP/UDP checksum */
2349                                 if ((RL_TCPPKT(rxstat) &&
2350                                     !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
2351                                     (RL_UDPPKT(rxstat) &&
2352                                      !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
2353                                         m->m_pkthdr.csum_flags |=
2354                                                 CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
2355                                         m->m_pkthdr.csum_data = 0xffff;
2356                                 }
2357                         } else {
2358                                 /*
2359                                  * RTL8168C/RTL816CP/RTL8111C/RTL8111CP
2360                                  */
2361                                 if ((rxstat & RL_RDESC_STAT_PROTOID) &&
2362                                     (rxvlan & RL_RDESC_IPV4))
2363                                         m->m_pkthdr.csum_flags |=
2364                                             CSUM_IP_CHECKED;
2365                                 if (!(rxstat & RL_RDESC_STAT_IPSUMBAD) &&
2366                                     (rxvlan & RL_RDESC_IPV4))
2367                                         m->m_pkthdr.csum_flags |=
2368                                             CSUM_IP_VALID;
2369                                 if (((rxstat & RL_RDESC_STAT_TCP) &&
2370                                     !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
2371                                     ((rxstat & RL_RDESC_STAT_UDP) &&
2372                                     !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
2373                                         m->m_pkthdr.csum_flags |=
2374                                                 CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
2375                                         m->m_pkthdr.csum_data = 0xffff;
2376                                 }
2377                         }
2378                 }
2379                 maxpkt--;
2380                 if (rxvlan & RL_RDESC_VLANCTL_TAG) {
2381                         m->m_pkthdr.ether_vtag =
2382                             bswap16((rxvlan & RL_RDESC_VLANCTL_DATA));
2383                         m->m_flags |= M_VLANTAG;
2384                 }
2385                 RL_UNLOCK(sc);
2386                 (*ifp->if_input)(ifp, m);
2387                 RL_LOCK(sc);
2388                 rx_npkts++;
2389         }
2390
2391         /* Flush the RX DMA ring */
2392
2393         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
2394             sc->rl_ldata.rl_rx_list_map,
2395             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2396
2397         sc->rl_ldata.rl_rx_prodidx = i;
2398
2399         if (rx_npktsp != NULL)
2400                 *rx_npktsp = rx_npkts;
2401         if (maxpkt)
2402                 return (EAGAIN);
2403
2404         return (0);
2405 }
2406
2407 static void
2408 re_txeof(struct rl_softc *sc)
2409 {
2410         struct ifnet            *ifp;
2411         struct rl_txdesc        *txd;
2412         u_int32_t               txstat;
2413         int                     cons;
2414
2415         cons = sc->rl_ldata.rl_tx_considx;
2416         if (cons == sc->rl_ldata.rl_tx_prodidx)
2417                 return;
2418
2419         ifp = sc->rl_ifp;
2420 #ifdef DEV_NETMAP
2421         if (netmap_tx_irq(ifp, 0))
2422                 return;
2423 #endif /* DEV_NETMAP */
2424         /* Invalidate the TX descriptor list */
2425         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2426             sc->rl_ldata.rl_tx_list_map,
2427             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2428
2429         for (; cons != sc->rl_ldata.rl_tx_prodidx;
2430             cons = RL_TX_DESC_NXT(sc, cons)) {
2431                 txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat);
2432                 if (txstat & RL_TDESC_STAT_OWN)
2433                         break;
2434                 /*
2435                  * We only stash mbufs in the last descriptor
2436                  * in a fragment chain, which also happens to
2437                  * be the only place where the TX status bits
2438                  * are valid.
2439                  */
2440                 if (txstat & RL_TDESC_CMD_EOF) {
2441                         txd = &sc->rl_ldata.rl_tx_desc[cons];
2442                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2443                             txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2444                         bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2445                             txd->tx_dmamap);
2446                         KASSERT(txd->tx_m != NULL,
2447                             ("%s: freeing NULL mbufs!", __func__));
2448                         m_freem(txd->tx_m);
2449                         txd->tx_m = NULL;
2450                         if (txstat & (RL_TDESC_STAT_EXCESSCOL|
2451                             RL_TDESC_STAT_COLCNT))
2452                                 if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1);
2453                         if (txstat & RL_TDESC_STAT_TXERRSUM)
2454                                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2455                         else
2456                                 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2457                 }
2458                 sc->rl_ldata.rl_tx_free++;
2459                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2460         }
2461         sc->rl_ldata.rl_tx_considx = cons;
2462
2463         /* No changes made to the TX ring, so no flush needed */
2464
2465         if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) {
2466 #ifdef RE_TX_MODERATION
2467                 /*
2468                  * If not all descriptors have been reaped yet, reload
2469                  * the timer so that we will eventually get another
2470                  * interrupt that will cause us to re-enter this routine.
2471                  * This is done in case the transmitter has gone idle.
2472                  */
2473                 CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2474 #endif
2475         } else
2476                 sc->rl_watchdog_timer = 0;
2477 }
2478
2479 static void
2480 re_tick(void *xsc)
2481 {
2482         struct rl_softc         *sc;
2483         struct mii_data         *mii;
2484
2485         sc = xsc;
2486
2487         RL_LOCK_ASSERT(sc);
2488
2489         mii = device_get_softc(sc->rl_miibus);
2490         mii_tick(mii);
2491         if ((sc->rl_flags & RL_FLAG_LINK) == 0)
2492                 re_miibus_statchg(sc->rl_dev);
2493         /*
2494          * Reclaim transmitted frames here. Technically it is not
2495          * necessary to do here but it ensures periodic reclamation
2496          * regardless of Tx completion interrupt which seems to be
2497          * lost on PCIe based controllers under certain situations.
2498          */
2499         re_txeof(sc);
2500         re_watchdog(sc);
2501         callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2502 }
2503
2504 #ifdef DEVICE_POLLING
2505 static int
2506 re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
2507 {
2508         struct rl_softc *sc = ifp->if_softc;
2509         int rx_npkts = 0;
2510
2511         RL_LOCK(sc);
2512         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2513                 rx_npkts = re_poll_locked(ifp, cmd, count);
2514         RL_UNLOCK(sc);
2515         return (rx_npkts);
2516 }
2517
2518 static int
2519 re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count)
2520 {
2521         struct rl_softc *sc = ifp->if_softc;
2522         int rx_npkts;
2523
2524         RL_LOCK_ASSERT(sc);
2525
2526         sc->rxcycles = count;
2527         re_rxeof(sc, &rx_npkts);
2528         re_txeof(sc);
2529
2530         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2531                 re_start_locked(ifp);
2532
2533         if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
2534                 u_int16_t       status;
2535
2536                 status = CSR_READ_2(sc, RL_ISR);
2537                 if (status == 0xffff)
2538                         return (rx_npkts);
2539                 if (status)
2540                         CSR_WRITE_2(sc, RL_ISR, status);
2541                 if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2542                     (sc->rl_flags & RL_FLAG_PCIE))
2543                         CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2544
2545                 /*
2546                  * XXX check behaviour on receiver stalls.
2547                  */
2548
2549                 if (status & RL_ISR_SYSTEM_ERR) {
2550                         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2551                         re_init_locked(sc);
2552                 }
2553         }
2554         return (rx_npkts);
2555 }
2556 #endif /* DEVICE_POLLING */
2557
2558 static int
2559 re_intr(void *arg)
2560 {
2561         struct rl_softc         *sc;
2562         uint16_t                status;
2563
2564         sc = arg;
2565
2566         status = CSR_READ_2(sc, RL_ISR);
2567         if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0)
2568                 return (FILTER_STRAY);
2569         CSR_WRITE_2(sc, RL_IMR, 0);
2570
2571         taskqueue_enqueue(taskqueue_fast, &sc->rl_inttask);
2572
2573         return (FILTER_HANDLED);
2574 }
2575
2576 static void
2577 re_int_task(void *arg, int npending)
2578 {
2579         struct epoch_tracker    et;
2580         struct rl_softc         *sc;
2581         struct ifnet            *ifp;
2582         u_int16_t               status;
2583         int                     rval = 0;
2584
2585         sc = arg;
2586         ifp = sc->rl_ifp;
2587
2588         RL_LOCK(sc);
2589
2590         status = CSR_READ_2(sc, RL_ISR);
2591         CSR_WRITE_2(sc, RL_ISR, status);
2592
2593         if (sc->suspended ||
2594             (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2595                 RL_UNLOCK(sc);
2596                 return;
2597         }
2598
2599 #ifdef DEVICE_POLLING
2600         if  (ifp->if_capenable & IFCAP_POLLING) {
2601                 RL_UNLOCK(sc);
2602                 return;
2603         }
2604 #endif
2605
2606         if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW)) {
2607                 NET_EPOCH_ENTER(et);
2608                 rval = re_rxeof(sc, NULL);
2609                 NET_EPOCH_EXIT(et);
2610         }
2611
2612         /*
2613          * Some chips will ignore a second TX request issued
2614          * while an existing transmission is in progress. If
2615          * the transmitter goes idle but there are still
2616          * packets waiting to be sent, we need to restart the
2617          * channel here to flush them out. This only seems to
2618          * be required with the PCIe devices.
2619          */
2620         if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2621             (sc->rl_flags & RL_FLAG_PCIE))
2622                 CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2623         if (status & (
2624 #ifdef RE_TX_MODERATION
2625             RL_ISR_TIMEOUT_EXPIRED|
2626 #else
2627             RL_ISR_TX_OK|
2628 #endif
2629             RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL))
2630                 re_txeof(sc);
2631
2632         if (status & RL_ISR_SYSTEM_ERR) {
2633                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2634                 re_init_locked(sc);
2635         }
2636
2637         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2638                 re_start_locked(ifp);
2639
2640         RL_UNLOCK(sc);
2641
2642         if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) {
2643                 taskqueue_enqueue(taskqueue_fast, &sc->rl_inttask);
2644                 return;
2645         }
2646
2647         CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2648 }
2649
2650 static void
2651 re_intr_msi(void *xsc)
2652 {
2653         struct rl_softc         *sc;
2654         struct ifnet            *ifp;
2655         uint16_t                intrs, status;
2656
2657         sc = xsc;
2658         RL_LOCK(sc);
2659
2660         ifp = sc->rl_ifp;
2661 #ifdef DEVICE_POLLING
2662         if (ifp->if_capenable & IFCAP_POLLING) {
2663                 RL_UNLOCK(sc);
2664                 return;
2665         }
2666 #endif
2667         /* Disable interrupts. */
2668         CSR_WRITE_2(sc, RL_IMR, 0);
2669         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2670                 RL_UNLOCK(sc);
2671                 return;
2672         }
2673
2674         intrs = RL_INTRS_CPLUS;
2675         status = CSR_READ_2(sc, RL_ISR);
2676         CSR_WRITE_2(sc, RL_ISR, status);
2677         if (sc->rl_int_rx_act > 0) {
2678                 intrs &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW |
2679                     RL_ISR_RX_OVERRUN);
2680                 status &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW |
2681                     RL_ISR_RX_OVERRUN);
2682         }
2683
2684         if (status & (RL_ISR_TIMEOUT_EXPIRED | RL_ISR_RX_OK | RL_ISR_RX_ERR |
2685             RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN)) {
2686                 re_rxeof(sc, NULL);
2687                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2688                         if (sc->rl_int_rx_mod != 0 &&
2689                             (status & (RL_ISR_RX_OK | RL_ISR_RX_ERR |
2690                             RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN)) != 0) {
2691                                 /* Rearm one-shot timer. */
2692                                 CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2693                                 intrs &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR |
2694                                     RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN);
2695                                 sc->rl_int_rx_act = 1;
2696                         } else {
2697                                 intrs |= RL_ISR_RX_OK | RL_ISR_RX_ERR |
2698                                     RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN;
2699                                 sc->rl_int_rx_act = 0;
2700                         }
2701                 }
2702         }
2703
2704         /*
2705          * Some chips will ignore a second TX request issued
2706          * while an existing transmission is in progress. If
2707          * the transmitter goes idle but there are still
2708          * packets waiting to be sent, we need to restart the
2709          * channel here to flush them out. This only seems to
2710          * be required with the PCIe devices.
2711          */
2712         if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2713             (sc->rl_flags & RL_FLAG_PCIE))
2714                 CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2715         if (status & (RL_ISR_TX_OK | RL_ISR_TX_ERR | RL_ISR_TX_DESC_UNAVAIL))
2716                 re_txeof(sc);
2717
2718         if (status & RL_ISR_SYSTEM_ERR) {
2719                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2720                 re_init_locked(sc);
2721         }
2722
2723         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2724                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2725                         re_start_locked(ifp);
2726                 CSR_WRITE_2(sc, RL_IMR, intrs);
2727         }
2728         RL_UNLOCK(sc);
2729 }
2730
2731 static int
2732 re_encap(struct rl_softc *sc, struct mbuf **m_head)
2733 {
2734         struct rl_txdesc        *txd, *txd_last;
2735         bus_dma_segment_t       segs[RL_NTXSEGS];
2736         bus_dmamap_t            map;
2737         struct mbuf             *m_new;
2738         struct rl_desc          *desc;
2739         int                     nsegs, prod;
2740         int                     i, error, ei, si;
2741         int                     padlen;
2742         uint32_t                cmdstat, csum_flags, vlanctl;
2743
2744         RL_LOCK_ASSERT(sc);
2745         M_ASSERTPKTHDR((*m_head));
2746
2747         /*
2748          * With some of the RealTek chips, using the checksum offload
2749          * support in conjunction with the autopadding feature results
2750          * in the transmission of corrupt frames. For example, if we
2751          * need to send a really small IP fragment that's less than 60
2752          * bytes in size, and IP header checksumming is enabled, the
2753          * resulting ethernet frame that appears on the wire will
2754          * have garbled payload. To work around this, if TX IP checksum
2755          * offload is enabled, we always manually pad short frames out
2756          * to the minimum ethernet frame size.
2757          */
2758         if ((sc->rl_flags & RL_FLAG_AUTOPAD) == 0 &&
2759             (*m_head)->m_pkthdr.len < RL_IP4CSUMTX_PADLEN &&
2760             ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) != 0) {
2761                 padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len;
2762                 if (M_WRITABLE(*m_head) == 0) {
2763                         /* Get a writable copy. */
2764                         m_new = m_dup(*m_head, M_NOWAIT);
2765                         m_freem(*m_head);
2766                         if (m_new == NULL) {
2767                                 *m_head = NULL;
2768                                 return (ENOBUFS);
2769                         }
2770                         *m_head = m_new;
2771                 }
2772                 if ((*m_head)->m_next != NULL ||
2773                     M_TRAILINGSPACE(*m_head) < padlen) {
2774                         m_new = m_defrag(*m_head, M_NOWAIT);
2775                         if (m_new == NULL) {
2776                                 m_freem(*m_head);
2777                                 *m_head = NULL;
2778                                 return (ENOBUFS);
2779                         }
2780                 } else
2781                         m_new = *m_head;
2782
2783                 /*
2784                  * Manually pad short frames, and zero the pad space
2785                  * to avoid leaking data.
2786                  */
2787                 bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen);
2788                 m_new->m_pkthdr.len += padlen;
2789                 m_new->m_len = m_new->m_pkthdr.len;
2790                 *m_head = m_new;
2791         }
2792
2793         prod = sc->rl_ldata.rl_tx_prodidx;
2794         txd = &sc->rl_ldata.rl_tx_desc[prod];
2795         error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2796             *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2797         if (error == EFBIG) {
2798                 m_new = m_collapse(*m_head, M_NOWAIT, RL_NTXSEGS);
2799                 if (m_new == NULL) {
2800                         m_freem(*m_head);
2801                         *m_head = NULL;
2802                         return (ENOBUFS);
2803                 }
2804                 *m_head = m_new;
2805                 error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag,
2806                     txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2807                 if (error != 0) {
2808                         m_freem(*m_head);
2809                         *m_head = NULL;
2810                         return (error);
2811                 }
2812         } else if (error != 0)
2813                 return (error);
2814         if (nsegs == 0) {
2815                 m_freem(*m_head);
2816                 *m_head = NULL;
2817                 return (EIO);
2818         }
2819
2820         /* Check for number of available descriptors. */
2821         if (sc->rl_ldata.rl_tx_free - nsegs <= 1) {
2822                 bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap);
2823                 return (ENOBUFS);
2824         }
2825
2826         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2827             BUS_DMASYNC_PREWRITE);
2828
2829         /*
2830          * Set up checksum offload. Note: checksum offload bits must
2831          * appear in all descriptors of a multi-descriptor transmit
2832          * attempt. This is according to testing done with an 8169
2833          * chip. This is a requirement.
2834          */
2835         vlanctl = 0;
2836         csum_flags = 0;
2837         if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
2838                 if ((sc->rl_flags & RL_FLAG_DESCV2) != 0) {
2839                         csum_flags |= RL_TDESC_CMD_LGSEND;
2840                         vlanctl |= ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
2841                             RL_TDESC_CMD_MSSVALV2_SHIFT);
2842                 } else {
2843                         csum_flags |= RL_TDESC_CMD_LGSEND |
2844                             ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
2845                             RL_TDESC_CMD_MSSVAL_SHIFT);
2846                 }
2847         } else {
2848                 /*
2849                  * Unconditionally enable IP checksum if TCP or UDP
2850                  * checksum is required. Otherwise, TCP/UDP checksum
2851                  * doesn't make effects.
2852                  */
2853                 if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) {
2854                         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
2855                                 csum_flags |= RL_TDESC_CMD_IPCSUM;
2856                                 if (((*m_head)->m_pkthdr.csum_flags &
2857                                     CSUM_TCP) != 0)
2858                                         csum_flags |= RL_TDESC_CMD_TCPCSUM;
2859                                 if (((*m_head)->m_pkthdr.csum_flags &
2860                                     CSUM_UDP) != 0)
2861                                         csum_flags |= RL_TDESC_CMD_UDPCSUM;
2862                         } else {
2863                                 vlanctl |= RL_TDESC_CMD_IPCSUMV2;
2864                                 if (((*m_head)->m_pkthdr.csum_flags &
2865                                     CSUM_TCP) != 0)
2866                                         vlanctl |= RL_TDESC_CMD_TCPCSUMV2;
2867                                 if (((*m_head)->m_pkthdr.csum_flags &
2868                                     CSUM_UDP) != 0)
2869                                         vlanctl |= RL_TDESC_CMD_UDPCSUMV2;
2870                         }
2871                 }
2872         }
2873
2874         /*
2875          * Set up hardware VLAN tagging. Note: vlan tag info must
2876          * appear in all descriptors of a multi-descriptor
2877          * transmission attempt.
2878          */
2879         if ((*m_head)->m_flags & M_VLANTAG)
2880                 vlanctl |= bswap16((*m_head)->m_pkthdr.ether_vtag) |
2881                     RL_TDESC_VLANCTL_TAG;
2882
2883         si = prod;
2884         for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) {
2885                 desc = &sc->rl_ldata.rl_tx_list[prod];
2886                 desc->rl_vlanctl = htole32(vlanctl);
2887                 desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr));
2888                 desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr));
2889                 cmdstat = segs[i].ds_len;
2890                 if (i != 0)
2891                         cmdstat |= RL_TDESC_CMD_OWN;
2892                 if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1)
2893                         cmdstat |= RL_TDESC_CMD_EOR;
2894                 desc->rl_cmdstat = htole32(cmdstat | csum_flags);
2895                 sc->rl_ldata.rl_tx_free--;
2896         }
2897         /* Update producer index. */
2898         sc->rl_ldata.rl_tx_prodidx = prod;
2899
2900         /* Set EOF on the last descriptor. */
2901         ei = RL_TX_DESC_PRV(sc, prod);
2902         desc = &sc->rl_ldata.rl_tx_list[ei];
2903         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF);
2904
2905         desc = &sc->rl_ldata.rl_tx_list[si];
2906         /* Set SOF and transfer ownership of packet to the chip. */
2907         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF);
2908
2909         /*
2910          * Insure that the map for this transmission
2911          * is placed at the array index of the last descriptor
2912          * in this chain.  (Swap last and first dmamaps.)
2913          */
2914         txd_last = &sc->rl_ldata.rl_tx_desc[ei];
2915         map = txd->tx_dmamap;
2916         txd->tx_dmamap = txd_last->tx_dmamap;
2917         txd_last->tx_dmamap = map;
2918         txd_last->tx_m = *m_head;
2919
2920         return (0);
2921 }
2922
2923 static void
2924 re_start(struct ifnet *ifp)
2925 {
2926         struct rl_softc         *sc;
2927
2928         sc = ifp->if_softc;
2929         RL_LOCK(sc);
2930         re_start_locked(ifp);
2931         RL_UNLOCK(sc);
2932 }
2933
2934 /*
2935  * Main transmit routine for C+ and gigE NICs.
2936  */
2937 static void
2938 re_start_locked(struct ifnet *ifp)
2939 {
2940         struct rl_softc         *sc;
2941         struct mbuf             *m_head;
2942         int                     queued;
2943
2944         sc = ifp->if_softc;
2945
2946 #ifdef DEV_NETMAP
2947         /* XXX is this necessary ? */
2948         if (ifp->if_capenable & IFCAP_NETMAP) {
2949                 struct netmap_kring *kring = NA(ifp)->tx_rings[0];
2950                 if (sc->rl_ldata.rl_tx_prodidx != kring->nr_hwcur) {
2951                         /* kick the tx unit */
2952                         CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2953 #ifdef RE_TX_MODERATION
2954                         CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2955 #endif
2956                         sc->rl_watchdog_timer = 5;
2957                 }
2958                 return;
2959         }
2960 #endif /* DEV_NETMAP */
2961
2962         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
2963             IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0)
2964                 return;
2965
2966         for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
2967             sc->rl_ldata.rl_tx_free > 1;) {
2968                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2969                 if (m_head == NULL)
2970                         break;
2971
2972                 if (re_encap(sc, &m_head) != 0) {
2973                         if (m_head == NULL)
2974                                 break;
2975                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2976                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2977                         break;
2978                 }
2979
2980                 /*
2981                  * If there's a BPF listener, bounce a copy of this frame
2982                  * to him.
2983                  */
2984                 ETHER_BPF_MTAP(ifp, m_head);
2985
2986                 queued++;
2987         }
2988
2989         if (queued == 0) {
2990 #ifdef RE_TX_MODERATION
2991                 if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt)
2992                         CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2993 #endif
2994                 return;
2995         }
2996
2997         re_start_tx(sc);
2998 }
2999
3000 static void
3001 re_start_tx(struct rl_softc *sc)
3002 {
3003
3004         /* Flush the TX descriptors */
3005         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
3006             sc->rl_ldata.rl_tx_list_map,
3007             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
3008
3009         CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
3010
3011 #ifdef RE_TX_MODERATION
3012         /*
3013          * Use the countdown timer for interrupt moderation.
3014          * 'TX done' interrupts are disabled. Instead, we reset the
3015          * countdown timer, which will begin counting until it hits
3016          * the value in the TIMERINT register, and then trigger an
3017          * interrupt. Each time we write to the TIMERCNT register,
3018          * the timer count is reset to 0.
3019          */
3020         CSR_WRITE_4(sc, RL_TIMERCNT, 1);
3021 #endif
3022
3023         /*
3024          * Set a timeout in case the chip goes out to lunch.
3025          */
3026         sc->rl_watchdog_timer = 5;
3027 }
3028
3029 static void
3030 re_set_jumbo(struct rl_softc *sc, int jumbo)
3031 {
3032
3033         if (sc->rl_hwrev->rl_rev == RL_HWREV_8168E_VL) {
3034                 pci_set_max_read_req(sc->rl_dev, 4096);
3035                 return;
3036         }
3037
3038         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
3039         if (jumbo != 0) {
3040                 CSR_WRITE_1(sc, sc->rl_cfg3, CSR_READ_1(sc, sc->rl_cfg3) |
3041                     RL_CFG3_JUMBO_EN0);
3042                 switch (sc->rl_hwrev->rl_rev) {
3043                 case RL_HWREV_8168DP:
3044                         break;
3045                 case RL_HWREV_8168E:
3046                         CSR_WRITE_1(sc, sc->rl_cfg4,
3047                             CSR_READ_1(sc, sc->rl_cfg4) | 0x01);
3048                         break;
3049                 default:
3050                         CSR_WRITE_1(sc, sc->rl_cfg4,
3051                             CSR_READ_1(sc, sc->rl_cfg4) | RL_CFG4_JUMBO_EN1);
3052                 }
3053         } else {
3054                 CSR_WRITE_1(sc, sc->rl_cfg3, CSR_READ_1(sc, sc->rl_cfg3) &
3055                     ~RL_CFG3_JUMBO_EN0);
3056                 switch (sc->rl_hwrev->rl_rev) {
3057                 case RL_HWREV_8168DP:
3058                         break;
3059                 case RL_HWREV_8168E:
3060                         CSR_WRITE_1(sc, sc->rl_cfg4,
3061                             CSR_READ_1(sc, sc->rl_cfg4) & ~0x01);
3062                         break;
3063                 default:
3064                         CSR_WRITE_1(sc, sc->rl_cfg4,
3065                             CSR_READ_1(sc, sc->rl_cfg4) & ~RL_CFG4_JUMBO_EN1);
3066                 }
3067         }
3068         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3069
3070         switch (sc->rl_hwrev->rl_rev) {
3071         case RL_HWREV_8168DP:
3072                 pci_set_max_read_req(sc->rl_dev, 4096);
3073                 break;
3074         default:
3075                 if (jumbo != 0)
3076                         pci_set_max_read_req(sc->rl_dev, 512);
3077                 else
3078                         pci_set_max_read_req(sc->rl_dev, 4096);
3079         }
3080 }
3081
3082 static void
3083 re_init(void *xsc)
3084 {
3085         struct rl_softc         *sc = xsc;
3086
3087         RL_LOCK(sc);
3088         re_init_locked(sc);
3089         RL_UNLOCK(sc);
3090 }
3091
3092 static void
3093 re_init_locked(struct rl_softc *sc)
3094 {
3095         struct ifnet            *ifp = sc->rl_ifp;
3096         struct mii_data         *mii;
3097         uint32_t                reg;
3098         uint16_t                cfg;
3099         union {
3100                 uint32_t align_dummy;
3101                 u_char eaddr[ETHER_ADDR_LEN];
3102         } eaddr;
3103
3104         RL_LOCK_ASSERT(sc);
3105
3106         mii = device_get_softc(sc->rl_miibus);
3107
3108         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
3109                 return;
3110
3111         /*
3112          * Cancel pending I/O and free all RX/TX buffers.
3113          */
3114         re_stop(sc);
3115
3116         /* Put controller into known state. */
3117         re_reset(sc);
3118
3119         /*
3120          * For C+ mode, initialize the RX descriptors and mbufs.
3121          */
3122         if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
3123                 if (ifp->if_mtu > RL_MTU) {
3124                         if (re_jrx_list_init(sc) != 0) {
3125                                 device_printf(sc->rl_dev,
3126                                     "no memory for jumbo RX buffers\n");
3127                                 re_stop(sc);
3128                                 return;
3129                         }
3130                         /* Disable checksum offloading for jumbo frames. */
3131                         ifp->if_capenable &= ~(IFCAP_HWCSUM | IFCAP_TSO4);
3132                         ifp->if_hwassist &= ~(RE_CSUM_FEATURES | CSUM_TSO);
3133                 } else {
3134                         if (re_rx_list_init(sc) != 0) {
3135                                 device_printf(sc->rl_dev,
3136                                     "no memory for RX buffers\n");
3137                                 re_stop(sc);
3138                                 return;
3139                         }
3140                 }
3141                 re_set_jumbo(sc, ifp->if_mtu > RL_MTU);
3142         } else {
3143                 if (re_rx_list_init(sc) != 0) {
3144                         device_printf(sc->rl_dev, "no memory for RX buffers\n");
3145                         re_stop(sc);
3146                         return;
3147                 }
3148                 if ((sc->rl_flags & RL_FLAG_PCIE) != 0 &&
3149                     pci_get_device(sc->rl_dev) != RT_DEVICEID_8101E) {
3150                         if (ifp->if_mtu > RL_MTU)
3151                                 pci_set_max_read_req(sc->rl_dev, 512);
3152                         else
3153                                 pci_set_max_read_req(sc->rl_dev, 4096);
3154                 }
3155         }
3156         re_tx_list_init(sc);
3157
3158         /*
3159          * Enable C+ RX and TX mode, as well as VLAN stripping and
3160          * RX checksum offload. We must configure the C+ register
3161          * before all others.
3162          */
3163         cfg = RL_CPLUSCMD_PCI_MRW;
3164         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
3165                 cfg |= RL_CPLUSCMD_RXCSUM_ENB;
3166         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0)
3167                 cfg |= RL_CPLUSCMD_VLANSTRIP;
3168         if ((sc->rl_flags & RL_FLAG_MACSTAT) != 0) {
3169                 cfg |= RL_CPLUSCMD_MACSTAT_DIS;
3170                 /* XXX magic. */
3171                 cfg |= 0x0001;
3172         } else
3173                 cfg |= RL_CPLUSCMD_RXENB | RL_CPLUSCMD_TXENB;
3174         CSR_WRITE_2(sc, RL_CPLUS_CMD, cfg);
3175         if (sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SC ||
3176             sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SCE) {
3177                 reg = 0x000fff00;
3178                 if ((CSR_READ_1(sc, sc->rl_cfg2) & RL_CFG2_PCI66MHZ) != 0)
3179                         reg |= 0x000000ff;
3180                 if (sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SCE)
3181                         reg |= 0x00f00000;
3182                 CSR_WRITE_4(sc, 0x7c, reg);
3183                 /* Disable interrupt mitigation. */
3184                 CSR_WRITE_2(sc, 0xe2, 0);
3185         }
3186         /*
3187          * Disable TSO if interface MTU size is greater than MSS
3188          * allowed in controller.
3189          */
3190         if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) {
3191                 ifp->if_capenable &= ~IFCAP_TSO4;
3192                 ifp->if_hwassist &= ~CSUM_TSO;
3193         }
3194
3195         /*
3196          * Init our MAC address.  Even though the chipset
3197          * documentation doesn't mention it, we need to enter "Config
3198          * register write enable" mode to modify the ID registers.
3199          */
3200         /* Copy MAC address on stack to align. */
3201         bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN);
3202         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
3203         CSR_WRITE_4(sc, RL_IDR0,
3204             htole32(*(u_int32_t *)(&eaddr.eaddr[0])));
3205         CSR_WRITE_4(sc, RL_IDR4,
3206             htole32(*(u_int32_t *)(&eaddr.eaddr[4])));
3207         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3208
3209         /*
3210          * Load the addresses of the RX and TX lists into the chip.
3211          */
3212
3213         CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI,
3214             RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr));
3215         CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO,
3216             RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr));
3217
3218         CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI,
3219             RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr));
3220         CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
3221             RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
3222
3223         if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) {
3224                 /* Disable RXDV gate. */
3225                 CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) &
3226                     ~0x00080000);
3227         }
3228
3229         /*
3230          * Enable transmit and receive for pre-RTL8168G controllers.
3231          * RX/TX MACs should be enabled before RX/TX configuration.
3232          */
3233         if ((sc->rl_flags & RL_FLAG_8168G_PLUS) == 0)
3234                 CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB | RL_CMD_RX_ENB);
3235
3236         /*
3237          * Set the initial TX configuration.
3238          */
3239         if (sc->rl_testmode) {
3240                 if (sc->rl_type == RL_8169)
3241                         CSR_WRITE_4(sc, RL_TXCFG,
3242                             RL_TXCFG_CONFIG|RL_LOOPTEST_ON);
3243                 else
3244                         CSR_WRITE_4(sc, RL_TXCFG,
3245                             RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS);
3246         } else
3247                 CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG);
3248
3249         CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16);
3250
3251         /*
3252          * Set the initial RX configuration.
3253          */
3254         re_set_rxmode(sc);
3255
3256         /* Configure interrupt moderation. */
3257         if (sc->rl_type == RL_8169) {
3258                 /* Magic from vendor. */
3259                 CSR_WRITE_2(sc, RL_INTRMOD, 0x5100);
3260         }
3261
3262         /*
3263          * Enable transmit and receive for RTL8168G and later controllers.
3264          * RX/TX MACs should be enabled after RX/TX configuration.
3265          */
3266         if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0)
3267                 CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB | RL_CMD_RX_ENB);
3268
3269 #ifdef DEVICE_POLLING
3270         /*
3271          * Disable interrupts if we are polling.
3272          */
3273         if (ifp->if_capenable & IFCAP_POLLING)
3274                 CSR_WRITE_2(sc, RL_IMR, 0);
3275         else    /* otherwise ... */
3276 #endif
3277
3278         /*
3279          * Enable interrupts.
3280          */
3281         if (sc->rl_testmode)
3282                 CSR_WRITE_2(sc, RL_IMR, 0);
3283         else
3284                 CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
3285         CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS);
3286
3287         /* Set initial TX threshold */
3288         sc->rl_txthresh = RL_TX_THRESH_INIT;
3289
3290         /* Start RX/TX process. */
3291         CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
3292
3293         /*
3294          * Initialize the timer interrupt register so that
3295          * a timer interrupt will be generated once the timer
3296          * reaches a certain number of ticks. The timer is
3297          * reloaded on each transmit.
3298          */
3299 #ifdef RE_TX_MODERATION
3300         /*
3301          * Use timer interrupt register to moderate TX interrupt
3302          * moderation, which dramatically improves TX frame rate.
3303          */
3304         if (sc->rl_type == RL_8169)
3305                 CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800);
3306         else
3307                 CSR_WRITE_4(sc, RL_TIMERINT, 0x400);
3308 #else
3309         /*
3310          * Use timer interrupt register to moderate RX interrupt
3311          * moderation.
3312          */
3313         if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0 &&
3314             intr_filter == 0) {
3315                 if (sc->rl_type == RL_8169)
3316                         CSR_WRITE_4(sc, RL_TIMERINT_8169,
3317                             RL_USECS(sc->rl_int_rx_mod));
3318         } else {
3319                 if (sc->rl_type == RL_8169)
3320                         CSR_WRITE_4(sc, RL_TIMERINT_8169, RL_USECS(0));
3321         }
3322 #endif
3323
3324         /*
3325          * For 8169 gigE NICs, set the max allowed RX packet
3326          * size so we can receive jumbo frames.
3327          */
3328         if (sc->rl_type == RL_8169) {
3329                 if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
3330                         /*
3331                          * For controllers that use new jumbo frame scheme,
3332                          * set maximum size of jumbo frame depending on
3333                          * controller revisions.
3334                          */
3335                         if (ifp->if_mtu > RL_MTU)
3336                                 CSR_WRITE_2(sc, RL_MAXRXPKTLEN,
3337                                     sc->rl_hwrev->rl_max_mtu +
3338                                     ETHER_VLAN_ENCAP_LEN + ETHER_HDR_LEN +
3339                                     ETHER_CRC_LEN);
3340                         else
3341                                 CSR_WRITE_2(sc, RL_MAXRXPKTLEN,
3342                                     RE_RX_DESC_BUFLEN);
3343                 } else if ((sc->rl_flags & RL_FLAG_PCIE) != 0 &&
3344                     sc->rl_hwrev->rl_max_mtu == RL_MTU) {
3345                         /* RTL810x has no jumbo frame support. */
3346                         CSR_WRITE_2(sc, RL_MAXRXPKTLEN, RE_RX_DESC_BUFLEN);
3347                 } else
3348                         CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383);
3349         }
3350
3351         if (sc->rl_testmode)
3352                 return;
3353
3354         CSR_WRITE_1(sc, sc->rl_cfg1, CSR_READ_1(sc, sc->rl_cfg1) |
3355             RL_CFG1_DRVLOAD);
3356
3357         ifp->if_drv_flags |= IFF_DRV_RUNNING;
3358         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3359
3360         sc->rl_flags &= ~RL_FLAG_LINK;
3361         mii_mediachg(mii);
3362
3363         sc->rl_watchdog_timer = 0;
3364         callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
3365 }
3366
3367 /*
3368  * Set media options.
3369  */
3370 static int
3371 re_ifmedia_upd(struct ifnet *ifp)
3372 {
3373         struct rl_softc         *sc;
3374         struct mii_data         *mii;
3375         int                     error;
3376
3377         sc = ifp->if_softc;
3378         mii = device_get_softc(sc->rl_miibus);
3379         RL_LOCK(sc);
3380         error = mii_mediachg(mii);
3381         RL_UNLOCK(sc);
3382
3383         return (error);
3384 }
3385
3386 /*
3387  * Report current media status.
3388  */
3389 static void
3390 re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3391 {
3392         struct rl_softc         *sc;
3393         struct mii_data         *mii;
3394
3395         sc = ifp->if_softc;
3396         mii = device_get_softc(sc->rl_miibus);
3397
3398         RL_LOCK(sc);
3399         mii_pollstat(mii);
3400         ifmr->ifm_active = mii->mii_media_active;
3401         ifmr->ifm_status = mii->mii_media_status;
3402         RL_UNLOCK(sc);
3403 }
3404
3405 static int
3406 re_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
3407 {
3408         struct rl_softc         *sc = ifp->if_softc;
3409         struct ifreq            *ifr = (struct ifreq *) data;
3410         struct mii_data         *mii;
3411         int                     error = 0;
3412
3413         switch (command) {
3414         case SIOCSIFMTU:
3415                 if (ifr->ifr_mtu < ETHERMIN ||
3416                     ifr->ifr_mtu > sc->rl_hwrev->rl_max_mtu ||
3417                     ((sc->rl_flags & RL_FLAG_FASTETHER) != 0 &&
3418                     ifr->ifr_mtu > RL_MTU)) {
3419                         error = EINVAL;
3420                         break;
3421                 }
3422                 RL_LOCK(sc);
3423                 if (ifp->if_mtu != ifr->ifr_mtu) {
3424                         ifp->if_mtu = ifr->ifr_mtu;
3425                         if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 &&
3426                             (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
3427                                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3428                                 re_init_locked(sc);
3429                         }
3430                         if (ifp->if_mtu > RL_TSO_MTU &&
3431                             (ifp->if_capenable & IFCAP_TSO4) != 0) {
3432                                 ifp->if_capenable &= ~(IFCAP_TSO4 |
3433                                     IFCAP_VLAN_HWTSO);
3434                                 ifp->if_hwassist &= ~CSUM_TSO;
3435                         }
3436                         VLAN_CAPABILITIES(ifp);
3437                 }
3438                 RL_UNLOCK(sc);
3439                 break;
3440         case SIOCSIFFLAGS:
3441                 RL_LOCK(sc);
3442                 if ((ifp->if_flags & IFF_UP) != 0) {
3443                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
3444                                 if (((ifp->if_flags ^ sc->rl_if_flags)
3445                                     & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
3446                                         re_set_rxmode(sc);
3447                         } else
3448                                 re_init_locked(sc);
3449                 } else {
3450                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
3451                                 re_stop(sc);
3452                 }
3453                 sc->rl_if_flags = ifp->if_flags;
3454                 RL_UNLOCK(sc);
3455                 break;
3456         case SIOCADDMULTI:
3457         case SIOCDELMULTI:
3458                 RL_LOCK(sc);
3459                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
3460                         re_set_rxmode(sc);
3461                 RL_UNLOCK(sc);
3462                 break;
3463         case SIOCGIFMEDIA:
3464         case SIOCSIFMEDIA:
3465                 mii = device_get_softc(sc->rl_miibus);
3466                 error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
3467                 break;
3468         case SIOCSIFCAP:
3469             {
3470                 int mask, reinit;
3471
3472                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3473                 reinit = 0;
3474 #ifdef DEVICE_POLLING
3475                 if (mask & IFCAP_POLLING) {
3476                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
3477                                 error = ether_poll_register(re_poll, ifp);
3478                                 if (error)
3479                                         return (error);
3480                                 RL_LOCK(sc);
3481                                 /* Disable interrupts */
3482                                 CSR_WRITE_2(sc, RL_IMR, 0x0000);
3483                                 ifp->if_capenable |= IFCAP_POLLING;
3484                                 RL_UNLOCK(sc);
3485                         } else {
3486                                 error = ether_poll_deregister(ifp);
3487                                 /* Enable interrupts. */
3488                                 RL_LOCK(sc);
3489                                 CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
3490                                 ifp->if_capenable &= ~IFCAP_POLLING;
3491                                 RL_UNLOCK(sc);
3492                         }
3493                 }
3494 #endif /* DEVICE_POLLING */
3495                 RL_LOCK(sc);
3496                 if ((mask & IFCAP_TXCSUM) != 0 &&
3497                     (ifp->if_capabilities & IFCAP_TXCSUM) != 0) {
3498                         ifp->if_capenable ^= IFCAP_TXCSUM;
3499                         if ((ifp->if_capenable & IFCAP_TXCSUM) != 0)
3500                                 ifp->if_hwassist |= RE_CSUM_FEATURES;
3501                         else
3502                                 ifp->if_hwassist &= ~RE_CSUM_FEATURES;
3503                         reinit = 1;
3504                 }
3505                 if ((mask & IFCAP_RXCSUM) != 0 &&
3506                     (ifp->if_capabilities & IFCAP_RXCSUM) != 0) {
3507                         ifp->if_capenable ^= IFCAP_RXCSUM;
3508                         reinit = 1;
3509                 }
3510                 if ((mask & IFCAP_TSO4) != 0 &&
3511                     (ifp->if_capabilities & IFCAP_TSO4) != 0) {
3512                         ifp->if_capenable ^= IFCAP_TSO4;
3513                         if ((IFCAP_TSO4 & ifp->if_capenable) != 0)
3514                                 ifp->if_hwassist |= CSUM_TSO;
3515                         else
3516                                 ifp->if_hwassist &= ~CSUM_TSO;
3517                         if (ifp->if_mtu > RL_TSO_MTU &&
3518                             (ifp->if_capenable & IFCAP_TSO4) != 0) {
3519                                 ifp->if_capenable &= ~IFCAP_TSO4;
3520                                 ifp->if_hwassist &= ~CSUM_TSO;
3521                         }
3522                 }
3523                 if ((mask & IFCAP_VLAN_HWTSO) != 0 &&
3524                     (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0)
3525                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3526                 if ((mask & IFCAP_VLAN_HWTAGGING) != 0 &&
3527                     (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) {
3528                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3529                         /* TSO over VLAN requires VLAN hardware tagging. */
3530                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
3531                                 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
3532                         reinit = 1;
3533                 }
3534                 if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 &&
3535                     (mask & (IFCAP_HWCSUM | IFCAP_TSO4 |
3536                     IFCAP_VLAN_HWTSO)) != 0)
3537                                 reinit = 1;
3538                 if ((mask & IFCAP_WOL) != 0 &&
3539                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
3540                         if ((mask & IFCAP_WOL_UCAST) != 0)
3541                                 ifp->if_capenable ^= IFCAP_WOL_UCAST;
3542                         if ((mask & IFCAP_WOL_MCAST) != 0)
3543                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
3544                         if ((mask & IFCAP_WOL_MAGIC) != 0)
3545                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
3546                 }
3547                 if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING) {
3548                         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3549                         re_init_locked(sc);
3550                 }
3551                 RL_UNLOCK(sc);
3552                 VLAN_CAPABILITIES(ifp);
3553             }
3554                 break;
3555         default:
3556                 error = ether_ioctl(ifp, command, data);
3557                 break;
3558         }
3559
3560         return (error);
3561 }
3562
3563 static void
3564 re_watchdog(struct rl_softc *sc)
3565 {
3566         struct ifnet            *ifp;
3567
3568         RL_LOCK_ASSERT(sc);
3569
3570         if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0)
3571                 return;
3572
3573         ifp = sc->rl_ifp;
3574         re_txeof(sc);
3575         if (sc->rl_ldata.rl_tx_free == sc->rl_ldata.rl_tx_desc_cnt) {
3576                 if_printf(ifp, "watchdog timeout (missed Tx interrupts) "
3577                     "-- recovering\n");
3578                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3579                         re_start_locked(ifp);
3580                 return;
3581         }
3582
3583         if_printf(ifp, "watchdog timeout\n");
3584         if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
3585
3586         re_rxeof(sc, NULL);
3587         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3588         re_init_locked(sc);
3589         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3590                 re_start_locked(ifp);
3591 }
3592
3593 /*
3594  * Stop the adapter and free any mbufs allocated to the
3595  * RX and TX lists.
3596  */
3597 static void
3598 re_stop(struct rl_softc *sc)
3599 {
3600         int                     i;
3601         struct ifnet            *ifp;
3602         struct rl_txdesc        *txd;
3603         struct rl_rxdesc        *rxd;
3604
3605         RL_LOCK_ASSERT(sc);
3606
3607         ifp = sc->rl_ifp;
3608
3609         sc->rl_watchdog_timer = 0;
3610         callout_stop(&sc->rl_stat_callout);
3611         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
3612
3613         /*
3614          * Disable accepting frames to put RX MAC into idle state.
3615          * Otherwise it's possible to get frames while stop command
3616          * execution is in progress and controller can DMA the frame
3617          * to already freed RX buffer during that period.
3618          */
3619         CSR_WRITE_4(sc, RL_RXCFG, CSR_READ_4(sc, RL_RXCFG) &
3620             ~(RL_RXCFG_RX_ALLPHYS | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_MULTI |
3621             RL_RXCFG_RX_BROAD));
3622
3623         if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) {
3624                 /* Enable RXDV gate. */
3625                 CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) |
3626                     0x00080000);
3627         }
3628
3629         if ((sc->rl_flags & RL_FLAG_WAIT_TXPOLL) != 0) {
3630                 for (i = RL_TIMEOUT; i > 0; i--) {
3631                         if ((CSR_READ_1(sc, sc->rl_txstart) &
3632                             RL_TXSTART_START) == 0)
3633                                 break;
3634                         DELAY(20);
3635                 }
3636                 if (i == 0)
3637                         device_printf(sc->rl_dev,
3638                             "stopping TX poll timed out!\n");
3639                 CSR_WRITE_1(sc, RL_COMMAND, 0x00);
3640         } else if ((sc->rl_flags & RL_FLAG_CMDSTOP) != 0) {
3641                 CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_STOPREQ | RL_CMD_TX_ENB |
3642                     RL_CMD_RX_ENB);
3643                 if ((sc->rl_flags & RL_FLAG_CMDSTOP_WAIT_TXQ) != 0) {
3644                         for (i = RL_TIMEOUT; i > 0; i--) {
3645                                 if ((CSR_READ_4(sc, RL_TXCFG) &
3646                                     RL_TXCFG_QUEUE_EMPTY) != 0)
3647                                         break;
3648                                 DELAY(100);
3649                         }
3650                         if (i == 0)
3651                                 device_printf(sc->rl_dev,
3652                                    "stopping TXQ timed out!\n");
3653                 }
3654         } else
3655                 CSR_WRITE_1(sc, RL_COMMAND, 0x00);
3656         DELAY(1000);
3657         CSR_WRITE_2(sc, RL_IMR, 0x0000);
3658         CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
3659
3660         if (sc->rl_head != NULL) {
3661                 m_freem(sc->rl_head);
3662                 sc->rl_head = sc->rl_tail = NULL;
3663         }
3664
3665         /* Free the TX list buffers. */
3666         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
3667                 txd = &sc->rl_ldata.rl_tx_desc[i];
3668                 if (txd->tx_m != NULL) {
3669                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
3670                             txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
3671                         bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
3672                             txd->tx_dmamap);
3673                         m_freem(txd->tx_m);
3674                         txd->tx_m = NULL;
3675                 }
3676         }
3677
3678         /* Free the RX list buffers. */
3679         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
3680                 rxd = &sc->rl_ldata.rl_rx_desc[i];
3681                 if (rxd->rx_m != NULL) {
3682                         bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
3683                             rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
3684                         bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
3685                             rxd->rx_dmamap);
3686                         m_freem(rxd->rx_m);
3687                         rxd->rx_m = NULL;
3688                 }
3689         }
3690
3691         if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
3692                 for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
3693                         rxd = &sc->rl_ldata.rl_jrx_desc[i];
3694                         if (rxd->rx_m != NULL) {
3695                                 bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag,
3696                                     rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
3697                                 bus_dmamap_unload(sc->rl_ldata.rl_jrx_mtag,
3698                                     rxd->rx_dmamap);
3699                                 m_freem(rxd->rx_m);
3700                                 rxd->rx_m = NULL;
3701                         }
3702                 }
3703         }
3704 }
3705
3706 /*
3707  * Device suspend routine.  Stop the interface and save some PCI
3708  * settings in case the BIOS doesn't restore them properly on
3709  * resume.
3710  */
3711 static int
3712 re_suspend(device_t dev)
3713 {
3714         struct rl_softc         *sc;
3715
3716         sc = device_get_softc(dev);
3717
3718         RL_LOCK(sc);
3719         re_stop(sc);
3720         re_setwol(sc);
3721         sc->suspended = 1;
3722         RL_UNLOCK(sc);
3723
3724         return (0);
3725 }
3726
3727 /*
3728  * Device resume routine.  Restore some PCI settings in case the BIOS
3729  * doesn't, re-enable busmastering, and restart the interface if
3730  * appropriate.
3731  */
3732 static int
3733 re_resume(device_t dev)
3734 {
3735         struct rl_softc         *sc;
3736         struct ifnet            *ifp;
3737
3738         sc = device_get_softc(dev);
3739
3740         RL_LOCK(sc);
3741
3742         ifp = sc->rl_ifp;
3743         /* Take controller out of sleep mode. */
3744         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
3745                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
3746                         CSR_WRITE_1(sc, RL_GPIO,
3747                             CSR_READ_1(sc, RL_GPIO) | 0x01);
3748         }
3749
3750         /*
3751          * Clear WOL matching such that normal Rx filtering
3752          * wouldn't interfere with WOL patterns.
3753          */
3754         re_clrwol(sc);
3755
3756         /* reinitialize interface if necessary */
3757         if (ifp->if_flags & IFF_UP)
3758                 re_init_locked(sc);
3759
3760         sc->suspended = 0;
3761         RL_UNLOCK(sc);
3762
3763         return (0);
3764 }
3765
3766 /*
3767  * Stop all chip I/O so that the kernel's probe routines don't
3768  * get confused by errant DMAs when rebooting.
3769  */
3770 static int
3771 re_shutdown(device_t dev)
3772 {
3773         struct rl_softc         *sc;
3774
3775         sc = device_get_softc(dev);
3776
3777         RL_LOCK(sc);
3778         re_stop(sc);
3779         /*
3780          * Mark interface as down since otherwise we will panic if
3781          * interrupt comes in later on, which can happen in some
3782          * cases.
3783          */
3784         sc->rl_ifp->if_flags &= ~IFF_UP;
3785         re_setwol(sc);
3786         RL_UNLOCK(sc);
3787
3788         return (0);
3789 }
3790
3791 static void
3792 re_set_linkspeed(struct rl_softc *sc)
3793 {
3794         struct mii_softc *miisc;
3795         struct mii_data *mii;
3796         int aneg, i, phyno;
3797
3798         RL_LOCK_ASSERT(sc);
3799
3800         mii = device_get_softc(sc->rl_miibus);
3801         mii_pollstat(mii);
3802         aneg = 0;
3803         if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
3804             (IFM_ACTIVE | IFM_AVALID)) {
3805                 switch IFM_SUBTYPE(mii->mii_media_active) {
3806                 case IFM_10_T:
3807                 case IFM_100_TX:
3808                         return;
3809                 case IFM_1000_T:
3810                         aneg++;
3811                         break;
3812                 default:
3813                         break;
3814                 }
3815         }
3816         miisc = LIST_FIRST(&mii->mii_phys);
3817         phyno = miisc->mii_phy;
3818         LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
3819                 PHY_RESET(miisc);
3820         re_miibus_writereg(sc->rl_dev, phyno, MII_100T2CR, 0);
3821         re_miibus_writereg(sc->rl_dev, phyno,
3822             MII_ANAR, ANAR_TX_FD | ANAR_TX | ANAR_10_FD | ANAR_10 | ANAR_CSMA);
3823         re_miibus_writereg(sc->rl_dev, phyno,
3824             MII_BMCR, BMCR_AUTOEN | BMCR_STARTNEG);
3825         DELAY(1000);
3826         if (aneg != 0) {
3827                 /*
3828                  * Poll link state until re(4) get a 10/100Mbps link.
3829                  */
3830                 for (i = 0; i < MII_ANEGTICKS_GIGE; i++) {
3831                         mii_pollstat(mii);
3832                         if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID))
3833                             == (IFM_ACTIVE | IFM_AVALID)) {
3834                                 switch (IFM_SUBTYPE(mii->mii_media_active)) {
3835                                 case IFM_10_T:
3836                                 case IFM_100_TX:
3837                                         return;
3838                                 default:
3839                                         break;
3840                                 }
3841                         }
3842                         RL_UNLOCK(sc);
3843                         pause("relnk", hz);
3844                         RL_LOCK(sc);
3845                 }
3846                 if (i == MII_ANEGTICKS_GIGE)
3847                         device_printf(sc->rl_dev,
3848                             "establishing a link failed, WOL may not work!");
3849         }
3850         /*
3851          * No link, force MAC to have 100Mbps, full-duplex link.
3852          * MAC does not require reprogramming on resolved speed/duplex,
3853          * so this is just for completeness.
3854          */
3855         mii->mii_media_status = IFM_AVALID | IFM_ACTIVE;
3856         mii->mii_media_active = IFM_ETHER | IFM_100_TX | IFM_FDX;
3857 }
3858
3859 static void
3860 re_setwol(struct rl_softc *sc)
3861 {
3862         struct ifnet            *ifp;
3863         int                     pmc;
3864         uint16_t                pmstat;
3865         uint8_t                 v;
3866
3867         RL_LOCK_ASSERT(sc);
3868
3869         if (pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3870                 return;
3871
3872         ifp = sc->rl_ifp;
3873         /* Put controller into sleep mode. */
3874         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
3875                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
3876                         CSR_WRITE_1(sc, RL_GPIO,
3877                             CSR_READ_1(sc, RL_GPIO) & ~0x01);
3878         }
3879         if ((ifp->if_capenable & IFCAP_WOL) != 0) {
3880                 if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) {
3881                         /* Disable RXDV gate. */
3882                         CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) &
3883                             ~0x00080000);
3884                 }
3885                 re_set_rxmode(sc);
3886                 if ((sc->rl_flags & RL_FLAG_WOL_MANLINK) != 0)
3887                         re_set_linkspeed(sc);
3888                 if ((sc->rl_flags & RL_FLAG_WOLRXENB) != 0)
3889                         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RX_ENB);
3890         }
3891         /* Enable config register write. */
3892         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3893
3894         /* Enable PME. */
3895         v = CSR_READ_1(sc, sc->rl_cfg1);
3896         v &= ~RL_CFG1_PME;
3897         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3898                 v |= RL_CFG1_PME;
3899         CSR_WRITE_1(sc, sc->rl_cfg1, v);
3900
3901         v = CSR_READ_1(sc, sc->rl_cfg3);
3902         v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3903         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0)
3904                 v |= RL_CFG3_WOL_MAGIC;
3905         CSR_WRITE_1(sc, sc->rl_cfg3, v);
3906
3907         v = CSR_READ_1(sc, sc->rl_cfg5);
3908         v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST |
3909             RL_CFG5_WOL_LANWAKE);
3910         if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0)
3911                 v |= RL_CFG5_WOL_UCAST;
3912         if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0)
3913                 v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST;
3914         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3915                 v |= RL_CFG5_WOL_LANWAKE;
3916         CSR_WRITE_1(sc, sc->rl_cfg5, v);
3917
3918         /* Config register write done. */
3919         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3920
3921         if ((ifp->if_capenable & IFCAP_WOL) == 0 &&
3922             (sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0)
3923                 CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) & ~0x80);
3924         /*
3925          * It seems that hardware resets its link speed to 100Mbps in
3926          * power down mode so switching to 100Mbps in driver is not
3927          * needed.
3928          */
3929
3930         /* Request PME if WOL is requested. */
3931         pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2);
3932         pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
3933         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3934                 pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
3935         pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
3936 }
3937
3938 static void
3939 re_clrwol(struct rl_softc *sc)
3940 {
3941         int                     pmc;
3942         uint8_t                 v;
3943
3944         RL_LOCK_ASSERT(sc);
3945
3946         if (pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3947                 return;
3948
3949         /* Enable config register write. */
3950         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3951
3952         v = CSR_READ_1(sc, sc->rl_cfg3);
3953         v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3954         CSR_WRITE_1(sc, sc->rl_cfg3, v);
3955
3956         /* Config register write done. */
3957         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3958
3959         v = CSR_READ_1(sc, sc->rl_cfg5);
3960         v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3961         v &= ~RL_CFG5_WOL_LANWAKE;
3962         CSR_WRITE_1(sc, sc->rl_cfg5, v);
3963 }
3964
3965 static void
3966 re_add_sysctls(struct rl_softc *sc)
3967 {
3968         struct sysctl_ctx_list  *ctx;
3969         struct sysctl_oid_list  *children;
3970         int                     error;
3971
3972         ctx = device_get_sysctl_ctx(sc->rl_dev);
3973         children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->rl_dev));
3974
3975         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "stats",
3976             CTLTYPE_INT | CTLFLAG_RW, sc, 0, re_sysctl_stats, "I",
3977             "Statistics Information");
3978         if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0)
3979                 return;
3980
3981         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "int_rx_mod",
3982             CTLTYPE_INT | CTLFLAG_RW, &sc->rl_int_rx_mod, 0,
3983             sysctl_hw_re_int_mod, "I", "re RX interrupt moderation");
3984         /* Pull in device tunables. */
3985         sc->rl_int_rx_mod = RL_TIMER_DEFAULT;
3986         error = resource_int_value(device_get_name(sc->rl_dev),
3987             device_get_unit(sc->rl_dev), "int_rx_mod", &sc->rl_int_rx_mod);
3988         if (error == 0) {
3989                 if (sc->rl_int_rx_mod < RL_TIMER_MIN ||
3990                     sc->rl_int_rx_mod > RL_TIMER_MAX) {
3991                         device_printf(sc->rl_dev, "int_rx_mod value out of "
3992                             "range; using default: %d\n",
3993                             RL_TIMER_DEFAULT);
3994                         sc->rl_int_rx_mod = RL_TIMER_DEFAULT;
3995                 }
3996         }
3997 }
3998
3999 static int
4000 re_sysctl_stats(SYSCTL_HANDLER_ARGS)
4001 {
4002         struct rl_softc         *sc;
4003         struct rl_stats         *stats;
4004         int                     error, i, result;
4005
4006         result = -1;
4007         error = sysctl_handle_int(oidp, &result, 0, req);
4008         if (error || req->newptr == NULL)
4009                 return (error);
4010
4011         if (result == 1) {
4012                 sc = (struct rl_softc *)arg1;
4013                 RL_LOCK(sc);
4014                 if ((sc->rl_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
4015                         RL_UNLOCK(sc);
4016                         goto done;
4017                 }
4018                 bus_dmamap_sync(sc->rl_ldata.rl_stag,
4019                     sc->rl_ldata.rl_smap, BUS_DMASYNC_PREREAD);
4020                 CSR_WRITE_4(sc, RL_DUMPSTATS_HI,
4021                     RL_ADDR_HI(sc->rl_ldata.rl_stats_addr));
4022                 CSR_WRITE_4(sc, RL_DUMPSTATS_LO,
4023                     RL_ADDR_LO(sc->rl_ldata.rl_stats_addr));
4024                 CSR_WRITE_4(sc, RL_DUMPSTATS_LO,
4025                     RL_ADDR_LO(sc->rl_ldata.rl_stats_addr |
4026                     RL_DUMPSTATS_START));
4027                 for (i = RL_TIMEOUT; i > 0; i--) {
4028                         if ((CSR_READ_4(sc, RL_DUMPSTATS_LO) &
4029                             RL_DUMPSTATS_START) == 0)
4030                                 break;
4031                         DELAY(1000);
4032                 }
4033                 bus_dmamap_sync(sc->rl_ldata.rl_stag,
4034                     sc->rl_ldata.rl_smap, BUS_DMASYNC_POSTREAD);
4035                 RL_UNLOCK(sc);
4036                 if (i == 0) {
4037                         device_printf(sc->rl_dev,
4038                             "DUMP statistics request timed out\n");
4039                         return (ETIMEDOUT);
4040                 }
4041 done:
4042                 stats = sc->rl_ldata.rl_stats;
4043                 printf("%s statistics:\n", device_get_nameunit(sc->rl_dev));
4044                 printf("Tx frames : %ju\n",
4045                     (uintmax_t)le64toh(stats->rl_tx_pkts));
4046                 printf("Rx frames : %ju\n",
4047                     (uintmax_t)le64toh(stats->rl_rx_pkts));
4048                 printf("Tx errors : %ju\n",
4049                     (uintmax_t)le64toh(stats->rl_tx_errs));
4050                 printf("Rx errors : %u\n",
4051                     le32toh(stats->rl_rx_errs));
4052                 printf("Rx missed frames : %u\n",
4053                     (uint32_t)le16toh(stats->rl_missed_pkts));
4054                 printf("Rx frame alignment errs : %u\n",
4055                     (uint32_t)le16toh(stats->rl_rx_framealign_errs));
4056                 printf("Tx single collisions : %u\n",
4057                     le32toh(stats->rl_tx_onecoll));
4058                 printf("Tx multiple collisions : %u\n",
4059                     le32toh(stats->rl_tx_multicolls));
4060                 printf("Rx unicast frames : %ju\n",
4061                     (uintmax_t)le64toh(stats->rl_rx_ucasts));
4062                 printf("Rx broadcast frames : %ju\n",
4063                     (uintmax_t)le64toh(stats->rl_rx_bcasts));
4064                 printf("Rx multicast frames : %u\n",
4065                     le32toh(stats->rl_rx_mcasts));
4066                 printf("Tx aborts : %u\n",
4067                     (uint32_t)le16toh(stats->rl_tx_aborts));
4068                 printf("Tx underruns : %u\n",
4069                     (uint32_t)le16toh(stats->rl_rx_underruns));
4070         }
4071
4072         return (error);
4073 }
4074
4075 static int
4076 sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high)
4077 {
4078         int error, value;
4079
4080         if (arg1 == NULL)
4081                 return (EINVAL);
4082         value = *(int *)arg1;
4083         error = sysctl_handle_int(oidp, &value, 0, req);
4084         if (error || req->newptr == NULL)
4085                 return (error);
4086         if (value < low || value > high)
4087                 return (EINVAL);
4088         *(int *)arg1 = value;
4089
4090         return (0);
4091 }
4092
4093 static int
4094 sysctl_hw_re_int_mod(SYSCTL_HANDLER_ARGS)
4095 {
4096
4097         return (sysctl_int_range(oidp, arg1, arg2, req, RL_TIMER_MIN,
4098             RL_TIMER_MAX));
4099 }
4100
4101 #ifdef DEBUGNET
4102 static void
4103 re_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize)
4104 {
4105         struct rl_softc *sc;
4106
4107         sc = if_getsoftc(ifp);
4108         RL_LOCK(sc);
4109         *nrxr = sc->rl_ldata.rl_rx_desc_cnt;
4110         *ncl = DEBUGNET_MAX_IN_FLIGHT;
4111         *clsize = (ifp->if_mtu > RL_MTU &&
4112             (sc->rl_flags & RL_FLAG_JUMBOV2) != 0) ? MJUM9BYTES : MCLBYTES;
4113         RL_UNLOCK(sc);
4114 }
4115
4116 static void
4117 re_debugnet_event(struct ifnet *ifp __unused, enum debugnet_ev event __unused)
4118 {
4119 }
4120
4121 static int
4122 re_debugnet_transmit(struct ifnet *ifp, struct mbuf *m)
4123 {
4124         struct rl_softc *sc;
4125         int error;
4126
4127         sc = if_getsoftc(ifp);
4128         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4129             IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0)
4130                 return (EBUSY);
4131
4132         error = re_encap(sc, &m);
4133         if (error == 0)
4134                 re_start_tx(sc);
4135         return (error);
4136 }
4137
4138 static int
4139 re_debugnet_poll(struct ifnet *ifp, int count)
4140 {
4141         struct rl_softc *sc;
4142         int error;
4143
4144         sc = if_getsoftc(ifp);
4145         if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
4146             (sc->rl_flags & RL_FLAG_LINK) == 0)
4147                 return (EBUSY);
4148
4149         re_txeof(sc);
4150         error = re_rxeof(sc, NULL);
4151         if (error != 0 && error != EAGAIN)
4152                 return (error);
4153         return (0);
4154 }
4155 #endif /* DEBUGNET */