]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - sys/dev/re/if_re.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / sys / dev / re / if_re.c
1 /*-
2  * Copyright (c) 1997, 1998-2003
3  *      Bill Paul <wpaul@windriver.com>.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by Bill Paul.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30  * THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 /*
37  * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver
38  *
39  * Written by Bill Paul <wpaul@windriver.com>
40  * Senior Networking Software Engineer
41  * Wind River Systems
42  */
43
44 /*
45  * This driver is designed to support RealTek's next generation of
46  * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
47  * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
48  * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E.
49  *
50  * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
51  * with the older 8139 family, however it also supports a special
52  * C+ mode of operation that provides several new performance enhancing
53  * features. These include:
54  *
55  *      o Descriptor based DMA mechanism. Each descriptor represents
56  *        a single packet fragment. Data buffers may be aligned on
57  *        any byte boundary.
58  *
59  *      o 64-bit DMA
60  *
61  *      o TCP/IP checksum offload for both RX and TX
62  *
63  *      o High and normal priority transmit DMA rings
64  *
65  *      o VLAN tag insertion and extraction
66  *
67  *      o TCP large send (segmentation offload)
68  *
69  * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
70  * programming API is fairly straightforward. The RX filtering, EEPROM
71  * access and PHY access is the same as it is on the older 8139 series
72  * chips.
73  *
74  * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
75  * same programming API and feature set as the 8139C+ with the following
76  * differences and additions:
77  *
78  *      o 1000Mbps mode
79  *
80  *      o Jumbo frames
81  *
82  *      o GMII and TBI ports/registers for interfacing with copper
83  *        or fiber PHYs
84  *
85  *      o RX and TX DMA rings can have up to 1024 descriptors
86  *        (the 8139C+ allows a maximum of 64)
87  *
88  *      o Slight differences in register layout from the 8139C+
89  *
90  * The TX start and timer interrupt registers are at different locations
91  * on the 8169 than they are on the 8139C+. Also, the status word in the
92  * RX descriptor has a slightly different bit layout. The 8169 does not
93  * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
94  * copper gigE PHY.
95  *
96  * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
97  * (the 'S' stands for 'single-chip'). These devices have the same
98  * programming API as the older 8169, but also have some vendor-specific
99  * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
100  * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
101  *
102  * This driver takes advantage of the RX and TX checksum offload and
103  * VLAN tag insertion/extraction features. It also implements TX
104  * interrupt moderation using the timer interrupt registers, which
105  * significantly reduces TX interrupt load. There is also support
106  * for jumbo frames, however the 8169/8169S/8110S can not transmit
107  * jumbo frames larger than 7440, so the max MTU possible with this
108  * driver is 7422 bytes.
109  */
110
111 #ifdef HAVE_KERNEL_OPTION_HEADERS
112 #include "opt_device_polling.h"
113 #endif
114
115 #include <sys/param.h>
116 #include <sys/endian.h>
117 #include <sys/systm.h>
118 #include <sys/sockio.h>
119 #include <sys/mbuf.h>
120 #include <sys/malloc.h>
121 #include <sys/module.h>
122 #include <sys/kernel.h>
123 #include <sys/socket.h>
124 #include <sys/lock.h>
125 #include <sys/mutex.h>
126 #include <sys/taskqueue.h>
127
128 #include <net/if.h>
129 #include <net/if_arp.h>
130 #include <net/ethernet.h>
131 #include <net/if_dl.h>
132 #include <net/if_media.h>
133 #include <net/if_types.h>
134 #include <net/if_vlan_var.h>
135
136 #include <net/bpf.h>
137
138 #include <machine/bus.h>
139 #include <machine/resource.h>
140 #include <sys/bus.h>
141 #include <sys/rman.h>
142
143 #include <dev/mii/mii.h>
144 #include <dev/mii/miivar.h>
145
146 #include <dev/pci/pcireg.h>
147 #include <dev/pci/pcivar.h>
148
149 #include <pci/if_rlreg.h>
150
151 MODULE_DEPEND(re, pci, 1, 1, 1);
152 MODULE_DEPEND(re, ether, 1, 1, 1);
153 MODULE_DEPEND(re, miibus, 1, 1, 1);
154
155 /* "device miibus" required.  See GENERIC if you get errors here. */
156 #include "miibus_if.h"
157
158 /* Tunables. */
159 static int msi_disable = 0;
160 TUNABLE_INT("hw.re.msi_disable", &msi_disable);
161 static int prefer_iomap = 0;
162 TUNABLE_INT("hw.re.prefer_iomap", &prefer_iomap);
163
164 #define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
165
166 /*
167  * Various supported device vendors/types and their names.
168  */
169 static struct rl_type re_devs[] = {
170         { DLINK_VENDORID, DLINK_DEVICEID_528T, 0,
171             "D-Link DGE-528(T) Gigabit Ethernet Adapter" },
172         { RT_VENDORID, RT_DEVICEID_8139, 0,
173             "RealTek 8139C+ 10/100BaseTX" },
174         { RT_VENDORID, RT_DEVICEID_8101E, 0,
175             "RealTek 8101E/8102E/8102EL PCIe 10/100baseTX" },
176         { RT_VENDORID, RT_DEVICEID_8168, 0,
177             "RealTek 8168/8168B/8168C/8168CP/8168D/8111B/8111C/8111CP PCIe "
178             "Gigabit Ethernet" },
179         { RT_VENDORID, RT_DEVICEID_8169, 0,
180             "RealTek 8169/8169S/8169SB(L)/8110S/8110SB(L) Gigabit Ethernet" },
181         { RT_VENDORID, RT_DEVICEID_8169SC, 0,
182             "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
183         { COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, 0,
184             "Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" },
185         { LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, 0,
186             "Linksys EG1032 (RTL8169S) Gigabit Ethernet" },
187         { USR_VENDORID, USR_DEVICEID_997902, 0,
188             "US Robotics 997902 (RTL8169S) Gigabit Ethernet" }
189 };
190
191 static struct rl_hwrev re_hwrevs[] = {
192         { RL_HWREV_8139, RL_8139,  "" },
193         { RL_HWREV_8139A, RL_8139, "A" },
194         { RL_HWREV_8139AG, RL_8139, "A-G" },
195         { RL_HWREV_8139B, RL_8139, "B" },
196         { RL_HWREV_8130, RL_8139, "8130" },
197         { RL_HWREV_8139C, RL_8139, "C" },
198         { RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C" },
199         { RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+"},
200         { RL_HWREV_8168_SPIN1, RL_8169, "8168"},
201         { RL_HWREV_8169, RL_8169, "8169"},
202         { RL_HWREV_8169S, RL_8169, "8169S"},
203         { RL_HWREV_8110S, RL_8169, "8110S"},
204         { RL_HWREV_8169_8110SB, RL_8169, "8169SB/8110SB"},
205         { RL_HWREV_8169_8110SC, RL_8169, "8169SC/8110SC"},
206         { RL_HWREV_8169_8110SBL, RL_8169, "8169SBL/8110SBL"},
207         { RL_HWREV_8169_8110SCE, RL_8169, "8169SC/8110SC"},
208         { RL_HWREV_8100, RL_8139, "8100"},
209         { RL_HWREV_8101, RL_8139, "8101"},
210         { RL_HWREV_8100E, RL_8169, "8100E"},
211         { RL_HWREV_8101E, RL_8169, "8101E"},
212         { RL_HWREV_8102E, RL_8169, "8102E"},
213         { RL_HWREV_8102EL, RL_8169, "8102EL"},
214         { RL_HWREV_8168_SPIN2, RL_8169, "8168"},
215         { RL_HWREV_8168_SPIN3, RL_8169, "8168"},
216         { RL_HWREV_8168C, RL_8169, "8168C/8111C"},
217         { RL_HWREV_8168C_SPIN2, RL_8169, "8168C/8111C"},
218         { RL_HWREV_8168CP, RL_8169, "8168CP/8111CP"},
219         { RL_HWREV_8168D, RL_8169, "8168D"},
220         { 0, 0, NULL }
221 };
222
223 static int re_probe             (device_t);
224 static int re_attach            (device_t);
225 static int re_detach            (device_t);
226
227 static int re_encap             (struct rl_softc *, struct mbuf **);
228
229 static void re_dma_map_addr     (void *, bus_dma_segment_t *, int, int);
230 static int re_allocmem          (device_t, struct rl_softc *);
231 static __inline void re_discard_rxbuf
232                                 (struct rl_softc *, int);
233 static int re_newbuf            (struct rl_softc *, int);
234 static int re_rx_list_init      (struct rl_softc *);
235 static int re_tx_list_init      (struct rl_softc *);
236 #ifdef RE_FIXUP_RX
237 static __inline void re_fixup_rx
238                                 (struct mbuf *);
239 #endif
240 static int re_rxeof             (struct rl_softc *);
241 static void re_txeof            (struct rl_softc *);
242 #ifdef DEVICE_POLLING
243 static void re_poll             (struct ifnet *, enum poll_cmd, int);
244 static void re_poll_locked      (struct ifnet *, enum poll_cmd, int);
245 #endif
246 static int re_intr              (void *);
247 static void re_tick             (void *);
248 static void re_tx_task          (void *, int);
249 static void re_int_task         (void *, int);
250 static void re_start            (struct ifnet *);
251 static int re_ioctl             (struct ifnet *, u_long, caddr_t);
252 static void re_init             (void *);
253 static void re_init_locked      (struct rl_softc *);
254 static void re_stop             (struct rl_softc *);
255 static void re_watchdog         (struct rl_softc *);
256 static int re_suspend           (device_t);
257 static int re_resume            (device_t);
258 static int re_shutdown          (device_t);
259 static int re_ifmedia_upd       (struct ifnet *);
260 static void re_ifmedia_sts      (struct ifnet *, struct ifmediareq *);
261
262 static void re_eeprom_putbyte   (struct rl_softc *, int);
263 static void re_eeprom_getword   (struct rl_softc *, int, u_int16_t *);
264 static void re_read_eeprom      (struct rl_softc *, caddr_t, int, int);
265 static int re_gmii_readreg      (device_t, int, int);
266 static int re_gmii_writereg     (device_t, int, int, int);
267
268 static int re_miibus_readreg    (device_t, int, int);
269 static int re_miibus_writereg   (device_t, int, int, int);
270 static void re_miibus_statchg   (device_t);
271
272 static void re_set_rxmode               (struct rl_softc *);
273 static void re_reset            (struct rl_softc *);
274 static void re_setwol           (struct rl_softc *);
275 static void re_clrwol           (struct rl_softc *);
276
277 #ifdef RE_DIAG
278 static int re_diag              (struct rl_softc *);
279 #endif
280
281 static device_method_t re_methods[] = {
282         /* Device interface */
283         DEVMETHOD(device_probe,         re_probe),
284         DEVMETHOD(device_attach,        re_attach),
285         DEVMETHOD(device_detach,        re_detach),
286         DEVMETHOD(device_suspend,       re_suspend),
287         DEVMETHOD(device_resume,        re_resume),
288         DEVMETHOD(device_shutdown,      re_shutdown),
289
290         /* bus interface */
291         DEVMETHOD(bus_print_child,      bus_generic_print_child),
292         DEVMETHOD(bus_driver_added,     bus_generic_driver_added),
293
294         /* MII interface */
295         DEVMETHOD(miibus_readreg,       re_miibus_readreg),
296         DEVMETHOD(miibus_writereg,      re_miibus_writereg),
297         DEVMETHOD(miibus_statchg,       re_miibus_statchg),
298
299         { 0, 0 }
300 };
301
302 static driver_t re_driver = {
303         "re",
304         re_methods,
305         sizeof(struct rl_softc)
306 };
307
308 static devclass_t re_devclass;
309
310 DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0);
311 DRIVER_MODULE(re, cardbus, re_driver, re_devclass, 0, 0);
312 DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
313
314 #define EE_SET(x)                                       \
315         CSR_WRITE_1(sc, RL_EECMD,                       \
316                 CSR_READ_1(sc, RL_EECMD) | x)
317
318 #define EE_CLR(x)                                       \
319         CSR_WRITE_1(sc, RL_EECMD,                       \
320                 CSR_READ_1(sc, RL_EECMD) & ~x)
321
322 /*
323  * Send a read command and address to the EEPROM, check for ACK.
324  */
325 static void
326 re_eeprom_putbyte(struct rl_softc *sc, int addr)
327 {
328         int                     d, i;
329
330         d = addr | (RL_9346_READ << sc->rl_eewidth);
331
332         /*
333          * Feed in each bit and strobe the clock.
334          */
335
336         for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) {
337                 if (d & i) {
338                         EE_SET(RL_EE_DATAIN);
339                 } else {
340                         EE_CLR(RL_EE_DATAIN);
341                 }
342                 DELAY(100);
343                 EE_SET(RL_EE_CLK);
344                 DELAY(150);
345                 EE_CLR(RL_EE_CLK);
346                 DELAY(100);
347         }
348 }
349
350 /*
351  * Read a word of data stored in the EEPROM at address 'addr.'
352  */
353 static void
354 re_eeprom_getword(struct rl_softc *sc, int addr, u_int16_t *dest)
355 {
356         int                     i;
357         u_int16_t               word = 0;
358
359         /*
360          * Send address of word we want to read.
361          */
362         re_eeprom_putbyte(sc, addr);
363
364         /*
365          * Start reading bits from EEPROM.
366          */
367         for (i = 0x8000; i; i >>= 1) {
368                 EE_SET(RL_EE_CLK);
369                 DELAY(100);
370                 if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT)
371                         word |= i;
372                 EE_CLR(RL_EE_CLK);
373                 DELAY(100);
374         }
375
376         *dest = word;
377 }
378
379 /*
380  * Read a sequence of words from the EEPROM.
381  */
382 static void
383 re_read_eeprom(struct rl_softc *sc, caddr_t dest, int off, int cnt)
384 {
385         int                     i;
386         u_int16_t               word = 0, *ptr;
387
388         CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
389
390         DELAY(100);
391
392         for (i = 0; i < cnt; i++) {
393                 CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL);
394                 re_eeprom_getword(sc, off + i, &word);
395                 CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL);
396                 ptr = (u_int16_t *)(dest + (i * 2));
397                 *ptr = word;
398         }
399
400         CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
401 }
402
403 static int
404 re_gmii_readreg(device_t dev, int phy, int reg)
405 {
406         struct rl_softc         *sc;
407         u_int32_t               rval;
408         int                     i;
409
410         if (phy != 1)
411                 return (0);
412
413         sc = device_get_softc(dev);
414
415         /* Let the rgephy driver read the GMEDIASTAT register */
416
417         if (reg == RL_GMEDIASTAT) {
418                 rval = CSR_READ_1(sc, RL_GMEDIASTAT);
419                 return (rval);
420         }
421
422         CSR_WRITE_4(sc, RL_PHYAR, reg << 16);
423         DELAY(1000);
424
425         for (i = 0; i < RL_PHY_TIMEOUT; i++) {
426                 rval = CSR_READ_4(sc, RL_PHYAR);
427                 if (rval & RL_PHYAR_BUSY)
428                         break;
429                 DELAY(100);
430         }
431
432         if (i == RL_PHY_TIMEOUT) {
433                 device_printf(sc->rl_dev, "PHY read failed\n");
434                 return (0);
435         }
436
437         return (rval & RL_PHYAR_PHYDATA);
438 }
439
440 static int
441 re_gmii_writereg(device_t dev, int phy, int reg, int data)
442 {
443         struct rl_softc         *sc;
444         u_int32_t               rval;
445         int                     i;
446
447         sc = device_get_softc(dev);
448
449         CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) |
450             (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY);
451         DELAY(1000);
452
453         for (i = 0; i < RL_PHY_TIMEOUT; i++) {
454                 rval = CSR_READ_4(sc, RL_PHYAR);
455                 if (!(rval & RL_PHYAR_BUSY))
456                         break;
457                 DELAY(100);
458         }
459
460         if (i == RL_PHY_TIMEOUT) {
461                 device_printf(sc->rl_dev, "PHY write failed\n");
462                 return (0);
463         }
464
465         return (0);
466 }
467
468 static int
469 re_miibus_readreg(device_t dev, int phy, int reg)
470 {
471         struct rl_softc         *sc;
472         u_int16_t               rval = 0;
473         u_int16_t               re8139_reg = 0;
474
475         sc = device_get_softc(dev);
476
477         if (sc->rl_type == RL_8169) {
478                 rval = re_gmii_readreg(dev, phy, reg);
479                 return (rval);
480         }
481
482         /* Pretend the internal PHY is only at address 0 */
483         if (phy) {
484                 return (0);
485         }
486         switch (reg) {
487         case MII_BMCR:
488                 re8139_reg = RL_BMCR;
489                 break;
490         case MII_BMSR:
491                 re8139_reg = RL_BMSR;
492                 break;
493         case MII_ANAR:
494                 re8139_reg = RL_ANAR;
495                 break;
496         case MII_ANER:
497                 re8139_reg = RL_ANER;
498                 break;
499         case MII_ANLPAR:
500                 re8139_reg = RL_LPAR;
501                 break;
502         case MII_PHYIDR1:
503         case MII_PHYIDR2:
504                 return (0);
505         /*
506          * Allow the rlphy driver to read the media status
507          * register. If we have a link partner which does not
508          * support NWAY, this is the register which will tell
509          * us the results of parallel detection.
510          */
511         case RL_MEDIASTAT:
512                 rval = CSR_READ_1(sc, RL_MEDIASTAT);
513                 return (rval);
514         default:
515                 device_printf(sc->rl_dev, "bad phy register\n");
516                 return (0);
517         }
518         rval = CSR_READ_2(sc, re8139_reg);
519         if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) {
520                 /* 8139C+ has different bit layout. */
521                 rval &= ~(BMCR_LOOP | BMCR_ISO);
522         }
523         return (rval);
524 }
525
526 static int
527 re_miibus_writereg(device_t dev, int phy, int reg, int data)
528 {
529         struct rl_softc         *sc;
530         u_int16_t               re8139_reg = 0;
531         int                     rval = 0;
532
533         sc = device_get_softc(dev);
534
535         if (sc->rl_type == RL_8169) {
536                 rval = re_gmii_writereg(dev, phy, reg, data);
537                 return (rval);
538         }
539
540         /* Pretend the internal PHY is only at address 0 */
541         if (phy)
542                 return (0);
543
544         switch (reg) {
545         case MII_BMCR:
546                 re8139_reg = RL_BMCR;
547                 if (sc->rl_type == RL_8139CPLUS) {
548                         /* 8139C+ has different bit layout. */
549                         data &= ~(BMCR_LOOP | BMCR_ISO);
550                 }
551                 break;
552         case MII_BMSR:
553                 re8139_reg = RL_BMSR;
554                 break;
555         case MII_ANAR:
556                 re8139_reg = RL_ANAR;
557                 break;
558         case MII_ANER:
559                 re8139_reg = RL_ANER;
560                 break;
561         case MII_ANLPAR:
562                 re8139_reg = RL_LPAR;
563                 break;
564         case MII_PHYIDR1:
565         case MII_PHYIDR2:
566                 return (0);
567                 break;
568         default:
569                 device_printf(sc->rl_dev, "bad phy register\n");
570                 return (0);
571         }
572         CSR_WRITE_2(sc, re8139_reg, data);
573         return (0);
574 }
575
576 static void
577 re_miibus_statchg(device_t dev)
578 {
579         struct rl_softc         *sc;
580         struct ifnet            *ifp;
581         struct mii_data         *mii;
582
583         sc = device_get_softc(dev);
584         mii = device_get_softc(sc->rl_miibus);
585         ifp = sc->rl_ifp;
586         if (mii == NULL || ifp == NULL ||
587             (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
588                 return;
589
590         sc->rl_flags &= ~RL_FLAG_LINK;
591         if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
592             (IFM_ACTIVE | IFM_AVALID)) {
593                 switch (IFM_SUBTYPE(mii->mii_media_active)) {
594                 case IFM_10_T:
595                 case IFM_100_TX:
596                         sc->rl_flags |= RL_FLAG_LINK;
597                         break;
598                 case IFM_1000_T:
599                         if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0)
600                                 break;
601                         sc->rl_flags |= RL_FLAG_LINK;
602                         break;
603                 default:
604                         break;
605                 }
606         }
607         /*
608          * RealTek controllers does not provide any interface to
609          * Tx/Rx MACs for resolved speed, duplex and flow-control
610          * parameters.
611          */
612 }
613
614 /*
615  * Set the RX configuration and 64-bit multicast hash filter.
616  */
617 static void
618 re_set_rxmode(struct rl_softc *sc)
619 {
620         struct ifnet            *ifp;
621         struct ifmultiaddr      *ifma;
622         uint32_t                hashes[2] = { 0, 0 };
623         uint32_t                h, rxfilt;
624
625         RL_LOCK_ASSERT(sc);
626
627         ifp = sc->rl_ifp;
628
629         rxfilt = RL_RXCFG_CONFIG | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_BROAD;
630
631         if (ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) {
632                 if (ifp->if_flags & IFF_PROMISC)
633                         rxfilt |= RL_RXCFG_RX_ALLPHYS;
634                 /*
635                  * Unlike other hardwares, we have to explicitly set
636                  * RL_RXCFG_RX_MULTI to receive multicast frames in
637                  * promiscuous mode.
638                  */
639                 rxfilt |= RL_RXCFG_RX_MULTI;
640                 hashes[0] = hashes[1] = 0xffffffff;
641                 goto done;
642         }
643
644         IF_ADDR_LOCK(ifp);
645         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
646                 if (ifma->ifma_addr->sa_family != AF_LINK)
647                         continue;
648                 h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
649                     ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
650                 if (h < 32)
651                         hashes[0] |= (1 << h);
652                 else
653                         hashes[1] |= (1 << (h - 32));
654         }
655         IF_ADDR_UNLOCK(ifp);
656
657         if (hashes[0] != 0 || hashes[1] != 0) {
658                 /*
659                  * For some unfathomable reason, RealTek decided to
660                  * reverse the order of the multicast hash registers
661                  * in the PCI Express parts.  This means we have to
662                  * write the hash pattern in reverse order for those
663                  * devices.
664                  */
665                 if ((sc->rl_flags & RL_FLAG_PCIE) != 0) {
666                         h = bswap32(hashes[0]);
667                         hashes[0] = bswap32(hashes[1]);
668                         hashes[1] = h;
669                 }
670                 rxfilt |= RL_RXCFG_RX_MULTI;
671         }
672
673 done:
674         CSR_WRITE_4(sc, RL_MAR0, hashes[0]);
675         CSR_WRITE_4(sc, RL_MAR4, hashes[1]);
676         CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
677 }
678
679 static void
680 re_reset(struct rl_softc *sc)
681 {
682         int                     i;
683
684         RL_LOCK_ASSERT(sc);
685
686         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET);
687
688         for (i = 0; i < RL_TIMEOUT; i++) {
689                 DELAY(10);
690                 if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET))
691                         break;
692         }
693         if (i == RL_TIMEOUT)
694                 device_printf(sc->rl_dev, "reset never completed!\n");
695
696         if ((sc->rl_flags & RL_FLAG_MACRESET) != 0)
697                 CSR_WRITE_1(sc, 0x82, 1);
698         if (sc->rl_hwrev == RL_HWREV_8169S)
699                 re_gmii_writereg(sc->rl_dev, 1, 0x0b, 0);
700 }
701
702 #ifdef RE_DIAG
703
704 /*
705  * The following routine is designed to test for a defect on some
706  * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
707  * lines connected to the bus, however for a 32-bit only card, they
708  * should be pulled high. The result of this defect is that the
709  * NIC will not work right if you plug it into a 64-bit slot: DMA
710  * operations will be done with 64-bit transfers, which will fail
711  * because the 64-bit data lines aren't connected.
712  *
713  * There's no way to work around this (short of talking a soldering
714  * iron to the board), however we can detect it. The method we use
715  * here is to put the NIC into digital loopback mode, set the receiver
716  * to promiscuous mode, and then try to send a frame. We then compare
717  * the frame data we sent to what was received. If the data matches,
718  * then the NIC is working correctly, otherwise we know the user has
719  * a defective NIC which has been mistakenly plugged into a 64-bit PCI
720  * slot. In the latter case, there's no way the NIC can work correctly,
721  * so we print out a message on the console and abort the device attach.
722  */
723
724 static int
725 re_diag(struct rl_softc *sc)
726 {
727         struct ifnet            *ifp = sc->rl_ifp;
728         struct mbuf             *m0;
729         struct ether_header     *eh;
730         struct rl_desc          *cur_rx;
731         u_int16_t               status;
732         u_int32_t               rxstat;
733         int                     total_len, i, error = 0, phyaddr;
734         u_int8_t                dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
735         u_int8_t                src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
736
737         /* Allocate a single mbuf */
738         MGETHDR(m0, M_DONTWAIT, MT_DATA);
739         if (m0 == NULL)
740                 return (ENOBUFS);
741
742         RL_LOCK(sc);
743
744         /*
745          * Initialize the NIC in test mode. This sets the chip up
746          * so that it can send and receive frames, but performs the
747          * following special functions:
748          * - Puts receiver in promiscuous mode
749          * - Enables digital loopback mode
750          * - Leaves interrupts turned off
751          */
752
753         ifp->if_flags |= IFF_PROMISC;
754         sc->rl_testmode = 1;
755         re_init_locked(sc);
756         sc->rl_flags |= RL_FLAG_LINK;
757         if (sc->rl_type == RL_8169)
758                 phyaddr = 1;
759         else
760                 phyaddr = 0;
761
762         re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET);
763         for (i = 0; i < RL_TIMEOUT; i++) {
764                 status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR);
765                 if (!(status & BMCR_RESET))
766                         break;
767         }
768
769         re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP);
770         CSR_WRITE_2(sc, RL_ISR, RL_INTRS);
771
772         DELAY(100000);
773
774         /* Put some data in the mbuf */
775
776         eh = mtod(m0, struct ether_header *);
777         bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN);
778         bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN);
779         eh->ether_type = htons(ETHERTYPE_IP);
780         m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
781
782         /*
783          * Queue the packet, start transmission.
784          * Note: IF_HANDOFF() ultimately calls re_start() for us.
785          */
786
787         CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
788         RL_UNLOCK(sc);
789         /* XXX: re_diag must not be called when in ALTQ mode */
790         IF_HANDOFF(&ifp->if_snd, m0, ifp);
791         RL_LOCK(sc);
792         m0 = NULL;
793
794         /* Wait for it to propagate through the chip */
795
796         DELAY(100000);
797         for (i = 0; i < RL_TIMEOUT; i++) {
798                 status = CSR_READ_2(sc, RL_ISR);
799                 CSR_WRITE_2(sc, RL_ISR, status);
800                 if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) ==
801                     (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK))
802                         break;
803                 DELAY(10);
804         }
805
806         if (i == RL_TIMEOUT) {
807                 device_printf(sc->rl_dev,
808                     "diagnostic failed, failed to receive packet in"
809                     " loopback mode\n");
810                 error = EIO;
811                 goto done;
812         }
813
814         /*
815          * The packet should have been dumped into the first
816          * entry in the RX DMA ring. Grab it from there.
817          */
818
819         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
820             sc->rl_ldata.rl_rx_list_map,
821             BUS_DMASYNC_POSTREAD);
822         bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
823             sc->rl_ldata.rl_rx_desc[0].rx_dmamap,
824             BUS_DMASYNC_POSTREAD);
825         bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
826             sc->rl_ldata.rl_rx_desc[0].rx_dmamap);
827
828         m0 = sc->rl_ldata.rl_rx_desc[0].rx_m;
829         sc->rl_ldata.rl_rx_desc[0].rx_m = NULL;
830         eh = mtod(m0, struct ether_header *);
831
832         cur_rx = &sc->rl_ldata.rl_rx_list[0];
833         total_len = RL_RXBYTES(cur_rx);
834         rxstat = le32toh(cur_rx->rl_cmdstat);
835
836         if (total_len != ETHER_MIN_LEN) {
837                 device_printf(sc->rl_dev,
838                     "diagnostic failed, received short packet\n");
839                 error = EIO;
840                 goto done;
841         }
842
843         /* Test that the received packet data matches what we sent. */
844
845         if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) ||
846             bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) ||
847             ntohs(eh->ether_type) != ETHERTYPE_IP) {
848                 device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n");
849                 device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n",
850                     dst, ":", src, ":", ETHERTYPE_IP);
851                 device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n",
852                     eh->ether_dhost, ":",  eh->ether_shost, ":",
853                     ntohs(eh->ether_type));
854                 device_printf(sc->rl_dev, "You may have a defective 32-bit "
855                     "NIC plugged into a 64-bit PCI slot.\n");
856                 device_printf(sc->rl_dev, "Please re-install the NIC in a "
857                     "32-bit slot for proper operation.\n");
858                 device_printf(sc->rl_dev, "Read the re(4) man page for more "
859                     "details.\n");
860                 error = EIO;
861         }
862
863 done:
864         /* Turn interface off, release resources */
865
866         sc->rl_testmode = 0;
867         sc->rl_flags &= ~RL_FLAG_LINK;
868         ifp->if_flags &= ~IFF_PROMISC;
869         re_stop(sc);
870         if (m0 != NULL)
871                 m_freem(m0);
872
873         RL_UNLOCK(sc);
874
875         return (error);
876 }
877
878 #endif
879
880 /*
881  * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
882  * IDs against our list and return a device name if we find a match.
883  */
884 static int
885 re_probe(device_t dev)
886 {
887         struct rl_type          *t;
888         uint16_t                devid, vendor;
889         uint16_t                revid, sdevid;
890         int                     i;
891         
892         vendor = pci_get_vendor(dev);
893         devid = pci_get_device(dev);
894         revid = pci_get_revid(dev);
895         sdevid = pci_get_subdevice(dev);
896
897         if (vendor == LINKSYS_VENDORID && devid == LINKSYS_DEVICEID_EG1032) {
898                 if (sdevid != LINKSYS_SUBDEVICE_EG1032_REV3) {
899                         /*
900                          * Only attach to rev. 3 of the Linksys EG1032 adapter.
901                          * Rev. 2 is supported by sk(4).
902                          */
903                         return (ENXIO);
904                 }
905         }
906
907         if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) {
908                 if (revid != 0x20) {
909                         /* 8139, let rl(4) take care of this device. */
910                         return (ENXIO);
911                 }
912         }
913
914         t = re_devs;
915         for (i = 0; i < sizeof(re_devs) / sizeof(re_devs[0]); i++, t++) {
916                 if (vendor == t->rl_vid && devid == t->rl_did) {
917                         device_set_desc(dev, t->rl_name);
918                         return (BUS_PROBE_DEFAULT);
919                 }
920         }
921
922         return (ENXIO);
923 }
924
925 /*
926  * Map a single buffer address.
927  */
928
929 static void
930 re_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
931 {
932         bus_addr_t              *addr;
933
934         if (error)
935                 return;
936
937         KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
938         addr = arg;
939         *addr = segs->ds_addr;
940 }
941
942 static int
943 re_allocmem(device_t dev, struct rl_softc *sc)
944 {
945         bus_size_t              rx_list_size, tx_list_size;
946         int                     error;
947         int                     i;
948
949         rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc);
950         tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc);
951
952         /*
953          * Allocate the parent bus DMA tag appropriate for PCI.
954          * In order to use DAC, RL_CPLUSCMD_PCI_DAC bit of RL_CPLUS_CMD
955          * register should be set. However some RealTek chips are known
956          * to be buggy on DAC handling, therefore disable DAC by limiting
957          * DMA address space to 32bit. PCIe variants of RealTek chips
958          * may not have the limitation but I took safer path.
959          */
960         error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
961             BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
962             BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
963             NULL, NULL, &sc->rl_parent_tag);
964         if (error) {
965                 device_printf(dev, "could not allocate parent DMA tag\n");
966                 return (error);
967         }
968
969         /*
970          * Allocate map for TX mbufs.
971          */
972         error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0,
973             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
974             NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0,
975             NULL, NULL, &sc->rl_ldata.rl_tx_mtag);
976         if (error) {
977                 device_printf(dev, "could not allocate TX DMA tag\n");
978                 return (error);
979         }
980
981         /*
982          * Allocate map for RX mbufs.
983          */
984
985         error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0,
986             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
987             MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag);
988         if (error) {
989                 device_printf(dev, "could not allocate RX DMA tag\n");
990                 return (error);
991         }
992
993         /*
994          * Allocate map for TX descriptor list.
995          */
996         error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
997             0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
998             NULL, tx_list_size, 1, tx_list_size, 0,
999             NULL, NULL, &sc->rl_ldata.rl_tx_list_tag);
1000         if (error) {
1001                 device_printf(dev, "could not allocate TX DMA ring tag\n");
1002                 return (error);
1003         }
1004
1005         /* Allocate DMA'able memory for the TX ring */
1006
1007         error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag,
1008             (void **)&sc->rl_ldata.rl_tx_list,
1009             BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1010             &sc->rl_ldata.rl_tx_list_map);
1011         if (error) {
1012                 device_printf(dev, "could not allocate TX DMA ring\n");
1013                 return (error);
1014         }
1015
1016         /* Load the map for the TX ring. */
1017
1018         sc->rl_ldata.rl_tx_list_addr = 0;
1019         error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag,
1020              sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list,
1021              tx_list_size, re_dma_map_addr,
1022              &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT);
1023         if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) {
1024                 device_printf(dev, "could not load TX DMA ring\n");
1025                 return (ENOMEM);
1026         }
1027
1028         /* Create DMA maps for TX buffers */
1029
1030         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
1031                 error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0,
1032                     &sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1033                 if (error) {
1034                         device_printf(dev, "could not create DMA map for TX\n");
1035                         return (error);
1036                 }
1037         }
1038
1039         /*
1040          * Allocate map for RX descriptor list.
1041          */
1042         error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1043             0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1044             NULL, rx_list_size, 1, rx_list_size, 0,
1045             NULL, NULL, &sc->rl_ldata.rl_rx_list_tag);
1046         if (error) {
1047                 device_printf(dev, "could not create RX DMA ring tag\n");
1048                 return (error);
1049         }
1050
1051         /* Allocate DMA'able memory for the RX ring */
1052
1053         error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag,
1054             (void **)&sc->rl_ldata.rl_rx_list,
1055             BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1056             &sc->rl_ldata.rl_rx_list_map);
1057         if (error) {
1058                 device_printf(dev, "could not allocate RX DMA ring\n");
1059                 return (error);
1060         }
1061
1062         /* Load the map for the RX ring. */
1063
1064         sc->rl_ldata.rl_rx_list_addr = 0;
1065         error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag,
1066              sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list,
1067              rx_list_size, re_dma_map_addr,
1068              &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT);
1069         if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) {
1070                 device_printf(dev, "could not load RX DMA ring\n");
1071                 return (ENOMEM);
1072         }
1073
1074         /* Create DMA maps for RX buffers */
1075
1076         error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1077             &sc->rl_ldata.rl_rx_sparemap);
1078         if (error) {
1079                 device_printf(dev, "could not create spare DMA map for RX\n");
1080                 return (error);
1081         }
1082         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1083                 error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1084                     &sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1085                 if (error) {
1086                         device_printf(dev, "could not create DMA map for RX\n");
1087                         return (error);
1088                 }
1089         }
1090
1091         return (0);
1092 }
1093
1094 /*
1095  * Attach the interface. Allocate softc structures, do ifmedia
1096  * setup and ethernet/BPF attach.
1097  */
1098 static int
1099 re_attach(device_t dev)
1100 {
1101         u_char                  eaddr[ETHER_ADDR_LEN];
1102         u_int16_t               as[ETHER_ADDR_LEN / 2];
1103         struct rl_softc         *sc;
1104         struct ifnet            *ifp;
1105         struct rl_hwrev         *hw_rev;
1106         int                     hwrev;
1107         u_int16_t               devid, re_did = 0;
1108         int                     error = 0, rid, i;
1109         int                     msic, reg;
1110         uint8_t                 cfg;
1111
1112         sc = device_get_softc(dev);
1113         sc->rl_dev = dev;
1114
1115         mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
1116             MTX_DEF);
1117         callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0);
1118
1119         /*
1120          * Map control/status registers.
1121          */
1122         pci_enable_busmaster(dev);
1123
1124         devid = pci_get_device(dev);
1125         /*
1126          * Prefer memory space register mapping over IO space.
1127          * Because RTL8169SC does not seem to work when memory mapping
1128          * is used always activate io mapping. 
1129          */
1130         if (devid == RT_DEVICEID_8169SC)
1131                 prefer_iomap = 1;
1132         if (prefer_iomap == 0) {
1133                 sc->rl_res_id = PCIR_BAR(1);
1134                 sc->rl_res_type = SYS_RES_MEMORY;
1135                 /* RTL8168/8101E seems to use different BARs. */
1136                 if (devid == RT_DEVICEID_8168 || devid == RT_DEVICEID_8101E)
1137                         sc->rl_res_id = PCIR_BAR(2);
1138         } else {
1139                 sc->rl_res_id = PCIR_BAR(0);
1140                 sc->rl_res_type = SYS_RES_IOPORT;
1141         }
1142         sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1143             &sc->rl_res_id, RF_ACTIVE);
1144         if (sc->rl_res == NULL && prefer_iomap == 0) {
1145                 sc->rl_res_id = PCIR_BAR(0);
1146                 sc->rl_res_type = SYS_RES_IOPORT;
1147                 sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1148                     &sc->rl_res_id, RF_ACTIVE);
1149         }
1150         if (sc->rl_res == NULL) {
1151                 device_printf(dev, "couldn't map ports/memory\n");
1152                 error = ENXIO;
1153                 goto fail;
1154         }
1155
1156         sc->rl_btag = rman_get_bustag(sc->rl_res);
1157         sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
1158
1159         msic = 0;
1160         if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
1161                 sc->rl_flags |= RL_FLAG_PCIE;
1162                 msic = pci_msi_count(dev);
1163                 if (bootverbose)
1164                         device_printf(dev, "MSI count : %d\n", msic);
1165         }
1166         if (msic > 0 && msi_disable == 0) {
1167                 msic = 1;
1168                 if (pci_alloc_msi(dev, &msic) == 0) {
1169                         if (msic == RL_MSI_MESSAGES) {
1170                                 device_printf(dev, "Using %d MSI messages\n",
1171                                     msic);
1172                                 sc->rl_flags |= RL_FLAG_MSI;
1173                                 /* Explicitly set MSI enable bit. */
1174                                 CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1175                                 cfg = CSR_READ_1(sc, RL_CFG2);
1176                                 cfg |= RL_CFG2_MSI;
1177                                 CSR_WRITE_1(sc, RL_CFG2, cfg);
1178                                 CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1179                         } else
1180                                 pci_release_msi(dev);
1181                 }
1182         }
1183
1184         /* Allocate interrupt */
1185         if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1186                 rid = 0;
1187                 sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
1188                     RF_SHAREABLE | RF_ACTIVE);
1189                 if (sc->rl_irq[0] == NULL) {
1190                         device_printf(dev, "couldn't allocate IRQ resources\n");
1191                         error = ENXIO;
1192                         goto fail;
1193                 }
1194         } else {
1195                 for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1196                         sc->rl_irq[i] = bus_alloc_resource_any(dev,
1197                             SYS_RES_IRQ, &rid, RF_ACTIVE);
1198                         if (sc->rl_irq[i] == NULL) {
1199                                 device_printf(dev,
1200                                     "couldn't llocate IRQ resources for "
1201                                     "message %d\n", rid);
1202                                 error = ENXIO;
1203                                 goto fail;
1204                         }
1205                 }
1206         }
1207
1208         if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1209                 CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1210                 cfg = CSR_READ_1(sc, RL_CFG2);
1211                 if ((cfg & RL_CFG2_MSI) != 0) {
1212                         device_printf(dev, "turning off MSI enable bit.\n");
1213                         cfg &= ~RL_CFG2_MSI;
1214                         CSR_WRITE_1(sc, RL_CFG2, cfg);
1215                 }
1216                 CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1217         }
1218
1219         /* Reset the adapter. */
1220         RL_LOCK(sc);
1221         re_reset(sc);
1222         RL_UNLOCK(sc);
1223
1224         hw_rev = re_hwrevs;
1225         hwrev = CSR_READ_4(sc, RL_TXCFG);
1226         switch (hwrev & 0x70000000) {
1227         case 0x00000000:
1228         case 0x10000000:
1229                 device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0xfc800000);
1230                 hwrev &= (RL_TXCFG_HWREV | 0x80000000);
1231                 break;
1232         default:
1233                 device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0x7c800000);
1234                 hwrev &= RL_TXCFG_HWREV;
1235                 break;
1236         }
1237         device_printf(dev, "MAC rev. 0x%08x\n", hwrev & 0x00700000);
1238         while (hw_rev->rl_desc != NULL) {
1239                 if (hw_rev->rl_rev == hwrev) {
1240                         sc->rl_type = hw_rev->rl_type;
1241                         sc->rl_hwrev = hw_rev->rl_rev;
1242                         break;
1243                 }
1244                 hw_rev++;
1245         }
1246         if (hw_rev->rl_desc == NULL) {
1247                 device_printf(dev, "Unknown H/W revision: 0x%08x\n", hwrev);
1248                 error = ENXIO;
1249                 goto fail;
1250         }
1251
1252         switch (hw_rev->rl_rev) {
1253         case RL_HWREV_8139CPLUS:
1254                 sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_FASTETHER;
1255                 break;
1256         case RL_HWREV_8100E:
1257         case RL_HWREV_8101E:
1258                 sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_PHYWAKE |
1259                     RL_FLAG_FASTETHER;
1260                 break;
1261         case RL_HWREV_8102E:
1262         case RL_HWREV_8102EL:
1263                 sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_PHYWAKE |
1264                     RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
1265                     RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP;
1266                 break;
1267         case RL_HWREV_8168_SPIN1:
1268         case RL_HWREV_8168_SPIN2:
1269                 sc->rl_flags |= RL_FLAG_WOLRXENB;
1270                 /* FALLTHROUGH */
1271         case RL_HWREV_8168_SPIN3:
1272                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_MACSTAT;
1273                 break;
1274         case RL_HWREV_8168C_SPIN2:
1275                 sc->rl_flags |= RL_FLAG_MACSLEEP;
1276                 /* FALLTHROUGH */
1277         case RL_HWREV_8168C:
1278                 if ((hwrev & 0x00700000) == 0x00200000)
1279                         sc->rl_flags |= RL_FLAG_MACSLEEP;
1280                 /* FALLTHROUGH */
1281         case RL_HWREV_8168CP:
1282         case RL_HWREV_8168D:
1283                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
1284                     RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP;
1285                 /*
1286                  * These controllers support jumbo frame but it seems
1287                  * that enabling it requires touching additional magic
1288                  * registers. Depending on MAC revisions some
1289                  * controllers need to disable checksum offload. So
1290                  * disable jumbo frame until I have better idea what
1291                  * it really requires to make it support.
1292                  * RTL8168C/CP : supports up to 6KB jumbo frame.
1293                  * RTL8111C/CP : supports up to 9KB jumbo frame.
1294                  */
1295                 sc->rl_flags |= RL_FLAG_NOJUMBO;
1296                 break;
1297         case RL_HWREV_8169_8110SB:
1298         case RL_HWREV_8169_8110SBL:
1299         case RL_HWREV_8169_8110SC:
1300         case RL_HWREV_8169_8110SCE:
1301                 sc->rl_flags |= RL_FLAG_PHYWAKE;
1302                 /* FALLTHROUGH */
1303         case RL_HWREV_8169:
1304         case RL_HWREV_8169S:
1305         case RL_HWREV_8110S:
1306                 sc->rl_flags |= RL_FLAG_MACRESET;
1307                 break;
1308         default:
1309                 break;
1310         }
1311
1312         /* Enable PME. */
1313         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1314         cfg = CSR_READ_1(sc, RL_CFG1);
1315         cfg |= RL_CFG1_PME;
1316         CSR_WRITE_1(sc, RL_CFG1, cfg);
1317         cfg = CSR_READ_1(sc, RL_CFG5);
1318         cfg &= RL_CFG5_PME_STS;
1319         CSR_WRITE_1(sc, RL_CFG5, cfg);
1320         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1321
1322         if ((sc->rl_flags & RL_FLAG_PAR) != 0) {
1323                 /*
1324                  * XXX Should have a better way to extract station
1325                  * address from EEPROM.
1326                  */
1327                 for (i = 0; i < ETHER_ADDR_LEN; i++)
1328                         eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i);
1329         } else {
1330                 sc->rl_eewidth = RL_9356_ADDR_LEN;
1331                 re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
1332                 if (re_did != 0x8129)
1333                         sc->rl_eewidth = RL_9346_ADDR_LEN;
1334
1335                 /*
1336                  * Get station address from the EEPROM.
1337                  */
1338                 re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3);
1339                 for (i = 0; i < ETHER_ADDR_LEN / 2; i++)
1340                         as[i] = le16toh(as[i]);
1341                 bcopy(as, eaddr, sizeof(eaddr));
1342         }
1343
1344         if (sc->rl_type == RL_8169) {
1345                 /* Set RX length mask and number of descriptors. */
1346                 sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN;
1347                 sc->rl_txstart = RL_GTXSTART;
1348                 sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT;
1349                 sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT;
1350         } else {
1351                 /* Set RX length mask and number of descriptors. */
1352                 sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN;
1353                 sc->rl_txstart = RL_TXSTART;
1354                 sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT;
1355                 sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT;
1356         }
1357
1358         error = re_allocmem(dev, sc);
1359         if (error)
1360                 goto fail;
1361
1362         ifp = sc->rl_ifp = if_alloc(IFT_ETHER);
1363         if (ifp == NULL) {
1364                 device_printf(dev, "can not if_alloc()\n");
1365                 error = ENOSPC;
1366                 goto fail;
1367         }
1368
1369         /* Take controller out of deep sleep mode. */
1370         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
1371                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
1372                         CSR_WRITE_1(sc, RL_GPIO,
1373                             CSR_READ_1(sc, RL_GPIO) | 0x01);
1374                 else
1375                         CSR_WRITE_1(sc, RL_GPIO,
1376                             CSR_READ_1(sc, RL_GPIO) & ~0x01);
1377         }
1378
1379         /* Take PHY out of power down mode. */
1380         if ((sc->rl_flags & RL_FLAG_PHYWAKE) != 0) {
1381                 re_gmii_writereg(dev, 1, 0x1f, 0);
1382                 re_gmii_writereg(dev, 1, 0x0e, 0);
1383         }
1384
1385         /* Do MII setup */
1386         if (mii_phy_probe(dev, &sc->rl_miibus,
1387             re_ifmedia_upd, re_ifmedia_sts)) {
1388                 device_printf(dev, "MII without any phy!\n");
1389                 error = ENXIO;
1390                 goto fail;
1391         }
1392
1393         ifp->if_softc = sc;
1394         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1395         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1396         ifp->if_ioctl = re_ioctl;
1397         ifp->if_start = re_start;
1398         ifp->if_hwassist = RE_CSUM_FEATURES;
1399         ifp->if_capabilities = IFCAP_HWCSUM;
1400         ifp->if_capenable = ifp->if_capabilities;
1401         ifp->if_init = re_init;
1402         IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN);
1403         ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN;
1404         IFQ_SET_READY(&ifp->if_snd);
1405
1406         TASK_INIT(&sc->rl_txtask, 1, re_tx_task, ifp);
1407         TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc);
1408
1409         /*
1410          * XXX
1411          * Still have no idea how to make TSO work on 8168C, 8168CP,
1412          * 8111C and 8111CP.
1413          */
1414         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1415                 ifp->if_hwassist |= CSUM_TSO;
1416                 ifp->if_capabilities |= IFCAP_TSO4;
1417         }
1418
1419         /*
1420          * Call MI attach routine.
1421          */
1422         ether_ifattach(ifp, eaddr);
1423
1424         /* VLAN capability setup */
1425         ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
1426         if (ifp->if_capabilities & IFCAP_HWCSUM)
1427                 ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
1428         /* Enable WOL if PM is supported. */
1429         if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &reg) == 0)
1430                 ifp->if_capabilities |= IFCAP_WOL;
1431         ifp->if_capenable = ifp->if_capabilities;
1432         /*
1433          * Don't enable TSO by default. Under certain
1434          * circumtances the controller generated corrupted
1435          * packets in TSO size.
1436          */
1437         ifp->if_hwassist &= ~CSUM_TSO;
1438         ifp->if_capenable &= ~IFCAP_TSO4;
1439 #ifdef DEVICE_POLLING
1440         ifp->if_capabilities |= IFCAP_POLLING;
1441 #endif
1442         /*
1443          * Tell the upper layer(s) we support long frames.
1444          * Must appear after the call to ether_ifattach() because
1445          * ether_ifattach() sets ifi_hdrlen to the default value.
1446          */
1447         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1448
1449 #ifdef RE_DIAG
1450         /*
1451          * Perform hardware diagnostic on the original RTL8169.
1452          * Some 32-bit cards were incorrectly wired and would
1453          * malfunction if plugged into a 64-bit slot.
1454          */
1455
1456         if (hwrev == RL_HWREV_8169) {
1457                 error = re_diag(sc);
1458                 if (error) {
1459                         device_printf(dev,
1460                         "attach aborted due to hardware diag failure\n");
1461                         ether_ifdetach(ifp);
1462                         goto fail;
1463                 }
1464         }
1465 #endif
1466
1467         /* Hook interrupt last to avoid having to lock softc */
1468         if ((sc->rl_flags & RL_FLAG_MSI) == 0)
1469                 error = bus_setup_intr(dev, sc->rl_irq[0],
1470                     INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1471                     &sc->rl_intrhand[0]);
1472         else {
1473                 for (i = 0; i < RL_MSI_MESSAGES; i++) {
1474                         error = bus_setup_intr(dev, sc->rl_irq[i],
1475                             INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1476                             &sc->rl_intrhand[i]);
1477                         if (error != 0)
1478                                 break;
1479                 }
1480         }
1481         if (error) {
1482                 device_printf(dev, "couldn't set up irq\n");
1483                 ether_ifdetach(ifp);
1484         }
1485
1486 fail:
1487
1488         if (error)
1489                 re_detach(dev);
1490
1491         return (error);
1492 }
1493
1494 /*
1495  * Shutdown hardware and free up resources. This can be called any
1496  * time after the mutex has been initialized. It is called in both
1497  * the error case in attach and the normal detach case so it needs
1498  * to be careful about only freeing resources that have actually been
1499  * allocated.
1500  */
1501 static int
1502 re_detach(device_t dev)
1503 {
1504         struct rl_softc         *sc;
1505         struct ifnet            *ifp;
1506         int                     i, rid;
1507
1508         sc = device_get_softc(dev);
1509         ifp = sc->rl_ifp;
1510         KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized"));
1511
1512         /* These should only be active if attach succeeded */
1513         if (device_is_attached(dev)) {
1514 #ifdef DEVICE_POLLING
1515                 if (ifp->if_capenable & IFCAP_POLLING)
1516                         ether_poll_deregister(ifp);
1517 #endif
1518                 RL_LOCK(sc);
1519 #if 0
1520                 sc->suspended = 1;
1521 #endif
1522                 re_stop(sc);
1523                 RL_UNLOCK(sc);
1524                 callout_drain(&sc->rl_stat_callout);
1525                 taskqueue_drain(taskqueue_fast, &sc->rl_inttask);
1526                 taskqueue_drain(taskqueue_fast, &sc->rl_txtask);
1527                 /*
1528                  * Force off the IFF_UP flag here, in case someone
1529                  * still had a BPF descriptor attached to this
1530                  * interface. If they do, ether_ifdetach() will cause
1531                  * the BPF code to try and clear the promisc mode
1532                  * flag, which will bubble down to re_ioctl(),
1533                  * which will try to call re_init() again. This will
1534                  * turn the NIC back on and restart the MII ticker,
1535                  * which will panic the system when the kernel tries
1536                  * to invoke the re_tick() function that isn't there
1537                  * anymore.
1538                  */
1539                 ifp->if_flags &= ~IFF_UP;
1540                 ether_ifdetach(ifp);
1541         }
1542         if (sc->rl_miibus)
1543                 device_delete_child(dev, sc->rl_miibus);
1544         bus_generic_detach(dev);
1545
1546         /*
1547          * The rest is resource deallocation, so we should already be
1548          * stopped here.
1549          */
1550
1551         for (i = 0; i < RL_MSI_MESSAGES; i++) {
1552                 if (sc->rl_intrhand[i] != NULL) {
1553                         bus_teardown_intr(dev, sc->rl_irq[i],
1554                             sc->rl_intrhand[i]);
1555                         sc->rl_intrhand[i] = NULL;
1556                 }
1557         }
1558         if (ifp != NULL)
1559                 if_free(ifp);
1560         if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1561                 if (sc->rl_irq[0] != NULL) {
1562                         bus_release_resource(dev, SYS_RES_IRQ, 0,
1563                             sc->rl_irq[0]);
1564                         sc->rl_irq[0] = NULL;
1565                 }
1566         } else {
1567                 for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1568                         if (sc->rl_irq[i] != NULL) {
1569                                 bus_release_resource(dev, SYS_RES_IRQ, rid,
1570                                     sc->rl_irq[i]);
1571                                 sc->rl_irq[i] = NULL;
1572                         }
1573                 }
1574                 pci_release_msi(dev);
1575         }
1576         if (sc->rl_res)
1577                 bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id,
1578                     sc->rl_res);
1579
1580         /* Unload and free the RX DMA ring memory and map */
1581
1582         if (sc->rl_ldata.rl_rx_list_tag) {
1583                 bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
1584                     sc->rl_ldata.rl_rx_list_map);
1585                 bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
1586                     sc->rl_ldata.rl_rx_list,
1587                     sc->rl_ldata.rl_rx_list_map);
1588                 bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag);
1589         }
1590
1591         /* Unload and free the TX DMA ring memory and map */
1592
1593         if (sc->rl_ldata.rl_tx_list_tag) {
1594                 bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
1595                     sc->rl_ldata.rl_tx_list_map);
1596                 bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
1597                     sc->rl_ldata.rl_tx_list,
1598                     sc->rl_ldata.rl_tx_list_map);
1599                 bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag);
1600         }
1601
1602         /* Destroy all the RX and TX buffer maps */
1603
1604         if (sc->rl_ldata.rl_tx_mtag) {
1605                 for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1606                         bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag,
1607                             sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1608                 bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag);
1609         }
1610         if (sc->rl_ldata.rl_rx_mtag) {
1611                 for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++)
1612                         bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1613                             sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1614                 if (sc->rl_ldata.rl_rx_sparemap)
1615                         bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1616                             sc->rl_ldata.rl_rx_sparemap);
1617                 bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag);
1618         }
1619
1620         /* Unload and free the stats buffer and map */
1621
1622         if (sc->rl_ldata.rl_stag) {
1623                 bus_dmamap_unload(sc->rl_ldata.rl_stag,
1624                     sc->rl_ldata.rl_rx_list_map);
1625                 bus_dmamem_free(sc->rl_ldata.rl_stag,
1626                     sc->rl_ldata.rl_stats,
1627                     sc->rl_ldata.rl_smap);
1628                 bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
1629         }
1630
1631         if (sc->rl_parent_tag)
1632                 bus_dma_tag_destroy(sc->rl_parent_tag);
1633
1634         mtx_destroy(&sc->rl_mtx);
1635
1636         return (0);
1637 }
1638
1639 static __inline void
1640 re_discard_rxbuf(struct rl_softc *sc, int idx)
1641 {
1642         struct rl_desc          *desc;
1643         struct rl_rxdesc        *rxd;
1644         uint32_t                cmdstat;
1645
1646         rxd = &sc->rl_ldata.rl_rx_desc[idx];
1647         desc = &sc->rl_ldata.rl_rx_list[idx];
1648         desc->rl_vlanctl = 0;
1649         cmdstat = rxd->rx_size;
1650         if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1651                 cmdstat |= RL_RDESC_CMD_EOR;
1652         desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1653 }
1654
1655 static int
1656 re_newbuf(struct rl_softc *sc, int idx)
1657 {
1658         struct mbuf             *m;
1659         struct rl_rxdesc        *rxd;
1660         bus_dma_segment_t       segs[1];
1661         bus_dmamap_t            map;
1662         struct rl_desc          *desc;
1663         uint32_t                cmdstat;
1664         int                     error, nsegs;
1665
1666         m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1667         if (m == NULL)
1668                 return (ENOBUFS);
1669
1670         m->m_len = m->m_pkthdr.len = MCLBYTES;
1671 #ifdef RE_FIXUP_RX
1672         /*
1673          * This is part of an evil trick to deal with non-x86 platforms.
1674          * The RealTek chip requires RX buffers to be aligned on 64-bit
1675          * boundaries, but that will hose non-x86 machines. To get around
1676          * this, we leave some empty space at the start of each buffer
1677          * and for non-x86 hosts, we copy the buffer back six bytes
1678          * to achieve word alignment. This is slightly more efficient
1679          * than allocating a new buffer, copying the contents, and
1680          * discarding the old buffer.
1681          */
1682         m_adj(m, RE_ETHER_ALIGN);
1683 #endif
1684         error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag,
1685             sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
1686         if (error != 0) {
1687                 m_freem(m);
1688                 return (ENOBUFS);
1689         }
1690         KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
1691
1692         rxd = &sc->rl_ldata.rl_rx_desc[idx];
1693         if (rxd->rx_m != NULL) {
1694                 bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1695                     BUS_DMASYNC_POSTREAD);
1696                 bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap);
1697         }
1698
1699         rxd->rx_m = m;
1700         map = rxd->rx_dmamap;
1701         rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap;
1702         rxd->rx_size = segs[0].ds_len;
1703         sc->rl_ldata.rl_rx_sparemap = map;
1704         bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1705             BUS_DMASYNC_PREREAD);
1706
1707         desc = &sc->rl_ldata.rl_rx_list[idx];
1708         desc->rl_vlanctl = 0;
1709         desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
1710         desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
1711         cmdstat = segs[0].ds_len;
1712         if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1713                 cmdstat |= RL_RDESC_CMD_EOR;
1714         desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1715
1716         return (0);
1717 }
1718
1719 #ifdef RE_FIXUP_RX
1720 static __inline void
1721 re_fixup_rx(struct mbuf *m)
1722 {
1723         int                     i;
1724         uint16_t                *src, *dst;
1725
1726         src = mtod(m, uint16_t *);
1727         dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src;
1728
1729         for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
1730                 *dst++ = *src++;
1731
1732         m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN;
1733 }
1734 #endif
1735
1736 static int
1737 re_tx_list_init(struct rl_softc *sc)
1738 {
1739         struct rl_desc          *desc;
1740         int                     i;
1741
1742         RL_LOCK_ASSERT(sc);
1743
1744         bzero(sc->rl_ldata.rl_tx_list,
1745             sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
1746         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1747                 sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
1748         /* Set EOR. */
1749         desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
1750         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
1751
1752         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1753             sc->rl_ldata.rl_tx_list_map,
1754             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1755
1756         sc->rl_ldata.rl_tx_prodidx = 0;
1757         sc->rl_ldata.rl_tx_considx = 0;
1758         sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt;
1759
1760         return (0);
1761 }
1762
1763 static int
1764 re_rx_list_init(struct rl_softc *sc)
1765 {
1766         int                     error, i;
1767
1768         bzero(sc->rl_ldata.rl_rx_list,
1769             sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
1770         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1771                 sc->rl_ldata.rl_rx_desc[i].rx_m = NULL;
1772                 if ((error = re_newbuf(sc, i)) != 0)
1773                         return (error);
1774         }
1775
1776         /* Flush the RX descriptors */
1777
1778         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1779             sc->rl_ldata.rl_rx_list_map,
1780             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1781
1782         sc->rl_ldata.rl_rx_prodidx = 0;
1783         sc->rl_head = sc->rl_tail = NULL;
1784
1785         return (0);
1786 }
1787
1788 /*
1789  * RX handler for C+ and 8169. For the gigE chips, we support
1790  * the reception of jumbo frames that have been fragmented
1791  * across multiple 2K mbuf cluster buffers.
1792  */
1793 static int
1794 re_rxeof(struct rl_softc *sc)
1795 {
1796         struct mbuf             *m;
1797         struct ifnet            *ifp;
1798         int                     i, total_len;
1799         struct rl_desc          *cur_rx;
1800         u_int32_t               rxstat, rxvlan;
1801         int                     maxpkt = 16;
1802
1803         RL_LOCK_ASSERT(sc);
1804
1805         ifp = sc->rl_ifp;
1806
1807         /* Invalidate the descriptor memory */
1808
1809         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1810             sc->rl_ldata.rl_rx_list_map,
1811             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1812
1813         for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0;
1814             i = RL_RX_DESC_NXT(sc, i)) {
1815                 cur_rx = &sc->rl_ldata.rl_rx_list[i];
1816                 rxstat = le32toh(cur_rx->rl_cmdstat);
1817                 if ((rxstat & RL_RDESC_STAT_OWN) != 0)
1818                         break;
1819                 total_len = rxstat & sc->rl_rxlenmask;
1820                 rxvlan = le32toh(cur_rx->rl_vlanctl);
1821                 m = sc->rl_ldata.rl_rx_desc[i].rx_m;
1822
1823                 if (!(rxstat & RL_RDESC_STAT_EOF)) {
1824                         if (re_newbuf(sc, i) != 0) {
1825                                 /*
1826                                  * If this is part of a multi-fragment packet,
1827                                  * discard all the pieces.
1828                                  */
1829                                 if (sc->rl_head != NULL) {
1830                                         m_freem(sc->rl_head);
1831                                         sc->rl_head = sc->rl_tail = NULL;
1832                                 }
1833                                 re_discard_rxbuf(sc, i);
1834                                 continue;
1835                         }
1836                         m->m_len = RE_RX_DESC_BUFLEN;
1837                         if (sc->rl_head == NULL)
1838                                 sc->rl_head = sc->rl_tail = m;
1839                         else {
1840                                 m->m_flags &= ~M_PKTHDR;
1841                                 sc->rl_tail->m_next = m;
1842                                 sc->rl_tail = m;
1843                         }
1844                         continue;
1845                 }
1846
1847                 /*
1848                  * NOTE: for the 8139C+, the frame length field
1849                  * is always 12 bits in size, but for the gigE chips,
1850                  * it is 13 bits (since the max RX frame length is 16K).
1851                  * Unfortunately, all 32 bits in the status word
1852                  * were already used, so to make room for the extra
1853                  * length bit, RealTek took out the 'frame alignment
1854                  * error' bit and shifted the other status bits
1855                  * over one slot. The OWN, EOR, FS and LS bits are
1856                  * still in the same places. We have already extracted
1857                  * the frame length and checked the OWN bit, so rather
1858                  * than using an alternate bit mapping, we shift the
1859                  * status bits one space to the right so we can evaluate
1860                  * them using the 8169 status as though it was in the
1861                  * same format as that of the 8139C+.
1862                  */
1863                 if (sc->rl_type == RL_8169)
1864                         rxstat >>= 1;
1865
1866                 /*
1867                  * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be
1868                  * set, but if CRC is clear, it will still be a valid frame.
1869                  */
1870                 if (rxstat & RL_RDESC_STAT_RXERRSUM && !(total_len > 8191 &&
1871                     (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)) {
1872                         ifp->if_ierrors++;
1873                         /*
1874                          * If this is part of a multi-fragment packet,
1875                          * discard all the pieces.
1876                          */
1877                         if (sc->rl_head != NULL) {
1878                                 m_freem(sc->rl_head);
1879                                 sc->rl_head = sc->rl_tail = NULL;
1880                         }
1881                         re_discard_rxbuf(sc, i);
1882                         continue;
1883                 }
1884
1885                 /*
1886                  * If allocating a replacement mbuf fails,
1887                  * reload the current one.
1888                  */
1889
1890                 if (re_newbuf(sc, i) != 0) {
1891                         ifp->if_iqdrops++;
1892                         if (sc->rl_head != NULL) {
1893                                 m_freem(sc->rl_head);
1894                                 sc->rl_head = sc->rl_tail = NULL;
1895                         }
1896                         re_discard_rxbuf(sc, i);
1897                         continue;
1898                 }
1899
1900                 if (sc->rl_head != NULL) {
1901                         m->m_len = total_len % RE_RX_DESC_BUFLEN;
1902                         if (m->m_len == 0)
1903                                 m->m_len = RE_RX_DESC_BUFLEN;
1904                         /*
1905                          * Special case: if there's 4 bytes or less
1906                          * in this buffer, the mbuf can be discarded:
1907                          * the last 4 bytes is the CRC, which we don't
1908                          * care about anyway.
1909                          */
1910                         if (m->m_len <= ETHER_CRC_LEN) {
1911                                 sc->rl_tail->m_len -=
1912                                     (ETHER_CRC_LEN - m->m_len);
1913                                 m_freem(m);
1914                         } else {
1915                                 m->m_len -= ETHER_CRC_LEN;
1916                                 m->m_flags &= ~M_PKTHDR;
1917                                 sc->rl_tail->m_next = m;
1918                         }
1919                         m = sc->rl_head;
1920                         sc->rl_head = sc->rl_tail = NULL;
1921                         m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
1922                 } else
1923                         m->m_pkthdr.len = m->m_len =
1924                             (total_len - ETHER_CRC_LEN);
1925
1926 #ifdef RE_FIXUP_RX
1927                 re_fixup_rx(m);
1928 #endif
1929                 ifp->if_ipackets++;
1930                 m->m_pkthdr.rcvif = ifp;
1931
1932                 /* Do RX checksumming if enabled */
1933
1934                 if (ifp->if_capenable & IFCAP_RXCSUM) {
1935                         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1936                                 /* Check IP header checksum */
1937                                 if (rxstat & RL_RDESC_STAT_PROTOID)
1938                                         m->m_pkthdr.csum_flags |=
1939                                             CSUM_IP_CHECKED;
1940                                 if (!(rxstat & RL_RDESC_STAT_IPSUMBAD))
1941                                         m->m_pkthdr.csum_flags |=
1942                                             CSUM_IP_VALID;
1943
1944                                 /* Check TCP/UDP checksum */
1945                                 if ((RL_TCPPKT(rxstat) &&
1946                                     !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1947                                     (RL_UDPPKT(rxstat) &&
1948                                      !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1949                                         m->m_pkthdr.csum_flags |=
1950                                                 CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1951                                         m->m_pkthdr.csum_data = 0xffff;
1952                                 }
1953                         } else {
1954                                 /*
1955                                  * RTL8168C/RTL816CP/RTL8111C/RTL8111CP
1956                                  */
1957                                 if ((rxstat & RL_RDESC_STAT_PROTOID) &&
1958                                     (rxvlan & RL_RDESC_IPV4))
1959                                         m->m_pkthdr.csum_flags |=
1960                                             CSUM_IP_CHECKED;
1961                                 if (!(rxstat & RL_RDESC_STAT_IPSUMBAD) &&
1962                                     (rxvlan & RL_RDESC_IPV4))
1963                                         m->m_pkthdr.csum_flags |=
1964                                             CSUM_IP_VALID;
1965                                 if (((rxstat & RL_RDESC_STAT_TCP) &&
1966                                     !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1967                                     ((rxstat & RL_RDESC_STAT_UDP) &&
1968                                     !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1969                                         m->m_pkthdr.csum_flags |=
1970                                                 CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1971                                         m->m_pkthdr.csum_data = 0xffff;
1972                                 }
1973                         }
1974                 }
1975                 maxpkt--;
1976                 if (rxvlan & RL_RDESC_VLANCTL_TAG) {
1977                         m->m_pkthdr.ether_vtag =
1978                             bswap16((rxvlan & RL_RDESC_VLANCTL_DATA));
1979                         m->m_flags |= M_VLANTAG;
1980                 }
1981                 RL_UNLOCK(sc);
1982                 (*ifp->if_input)(ifp, m);
1983                 RL_LOCK(sc);
1984         }
1985
1986         /* Flush the RX DMA ring */
1987
1988         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1989             sc->rl_ldata.rl_rx_list_map,
1990             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1991
1992         sc->rl_ldata.rl_rx_prodidx = i;
1993
1994         if (maxpkt)
1995                 return(EAGAIN);
1996
1997         return(0);
1998 }
1999
2000 static void
2001 re_txeof(struct rl_softc *sc)
2002 {
2003         struct ifnet            *ifp;
2004         struct rl_txdesc        *txd;
2005         u_int32_t               txstat;
2006         int                     cons;
2007
2008         cons = sc->rl_ldata.rl_tx_considx;
2009         if (cons == sc->rl_ldata.rl_tx_prodidx)
2010                 return;
2011
2012         ifp = sc->rl_ifp;
2013         /* Invalidate the TX descriptor list */
2014         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2015             sc->rl_ldata.rl_tx_list_map,
2016             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2017
2018         for (; cons != sc->rl_ldata.rl_tx_prodidx;
2019             cons = RL_TX_DESC_NXT(sc, cons)) {
2020                 txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat);
2021                 if (txstat & RL_TDESC_STAT_OWN)
2022                         break;
2023                 /*
2024                  * We only stash mbufs in the last descriptor
2025                  * in a fragment chain, which also happens to
2026                  * be the only place where the TX status bits
2027                  * are valid.
2028                  */
2029                 if (txstat & RL_TDESC_CMD_EOF) {
2030                         txd = &sc->rl_ldata.rl_tx_desc[cons];
2031                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2032                             txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2033                         bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2034                             txd->tx_dmamap);
2035                         KASSERT(txd->tx_m != NULL,
2036                             ("%s: freeing NULL mbufs!", __func__));
2037                         m_freem(txd->tx_m);
2038                         txd->tx_m = NULL;
2039                         if (txstat & (RL_TDESC_STAT_EXCESSCOL|
2040                             RL_TDESC_STAT_COLCNT))
2041                                 ifp->if_collisions++;
2042                         if (txstat & RL_TDESC_STAT_TXERRSUM)
2043                                 ifp->if_oerrors++;
2044                         else
2045                                 ifp->if_opackets++;
2046                 }
2047                 sc->rl_ldata.rl_tx_free++;
2048                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2049         }
2050         sc->rl_ldata.rl_tx_considx = cons;
2051
2052         /* No changes made to the TX ring, so no flush needed */
2053
2054         if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) {
2055 #ifdef RE_TX_MODERATION
2056                 /*
2057                  * If not all descriptors have been reaped yet, reload
2058                  * the timer so that we will eventually get another
2059                  * interrupt that will cause us to re-enter this routine.
2060                  * This is done in case the transmitter has gone idle.
2061                  */
2062                 CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2063 #endif
2064         } else
2065                 sc->rl_watchdog_timer = 0;
2066 }
2067
2068 static void
2069 re_tick(void *xsc)
2070 {
2071         struct rl_softc         *sc;
2072         struct mii_data         *mii;
2073
2074         sc = xsc;
2075
2076         RL_LOCK_ASSERT(sc);
2077
2078         mii = device_get_softc(sc->rl_miibus);
2079         mii_tick(mii);
2080         if ((sc->rl_flags & RL_FLAG_LINK) == 0)
2081                 re_miibus_statchg(sc->rl_dev);
2082         /*
2083          * Reclaim transmitted frames here. Technically it is not
2084          * necessary to do here but it ensures periodic reclamation
2085          * regardless of Tx completion interrupt which seems to be
2086          * lost on PCIe based controllers under certain situations. 
2087          */
2088         re_txeof(sc);
2089         re_watchdog(sc);
2090         callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2091 }
2092
2093 #ifdef DEVICE_POLLING
2094 static void
2095 re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
2096 {
2097         struct rl_softc *sc = ifp->if_softc;
2098
2099         RL_LOCK(sc);
2100         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2101                 re_poll_locked(ifp, cmd, count);
2102         RL_UNLOCK(sc);
2103 }
2104
2105 static void
2106 re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count)
2107 {
2108         struct rl_softc *sc = ifp->if_softc;
2109
2110         RL_LOCK_ASSERT(sc);
2111
2112         sc->rxcycles = count;
2113         re_rxeof(sc);
2114         re_txeof(sc);
2115
2116         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2117                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2118
2119         if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
2120                 u_int16_t       status;
2121
2122                 status = CSR_READ_2(sc, RL_ISR);
2123                 if (status == 0xffff)
2124                         return;
2125                 if (status)
2126                         CSR_WRITE_2(sc, RL_ISR, status);
2127                 if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2128                     (sc->rl_flags & RL_FLAG_PCIE))
2129                         CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2130
2131                 /*
2132                  * XXX check behaviour on receiver stalls.
2133                  */
2134
2135                 if (status & RL_ISR_SYSTEM_ERR)
2136                         re_init_locked(sc);
2137         }
2138 }
2139 #endif /* DEVICE_POLLING */
2140
2141 static int
2142 re_intr(void *arg)
2143 {
2144         struct rl_softc         *sc;
2145         uint16_t                status;
2146
2147         sc = arg;
2148
2149         status = CSR_READ_2(sc, RL_ISR);
2150         if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0)
2151                 return (FILTER_STRAY);
2152         CSR_WRITE_2(sc, RL_IMR, 0);
2153
2154         taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2155
2156         return (FILTER_HANDLED);
2157 }
2158
2159 static void
2160 re_int_task(void *arg, int npending)
2161 {
2162         struct rl_softc         *sc;
2163         struct ifnet            *ifp;
2164         u_int16_t               status;
2165         int                     rval = 0;
2166
2167         sc = arg;
2168         ifp = sc->rl_ifp;
2169
2170         RL_LOCK(sc);
2171
2172         status = CSR_READ_2(sc, RL_ISR);
2173         CSR_WRITE_2(sc, RL_ISR, status);
2174
2175         if (sc->suspended ||
2176             (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2177                 RL_UNLOCK(sc);
2178                 return;
2179         }
2180
2181 #ifdef DEVICE_POLLING
2182         if  (ifp->if_capenable & IFCAP_POLLING) {
2183                 RL_UNLOCK(sc);
2184                 return;
2185         }
2186 #endif
2187
2188         if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW))
2189                 rval = re_rxeof(sc);
2190
2191         /*
2192          * Some chips will ignore a second TX request issued
2193          * while an existing transmission is in progress. If
2194          * the transmitter goes idle but there are still
2195          * packets waiting to be sent, we need to restart the
2196          * channel here to flush them out. This only seems to
2197          * be required with the PCIe devices.
2198          */
2199         if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2200             (sc->rl_flags & RL_FLAG_PCIE))
2201                 CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2202         if (status & (
2203 #ifdef RE_TX_MODERATION
2204             RL_ISR_TIMEOUT_EXPIRED|
2205 #else
2206             RL_ISR_TX_OK|
2207 #endif
2208             RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL))
2209                 re_txeof(sc);
2210
2211         if (status & RL_ISR_SYSTEM_ERR)
2212                 re_init_locked(sc);
2213
2214         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2215                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2216
2217         RL_UNLOCK(sc);
2218
2219         if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) {
2220                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2221                 return;
2222         }
2223
2224         CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2225 }
2226
2227 static int
2228 re_encap(struct rl_softc *sc, struct mbuf **m_head)
2229 {
2230         struct rl_txdesc        *txd, *txd_last;
2231         bus_dma_segment_t       segs[RL_NTXSEGS];
2232         bus_dmamap_t            map;
2233         struct mbuf             *m_new;
2234         struct rl_desc          *desc;
2235         int                     nsegs, prod;
2236         int                     i, error, ei, si;
2237         int                     padlen;
2238         uint32_t                cmdstat, csum_flags, vlanctl;
2239
2240         RL_LOCK_ASSERT(sc);
2241         M_ASSERTPKTHDR((*m_head));
2242
2243         /*
2244          * With some of the RealTek chips, using the checksum offload
2245          * support in conjunction with the autopadding feature results
2246          * in the transmission of corrupt frames. For example, if we
2247          * need to send a really small IP fragment that's less than 60
2248          * bytes in size, and IP header checksumming is enabled, the
2249          * resulting ethernet frame that appears on the wire will
2250          * have garbled payload. To work around this, if TX IP checksum
2251          * offload is enabled, we always manually pad short frames out
2252          * to the minimum ethernet frame size.
2253          */
2254         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0 &&
2255             (*m_head)->m_pkthdr.len < RL_IP4CSUMTX_PADLEN &&
2256             ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) != 0) {
2257                 padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len;
2258                 if (M_WRITABLE(*m_head) == 0) {
2259                         /* Get a writable copy. */
2260                         m_new = m_dup(*m_head, M_DONTWAIT);
2261                         m_freem(*m_head);
2262                         if (m_new == NULL) {
2263                                 *m_head = NULL;
2264                                 return (ENOBUFS);
2265                         }
2266                         *m_head = m_new;
2267                 }
2268                 if ((*m_head)->m_next != NULL ||
2269                     M_TRAILINGSPACE(*m_head) < padlen) {
2270                         m_new = m_defrag(*m_head, M_DONTWAIT);
2271                         if (m_new == NULL) {
2272                                 m_freem(*m_head);
2273                                 *m_head = NULL;
2274                                 return (ENOBUFS);
2275                         }
2276                 } else
2277                         m_new = *m_head;
2278
2279                 /*
2280                  * Manually pad short frames, and zero the pad space
2281                  * to avoid leaking data.
2282                  */
2283                 bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen);
2284                 m_new->m_pkthdr.len += padlen;
2285                 m_new->m_len = m_new->m_pkthdr.len;
2286                 *m_head = m_new;
2287         }
2288
2289         prod = sc->rl_ldata.rl_tx_prodidx;
2290         txd = &sc->rl_ldata.rl_tx_desc[prod];
2291         error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2292             *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2293         if (error == EFBIG) {
2294                 m_new = m_collapse(*m_head, M_DONTWAIT, RL_NTXSEGS);
2295                 if (m_new == NULL) {
2296                         m_freem(*m_head);
2297                         *m_head = NULL;
2298                         return (ENOBUFS);
2299                 }
2300                 *m_head = m_new;
2301                 error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag,
2302                     txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2303                 if (error != 0) {
2304                         m_freem(*m_head);
2305                         *m_head = NULL;
2306                         return (error);
2307                 }
2308         } else if (error != 0)
2309                 return (error);
2310         if (nsegs == 0) {
2311                 m_freem(*m_head);
2312                 *m_head = NULL;
2313                 return (EIO);
2314         }
2315
2316         /* Check for number of available descriptors. */
2317         if (sc->rl_ldata.rl_tx_free - nsegs <= 1) {
2318                 bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap);
2319                 return (ENOBUFS);
2320         }
2321
2322         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2323             BUS_DMASYNC_PREWRITE);
2324
2325         /*
2326          * Set up checksum offload. Note: checksum offload bits must
2327          * appear in all descriptors of a multi-descriptor transmit
2328          * attempt. This is according to testing done with an 8169
2329          * chip. This is a requirement.
2330          */
2331         vlanctl = 0;
2332         csum_flags = 0;
2333         if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0)
2334                 csum_flags = RL_TDESC_CMD_LGSEND |
2335                     ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
2336                     RL_TDESC_CMD_MSSVAL_SHIFT);
2337         else {
2338                 /*
2339                  * Unconditionally enable IP checksum if TCP or UDP
2340                  * checksum is required. Otherwise, TCP/UDP checksum
2341                  * does't make effects.
2342                  */
2343                 if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) {
2344                         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
2345                                 csum_flags |= RL_TDESC_CMD_IPCSUM;
2346                                 if (((*m_head)->m_pkthdr.csum_flags &
2347                                     CSUM_TCP) != 0)
2348                                         csum_flags |= RL_TDESC_CMD_TCPCSUM;
2349                                 if (((*m_head)->m_pkthdr.csum_flags &
2350                                     CSUM_UDP) != 0)
2351                                         csum_flags |= RL_TDESC_CMD_UDPCSUM;
2352                         } else {
2353                                 vlanctl |= RL_TDESC_CMD_IPCSUMV2;
2354                                 if (((*m_head)->m_pkthdr.csum_flags &
2355                                     CSUM_TCP) != 0)
2356                                         vlanctl |= RL_TDESC_CMD_TCPCSUMV2;
2357                                 if (((*m_head)->m_pkthdr.csum_flags &
2358                                     CSUM_UDP) != 0)
2359                                         vlanctl |= RL_TDESC_CMD_UDPCSUMV2;
2360                         }
2361                 }
2362         }
2363
2364         /*
2365          * Set up hardware VLAN tagging. Note: vlan tag info must
2366          * appear in all descriptors of a multi-descriptor
2367          * transmission attempt.
2368          */
2369         if ((*m_head)->m_flags & M_VLANTAG)
2370                 vlanctl |= bswap16((*m_head)->m_pkthdr.ether_vtag) |
2371                     RL_TDESC_VLANCTL_TAG;
2372
2373         si = prod;
2374         for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) {
2375                 desc = &sc->rl_ldata.rl_tx_list[prod];
2376                 desc->rl_vlanctl = htole32(vlanctl);
2377                 desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr));
2378                 desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr));
2379                 cmdstat = segs[i].ds_len;
2380                 if (i != 0)
2381                         cmdstat |= RL_TDESC_CMD_OWN;
2382                 if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1)
2383                         cmdstat |= RL_TDESC_CMD_EOR;
2384                 desc->rl_cmdstat = htole32(cmdstat | csum_flags);
2385                 sc->rl_ldata.rl_tx_free--;
2386         }
2387         /* Update producer index. */
2388         sc->rl_ldata.rl_tx_prodidx = prod;
2389
2390         /* Set EOF on the last descriptor. */
2391         ei = RL_TX_DESC_PRV(sc, prod);
2392         desc = &sc->rl_ldata.rl_tx_list[ei];
2393         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF);
2394
2395         desc = &sc->rl_ldata.rl_tx_list[si];
2396         /* Set SOF and transfer ownership of packet to the chip. */
2397         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF);
2398
2399         /*
2400          * Insure that the map for this transmission
2401          * is placed at the array index of the last descriptor
2402          * in this chain.  (Swap last and first dmamaps.)
2403          */
2404         txd_last = &sc->rl_ldata.rl_tx_desc[ei];
2405         map = txd->tx_dmamap;
2406         txd->tx_dmamap = txd_last->tx_dmamap;
2407         txd_last->tx_dmamap = map;
2408         txd_last->tx_m = *m_head;
2409
2410         return (0);
2411 }
2412
2413 static void
2414 re_tx_task(void *arg, int npending)
2415 {
2416         struct ifnet            *ifp;
2417
2418         ifp = arg;
2419         re_start(ifp);
2420 }
2421
2422 /*
2423  * Main transmit routine for C+ and gigE NICs.
2424  */
2425 static void
2426 re_start(struct ifnet *ifp)
2427 {
2428         struct rl_softc         *sc;
2429         struct mbuf             *m_head;
2430         int                     queued;
2431
2432         sc = ifp->if_softc;
2433
2434         RL_LOCK(sc);
2435
2436         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
2437             IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) {
2438                 RL_UNLOCK(sc);
2439                 return;
2440         }
2441
2442         for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
2443             sc->rl_ldata.rl_tx_free > 1;) {
2444                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2445                 if (m_head == NULL)
2446                         break;
2447
2448                 if (re_encap(sc, &m_head) != 0) {
2449                         if (m_head == NULL)
2450                                 break;
2451                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2452                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2453                         break;
2454                 }
2455
2456                 /*
2457                  * If there's a BPF listener, bounce a copy of this frame
2458                  * to him.
2459                  */
2460                 ETHER_BPF_MTAP(ifp, m_head);
2461
2462                 queued++;
2463         }
2464
2465         if (queued == 0) {
2466 #ifdef RE_TX_MODERATION
2467                 if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt)
2468                         CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2469 #endif
2470                 RL_UNLOCK(sc);
2471                 return;
2472         }
2473
2474         /* Flush the TX descriptors */
2475
2476         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2477             sc->rl_ldata.rl_tx_list_map,
2478             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2479
2480         CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2481
2482 #ifdef RE_TX_MODERATION
2483         /*
2484          * Use the countdown timer for interrupt moderation.
2485          * 'TX done' interrupts are disabled. Instead, we reset the
2486          * countdown timer, which will begin counting until it hits
2487          * the value in the TIMERINT register, and then trigger an
2488          * interrupt. Each time we write to the TIMERCNT register,
2489          * the timer count is reset to 0.
2490          */
2491         CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2492 #endif
2493
2494         /*
2495          * Set a timeout in case the chip goes out to lunch.
2496          */
2497         sc->rl_watchdog_timer = 5;
2498
2499         RL_UNLOCK(sc);
2500 }
2501
2502 static void
2503 re_init(void *xsc)
2504 {
2505         struct rl_softc         *sc = xsc;
2506
2507         RL_LOCK(sc);
2508         re_init_locked(sc);
2509         RL_UNLOCK(sc);
2510 }
2511
2512 static void
2513 re_init_locked(struct rl_softc *sc)
2514 {
2515         struct ifnet            *ifp = sc->rl_ifp;
2516         struct mii_data         *mii;
2517         uint32_t                reg;
2518         uint16_t                cfg;
2519         union {
2520                 uint32_t align_dummy;
2521                 u_char eaddr[ETHER_ADDR_LEN];
2522         } eaddr;
2523
2524         RL_LOCK_ASSERT(sc);
2525
2526         mii = device_get_softc(sc->rl_miibus);
2527
2528         /*
2529          * Cancel pending I/O and free all RX/TX buffers.
2530          */
2531         re_stop(sc);
2532
2533         /* Put controller into known state. */
2534         re_reset(sc);
2535
2536         /*
2537          * Enable C+ RX and TX mode, as well as VLAN stripping and
2538          * RX checksum offload. We must configure the C+ register
2539          * before all others.
2540          */
2541         cfg = RL_CPLUSCMD_PCI_MRW;
2542         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
2543                 cfg |= RL_CPLUSCMD_RXCSUM_ENB;
2544         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0)
2545                 cfg |= RL_CPLUSCMD_VLANSTRIP;
2546         if ((sc->rl_flags & RL_FLAG_MACSTAT) != 0) {
2547                 cfg |= RL_CPLUSCMD_MACSTAT_DIS;
2548                 /* XXX magic. */
2549                 cfg |= 0x0001;
2550         } else
2551                 cfg |= RL_CPLUSCMD_RXENB | RL_CPLUSCMD_TXENB;
2552         CSR_WRITE_2(sc, RL_CPLUS_CMD, cfg);
2553         if (sc->rl_hwrev == RL_HWREV_8169_8110SC ||
2554             sc->rl_hwrev == RL_HWREV_8169_8110SCE) {
2555                 reg = 0x000fff00;
2556                 if ((CSR_READ_1(sc, RL_CFG2) & RL_CFG2_PCI66MHZ) != 0)
2557                         reg |= 0x000000ff;
2558                 if (sc->rl_hwrev == RL_HWREV_8169_8110SCE)
2559                         reg |= 0x00f00000;
2560                 CSR_WRITE_4(sc, 0x7c, reg);
2561                 /* Disable interrupt mitigation. */
2562                 CSR_WRITE_2(sc, 0xe2, 0);
2563         }
2564         /*
2565          * Disable TSO if interface MTU size is greater than MSS
2566          * allowed in controller.
2567          */
2568         if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) {
2569                 ifp->if_capenable &= ~IFCAP_TSO4;
2570                 ifp->if_hwassist &= ~CSUM_TSO;
2571         }
2572
2573         /*
2574          * Init our MAC address.  Even though the chipset
2575          * documentation doesn't mention it, we need to enter "Config
2576          * register write enable" mode to modify the ID registers.
2577          */
2578         /* Copy MAC address on stack to align. */
2579         bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN);
2580         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
2581         CSR_WRITE_4(sc, RL_IDR0,
2582             htole32(*(u_int32_t *)(&eaddr.eaddr[0])));
2583         CSR_WRITE_4(sc, RL_IDR4,
2584             htole32(*(u_int32_t *)(&eaddr.eaddr[4])));
2585         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
2586
2587         /*
2588          * For C+ mode, initialize the RX descriptors and mbufs.
2589          */
2590         re_rx_list_init(sc);
2591         re_tx_list_init(sc);
2592
2593         /*
2594          * Load the addresses of the RX and TX lists into the chip.
2595          */
2596
2597         CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI,
2598             RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr));
2599         CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO,
2600             RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr));
2601
2602         CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI,
2603             RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr));
2604         CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
2605             RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
2606
2607         /*
2608          * Enable transmit and receive.
2609          */
2610         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2611
2612         /*
2613          * Set the initial TX configuration.
2614          */
2615         if (sc->rl_testmode) {
2616                 if (sc->rl_type == RL_8169)
2617                         CSR_WRITE_4(sc, RL_TXCFG,
2618                             RL_TXCFG_CONFIG|RL_LOOPTEST_ON);
2619                 else
2620                         CSR_WRITE_4(sc, RL_TXCFG,
2621                             RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS);
2622         } else
2623                 CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG);
2624
2625         CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16);
2626
2627         /*
2628          * Set the initial RX configuration.
2629          */
2630         re_set_rxmode(sc);
2631
2632 #ifdef DEVICE_POLLING
2633         /*
2634          * Disable interrupts if we are polling.
2635          */
2636         if (ifp->if_capenable & IFCAP_POLLING)
2637                 CSR_WRITE_2(sc, RL_IMR, 0);
2638         else    /* otherwise ... */
2639 #endif
2640
2641         /*
2642          * Enable interrupts.
2643          */
2644         if (sc->rl_testmode)
2645                 CSR_WRITE_2(sc, RL_IMR, 0);
2646         else
2647                 CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2648         CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS);
2649
2650         /* Set initial TX threshold */
2651         sc->rl_txthresh = RL_TX_THRESH_INIT;
2652
2653         /* Start RX/TX process. */
2654         CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
2655 #ifdef notdef
2656         /* Enable receiver and transmitter. */
2657         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2658 #endif
2659
2660 #ifdef RE_TX_MODERATION
2661         /*
2662          * Initialize the timer interrupt register so that
2663          * a timer interrupt will be generated once the timer
2664          * reaches a certain number of ticks. The timer is
2665          * reloaded on each transmit. This gives us TX interrupt
2666          * moderation, which dramatically improves TX frame rate.
2667          */
2668         if (sc->rl_type == RL_8169)
2669                 CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800);
2670         else
2671                 CSR_WRITE_4(sc, RL_TIMERINT, 0x400);
2672 #endif
2673
2674         /*
2675          * For 8169 gigE NICs, set the max allowed RX packet
2676          * size so we can receive jumbo frames.
2677          */
2678         if (sc->rl_type == RL_8169)
2679                 CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383);
2680
2681         if (sc->rl_testmode)
2682                 return;
2683
2684         mii_mediachg(mii);
2685
2686         CSR_WRITE_1(sc, RL_CFG1, CSR_READ_1(sc, RL_CFG1) | RL_CFG1_DRVLOAD);
2687
2688         ifp->if_drv_flags |= IFF_DRV_RUNNING;
2689         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2690
2691         sc->rl_flags &= ~RL_FLAG_LINK;
2692         sc->rl_watchdog_timer = 0;
2693         callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2694 }
2695
2696 /*
2697  * Set media options.
2698  */
2699 static int
2700 re_ifmedia_upd(struct ifnet *ifp)
2701 {
2702         struct rl_softc         *sc;
2703         struct mii_data         *mii;
2704         int                     error;
2705
2706         sc = ifp->if_softc;
2707         mii = device_get_softc(sc->rl_miibus);
2708         RL_LOCK(sc);
2709         error = mii_mediachg(mii);
2710         RL_UNLOCK(sc);
2711
2712         return (error);
2713 }
2714
2715 /*
2716  * Report current media status.
2717  */
2718 static void
2719 re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2720 {
2721         struct rl_softc         *sc;
2722         struct mii_data         *mii;
2723
2724         sc = ifp->if_softc;
2725         mii = device_get_softc(sc->rl_miibus);
2726
2727         RL_LOCK(sc);
2728         mii_pollstat(mii);
2729         RL_UNLOCK(sc);
2730         ifmr->ifm_active = mii->mii_media_active;
2731         ifmr->ifm_status = mii->mii_media_status;
2732 }
2733
2734 static int
2735 re_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2736 {
2737         struct rl_softc         *sc = ifp->if_softc;
2738         struct ifreq            *ifr = (struct ifreq *) data;
2739         struct mii_data         *mii;
2740         int                     error = 0;
2741
2742         switch (command) {
2743         case SIOCSIFMTU:
2744                 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > RL_JUMBO_MTU) {
2745                         error = EINVAL;
2746                         break;
2747                 }
2748                 if ((sc->rl_flags & RL_FLAG_NOJUMBO) != 0 &&
2749                     ifr->ifr_mtu > RL_MAX_FRAMELEN) {
2750                         error = EINVAL;
2751                         break;
2752                 }
2753                 RL_LOCK(sc);
2754                 if (ifp->if_mtu != ifr->ifr_mtu)
2755                         ifp->if_mtu = ifr->ifr_mtu;
2756                 if (ifp->if_mtu > RL_TSO_MTU &&
2757                     (ifp->if_capenable & IFCAP_TSO4) != 0) {
2758                         ifp->if_capenable &= ~IFCAP_TSO4;
2759                         ifp->if_hwassist &= ~CSUM_TSO;
2760                 }
2761                 RL_UNLOCK(sc);
2762                 break;
2763         case SIOCSIFFLAGS:
2764                 RL_LOCK(sc);
2765                 if ((ifp->if_flags & IFF_UP) != 0) {
2766                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2767                                 if (((ifp->if_flags ^ sc->rl_if_flags)
2768                                     & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
2769                                         re_set_rxmode(sc);
2770                         } else
2771                                 re_init_locked(sc);
2772                 } else {
2773                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2774                                 re_stop(sc);
2775                 }
2776                 sc->rl_if_flags = ifp->if_flags;
2777                 RL_UNLOCK(sc);
2778                 break;
2779         case SIOCADDMULTI:
2780         case SIOCDELMULTI:
2781                 RL_LOCK(sc);
2782                 re_set_rxmode(sc);
2783                 RL_UNLOCK(sc);
2784                 break;
2785         case SIOCGIFMEDIA:
2786         case SIOCSIFMEDIA:
2787                 mii = device_get_softc(sc->rl_miibus);
2788                 error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
2789                 break;
2790         case SIOCSIFCAP:
2791             {
2792                 int mask, reinit;
2793
2794                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2795                 reinit = 0;
2796 #ifdef DEVICE_POLLING
2797                 if (mask & IFCAP_POLLING) {
2798                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
2799                                 error = ether_poll_register(re_poll, ifp);
2800                                 if (error)
2801                                         return(error);
2802                                 RL_LOCK(sc);
2803                                 /* Disable interrupts */
2804                                 CSR_WRITE_2(sc, RL_IMR, 0x0000);
2805                                 ifp->if_capenable |= IFCAP_POLLING;
2806                                 RL_UNLOCK(sc);
2807                         } else {
2808                                 error = ether_poll_deregister(ifp);
2809                                 /* Enable interrupts. */
2810                                 RL_LOCK(sc);
2811                                 CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2812                                 ifp->if_capenable &= ~IFCAP_POLLING;
2813                                 RL_UNLOCK(sc);
2814                         }
2815                 }
2816 #endif /* DEVICE_POLLING */
2817                 if (mask & IFCAP_HWCSUM) {
2818                         ifp->if_capenable ^= IFCAP_HWCSUM;
2819                         if (ifp->if_capenable & IFCAP_TXCSUM)
2820                                 ifp->if_hwassist |= RE_CSUM_FEATURES;
2821                         else
2822                                 ifp->if_hwassist &= ~RE_CSUM_FEATURES;
2823                         reinit = 1;
2824                 }
2825                 if (mask & IFCAP_VLAN_HWTAGGING) {
2826                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2827                         reinit = 1;
2828                 }
2829                 if (mask & IFCAP_TSO4) {
2830                         ifp->if_capenable ^= IFCAP_TSO4;
2831                         if ((IFCAP_TSO4 & ifp->if_capenable) &&
2832                             (IFCAP_TSO4 & ifp->if_capabilities))
2833                                 ifp->if_hwassist |= CSUM_TSO;
2834                         else
2835                                 ifp->if_hwassist &= ~CSUM_TSO;
2836                         if (ifp->if_mtu > RL_TSO_MTU &&
2837                             (ifp->if_capenable & IFCAP_TSO4) != 0) {
2838                                 ifp->if_capenable &= ~IFCAP_TSO4;
2839                                 ifp->if_hwassist &= ~CSUM_TSO;
2840                         }
2841                 }
2842                 if ((mask & IFCAP_WOL) != 0 &&
2843                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
2844                         if ((mask & IFCAP_WOL_UCAST) != 0)
2845                                 ifp->if_capenable ^= IFCAP_WOL_UCAST;
2846                         if ((mask & IFCAP_WOL_MCAST) != 0)
2847                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
2848                         if ((mask & IFCAP_WOL_MAGIC) != 0)
2849                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2850                 }
2851                 if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING)
2852                         re_init(sc);
2853                 VLAN_CAPABILITIES(ifp);
2854             }
2855                 break;
2856         default:
2857                 error = ether_ioctl(ifp, command, data);
2858                 break;
2859         }
2860
2861         return (error);
2862 }
2863
2864 static void
2865 re_watchdog(struct rl_softc *sc)
2866 {
2867         struct ifnet            *ifp;
2868
2869         RL_LOCK_ASSERT(sc);
2870
2871         if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0)
2872                 return;
2873
2874         ifp = sc->rl_ifp;
2875         re_txeof(sc);
2876         if (sc->rl_ldata.rl_tx_free == sc->rl_ldata.rl_tx_desc_cnt) {
2877                 if_printf(ifp, "watchdog timeout (missed Tx interrupts) "
2878                     "-- recovering\n");
2879                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2880                         taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2881                 return;
2882         }
2883
2884         if_printf(ifp, "watchdog timeout\n");
2885         ifp->if_oerrors++;
2886
2887         re_rxeof(sc);
2888         re_init_locked(sc);
2889         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2890                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2891 }
2892
2893 /*
2894  * Stop the adapter and free any mbufs allocated to the
2895  * RX and TX lists.
2896  */
2897 static void
2898 re_stop(struct rl_softc *sc)
2899 {
2900         int                     i;
2901         struct ifnet            *ifp;
2902         struct rl_txdesc        *txd;
2903         struct rl_rxdesc        *rxd;
2904
2905         RL_LOCK_ASSERT(sc);
2906
2907         ifp = sc->rl_ifp;
2908
2909         sc->rl_watchdog_timer = 0;
2910         callout_stop(&sc->rl_stat_callout);
2911         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2912
2913         if ((sc->rl_flags & RL_FLAG_CMDSTOP) != 0)
2914                 CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_STOPREQ | RL_CMD_TX_ENB |
2915                     RL_CMD_RX_ENB);
2916         else
2917                 CSR_WRITE_1(sc, RL_COMMAND, 0x00);
2918         DELAY(1000);
2919         CSR_WRITE_2(sc, RL_IMR, 0x0000);
2920         CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
2921
2922         if (sc->rl_head != NULL) {
2923                 m_freem(sc->rl_head);
2924                 sc->rl_head = sc->rl_tail = NULL;
2925         }
2926
2927         /* Free the TX list buffers. */
2928
2929         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
2930                 txd = &sc->rl_ldata.rl_tx_desc[i];
2931                 if (txd->tx_m != NULL) {
2932                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2933                             txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2934                         bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2935                             txd->tx_dmamap);
2936                         m_freem(txd->tx_m);
2937                         txd->tx_m = NULL;
2938                 }
2939         }
2940
2941         /* Free the RX list buffers. */
2942
2943         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
2944                 rxd = &sc->rl_ldata.rl_rx_desc[i];
2945                 if (rxd->rx_m != NULL) {
2946                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2947                             rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
2948                         bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
2949                             rxd->rx_dmamap);
2950                         m_freem(rxd->rx_m);
2951                         rxd->rx_m = NULL;
2952                 }
2953         }
2954 }
2955
2956 /*
2957  * Device suspend routine.  Stop the interface and save some PCI
2958  * settings in case the BIOS doesn't restore them properly on
2959  * resume.
2960  */
2961 static int
2962 re_suspend(device_t dev)
2963 {
2964         struct rl_softc         *sc;
2965
2966         sc = device_get_softc(dev);
2967
2968         RL_LOCK(sc);
2969         re_stop(sc);
2970         re_setwol(sc);
2971         sc->suspended = 1;
2972         RL_UNLOCK(sc);
2973
2974         return (0);
2975 }
2976
2977 /*
2978  * Device resume routine.  Restore some PCI settings in case the BIOS
2979  * doesn't, re-enable busmastering, and restart the interface if
2980  * appropriate.
2981  */
2982 static int
2983 re_resume(device_t dev)
2984 {
2985         struct rl_softc         *sc;
2986         struct ifnet            *ifp;
2987
2988         sc = device_get_softc(dev);
2989
2990         RL_LOCK(sc);
2991
2992         ifp = sc->rl_ifp;
2993         /* Take controller out of sleep mode. */
2994         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
2995                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
2996                         CSR_WRITE_1(sc, RL_GPIO,
2997                             CSR_READ_1(sc, RL_GPIO) | 0x01);
2998         }
2999
3000         /* reinitialize interface if necessary */
3001         if (ifp->if_flags & IFF_UP)
3002                 re_init_locked(sc);
3003
3004         /*
3005          * Clear WOL matching such that normal Rx filtering
3006          * wouldn't interfere with WOL patterns.
3007          */
3008         re_clrwol(sc);
3009         sc->suspended = 0;
3010         RL_UNLOCK(sc);
3011
3012         return (0);
3013 }
3014
3015 /*
3016  * Stop all chip I/O so that the kernel's probe routines don't
3017  * get confused by errant DMAs when rebooting.
3018  */
3019 static int
3020 re_shutdown(device_t dev)
3021 {
3022         struct rl_softc         *sc;
3023
3024         sc = device_get_softc(dev);
3025
3026         RL_LOCK(sc);
3027         re_stop(sc);
3028         /*
3029          * Mark interface as down since otherwise we will panic if
3030          * interrupt comes in later on, which can happen in some
3031          * cases.
3032          */
3033         sc->rl_ifp->if_flags &= ~IFF_UP;
3034         re_setwol(sc);
3035         RL_UNLOCK(sc);
3036
3037         return (0);
3038 }
3039
3040 static void
3041 re_setwol(struct rl_softc *sc)
3042 {
3043         struct ifnet            *ifp;
3044         int                     pmc;
3045         uint16_t                pmstat;
3046         uint8_t                 v;
3047
3048         RL_LOCK_ASSERT(sc);
3049
3050         if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3051                 return;
3052
3053         ifp = sc->rl_ifp;
3054         /* Put controller into sleep mode. */
3055         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
3056                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
3057                         CSR_WRITE_1(sc, RL_GPIO,
3058                             CSR_READ_1(sc, RL_GPIO) & ~0x01);
3059         }
3060         if ((ifp->if_capenable & IFCAP_WOL) != 0 &&
3061             (sc->rl_flags & RL_FLAG_WOLRXENB) != 0)
3062                 CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RX_ENB);
3063         /* Enable config register write. */
3064         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3065
3066         /* Enable PME. */
3067         v = CSR_READ_1(sc, RL_CFG1);
3068         v &= ~RL_CFG1_PME;
3069         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3070                 v |= RL_CFG1_PME;
3071         CSR_WRITE_1(sc, RL_CFG1, v);
3072
3073         v = CSR_READ_1(sc, RL_CFG3);
3074         v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3075         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0)
3076                 v |= RL_CFG3_WOL_MAGIC;
3077         CSR_WRITE_1(sc, RL_CFG3, v);
3078
3079         /* Config register write done. */
3080         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3081
3082         v = CSR_READ_1(sc, RL_CFG5);
3083         v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3084         v &= ~RL_CFG5_WOL_LANWAKE;
3085         if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0)
3086                 v |= RL_CFG5_WOL_UCAST;
3087         if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0)
3088                 v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST;
3089         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3090                 v |= RL_CFG5_WOL_LANWAKE;
3091         CSR_WRITE_1(sc, RL_CFG5, v);
3092
3093         /*
3094          * It seems that hardware resets its link speed to 100Mbps in
3095          * power down mode so switching to 100Mbps in driver is not
3096          * needed.
3097          */
3098
3099         /* Request PME if WOL is requested. */
3100         pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2);
3101         pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
3102         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3103                 pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
3104         pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
3105 }
3106
3107 static void
3108 re_clrwol(struct rl_softc *sc)
3109 {
3110         int                     pmc;
3111         uint8_t                 v;
3112
3113         RL_LOCK_ASSERT(sc);
3114
3115         if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3116                 return;
3117
3118         /* Enable config register write. */
3119         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3120
3121         v = CSR_READ_1(sc, RL_CFG3);
3122         v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3123         CSR_WRITE_1(sc, RL_CFG3, v);
3124
3125         /* Config register write done. */
3126         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3127
3128         v = CSR_READ_1(sc, RL_CFG5);
3129         v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3130         v &= ~RL_CFG5_WOL_LANWAKE;
3131         CSR_WRITE_1(sc, RL_CFG5, v);
3132 }