]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/dev/re/if_re.c
Copy head to stable/8 as part of 8.0 Release cycle.
[FreeBSD/stable/8.git] / sys / dev / re / if_re.c
1 /*-
2  * Copyright (c) 1997, 1998-2003
3  *      Bill Paul <wpaul@windriver.com>.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by Bill Paul.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30  * THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 /*
37  * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver
38  *
39  * Written by Bill Paul <wpaul@windriver.com>
40  * Senior Networking Software Engineer
41  * Wind River Systems
42  */
43
44 /*
45  * This driver is designed to support RealTek's next generation of
46  * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
47  * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
48  * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E.
49  *
50  * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
51  * with the older 8139 family, however it also supports a special
52  * C+ mode of operation that provides several new performance enhancing
53  * features. These include:
54  *
55  *      o Descriptor based DMA mechanism. Each descriptor represents
56  *        a single packet fragment. Data buffers may be aligned on
57  *        any byte boundary.
58  *
59  *      o 64-bit DMA
60  *
61  *      o TCP/IP checksum offload for both RX and TX
62  *
63  *      o High and normal priority transmit DMA rings
64  *
65  *      o VLAN tag insertion and extraction
66  *
67  *      o TCP large send (segmentation offload)
68  *
69  * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
70  * programming API is fairly straightforward. The RX filtering, EEPROM
71  * access and PHY access is the same as it is on the older 8139 series
72  * chips.
73  *
74  * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
75  * same programming API and feature set as the 8139C+ with the following
76  * differences and additions:
77  *
78  *      o 1000Mbps mode
79  *
80  *      o Jumbo frames
81  *
82  *      o GMII and TBI ports/registers for interfacing with copper
83  *        or fiber PHYs
84  *
85  *      o RX and TX DMA rings can have up to 1024 descriptors
86  *        (the 8139C+ allows a maximum of 64)
87  *
88  *      o Slight differences in register layout from the 8139C+
89  *
90  * The TX start and timer interrupt registers are at different locations
91  * on the 8169 than they are on the 8139C+. Also, the status word in the
92  * RX descriptor has a slightly different bit layout. The 8169 does not
93  * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
94  * copper gigE PHY.
95  *
96  * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
97  * (the 'S' stands for 'single-chip'). These devices have the same
98  * programming API as the older 8169, but also have some vendor-specific
99  * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
100  * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
101  *
102  * This driver takes advantage of the RX and TX checksum offload and
103  * VLAN tag insertion/extraction features. It also implements TX
104  * interrupt moderation using the timer interrupt registers, which
105  * significantly reduces TX interrupt load. There is also support
106  * for jumbo frames, however the 8169/8169S/8110S can not transmit
107  * jumbo frames larger than 7440, so the max MTU possible with this
108  * driver is 7422 bytes.
109  */
110
111 #ifdef HAVE_KERNEL_OPTION_HEADERS
112 #include "opt_device_polling.h"
113 #endif
114
115 #include <sys/param.h>
116 #include <sys/endian.h>
117 #include <sys/systm.h>
118 #include <sys/sockio.h>
119 #include <sys/mbuf.h>
120 #include <sys/malloc.h>
121 #include <sys/module.h>
122 #include <sys/kernel.h>
123 #include <sys/socket.h>
124 #include <sys/lock.h>
125 #include <sys/mutex.h>
126 #include <sys/taskqueue.h>
127
128 #include <net/if.h>
129 #include <net/if_arp.h>
130 #include <net/ethernet.h>
131 #include <net/if_dl.h>
132 #include <net/if_media.h>
133 #include <net/if_types.h>
134 #include <net/if_vlan_var.h>
135
136 #include <net/bpf.h>
137
138 #include <machine/bus.h>
139 #include <machine/resource.h>
140 #include <sys/bus.h>
141 #include <sys/rman.h>
142
143 #include <dev/mii/mii.h>
144 #include <dev/mii/miivar.h>
145
146 #include <dev/pci/pcireg.h>
147 #include <dev/pci/pcivar.h>
148
149 #include <pci/if_rlreg.h>
150
151 MODULE_DEPEND(re, pci, 1, 1, 1);
152 MODULE_DEPEND(re, ether, 1, 1, 1);
153 MODULE_DEPEND(re, miibus, 1, 1, 1);
154
155 /* "device miibus" required.  See GENERIC if you get errors here. */
156 #include "miibus_if.h"
157
158 /* Tunables. */
159 static int msi_disable = 0;
160 TUNABLE_INT("hw.re.msi_disable", &msi_disable);
161 static int prefer_iomap = 0;
162 TUNABLE_INT("hw.re.prefer_iomap", &prefer_iomap);
163
164 #define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
165
166 /*
167  * Various supported device vendors/types and their names.
168  */
169 static struct rl_type re_devs[] = {
170         { DLINK_VENDORID, DLINK_DEVICEID_528T, 0,
171             "D-Link DGE-528(T) Gigabit Ethernet Adapter" },
172         { RT_VENDORID, RT_DEVICEID_8139, 0,
173             "RealTek 8139C+ 10/100BaseTX" },
174         { RT_VENDORID, RT_DEVICEID_8101E, 0,
175             "RealTek 8101E/8102E/8102EL PCIe 10/100baseTX" },
176         { RT_VENDORID, RT_DEVICEID_8168, 0,
177             "RealTek 8168/8168B/8168C/8168CP/8168D/8111B/8111C/8111CP PCIe "
178             "Gigabit Ethernet" },
179         { RT_VENDORID, RT_DEVICEID_8169, 0,
180             "RealTek 8169/8169S/8169SB(L)/8110S/8110SB(L) Gigabit Ethernet" },
181         { RT_VENDORID, RT_DEVICEID_8169SC, 0,
182             "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
183         { COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, 0,
184             "Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" },
185         { LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, 0,
186             "Linksys EG1032 (RTL8169S) Gigabit Ethernet" },
187         { USR_VENDORID, USR_DEVICEID_997902, 0,
188             "US Robotics 997902 (RTL8169S) Gigabit Ethernet" }
189 };
190
191 static struct rl_hwrev re_hwrevs[] = {
192         { RL_HWREV_8139, RL_8139,  "" },
193         { RL_HWREV_8139A, RL_8139, "A" },
194         { RL_HWREV_8139AG, RL_8139, "A-G" },
195         { RL_HWREV_8139B, RL_8139, "B" },
196         { RL_HWREV_8130, RL_8139, "8130" },
197         { RL_HWREV_8139C, RL_8139, "C" },
198         { RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C" },
199         { RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+"},
200         { RL_HWREV_8168_SPIN1, RL_8169, "8168"},
201         { RL_HWREV_8169, RL_8169, "8169"},
202         { RL_HWREV_8169S, RL_8169, "8169S"},
203         { RL_HWREV_8110S, RL_8169, "8110S"},
204         { RL_HWREV_8169_8110SB, RL_8169, "8169SB/8110SB"},
205         { RL_HWREV_8169_8110SC, RL_8169, "8169SC/8110SC"},
206         { RL_HWREV_8169_8110SBL, RL_8169, "8169SBL/8110SBL"},
207         { RL_HWREV_8169_8110SCE, RL_8169, "8169SC/8110SC"},
208         { RL_HWREV_8100, RL_8139, "8100"},
209         { RL_HWREV_8101, RL_8139, "8101"},
210         { RL_HWREV_8100E, RL_8169, "8100E"},
211         { RL_HWREV_8101E, RL_8169, "8101E"},
212         { RL_HWREV_8102E, RL_8169, "8102E"},
213         { RL_HWREV_8102EL, RL_8169, "8102EL"},
214         { RL_HWREV_8102EL_SPIN1, RL_8169, "8102EL"},
215         { RL_HWREV_8168_SPIN2, RL_8169, "8168"},
216         { RL_HWREV_8168_SPIN3, RL_8169, "8168"},
217         { RL_HWREV_8168C, RL_8169, "8168C/8111C"},
218         { RL_HWREV_8168C_SPIN2, RL_8169, "8168C/8111C"},
219         { RL_HWREV_8168CP, RL_8169, "8168CP/8111CP"},
220         { RL_HWREV_8168D, RL_8169, "8168D"},
221         { 0, 0, NULL }
222 };
223
224 static int re_probe             (device_t);
225 static int re_attach            (device_t);
226 static int re_detach            (device_t);
227
228 static int re_encap             (struct rl_softc *, struct mbuf **);
229
230 static void re_dma_map_addr     (void *, bus_dma_segment_t *, int, int);
231 static int re_allocmem          (device_t, struct rl_softc *);
232 static __inline void re_discard_rxbuf
233                                 (struct rl_softc *, int);
234 static int re_newbuf            (struct rl_softc *, int);
235 static int re_rx_list_init      (struct rl_softc *);
236 static int re_tx_list_init      (struct rl_softc *);
237 #ifdef RE_FIXUP_RX
238 static __inline void re_fixup_rx
239                                 (struct mbuf *);
240 #endif
241 static int re_rxeof             (struct rl_softc *, int *);
242 static void re_txeof            (struct rl_softc *);
243 #ifdef DEVICE_POLLING
244 static int re_poll              (struct ifnet *, enum poll_cmd, int);
245 static int re_poll_locked       (struct ifnet *, enum poll_cmd, int);
246 #endif
247 static int re_intr              (void *);
248 static void re_tick             (void *);
249 static void re_tx_task          (void *, int);
250 static void re_int_task         (void *, int);
251 static void re_start            (struct ifnet *);
252 static int re_ioctl             (struct ifnet *, u_long, caddr_t);
253 static void re_init             (void *);
254 static void re_init_locked      (struct rl_softc *);
255 static void re_stop             (struct rl_softc *);
256 static void re_watchdog         (struct rl_softc *);
257 static int re_suspend           (device_t);
258 static int re_resume            (device_t);
259 static int re_shutdown          (device_t);
260 static int re_ifmedia_upd       (struct ifnet *);
261 static void re_ifmedia_sts      (struct ifnet *, struct ifmediareq *);
262
263 static void re_eeprom_putbyte   (struct rl_softc *, int);
264 static void re_eeprom_getword   (struct rl_softc *, int, u_int16_t *);
265 static void re_read_eeprom      (struct rl_softc *, caddr_t, int, int);
266 static int re_gmii_readreg      (device_t, int, int);
267 static int re_gmii_writereg     (device_t, int, int, int);
268
269 static int re_miibus_readreg    (device_t, int, int);
270 static int re_miibus_writereg   (device_t, int, int, int);
271 static void re_miibus_statchg   (device_t);
272
273 static void re_set_rxmode               (struct rl_softc *);
274 static void re_reset            (struct rl_softc *);
275 static void re_setwol           (struct rl_softc *);
276 static void re_clrwol           (struct rl_softc *);
277
278 #ifdef RE_DIAG
279 static int re_diag              (struct rl_softc *);
280 #endif
281
282 static device_method_t re_methods[] = {
283         /* Device interface */
284         DEVMETHOD(device_probe,         re_probe),
285         DEVMETHOD(device_attach,        re_attach),
286         DEVMETHOD(device_detach,        re_detach),
287         DEVMETHOD(device_suspend,       re_suspend),
288         DEVMETHOD(device_resume,        re_resume),
289         DEVMETHOD(device_shutdown,      re_shutdown),
290
291         /* bus interface */
292         DEVMETHOD(bus_print_child,      bus_generic_print_child),
293         DEVMETHOD(bus_driver_added,     bus_generic_driver_added),
294
295         /* MII interface */
296         DEVMETHOD(miibus_readreg,       re_miibus_readreg),
297         DEVMETHOD(miibus_writereg,      re_miibus_writereg),
298         DEVMETHOD(miibus_statchg,       re_miibus_statchg),
299
300         { 0, 0 }
301 };
302
303 static driver_t re_driver = {
304         "re",
305         re_methods,
306         sizeof(struct rl_softc)
307 };
308
309 static devclass_t re_devclass;
310
311 DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0);
312 DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
313
314 #define EE_SET(x)                                       \
315         CSR_WRITE_1(sc, RL_EECMD,                       \
316                 CSR_READ_1(sc, RL_EECMD) | x)
317
318 #define EE_CLR(x)                                       \
319         CSR_WRITE_1(sc, RL_EECMD,                       \
320                 CSR_READ_1(sc, RL_EECMD) & ~x)
321
322 /*
323  * Send a read command and address to the EEPROM, check for ACK.
324  */
325 static void
326 re_eeprom_putbyte(struct rl_softc *sc, int addr)
327 {
328         int                     d, i;
329
330         d = addr | (RL_9346_READ << sc->rl_eewidth);
331
332         /*
333          * Feed in each bit and strobe the clock.
334          */
335
336         for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) {
337                 if (d & i) {
338                         EE_SET(RL_EE_DATAIN);
339                 } else {
340                         EE_CLR(RL_EE_DATAIN);
341                 }
342                 DELAY(100);
343                 EE_SET(RL_EE_CLK);
344                 DELAY(150);
345                 EE_CLR(RL_EE_CLK);
346                 DELAY(100);
347         }
348 }
349
350 /*
351  * Read a word of data stored in the EEPROM at address 'addr.'
352  */
353 static void
354 re_eeprom_getword(struct rl_softc *sc, int addr, u_int16_t *dest)
355 {
356         int                     i;
357         u_int16_t               word = 0;
358
359         /*
360          * Send address of word we want to read.
361          */
362         re_eeprom_putbyte(sc, addr);
363
364         /*
365          * Start reading bits from EEPROM.
366          */
367         for (i = 0x8000; i; i >>= 1) {
368                 EE_SET(RL_EE_CLK);
369                 DELAY(100);
370                 if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT)
371                         word |= i;
372                 EE_CLR(RL_EE_CLK);
373                 DELAY(100);
374         }
375
376         *dest = word;
377 }
378
379 /*
380  * Read a sequence of words from the EEPROM.
381  */
382 static void
383 re_read_eeprom(struct rl_softc *sc, caddr_t dest, int off, int cnt)
384 {
385         int                     i;
386         u_int16_t               word = 0, *ptr;
387
388         CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
389
390         DELAY(100);
391
392         for (i = 0; i < cnt; i++) {
393                 CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL);
394                 re_eeprom_getword(sc, off + i, &word);
395                 CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL);
396                 ptr = (u_int16_t *)(dest + (i * 2));
397                 *ptr = word;
398         }
399
400         CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
401 }
402
403 static int
404 re_gmii_readreg(device_t dev, int phy, int reg)
405 {
406         struct rl_softc         *sc;
407         u_int32_t               rval;
408         int                     i;
409
410         if (phy != 1)
411                 return (0);
412
413         sc = device_get_softc(dev);
414
415         /* Let the rgephy driver read the GMEDIASTAT register */
416
417         if (reg == RL_GMEDIASTAT) {
418                 rval = CSR_READ_1(sc, RL_GMEDIASTAT);
419                 return (rval);
420         }
421
422         CSR_WRITE_4(sc, RL_PHYAR, reg << 16);
423         DELAY(1000);
424
425         for (i = 0; i < RL_PHY_TIMEOUT; i++) {
426                 rval = CSR_READ_4(sc, RL_PHYAR);
427                 if (rval & RL_PHYAR_BUSY)
428                         break;
429                 DELAY(100);
430         }
431
432         if (i == RL_PHY_TIMEOUT) {
433                 device_printf(sc->rl_dev, "PHY read failed\n");
434                 return (0);
435         }
436
437         return (rval & RL_PHYAR_PHYDATA);
438 }
439
440 static int
441 re_gmii_writereg(device_t dev, int phy, int reg, int data)
442 {
443         struct rl_softc         *sc;
444         u_int32_t               rval;
445         int                     i;
446
447         sc = device_get_softc(dev);
448
449         CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) |
450             (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY);
451         DELAY(1000);
452
453         for (i = 0; i < RL_PHY_TIMEOUT; i++) {
454                 rval = CSR_READ_4(sc, RL_PHYAR);
455                 if (!(rval & RL_PHYAR_BUSY))
456                         break;
457                 DELAY(100);
458         }
459
460         if (i == RL_PHY_TIMEOUT) {
461                 device_printf(sc->rl_dev, "PHY write failed\n");
462                 return (0);
463         }
464
465         return (0);
466 }
467
468 static int
469 re_miibus_readreg(device_t dev, int phy, int reg)
470 {
471         struct rl_softc         *sc;
472         u_int16_t               rval = 0;
473         u_int16_t               re8139_reg = 0;
474
475         sc = device_get_softc(dev);
476
477         if (sc->rl_type == RL_8169) {
478                 rval = re_gmii_readreg(dev, phy, reg);
479                 return (rval);
480         }
481
482         /* Pretend the internal PHY is only at address 0 */
483         if (phy) {
484                 return (0);
485         }
486         switch (reg) {
487         case MII_BMCR:
488                 re8139_reg = RL_BMCR;
489                 break;
490         case MII_BMSR:
491                 re8139_reg = RL_BMSR;
492                 break;
493         case MII_ANAR:
494                 re8139_reg = RL_ANAR;
495                 break;
496         case MII_ANER:
497                 re8139_reg = RL_ANER;
498                 break;
499         case MII_ANLPAR:
500                 re8139_reg = RL_LPAR;
501                 break;
502         case MII_PHYIDR1:
503         case MII_PHYIDR2:
504                 return (0);
505         /*
506          * Allow the rlphy driver to read the media status
507          * register. If we have a link partner which does not
508          * support NWAY, this is the register which will tell
509          * us the results of parallel detection.
510          */
511         case RL_MEDIASTAT:
512                 rval = CSR_READ_1(sc, RL_MEDIASTAT);
513                 return (rval);
514         default:
515                 device_printf(sc->rl_dev, "bad phy register\n");
516                 return (0);
517         }
518         rval = CSR_READ_2(sc, re8139_reg);
519         if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) {
520                 /* 8139C+ has different bit layout. */
521                 rval &= ~(BMCR_LOOP | BMCR_ISO);
522         }
523         return (rval);
524 }
525
526 static int
527 re_miibus_writereg(device_t dev, int phy, int reg, int data)
528 {
529         struct rl_softc         *sc;
530         u_int16_t               re8139_reg = 0;
531         int                     rval = 0;
532
533         sc = device_get_softc(dev);
534
535         if (sc->rl_type == RL_8169) {
536                 rval = re_gmii_writereg(dev, phy, reg, data);
537                 return (rval);
538         }
539
540         /* Pretend the internal PHY is only at address 0 */
541         if (phy)
542                 return (0);
543
544         switch (reg) {
545         case MII_BMCR:
546                 re8139_reg = RL_BMCR;
547                 if (sc->rl_type == RL_8139CPLUS) {
548                         /* 8139C+ has different bit layout. */
549                         data &= ~(BMCR_LOOP | BMCR_ISO);
550                 }
551                 break;
552         case MII_BMSR:
553                 re8139_reg = RL_BMSR;
554                 break;
555         case MII_ANAR:
556                 re8139_reg = RL_ANAR;
557                 break;
558         case MII_ANER:
559                 re8139_reg = RL_ANER;
560                 break;
561         case MII_ANLPAR:
562                 re8139_reg = RL_LPAR;
563                 break;
564         case MII_PHYIDR1:
565         case MII_PHYIDR2:
566                 return (0);
567                 break;
568         default:
569                 device_printf(sc->rl_dev, "bad phy register\n");
570                 return (0);
571         }
572         CSR_WRITE_2(sc, re8139_reg, data);
573         return (0);
574 }
575
576 static void
577 re_miibus_statchg(device_t dev)
578 {
579         struct rl_softc         *sc;
580         struct ifnet            *ifp;
581         struct mii_data         *mii;
582
583         sc = device_get_softc(dev);
584         mii = device_get_softc(sc->rl_miibus);
585         ifp = sc->rl_ifp;
586         if (mii == NULL || ifp == NULL ||
587             (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
588                 return;
589
590         sc->rl_flags &= ~RL_FLAG_LINK;
591         if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
592             (IFM_ACTIVE | IFM_AVALID)) {
593                 switch (IFM_SUBTYPE(mii->mii_media_active)) {
594                 case IFM_10_T:
595                 case IFM_100_TX:
596                         sc->rl_flags |= RL_FLAG_LINK;
597                         break;
598                 case IFM_1000_T:
599                         if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0)
600                                 break;
601                         sc->rl_flags |= RL_FLAG_LINK;
602                         break;
603                 default:
604                         break;
605                 }
606         }
607         /*
608          * RealTek controllers does not provide any interface to
609          * Tx/Rx MACs for resolved speed, duplex and flow-control
610          * parameters.
611          */
612 }
613
614 /*
615  * Set the RX configuration and 64-bit multicast hash filter.
616  */
617 static void
618 re_set_rxmode(struct rl_softc *sc)
619 {
620         struct ifnet            *ifp;
621         struct ifmultiaddr      *ifma;
622         uint32_t                hashes[2] = { 0, 0 };
623         uint32_t                h, rxfilt;
624
625         RL_LOCK_ASSERT(sc);
626
627         ifp = sc->rl_ifp;
628
629         rxfilt = RL_RXCFG_CONFIG | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_BROAD;
630
631         if (ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) {
632                 if (ifp->if_flags & IFF_PROMISC)
633                         rxfilt |= RL_RXCFG_RX_ALLPHYS;
634                 /*
635                  * Unlike other hardwares, we have to explicitly set
636                  * RL_RXCFG_RX_MULTI to receive multicast frames in
637                  * promiscuous mode.
638                  */
639                 rxfilt |= RL_RXCFG_RX_MULTI;
640                 hashes[0] = hashes[1] = 0xffffffff;
641                 goto done;
642         }
643
644         if_maddr_rlock(ifp);
645         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
646                 if (ifma->ifma_addr->sa_family != AF_LINK)
647                         continue;
648                 h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
649                     ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
650                 if (h < 32)
651                         hashes[0] |= (1 << h);
652                 else
653                         hashes[1] |= (1 << (h - 32));
654         }
655         if_maddr_runlock(ifp);
656
657         if (hashes[0] != 0 || hashes[1] != 0) {
658                 /*
659                  * For some unfathomable reason, RealTek decided to
660                  * reverse the order of the multicast hash registers
661                  * in the PCI Express parts.  This means we have to
662                  * write the hash pattern in reverse order for those
663                  * devices.
664                  */
665                 if ((sc->rl_flags & RL_FLAG_PCIE) != 0) {
666                         h = bswap32(hashes[0]);
667                         hashes[0] = bswap32(hashes[1]);
668                         hashes[1] = h;
669                 }
670                 rxfilt |= RL_RXCFG_RX_MULTI;
671         }
672
673 done:
674         CSR_WRITE_4(sc, RL_MAR0, hashes[0]);
675         CSR_WRITE_4(sc, RL_MAR4, hashes[1]);
676         CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
677 }
678
679 static void
680 re_reset(struct rl_softc *sc)
681 {
682         int                     i;
683
684         RL_LOCK_ASSERT(sc);
685
686         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET);
687
688         for (i = 0; i < RL_TIMEOUT; i++) {
689                 DELAY(10);
690                 if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET))
691                         break;
692         }
693         if (i == RL_TIMEOUT)
694                 device_printf(sc->rl_dev, "reset never completed!\n");
695
696         if ((sc->rl_flags & RL_FLAG_MACRESET) != 0)
697                 CSR_WRITE_1(sc, 0x82, 1);
698         if (sc->rl_hwrev == RL_HWREV_8169S)
699                 re_gmii_writereg(sc->rl_dev, 1, 0x0b, 0);
700 }
701
702 #ifdef RE_DIAG
703
704 /*
705  * The following routine is designed to test for a defect on some
706  * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
707  * lines connected to the bus, however for a 32-bit only card, they
708  * should be pulled high. The result of this defect is that the
709  * NIC will not work right if you plug it into a 64-bit slot: DMA
710  * operations will be done with 64-bit transfers, which will fail
711  * because the 64-bit data lines aren't connected.
712  *
713  * There's no way to work around this (short of talking a soldering
714  * iron to the board), however we can detect it. The method we use
715  * here is to put the NIC into digital loopback mode, set the receiver
716  * to promiscuous mode, and then try to send a frame. We then compare
717  * the frame data we sent to what was received. If the data matches,
718  * then the NIC is working correctly, otherwise we know the user has
719  * a defective NIC which has been mistakenly plugged into a 64-bit PCI
720  * slot. In the latter case, there's no way the NIC can work correctly,
721  * so we print out a message on the console and abort the device attach.
722  */
723
724 static int
725 re_diag(struct rl_softc *sc)
726 {
727         struct ifnet            *ifp = sc->rl_ifp;
728         struct mbuf             *m0;
729         struct ether_header     *eh;
730         struct rl_desc          *cur_rx;
731         u_int16_t               status;
732         u_int32_t               rxstat;
733         int                     total_len, i, error = 0, phyaddr;
734         u_int8_t                dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
735         u_int8_t                src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
736
737         /* Allocate a single mbuf */
738         MGETHDR(m0, M_DONTWAIT, MT_DATA);
739         if (m0 == NULL)
740                 return (ENOBUFS);
741
742         RL_LOCK(sc);
743
744         /*
745          * Initialize the NIC in test mode. This sets the chip up
746          * so that it can send and receive frames, but performs the
747          * following special functions:
748          * - Puts receiver in promiscuous mode
749          * - Enables digital loopback mode
750          * - Leaves interrupts turned off
751          */
752
753         ifp->if_flags |= IFF_PROMISC;
754         sc->rl_testmode = 1;
755         re_init_locked(sc);
756         sc->rl_flags |= RL_FLAG_LINK;
757         if (sc->rl_type == RL_8169)
758                 phyaddr = 1;
759         else
760                 phyaddr = 0;
761
762         re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET);
763         for (i = 0; i < RL_TIMEOUT; i++) {
764                 status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR);
765                 if (!(status & BMCR_RESET))
766                         break;
767         }
768
769         re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP);
770         CSR_WRITE_2(sc, RL_ISR, RL_INTRS);
771
772         DELAY(100000);
773
774         /* Put some data in the mbuf */
775
776         eh = mtod(m0, struct ether_header *);
777         bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN);
778         bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN);
779         eh->ether_type = htons(ETHERTYPE_IP);
780         m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
781
782         /*
783          * Queue the packet, start transmission.
784          * Note: IF_HANDOFF() ultimately calls re_start() for us.
785          */
786
787         CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
788         RL_UNLOCK(sc);
789         /* XXX: re_diag must not be called when in ALTQ mode */
790         IF_HANDOFF(&ifp->if_snd, m0, ifp);
791         RL_LOCK(sc);
792         m0 = NULL;
793
794         /* Wait for it to propagate through the chip */
795
796         DELAY(100000);
797         for (i = 0; i < RL_TIMEOUT; i++) {
798                 status = CSR_READ_2(sc, RL_ISR);
799                 CSR_WRITE_2(sc, RL_ISR, status);
800                 if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) ==
801                     (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK))
802                         break;
803                 DELAY(10);
804         }
805
806         if (i == RL_TIMEOUT) {
807                 device_printf(sc->rl_dev,
808                     "diagnostic failed, failed to receive packet in"
809                     " loopback mode\n");
810                 error = EIO;
811                 goto done;
812         }
813
814         /*
815          * The packet should have been dumped into the first
816          * entry in the RX DMA ring. Grab it from there.
817          */
818
819         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
820             sc->rl_ldata.rl_rx_list_map,
821             BUS_DMASYNC_POSTREAD);
822         bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
823             sc->rl_ldata.rl_rx_desc[0].rx_dmamap,
824             BUS_DMASYNC_POSTREAD);
825         bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
826             sc->rl_ldata.rl_rx_desc[0].rx_dmamap);
827
828         m0 = sc->rl_ldata.rl_rx_desc[0].rx_m;
829         sc->rl_ldata.rl_rx_desc[0].rx_m = NULL;
830         eh = mtod(m0, struct ether_header *);
831
832         cur_rx = &sc->rl_ldata.rl_rx_list[0];
833         total_len = RL_RXBYTES(cur_rx);
834         rxstat = le32toh(cur_rx->rl_cmdstat);
835
836         if (total_len != ETHER_MIN_LEN) {
837                 device_printf(sc->rl_dev,
838                     "diagnostic failed, received short packet\n");
839                 error = EIO;
840                 goto done;
841         }
842
843         /* Test that the received packet data matches what we sent. */
844
845         if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) ||
846             bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) ||
847             ntohs(eh->ether_type) != ETHERTYPE_IP) {
848                 device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n");
849                 device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n",
850                     dst, ":", src, ":", ETHERTYPE_IP);
851                 device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n",
852                     eh->ether_dhost, ":",  eh->ether_shost, ":",
853                     ntohs(eh->ether_type));
854                 device_printf(sc->rl_dev, "You may have a defective 32-bit "
855                     "NIC plugged into a 64-bit PCI slot.\n");
856                 device_printf(sc->rl_dev, "Please re-install the NIC in a "
857                     "32-bit slot for proper operation.\n");
858                 device_printf(sc->rl_dev, "Read the re(4) man page for more "
859                     "details.\n");
860                 error = EIO;
861         }
862
863 done:
864         /* Turn interface off, release resources */
865
866         sc->rl_testmode = 0;
867         sc->rl_flags &= ~RL_FLAG_LINK;
868         ifp->if_flags &= ~IFF_PROMISC;
869         re_stop(sc);
870         if (m0 != NULL)
871                 m_freem(m0);
872
873         RL_UNLOCK(sc);
874
875         return (error);
876 }
877
878 #endif
879
880 /*
881  * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
882  * IDs against our list and return a device name if we find a match.
883  */
884 static int
885 re_probe(device_t dev)
886 {
887         struct rl_type          *t;
888         uint16_t                devid, vendor;
889         uint16_t                revid, sdevid;
890         int                     i;
891         
892         vendor = pci_get_vendor(dev);
893         devid = pci_get_device(dev);
894         revid = pci_get_revid(dev);
895         sdevid = pci_get_subdevice(dev);
896
897         if (vendor == LINKSYS_VENDORID && devid == LINKSYS_DEVICEID_EG1032) {
898                 if (sdevid != LINKSYS_SUBDEVICE_EG1032_REV3) {
899                         /*
900                          * Only attach to rev. 3 of the Linksys EG1032 adapter.
901                          * Rev. 2 is supported by sk(4).
902                          */
903                         return (ENXIO);
904                 }
905         }
906
907         if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) {
908                 if (revid != 0x20) {
909                         /* 8139, let rl(4) take care of this device. */
910                         return (ENXIO);
911                 }
912         }
913
914         t = re_devs;
915         for (i = 0; i < sizeof(re_devs) / sizeof(re_devs[0]); i++, t++) {
916                 if (vendor == t->rl_vid && devid == t->rl_did) {
917                         device_set_desc(dev, t->rl_name);
918                         return (BUS_PROBE_DEFAULT);
919                 }
920         }
921
922         return (ENXIO);
923 }
924
925 /*
926  * Map a single buffer address.
927  */
928
929 static void
930 re_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
931 {
932         bus_addr_t              *addr;
933
934         if (error)
935                 return;
936
937         KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
938         addr = arg;
939         *addr = segs->ds_addr;
940 }
941
942 static int
943 re_allocmem(device_t dev, struct rl_softc *sc)
944 {
945         bus_size_t              rx_list_size, tx_list_size;
946         int                     error;
947         int                     i;
948
949         rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc);
950         tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc);
951
952         /*
953          * Allocate the parent bus DMA tag appropriate for PCI.
954          * In order to use DAC, RL_CPLUSCMD_PCI_DAC bit of RL_CPLUS_CMD
955          * register should be set. However some RealTek chips are known
956          * to be buggy on DAC handling, therefore disable DAC by limiting
957          * DMA address space to 32bit. PCIe variants of RealTek chips
958          * may not have the limitation but I took safer path.
959          */
960         error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
961             BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
962             BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
963             NULL, NULL, &sc->rl_parent_tag);
964         if (error) {
965                 device_printf(dev, "could not allocate parent DMA tag\n");
966                 return (error);
967         }
968
969         /*
970          * Allocate map for TX mbufs.
971          */
972         error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0,
973             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
974             NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0,
975             NULL, NULL, &sc->rl_ldata.rl_tx_mtag);
976         if (error) {
977                 device_printf(dev, "could not allocate TX DMA tag\n");
978                 return (error);
979         }
980
981         /*
982          * Allocate map for RX mbufs.
983          */
984
985         error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0,
986             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
987             MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag);
988         if (error) {
989                 device_printf(dev, "could not allocate RX DMA tag\n");
990                 return (error);
991         }
992
993         /*
994          * Allocate map for TX descriptor list.
995          */
996         error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
997             0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
998             NULL, tx_list_size, 1, tx_list_size, 0,
999             NULL, NULL, &sc->rl_ldata.rl_tx_list_tag);
1000         if (error) {
1001                 device_printf(dev, "could not allocate TX DMA ring tag\n");
1002                 return (error);
1003         }
1004
1005         /* Allocate DMA'able memory for the TX ring */
1006
1007         error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag,
1008             (void **)&sc->rl_ldata.rl_tx_list,
1009             BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1010             &sc->rl_ldata.rl_tx_list_map);
1011         if (error) {
1012                 device_printf(dev, "could not allocate TX DMA ring\n");
1013                 return (error);
1014         }
1015
1016         /* Load the map for the TX ring. */
1017
1018         sc->rl_ldata.rl_tx_list_addr = 0;
1019         error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag,
1020              sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list,
1021              tx_list_size, re_dma_map_addr,
1022              &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT);
1023         if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) {
1024                 device_printf(dev, "could not load TX DMA ring\n");
1025                 return (ENOMEM);
1026         }
1027
1028         /* Create DMA maps for TX buffers */
1029
1030         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
1031                 error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0,
1032                     &sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1033                 if (error) {
1034                         device_printf(dev, "could not create DMA map for TX\n");
1035                         return (error);
1036                 }
1037         }
1038
1039         /*
1040          * Allocate map for RX descriptor list.
1041          */
1042         error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1043             0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1044             NULL, rx_list_size, 1, rx_list_size, 0,
1045             NULL, NULL, &sc->rl_ldata.rl_rx_list_tag);
1046         if (error) {
1047                 device_printf(dev, "could not create RX DMA ring tag\n");
1048                 return (error);
1049         }
1050
1051         /* Allocate DMA'able memory for the RX ring */
1052
1053         error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag,
1054             (void **)&sc->rl_ldata.rl_rx_list,
1055             BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1056             &sc->rl_ldata.rl_rx_list_map);
1057         if (error) {
1058                 device_printf(dev, "could not allocate RX DMA ring\n");
1059                 return (error);
1060         }
1061
1062         /* Load the map for the RX ring. */
1063
1064         sc->rl_ldata.rl_rx_list_addr = 0;
1065         error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag,
1066              sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list,
1067              rx_list_size, re_dma_map_addr,
1068              &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT);
1069         if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) {
1070                 device_printf(dev, "could not load RX DMA ring\n");
1071                 return (ENOMEM);
1072         }
1073
1074         /* Create DMA maps for RX buffers */
1075
1076         error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1077             &sc->rl_ldata.rl_rx_sparemap);
1078         if (error) {
1079                 device_printf(dev, "could not create spare DMA map for RX\n");
1080                 return (error);
1081         }
1082         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1083                 error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1084                     &sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1085                 if (error) {
1086                         device_printf(dev, "could not create DMA map for RX\n");
1087                         return (error);
1088                 }
1089         }
1090
1091         return (0);
1092 }
1093
1094 /*
1095  * Attach the interface. Allocate softc structures, do ifmedia
1096  * setup and ethernet/BPF attach.
1097  */
1098 static int
1099 re_attach(device_t dev)
1100 {
1101         u_char                  eaddr[ETHER_ADDR_LEN];
1102         u_int16_t               as[ETHER_ADDR_LEN / 2];
1103         struct rl_softc         *sc;
1104         struct ifnet            *ifp;
1105         struct rl_hwrev         *hw_rev;
1106         int                     hwrev;
1107         u_int16_t               devid, re_did = 0;
1108         int                     error = 0, rid, i;
1109         int                     msic, reg;
1110         uint8_t                 cfg;
1111
1112         sc = device_get_softc(dev);
1113         sc->rl_dev = dev;
1114
1115         mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
1116             MTX_DEF);
1117         callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0);
1118
1119         /*
1120          * Map control/status registers.
1121          */
1122         pci_enable_busmaster(dev);
1123
1124         devid = pci_get_device(dev);
1125         /*
1126          * Prefer memory space register mapping over IO space.
1127          * Because RTL8169SC does not seem to work when memory mapping
1128          * is used always activate io mapping. 
1129          */
1130         if (devid == RT_DEVICEID_8169SC)
1131                 prefer_iomap = 1;
1132         if (prefer_iomap == 0) {
1133                 sc->rl_res_id = PCIR_BAR(1);
1134                 sc->rl_res_type = SYS_RES_MEMORY;
1135                 /* RTL8168/8101E seems to use different BARs. */
1136                 if (devid == RT_DEVICEID_8168 || devid == RT_DEVICEID_8101E)
1137                         sc->rl_res_id = PCIR_BAR(2);
1138         } else {
1139                 sc->rl_res_id = PCIR_BAR(0);
1140                 sc->rl_res_type = SYS_RES_IOPORT;
1141         }
1142         sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1143             &sc->rl_res_id, RF_ACTIVE);
1144         if (sc->rl_res == NULL && prefer_iomap == 0) {
1145                 sc->rl_res_id = PCIR_BAR(0);
1146                 sc->rl_res_type = SYS_RES_IOPORT;
1147                 sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1148                     &sc->rl_res_id, RF_ACTIVE);
1149         }
1150         if (sc->rl_res == NULL) {
1151                 device_printf(dev, "couldn't map ports/memory\n");
1152                 error = ENXIO;
1153                 goto fail;
1154         }
1155
1156         sc->rl_btag = rman_get_bustag(sc->rl_res);
1157         sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
1158
1159         msic = 0;
1160         if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
1161                 sc->rl_flags |= RL_FLAG_PCIE;
1162                 msic = pci_msi_count(dev);
1163                 if (bootverbose)
1164                         device_printf(dev, "MSI count : %d\n", msic);
1165         }
1166         if (msic > 0 && msi_disable == 0) {
1167                 msic = 1;
1168                 if (pci_alloc_msi(dev, &msic) == 0) {
1169                         if (msic == RL_MSI_MESSAGES) {
1170                                 device_printf(dev, "Using %d MSI messages\n",
1171                                     msic);
1172                                 sc->rl_flags |= RL_FLAG_MSI;
1173                                 /* Explicitly set MSI enable bit. */
1174                                 CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1175                                 cfg = CSR_READ_1(sc, RL_CFG2);
1176                                 cfg |= RL_CFG2_MSI;
1177                                 CSR_WRITE_1(sc, RL_CFG2, cfg);
1178                                 CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1179                         } else
1180                                 pci_release_msi(dev);
1181                 }
1182         }
1183
1184         /* Allocate interrupt */
1185         if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1186                 rid = 0;
1187                 sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
1188                     RF_SHAREABLE | RF_ACTIVE);
1189                 if (sc->rl_irq[0] == NULL) {
1190                         device_printf(dev, "couldn't allocate IRQ resources\n");
1191                         error = ENXIO;
1192                         goto fail;
1193                 }
1194         } else {
1195                 for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1196                         sc->rl_irq[i] = bus_alloc_resource_any(dev,
1197                             SYS_RES_IRQ, &rid, RF_ACTIVE);
1198                         if (sc->rl_irq[i] == NULL) {
1199                                 device_printf(dev,
1200                                     "couldn't llocate IRQ resources for "
1201                                     "message %d\n", rid);
1202                                 error = ENXIO;
1203                                 goto fail;
1204                         }
1205                 }
1206         }
1207
1208         if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1209                 CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1210                 cfg = CSR_READ_1(sc, RL_CFG2);
1211                 if ((cfg & RL_CFG2_MSI) != 0) {
1212                         device_printf(dev, "turning off MSI enable bit.\n");
1213                         cfg &= ~RL_CFG2_MSI;
1214                         CSR_WRITE_1(sc, RL_CFG2, cfg);
1215                 }
1216                 CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1217         }
1218
1219         /* Reset the adapter. */
1220         RL_LOCK(sc);
1221         re_reset(sc);
1222         RL_UNLOCK(sc);
1223
1224         hw_rev = re_hwrevs;
1225         hwrev = CSR_READ_4(sc, RL_TXCFG);
1226         switch (hwrev & 0x70000000) {
1227         case 0x00000000:
1228         case 0x10000000:
1229                 device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0xfc800000);
1230                 hwrev &= (RL_TXCFG_HWREV | 0x80000000);
1231                 break;
1232         default:
1233                 device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0x7c800000);
1234                 hwrev &= RL_TXCFG_HWREV;
1235                 break;
1236         }
1237         device_printf(dev, "MAC rev. 0x%08x\n", hwrev & 0x00700000);
1238         while (hw_rev->rl_desc != NULL) {
1239                 if (hw_rev->rl_rev == hwrev) {
1240                         sc->rl_type = hw_rev->rl_type;
1241                         sc->rl_hwrev = hw_rev->rl_rev;
1242                         break;
1243                 }
1244                 hw_rev++;
1245         }
1246         if (hw_rev->rl_desc == NULL) {
1247                 device_printf(dev, "Unknown H/W revision: 0x%08x\n", hwrev);
1248                 error = ENXIO;
1249                 goto fail;
1250         }
1251
1252         switch (hw_rev->rl_rev) {
1253         case RL_HWREV_8139CPLUS:
1254                 sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_FASTETHER |
1255                     RL_FLAG_AUTOPAD;
1256                 break;
1257         case RL_HWREV_8100E:
1258         case RL_HWREV_8101E:
1259                 sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_PHYWAKE |
1260                     RL_FLAG_FASTETHER;
1261                 break;
1262         case RL_HWREV_8102E:
1263         case RL_HWREV_8102EL:
1264         case RL_HWREV_8102EL_SPIN1:
1265                 sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_PHYWAKE |
1266                     RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
1267                     RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD;
1268                 break;
1269         case RL_HWREV_8168_SPIN1:
1270         case RL_HWREV_8168_SPIN2:
1271                 sc->rl_flags |= RL_FLAG_WOLRXENB;
1272                 /* FALLTHROUGH */
1273         case RL_HWREV_8168_SPIN3:
1274                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_MACSTAT;
1275                 break;
1276         case RL_HWREV_8168C_SPIN2:
1277                 sc->rl_flags |= RL_FLAG_MACSLEEP;
1278                 /* FALLTHROUGH */
1279         case RL_HWREV_8168C:
1280                 if ((hwrev & 0x00700000) == 0x00200000)
1281                         sc->rl_flags |= RL_FLAG_MACSLEEP;
1282                 /* FALLTHROUGH */
1283         case RL_HWREV_8168CP:
1284         case RL_HWREV_8168D:
1285                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
1286                     RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
1287                     RL_FLAG_AUTOPAD;
1288                 /*
1289                  * These controllers support jumbo frame but it seems
1290                  * that enabling it requires touching additional magic
1291                  * registers. Depending on MAC revisions some
1292                  * controllers need to disable checksum offload. So
1293                  * disable jumbo frame until I have better idea what
1294                  * it really requires to make it support.
1295                  * RTL8168C/CP : supports up to 6KB jumbo frame.
1296                  * RTL8111C/CP : supports up to 9KB jumbo frame.
1297                  */
1298                 sc->rl_flags |= RL_FLAG_NOJUMBO;
1299                 break;
1300         case RL_HWREV_8169_8110SB:
1301         case RL_HWREV_8169_8110SBL:
1302         case RL_HWREV_8169_8110SC:
1303         case RL_HWREV_8169_8110SCE:
1304                 sc->rl_flags |= RL_FLAG_PHYWAKE;
1305                 /* FALLTHROUGH */
1306         case RL_HWREV_8169:
1307         case RL_HWREV_8169S:
1308         case RL_HWREV_8110S:
1309                 sc->rl_flags |= RL_FLAG_MACRESET;
1310                 break;
1311         default:
1312                 break;
1313         }
1314
1315         /* Enable PME. */
1316         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1317         cfg = CSR_READ_1(sc, RL_CFG1);
1318         cfg |= RL_CFG1_PME;
1319         CSR_WRITE_1(sc, RL_CFG1, cfg);
1320         cfg = CSR_READ_1(sc, RL_CFG5);
1321         cfg &= RL_CFG5_PME_STS;
1322         CSR_WRITE_1(sc, RL_CFG5, cfg);
1323         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1324
1325         if ((sc->rl_flags & RL_FLAG_PAR) != 0) {
1326                 /*
1327                  * XXX Should have a better way to extract station
1328                  * address from EEPROM.
1329                  */
1330                 for (i = 0; i < ETHER_ADDR_LEN; i++)
1331                         eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i);
1332         } else {
1333                 sc->rl_eewidth = RL_9356_ADDR_LEN;
1334                 re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
1335                 if (re_did != 0x8129)
1336                         sc->rl_eewidth = RL_9346_ADDR_LEN;
1337
1338                 /*
1339                  * Get station address from the EEPROM.
1340                  */
1341                 re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3);
1342                 for (i = 0; i < ETHER_ADDR_LEN / 2; i++)
1343                         as[i] = le16toh(as[i]);
1344                 bcopy(as, eaddr, sizeof(eaddr));
1345         }
1346
1347         if (sc->rl_type == RL_8169) {
1348                 /* Set RX length mask and number of descriptors. */
1349                 sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN;
1350                 sc->rl_txstart = RL_GTXSTART;
1351                 sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT;
1352                 sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT;
1353         } else {
1354                 /* Set RX length mask and number of descriptors. */
1355                 sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN;
1356                 sc->rl_txstart = RL_TXSTART;
1357                 sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT;
1358                 sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT;
1359         }
1360
1361         error = re_allocmem(dev, sc);
1362         if (error)
1363                 goto fail;
1364
1365         ifp = sc->rl_ifp = if_alloc(IFT_ETHER);
1366         if (ifp == NULL) {
1367                 device_printf(dev, "can not if_alloc()\n");
1368                 error = ENOSPC;
1369                 goto fail;
1370         }
1371
1372         /* Take controller out of deep sleep mode. */
1373         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
1374                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
1375                         CSR_WRITE_1(sc, RL_GPIO,
1376                             CSR_READ_1(sc, RL_GPIO) | 0x01);
1377                 else
1378                         CSR_WRITE_1(sc, RL_GPIO,
1379                             CSR_READ_1(sc, RL_GPIO) & ~0x01);
1380         }
1381
1382         /* Take PHY out of power down mode. */
1383         if ((sc->rl_flags & RL_FLAG_PHYWAKE) != 0) {
1384                 re_gmii_writereg(dev, 1, 0x1f, 0);
1385                 re_gmii_writereg(dev, 1, 0x0e, 0);
1386         }
1387
1388         /* Do MII setup */
1389         if (mii_phy_probe(dev, &sc->rl_miibus,
1390             re_ifmedia_upd, re_ifmedia_sts)) {
1391                 device_printf(dev, "MII without any phy!\n");
1392                 error = ENXIO;
1393                 goto fail;
1394         }
1395
1396         ifp->if_softc = sc;
1397         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1398         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1399         ifp->if_ioctl = re_ioctl;
1400         ifp->if_start = re_start;
1401         ifp->if_hwassist = RE_CSUM_FEATURES;
1402         ifp->if_capabilities = IFCAP_HWCSUM;
1403         ifp->if_capenable = ifp->if_capabilities;
1404         ifp->if_init = re_init;
1405         IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN);
1406         ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN;
1407         IFQ_SET_READY(&ifp->if_snd);
1408
1409         TASK_INIT(&sc->rl_txtask, 1, re_tx_task, ifp);
1410         TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc);
1411
1412         /*
1413          * XXX
1414          * Still have no idea how to make TSO work on 8168C, 8168CP,
1415          * 8111C and 8111CP.
1416          */
1417         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1418                 ifp->if_hwassist |= CSUM_TSO;
1419                 ifp->if_capabilities |= IFCAP_TSO4;
1420         }
1421
1422         /*
1423          * Call MI attach routine.
1424          */
1425         ether_ifattach(ifp, eaddr);
1426
1427         /* VLAN capability setup */
1428         ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
1429         if (ifp->if_capabilities & IFCAP_HWCSUM)
1430                 ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
1431         /* Enable WOL if PM is supported. */
1432         if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &reg) == 0)
1433                 ifp->if_capabilities |= IFCAP_WOL;
1434         ifp->if_capenable = ifp->if_capabilities;
1435         /*
1436          * Don't enable TSO by default. Under certain
1437          * circumtances the controller generated corrupted
1438          * packets in TSO size.
1439          */
1440         ifp->if_hwassist &= ~CSUM_TSO;
1441         ifp->if_capenable &= ~IFCAP_TSO4;
1442 #ifdef DEVICE_POLLING
1443         ifp->if_capabilities |= IFCAP_POLLING;
1444 #endif
1445         /*
1446          * Tell the upper layer(s) we support long frames.
1447          * Must appear after the call to ether_ifattach() because
1448          * ether_ifattach() sets ifi_hdrlen to the default value.
1449          */
1450         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1451
1452 #ifdef RE_DIAG
1453         /*
1454          * Perform hardware diagnostic on the original RTL8169.
1455          * Some 32-bit cards were incorrectly wired and would
1456          * malfunction if plugged into a 64-bit slot.
1457          */
1458
1459         if (hwrev == RL_HWREV_8169) {
1460                 error = re_diag(sc);
1461                 if (error) {
1462                         device_printf(dev,
1463                         "attach aborted due to hardware diag failure\n");
1464                         ether_ifdetach(ifp);
1465                         goto fail;
1466                 }
1467         }
1468 #endif
1469
1470         /* Hook interrupt last to avoid having to lock softc */
1471         if ((sc->rl_flags & RL_FLAG_MSI) == 0)
1472                 error = bus_setup_intr(dev, sc->rl_irq[0],
1473                     INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1474                     &sc->rl_intrhand[0]);
1475         else {
1476                 for (i = 0; i < RL_MSI_MESSAGES; i++) {
1477                         error = bus_setup_intr(dev, sc->rl_irq[i],
1478                             INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1479                             &sc->rl_intrhand[i]);
1480                         if (error != 0)
1481                                 break;
1482                 }
1483         }
1484         if (error) {
1485                 device_printf(dev, "couldn't set up irq\n");
1486                 ether_ifdetach(ifp);
1487         }
1488
1489 fail:
1490
1491         if (error)
1492                 re_detach(dev);
1493
1494         return (error);
1495 }
1496
1497 /*
1498  * Shutdown hardware and free up resources. This can be called any
1499  * time after the mutex has been initialized. It is called in both
1500  * the error case in attach and the normal detach case so it needs
1501  * to be careful about only freeing resources that have actually been
1502  * allocated.
1503  */
1504 static int
1505 re_detach(device_t dev)
1506 {
1507         struct rl_softc         *sc;
1508         struct ifnet            *ifp;
1509         int                     i, rid;
1510
1511         sc = device_get_softc(dev);
1512         ifp = sc->rl_ifp;
1513         KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized"));
1514
1515         /* These should only be active if attach succeeded */
1516         if (device_is_attached(dev)) {
1517 #ifdef DEVICE_POLLING
1518                 if (ifp->if_capenable & IFCAP_POLLING)
1519                         ether_poll_deregister(ifp);
1520 #endif
1521                 RL_LOCK(sc);
1522 #if 0
1523                 sc->suspended = 1;
1524 #endif
1525                 re_stop(sc);
1526                 RL_UNLOCK(sc);
1527                 callout_drain(&sc->rl_stat_callout);
1528                 taskqueue_drain(taskqueue_fast, &sc->rl_inttask);
1529                 taskqueue_drain(taskqueue_fast, &sc->rl_txtask);
1530                 /*
1531                  * Force off the IFF_UP flag here, in case someone
1532                  * still had a BPF descriptor attached to this
1533                  * interface. If they do, ether_ifdetach() will cause
1534                  * the BPF code to try and clear the promisc mode
1535                  * flag, which will bubble down to re_ioctl(),
1536                  * which will try to call re_init() again. This will
1537                  * turn the NIC back on and restart the MII ticker,
1538                  * which will panic the system when the kernel tries
1539                  * to invoke the re_tick() function that isn't there
1540                  * anymore.
1541                  */
1542                 ifp->if_flags &= ~IFF_UP;
1543                 ether_ifdetach(ifp);
1544         }
1545         if (sc->rl_miibus)
1546                 device_delete_child(dev, sc->rl_miibus);
1547         bus_generic_detach(dev);
1548
1549         /*
1550          * The rest is resource deallocation, so we should already be
1551          * stopped here.
1552          */
1553
1554         for (i = 0; i < RL_MSI_MESSAGES; i++) {
1555                 if (sc->rl_intrhand[i] != NULL) {
1556                         bus_teardown_intr(dev, sc->rl_irq[i],
1557                             sc->rl_intrhand[i]);
1558                         sc->rl_intrhand[i] = NULL;
1559                 }
1560         }
1561         if (ifp != NULL)
1562                 if_free(ifp);
1563         if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1564                 if (sc->rl_irq[0] != NULL) {
1565                         bus_release_resource(dev, SYS_RES_IRQ, 0,
1566                             sc->rl_irq[0]);
1567                         sc->rl_irq[0] = NULL;
1568                 }
1569         } else {
1570                 for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1571                         if (sc->rl_irq[i] != NULL) {
1572                                 bus_release_resource(dev, SYS_RES_IRQ, rid,
1573                                     sc->rl_irq[i]);
1574                                 sc->rl_irq[i] = NULL;
1575                         }
1576                 }
1577                 pci_release_msi(dev);
1578         }
1579         if (sc->rl_res)
1580                 bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id,
1581                     sc->rl_res);
1582
1583         /* Unload and free the RX DMA ring memory and map */
1584
1585         if (sc->rl_ldata.rl_rx_list_tag) {
1586                 bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
1587                     sc->rl_ldata.rl_rx_list_map);
1588                 bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
1589                     sc->rl_ldata.rl_rx_list,
1590                     sc->rl_ldata.rl_rx_list_map);
1591                 bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag);
1592         }
1593
1594         /* Unload and free the TX DMA ring memory and map */
1595
1596         if (sc->rl_ldata.rl_tx_list_tag) {
1597                 bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
1598                     sc->rl_ldata.rl_tx_list_map);
1599                 bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
1600                     sc->rl_ldata.rl_tx_list,
1601                     sc->rl_ldata.rl_tx_list_map);
1602                 bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag);
1603         }
1604
1605         /* Destroy all the RX and TX buffer maps */
1606
1607         if (sc->rl_ldata.rl_tx_mtag) {
1608                 for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1609                         bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag,
1610                             sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1611                 bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag);
1612         }
1613         if (sc->rl_ldata.rl_rx_mtag) {
1614                 for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++)
1615                         bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1616                             sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1617                 if (sc->rl_ldata.rl_rx_sparemap)
1618                         bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1619                             sc->rl_ldata.rl_rx_sparemap);
1620                 bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag);
1621         }
1622
1623         /* Unload and free the stats buffer and map */
1624
1625         if (sc->rl_ldata.rl_stag) {
1626                 bus_dmamap_unload(sc->rl_ldata.rl_stag,
1627                     sc->rl_ldata.rl_rx_list_map);
1628                 bus_dmamem_free(sc->rl_ldata.rl_stag,
1629                     sc->rl_ldata.rl_stats,
1630                     sc->rl_ldata.rl_smap);
1631                 bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
1632         }
1633
1634         if (sc->rl_parent_tag)
1635                 bus_dma_tag_destroy(sc->rl_parent_tag);
1636
1637         mtx_destroy(&sc->rl_mtx);
1638
1639         return (0);
1640 }
1641
1642 static __inline void
1643 re_discard_rxbuf(struct rl_softc *sc, int idx)
1644 {
1645         struct rl_desc          *desc;
1646         struct rl_rxdesc        *rxd;
1647         uint32_t                cmdstat;
1648
1649         rxd = &sc->rl_ldata.rl_rx_desc[idx];
1650         desc = &sc->rl_ldata.rl_rx_list[idx];
1651         desc->rl_vlanctl = 0;
1652         cmdstat = rxd->rx_size;
1653         if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1654                 cmdstat |= RL_RDESC_CMD_EOR;
1655         desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1656 }
1657
1658 static int
1659 re_newbuf(struct rl_softc *sc, int idx)
1660 {
1661         struct mbuf             *m;
1662         struct rl_rxdesc        *rxd;
1663         bus_dma_segment_t       segs[1];
1664         bus_dmamap_t            map;
1665         struct rl_desc          *desc;
1666         uint32_t                cmdstat;
1667         int                     error, nsegs;
1668
1669         m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1670         if (m == NULL)
1671                 return (ENOBUFS);
1672
1673         m->m_len = m->m_pkthdr.len = MCLBYTES;
1674 #ifdef RE_FIXUP_RX
1675         /*
1676          * This is part of an evil trick to deal with non-x86 platforms.
1677          * The RealTek chip requires RX buffers to be aligned on 64-bit
1678          * boundaries, but that will hose non-x86 machines. To get around
1679          * this, we leave some empty space at the start of each buffer
1680          * and for non-x86 hosts, we copy the buffer back six bytes
1681          * to achieve word alignment. This is slightly more efficient
1682          * than allocating a new buffer, copying the contents, and
1683          * discarding the old buffer.
1684          */
1685         m_adj(m, RE_ETHER_ALIGN);
1686 #endif
1687         error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag,
1688             sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
1689         if (error != 0) {
1690                 m_freem(m);
1691                 return (ENOBUFS);
1692         }
1693         KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
1694
1695         rxd = &sc->rl_ldata.rl_rx_desc[idx];
1696         if (rxd->rx_m != NULL) {
1697                 bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1698                     BUS_DMASYNC_POSTREAD);
1699                 bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap);
1700         }
1701
1702         rxd->rx_m = m;
1703         map = rxd->rx_dmamap;
1704         rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap;
1705         rxd->rx_size = segs[0].ds_len;
1706         sc->rl_ldata.rl_rx_sparemap = map;
1707         bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1708             BUS_DMASYNC_PREREAD);
1709
1710         desc = &sc->rl_ldata.rl_rx_list[idx];
1711         desc->rl_vlanctl = 0;
1712         desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
1713         desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
1714         cmdstat = segs[0].ds_len;
1715         if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1716                 cmdstat |= RL_RDESC_CMD_EOR;
1717         desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1718
1719         return (0);
1720 }
1721
1722 #ifdef RE_FIXUP_RX
1723 static __inline void
1724 re_fixup_rx(struct mbuf *m)
1725 {
1726         int                     i;
1727         uint16_t                *src, *dst;
1728
1729         src = mtod(m, uint16_t *);
1730         dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src;
1731
1732         for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
1733                 *dst++ = *src++;
1734
1735         m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN;
1736 }
1737 #endif
1738
1739 static int
1740 re_tx_list_init(struct rl_softc *sc)
1741 {
1742         struct rl_desc          *desc;
1743         int                     i;
1744
1745         RL_LOCK_ASSERT(sc);
1746
1747         bzero(sc->rl_ldata.rl_tx_list,
1748             sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
1749         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1750                 sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
1751         /* Set EOR. */
1752         desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
1753         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
1754
1755         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1756             sc->rl_ldata.rl_tx_list_map,
1757             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1758
1759         sc->rl_ldata.rl_tx_prodidx = 0;
1760         sc->rl_ldata.rl_tx_considx = 0;
1761         sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt;
1762
1763         return (0);
1764 }
1765
1766 static int
1767 re_rx_list_init(struct rl_softc *sc)
1768 {
1769         int                     error, i;
1770
1771         bzero(sc->rl_ldata.rl_rx_list,
1772             sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
1773         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1774                 sc->rl_ldata.rl_rx_desc[i].rx_m = NULL;
1775                 if ((error = re_newbuf(sc, i)) != 0)
1776                         return (error);
1777         }
1778
1779         /* Flush the RX descriptors */
1780
1781         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1782             sc->rl_ldata.rl_rx_list_map,
1783             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1784
1785         sc->rl_ldata.rl_rx_prodidx = 0;
1786         sc->rl_head = sc->rl_tail = NULL;
1787
1788         return (0);
1789 }
1790
1791 /*
1792  * RX handler for C+ and 8169. For the gigE chips, we support
1793  * the reception of jumbo frames that have been fragmented
1794  * across multiple 2K mbuf cluster buffers.
1795  */
1796 static int
1797 re_rxeof(struct rl_softc *sc, int *rx_npktsp)
1798 {
1799         struct mbuf             *m;
1800         struct ifnet            *ifp;
1801         int                     i, total_len;
1802         struct rl_desc          *cur_rx;
1803         u_int32_t               rxstat, rxvlan;
1804         int                     maxpkt = 16, rx_npkts = 0;
1805
1806         RL_LOCK_ASSERT(sc);
1807
1808         ifp = sc->rl_ifp;
1809
1810         /* Invalidate the descriptor memory */
1811
1812         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1813             sc->rl_ldata.rl_rx_list_map,
1814             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1815
1816         for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0;
1817             i = RL_RX_DESC_NXT(sc, i)) {
1818                 cur_rx = &sc->rl_ldata.rl_rx_list[i];
1819                 rxstat = le32toh(cur_rx->rl_cmdstat);
1820                 if ((rxstat & RL_RDESC_STAT_OWN) != 0)
1821                         break;
1822                 total_len = rxstat & sc->rl_rxlenmask;
1823                 rxvlan = le32toh(cur_rx->rl_vlanctl);
1824                 m = sc->rl_ldata.rl_rx_desc[i].rx_m;
1825
1826                 if (!(rxstat & RL_RDESC_STAT_EOF)) {
1827                         if (re_newbuf(sc, i) != 0) {
1828                                 /*
1829                                  * If this is part of a multi-fragment packet,
1830                                  * discard all the pieces.
1831                                  */
1832                                 if (sc->rl_head != NULL) {
1833                                         m_freem(sc->rl_head);
1834                                         sc->rl_head = sc->rl_tail = NULL;
1835                                 }
1836                                 re_discard_rxbuf(sc, i);
1837                                 continue;
1838                         }
1839                         m->m_len = RE_RX_DESC_BUFLEN;
1840                         if (sc->rl_head == NULL)
1841                                 sc->rl_head = sc->rl_tail = m;
1842                         else {
1843                                 m->m_flags &= ~M_PKTHDR;
1844                                 sc->rl_tail->m_next = m;
1845                                 sc->rl_tail = m;
1846                         }
1847                         continue;
1848                 }
1849
1850                 /*
1851                  * NOTE: for the 8139C+, the frame length field
1852                  * is always 12 bits in size, but for the gigE chips,
1853                  * it is 13 bits (since the max RX frame length is 16K).
1854                  * Unfortunately, all 32 bits in the status word
1855                  * were already used, so to make room for the extra
1856                  * length bit, RealTek took out the 'frame alignment
1857                  * error' bit and shifted the other status bits
1858                  * over one slot. The OWN, EOR, FS and LS bits are
1859                  * still in the same places. We have already extracted
1860                  * the frame length and checked the OWN bit, so rather
1861                  * than using an alternate bit mapping, we shift the
1862                  * status bits one space to the right so we can evaluate
1863                  * them using the 8169 status as though it was in the
1864                  * same format as that of the 8139C+.
1865                  */
1866                 if (sc->rl_type == RL_8169)
1867                         rxstat >>= 1;
1868
1869                 /*
1870                  * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be
1871                  * set, but if CRC is clear, it will still be a valid frame.
1872                  */
1873                 if (rxstat & RL_RDESC_STAT_RXERRSUM && !(total_len > 8191 &&
1874                     (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)) {
1875                         ifp->if_ierrors++;
1876                         /*
1877                          * If this is part of a multi-fragment packet,
1878                          * discard all the pieces.
1879                          */
1880                         if (sc->rl_head != NULL) {
1881                                 m_freem(sc->rl_head);
1882                                 sc->rl_head = sc->rl_tail = NULL;
1883                         }
1884                         re_discard_rxbuf(sc, i);
1885                         continue;
1886                 }
1887
1888                 /*
1889                  * If allocating a replacement mbuf fails,
1890                  * reload the current one.
1891                  */
1892
1893                 if (re_newbuf(sc, i) != 0) {
1894                         ifp->if_iqdrops++;
1895                         if (sc->rl_head != NULL) {
1896                                 m_freem(sc->rl_head);
1897                                 sc->rl_head = sc->rl_tail = NULL;
1898                         }
1899                         re_discard_rxbuf(sc, i);
1900                         continue;
1901                 }
1902
1903                 if (sc->rl_head != NULL) {
1904                         m->m_len = total_len % RE_RX_DESC_BUFLEN;
1905                         if (m->m_len == 0)
1906                                 m->m_len = RE_RX_DESC_BUFLEN;
1907                         /*
1908                          * Special case: if there's 4 bytes or less
1909                          * in this buffer, the mbuf can be discarded:
1910                          * the last 4 bytes is the CRC, which we don't
1911                          * care about anyway.
1912                          */
1913                         if (m->m_len <= ETHER_CRC_LEN) {
1914                                 sc->rl_tail->m_len -=
1915                                     (ETHER_CRC_LEN - m->m_len);
1916                                 m_freem(m);
1917                         } else {
1918                                 m->m_len -= ETHER_CRC_LEN;
1919                                 m->m_flags &= ~M_PKTHDR;
1920                                 sc->rl_tail->m_next = m;
1921                         }
1922                         m = sc->rl_head;
1923                         sc->rl_head = sc->rl_tail = NULL;
1924                         m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
1925                 } else
1926                         m->m_pkthdr.len = m->m_len =
1927                             (total_len - ETHER_CRC_LEN);
1928
1929 #ifdef RE_FIXUP_RX
1930                 re_fixup_rx(m);
1931 #endif
1932                 ifp->if_ipackets++;
1933                 m->m_pkthdr.rcvif = ifp;
1934
1935                 /* Do RX checksumming if enabled */
1936
1937                 if (ifp->if_capenable & IFCAP_RXCSUM) {
1938                         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1939                                 /* Check IP header checksum */
1940                                 if (rxstat & RL_RDESC_STAT_PROTOID)
1941                                         m->m_pkthdr.csum_flags |=
1942                                             CSUM_IP_CHECKED;
1943                                 if (!(rxstat & RL_RDESC_STAT_IPSUMBAD))
1944                                         m->m_pkthdr.csum_flags |=
1945                                             CSUM_IP_VALID;
1946
1947                                 /* Check TCP/UDP checksum */
1948                                 if ((RL_TCPPKT(rxstat) &&
1949                                     !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1950                                     (RL_UDPPKT(rxstat) &&
1951                                      !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1952                                         m->m_pkthdr.csum_flags |=
1953                                                 CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1954                                         m->m_pkthdr.csum_data = 0xffff;
1955                                 }
1956                         } else {
1957                                 /*
1958                                  * RTL8168C/RTL816CP/RTL8111C/RTL8111CP
1959                                  */
1960                                 if ((rxstat & RL_RDESC_STAT_PROTOID) &&
1961                                     (rxvlan & RL_RDESC_IPV4))
1962                                         m->m_pkthdr.csum_flags |=
1963                                             CSUM_IP_CHECKED;
1964                                 if (!(rxstat & RL_RDESC_STAT_IPSUMBAD) &&
1965                                     (rxvlan & RL_RDESC_IPV4))
1966                                         m->m_pkthdr.csum_flags |=
1967                                             CSUM_IP_VALID;
1968                                 if (((rxstat & RL_RDESC_STAT_TCP) &&
1969                                     !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1970                                     ((rxstat & RL_RDESC_STAT_UDP) &&
1971                                     !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1972                                         m->m_pkthdr.csum_flags |=
1973                                                 CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1974                                         m->m_pkthdr.csum_data = 0xffff;
1975                                 }
1976                         }
1977                 }
1978                 maxpkt--;
1979                 if (rxvlan & RL_RDESC_VLANCTL_TAG) {
1980                         m->m_pkthdr.ether_vtag =
1981                             bswap16((rxvlan & RL_RDESC_VLANCTL_DATA));
1982                         m->m_flags |= M_VLANTAG;
1983                 }
1984                 RL_UNLOCK(sc);
1985                 (*ifp->if_input)(ifp, m);
1986                 RL_LOCK(sc);
1987                 rx_npkts++;
1988         }
1989
1990         /* Flush the RX DMA ring */
1991
1992         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1993             sc->rl_ldata.rl_rx_list_map,
1994             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1995
1996         sc->rl_ldata.rl_rx_prodidx = i;
1997
1998         if (rx_npktsp != NULL)
1999                 *rx_npktsp = rx_npkts;
2000         if (maxpkt)
2001                 return(EAGAIN);
2002
2003         return(0);
2004 }
2005
2006 static void
2007 re_txeof(struct rl_softc *sc)
2008 {
2009         struct ifnet            *ifp;
2010         struct rl_txdesc        *txd;
2011         u_int32_t               txstat;
2012         int                     cons;
2013
2014         cons = sc->rl_ldata.rl_tx_considx;
2015         if (cons == sc->rl_ldata.rl_tx_prodidx)
2016                 return;
2017
2018         ifp = sc->rl_ifp;
2019         /* Invalidate the TX descriptor list */
2020         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2021             sc->rl_ldata.rl_tx_list_map,
2022             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2023
2024         for (; cons != sc->rl_ldata.rl_tx_prodidx;
2025             cons = RL_TX_DESC_NXT(sc, cons)) {
2026                 txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat);
2027                 if (txstat & RL_TDESC_STAT_OWN)
2028                         break;
2029                 /*
2030                  * We only stash mbufs in the last descriptor
2031                  * in a fragment chain, which also happens to
2032                  * be the only place where the TX status bits
2033                  * are valid.
2034                  */
2035                 if (txstat & RL_TDESC_CMD_EOF) {
2036                         txd = &sc->rl_ldata.rl_tx_desc[cons];
2037                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2038                             txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2039                         bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2040                             txd->tx_dmamap);
2041                         KASSERT(txd->tx_m != NULL,
2042                             ("%s: freeing NULL mbufs!", __func__));
2043                         m_freem(txd->tx_m);
2044                         txd->tx_m = NULL;
2045                         if (txstat & (RL_TDESC_STAT_EXCESSCOL|
2046                             RL_TDESC_STAT_COLCNT))
2047                                 ifp->if_collisions++;
2048                         if (txstat & RL_TDESC_STAT_TXERRSUM)
2049                                 ifp->if_oerrors++;
2050                         else
2051                                 ifp->if_opackets++;
2052                 }
2053                 sc->rl_ldata.rl_tx_free++;
2054                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2055         }
2056         sc->rl_ldata.rl_tx_considx = cons;
2057
2058         /* No changes made to the TX ring, so no flush needed */
2059
2060         if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) {
2061 #ifdef RE_TX_MODERATION
2062                 /*
2063                  * If not all descriptors have been reaped yet, reload
2064                  * the timer so that we will eventually get another
2065                  * interrupt that will cause us to re-enter this routine.
2066                  * This is done in case the transmitter has gone idle.
2067                  */
2068                 CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2069 #endif
2070         } else
2071                 sc->rl_watchdog_timer = 0;
2072 }
2073
2074 static void
2075 re_tick(void *xsc)
2076 {
2077         struct rl_softc         *sc;
2078         struct mii_data         *mii;
2079
2080         sc = xsc;
2081
2082         RL_LOCK_ASSERT(sc);
2083
2084         mii = device_get_softc(sc->rl_miibus);
2085         mii_tick(mii);
2086         if ((sc->rl_flags & RL_FLAG_LINK) == 0)
2087                 re_miibus_statchg(sc->rl_dev);
2088         /*
2089          * Reclaim transmitted frames here. Technically it is not
2090          * necessary to do here but it ensures periodic reclamation
2091          * regardless of Tx completion interrupt which seems to be
2092          * lost on PCIe based controllers under certain situations. 
2093          */
2094         re_txeof(sc);
2095         re_watchdog(sc);
2096         callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2097 }
2098
2099 #ifdef DEVICE_POLLING
2100 static int
2101 re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
2102 {
2103         struct rl_softc *sc = ifp->if_softc;
2104         int rx_npkts = 0;
2105
2106         RL_LOCK(sc);
2107         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2108                 rx_npkts = re_poll_locked(ifp, cmd, count);
2109         RL_UNLOCK(sc);
2110         return (rx_npkts);
2111 }
2112
2113 static int
2114 re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count)
2115 {
2116         struct rl_softc *sc = ifp->if_softc;
2117         int rx_npkts;
2118
2119         RL_LOCK_ASSERT(sc);
2120
2121         sc->rxcycles = count;
2122         re_rxeof(sc, &rx_npkts);
2123         re_txeof(sc);
2124
2125         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2126                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2127
2128         if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
2129                 u_int16_t       status;
2130
2131                 status = CSR_READ_2(sc, RL_ISR);
2132                 if (status == 0xffff)
2133                         return (rx_npkts);
2134                 if (status)
2135                         CSR_WRITE_2(sc, RL_ISR, status);
2136                 if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2137                     (sc->rl_flags & RL_FLAG_PCIE))
2138                         CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2139
2140                 /*
2141                  * XXX check behaviour on receiver stalls.
2142                  */
2143
2144                 if (status & RL_ISR_SYSTEM_ERR)
2145                         re_init_locked(sc);
2146         }
2147         return (rx_npkts);
2148 }
2149 #endif /* DEVICE_POLLING */
2150
2151 static int
2152 re_intr(void *arg)
2153 {
2154         struct rl_softc         *sc;
2155         uint16_t                status;
2156
2157         sc = arg;
2158
2159         status = CSR_READ_2(sc, RL_ISR);
2160         if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0)
2161                 return (FILTER_STRAY);
2162         CSR_WRITE_2(sc, RL_IMR, 0);
2163
2164         taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2165
2166         return (FILTER_HANDLED);
2167 }
2168
2169 static void
2170 re_int_task(void *arg, int npending)
2171 {
2172         struct rl_softc         *sc;
2173         struct ifnet            *ifp;
2174         u_int16_t               status;
2175         int                     rval = 0;
2176
2177         sc = arg;
2178         ifp = sc->rl_ifp;
2179
2180         RL_LOCK(sc);
2181
2182         status = CSR_READ_2(sc, RL_ISR);
2183         CSR_WRITE_2(sc, RL_ISR, status);
2184
2185         if (sc->suspended ||
2186             (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2187                 RL_UNLOCK(sc);
2188                 return;
2189         }
2190
2191 #ifdef DEVICE_POLLING
2192         if  (ifp->if_capenable & IFCAP_POLLING) {
2193                 RL_UNLOCK(sc);
2194                 return;
2195         }
2196 #endif
2197
2198         if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW))
2199                 rval = re_rxeof(sc, NULL);
2200
2201         /*
2202          * Some chips will ignore a second TX request issued
2203          * while an existing transmission is in progress. If
2204          * the transmitter goes idle but there are still
2205          * packets waiting to be sent, we need to restart the
2206          * channel here to flush them out. This only seems to
2207          * be required with the PCIe devices.
2208          */
2209         if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2210             (sc->rl_flags & RL_FLAG_PCIE))
2211                 CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2212         if (status & (
2213 #ifdef RE_TX_MODERATION
2214             RL_ISR_TIMEOUT_EXPIRED|
2215 #else
2216             RL_ISR_TX_OK|
2217 #endif
2218             RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL))
2219                 re_txeof(sc);
2220
2221         if (status & RL_ISR_SYSTEM_ERR)
2222                 re_init_locked(sc);
2223
2224         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2225                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2226
2227         RL_UNLOCK(sc);
2228
2229         if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) {
2230                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2231                 return;
2232         }
2233
2234         CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2235 }
2236
2237 static int
2238 re_encap(struct rl_softc *sc, struct mbuf **m_head)
2239 {
2240         struct rl_txdesc        *txd, *txd_last;
2241         bus_dma_segment_t       segs[RL_NTXSEGS];
2242         bus_dmamap_t            map;
2243         struct mbuf             *m_new;
2244         struct rl_desc          *desc;
2245         int                     nsegs, prod;
2246         int                     i, error, ei, si;
2247         int                     padlen;
2248         uint32_t                cmdstat, csum_flags, vlanctl;
2249
2250         RL_LOCK_ASSERT(sc);
2251         M_ASSERTPKTHDR((*m_head));
2252
2253         /*
2254          * With some of the RealTek chips, using the checksum offload
2255          * support in conjunction with the autopadding feature results
2256          * in the transmission of corrupt frames. For example, if we
2257          * need to send a really small IP fragment that's less than 60
2258          * bytes in size, and IP header checksumming is enabled, the
2259          * resulting ethernet frame that appears on the wire will
2260          * have garbled payload. To work around this, if TX IP checksum
2261          * offload is enabled, we always manually pad short frames out
2262          * to the minimum ethernet frame size.
2263          */
2264         if ((sc->rl_flags & RL_FLAG_AUTOPAD) == 0 &&
2265             (*m_head)->m_pkthdr.len < RL_IP4CSUMTX_PADLEN &&
2266             ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) != 0) {
2267                 padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len;
2268                 if (M_WRITABLE(*m_head) == 0) {
2269                         /* Get a writable copy. */
2270                         m_new = m_dup(*m_head, M_DONTWAIT);
2271                         m_freem(*m_head);
2272                         if (m_new == NULL) {
2273                                 *m_head = NULL;
2274                                 return (ENOBUFS);
2275                         }
2276                         *m_head = m_new;
2277                 }
2278                 if ((*m_head)->m_next != NULL ||
2279                     M_TRAILINGSPACE(*m_head) < padlen) {
2280                         m_new = m_defrag(*m_head, M_DONTWAIT);
2281                         if (m_new == NULL) {
2282                                 m_freem(*m_head);
2283                                 *m_head = NULL;
2284                                 return (ENOBUFS);
2285                         }
2286                 } else
2287                         m_new = *m_head;
2288
2289                 /*
2290                  * Manually pad short frames, and zero the pad space
2291                  * to avoid leaking data.
2292                  */
2293                 bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen);
2294                 m_new->m_pkthdr.len += padlen;
2295                 m_new->m_len = m_new->m_pkthdr.len;
2296                 *m_head = m_new;
2297         }
2298
2299         prod = sc->rl_ldata.rl_tx_prodidx;
2300         txd = &sc->rl_ldata.rl_tx_desc[prod];
2301         error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2302             *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2303         if (error == EFBIG) {
2304                 m_new = m_collapse(*m_head, M_DONTWAIT, RL_NTXSEGS);
2305                 if (m_new == NULL) {
2306                         m_freem(*m_head);
2307                         *m_head = NULL;
2308                         return (ENOBUFS);
2309                 }
2310                 *m_head = m_new;
2311                 error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag,
2312                     txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2313                 if (error != 0) {
2314                         m_freem(*m_head);
2315                         *m_head = NULL;
2316                         return (error);
2317                 }
2318         } else if (error != 0)
2319                 return (error);
2320         if (nsegs == 0) {
2321                 m_freem(*m_head);
2322                 *m_head = NULL;
2323                 return (EIO);
2324         }
2325
2326         /* Check for number of available descriptors. */
2327         if (sc->rl_ldata.rl_tx_free - nsegs <= 1) {
2328                 bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap);
2329                 return (ENOBUFS);
2330         }
2331
2332         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2333             BUS_DMASYNC_PREWRITE);
2334
2335         /*
2336          * Set up checksum offload. Note: checksum offload bits must
2337          * appear in all descriptors of a multi-descriptor transmit
2338          * attempt. This is according to testing done with an 8169
2339          * chip. This is a requirement.
2340          */
2341         vlanctl = 0;
2342         csum_flags = 0;
2343         if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0)
2344                 csum_flags = RL_TDESC_CMD_LGSEND |
2345                     ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
2346                     RL_TDESC_CMD_MSSVAL_SHIFT);
2347         else {
2348                 /*
2349                  * Unconditionally enable IP checksum if TCP or UDP
2350                  * checksum is required. Otherwise, TCP/UDP checksum
2351                  * does't make effects.
2352                  */
2353                 if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) {
2354                         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
2355                                 csum_flags |= RL_TDESC_CMD_IPCSUM;
2356                                 if (((*m_head)->m_pkthdr.csum_flags &
2357                                     CSUM_TCP) != 0)
2358                                         csum_flags |= RL_TDESC_CMD_TCPCSUM;
2359                                 if (((*m_head)->m_pkthdr.csum_flags &
2360                                     CSUM_UDP) != 0)
2361                                         csum_flags |= RL_TDESC_CMD_UDPCSUM;
2362                         } else {
2363                                 vlanctl |= RL_TDESC_CMD_IPCSUMV2;
2364                                 if (((*m_head)->m_pkthdr.csum_flags &
2365                                     CSUM_TCP) != 0)
2366                                         vlanctl |= RL_TDESC_CMD_TCPCSUMV2;
2367                                 if (((*m_head)->m_pkthdr.csum_flags &
2368                                     CSUM_UDP) != 0)
2369                                         vlanctl |= RL_TDESC_CMD_UDPCSUMV2;
2370                         }
2371                 }
2372         }
2373
2374         /*
2375          * Set up hardware VLAN tagging. Note: vlan tag info must
2376          * appear in all descriptors of a multi-descriptor
2377          * transmission attempt.
2378          */
2379         if ((*m_head)->m_flags & M_VLANTAG)
2380                 vlanctl |= bswap16((*m_head)->m_pkthdr.ether_vtag) |
2381                     RL_TDESC_VLANCTL_TAG;
2382
2383         si = prod;
2384         for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) {
2385                 desc = &sc->rl_ldata.rl_tx_list[prod];
2386                 desc->rl_vlanctl = htole32(vlanctl);
2387                 desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr));
2388                 desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr));
2389                 cmdstat = segs[i].ds_len;
2390                 if (i != 0)
2391                         cmdstat |= RL_TDESC_CMD_OWN;
2392                 if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1)
2393                         cmdstat |= RL_TDESC_CMD_EOR;
2394                 desc->rl_cmdstat = htole32(cmdstat | csum_flags);
2395                 sc->rl_ldata.rl_tx_free--;
2396         }
2397         /* Update producer index. */
2398         sc->rl_ldata.rl_tx_prodidx = prod;
2399
2400         /* Set EOF on the last descriptor. */
2401         ei = RL_TX_DESC_PRV(sc, prod);
2402         desc = &sc->rl_ldata.rl_tx_list[ei];
2403         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF);
2404
2405         desc = &sc->rl_ldata.rl_tx_list[si];
2406         /* Set SOF and transfer ownership of packet to the chip. */
2407         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF);
2408
2409         /*
2410          * Insure that the map for this transmission
2411          * is placed at the array index of the last descriptor
2412          * in this chain.  (Swap last and first dmamaps.)
2413          */
2414         txd_last = &sc->rl_ldata.rl_tx_desc[ei];
2415         map = txd->tx_dmamap;
2416         txd->tx_dmamap = txd_last->tx_dmamap;
2417         txd_last->tx_dmamap = map;
2418         txd_last->tx_m = *m_head;
2419
2420         return (0);
2421 }
2422
2423 static void
2424 re_tx_task(void *arg, int npending)
2425 {
2426         struct ifnet            *ifp;
2427
2428         ifp = arg;
2429         re_start(ifp);
2430 }
2431
2432 /*
2433  * Main transmit routine for C+ and gigE NICs.
2434  */
2435 static void
2436 re_start(struct ifnet *ifp)
2437 {
2438         struct rl_softc         *sc;
2439         struct mbuf             *m_head;
2440         int                     queued;
2441
2442         sc = ifp->if_softc;
2443
2444         RL_LOCK(sc);
2445
2446         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
2447             IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) {
2448                 RL_UNLOCK(sc);
2449                 return;
2450         }
2451
2452         for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
2453             sc->rl_ldata.rl_tx_free > 1;) {
2454                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2455                 if (m_head == NULL)
2456                         break;
2457
2458                 if (re_encap(sc, &m_head) != 0) {
2459                         if (m_head == NULL)
2460                                 break;
2461                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2462                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2463                         break;
2464                 }
2465
2466                 /*
2467                  * If there's a BPF listener, bounce a copy of this frame
2468                  * to him.
2469                  */
2470                 ETHER_BPF_MTAP(ifp, m_head);
2471
2472                 queued++;
2473         }
2474
2475         if (queued == 0) {
2476 #ifdef RE_TX_MODERATION
2477                 if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt)
2478                         CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2479 #endif
2480                 RL_UNLOCK(sc);
2481                 return;
2482         }
2483
2484         /* Flush the TX descriptors */
2485
2486         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2487             sc->rl_ldata.rl_tx_list_map,
2488             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2489
2490         CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2491
2492 #ifdef RE_TX_MODERATION
2493         /*
2494          * Use the countdown timer for interrupt moderation.
2495          * 'TX done' interrupts are disabled. Instead, we reset the
2496          * countdown timer, which will begin counting until it hits
2497          * the value in the TIMERINT register, and then trigger an
2498          * interrupt. Each time we write to the TIMERCNT register,
2499          * the timer count is reset to 0.
2500          */
2501         CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2502 #endif
2503
2504         /*
2505          * Set a timeout in case the chip goes out to lunch.
2506          */
2507         sc->rl_watchdog_timer = 5;
2508
2509         RL_UNLOCK(sc);
2510 }
2511
2512 static void
2513 re_init(void *xsc)
2514 {
2515         struct rl_softc         *sc = xsc;
2516
2517         RL_LOCK(sc);
2518         re_init_locked(sc);
2519         RL_UNLOCK(sc);
2520 }
2521
2522 static void
2523 re_init_locked(struct rl_softc *sc)
2524 {
2525         struct ifnet            *ifp = sc->rl_ifp;
2526         struct mii_data         *mii;
2527         uint32_t                reg;
2528         uint16_t                cfg;
2529         union {
2530                 uint32_t align_dummy;
2531                 u_char eaddr[ETHER_ADDR_LEN];
2532         } eaddr;
2533
2534         RL_LOCK_ASSERT(sc);
2535
2536         mii = device_get_softc(sc->rl_miibus);
2537
2538         /*
2539          * Cancel pending I/O and free all RX/TX buffers.
2540          */
2541         re_stop(sc);
2542
2543         /* Put controller into known state. */
2544         re_reset(sc);
2545
2546         /*
2547          * Enable C+ RX and TX mode, as well as VLAN stripping and
2548          * RX checksum offload. We must configure the C+ register
2549          * before all others.
2550          */
2551         cfg = RL_CPLUSCMD_PCI_MRW;
2552         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
2553                 cfg |= RL_CPLUSCMD_RXCSUM_ENB;
2554         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0)
2555                 cfg |= RL_CPLUSCMD_VLANSTRIP;
2556         if ((sc->rl_flags & RL_FLAG_MACSTAT) != 0) {
2557                 cfg |= RL_CPLUSCMD_MACSTAT_DIS;
2558                 /* XXX magic. */
2559                 cfg |= 0x0001;
2560         } else
2561                 cfg |= RL_CPLUSCMD_RXENB | RL_CPLUSCMD_TXENB;
2562         CSR_WRITE_2(sc, RL_CPLUS_CMD, cfg);
2563         if (sc->rl_hwrev == RL_HWREV_8169_8110SC ||
2564             sc->rl_hwrev == RL_HWREV_8169_8110SCE) {
2565                 reg = 0x000fff00;
2566                 if ((CSR_READ_1(sc, RL_CFG2) & RL_CFG2_PCI66MHZ) != 0)
2567                         reg |= 0x000000ff;
2568                 if (sc->rl_hwrev == RL_HWREV_8169_8110SCE)
2569                         reg |= 0x00f00000;
2570                 CSR_WRITE_4(sc, 0x7c, reg);
2571                 /* Disable interrupt mitigation. */
2572                 CSR_WRITE_2(sc, 0xe2, 0);
2573         }
2574         /*
2575          * Disable TSO if interface MTU size is greater than MSS
2576          * allowed in controller.
2577          */
2578         if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) {
2579                 ifp->if_capenable &= ~IFCAP_TSO4;
2580                 ifp->if_hwassist &= ~CSUM_TSO;
2581         }
2582
2583         /*
2584          * Init our MAC address.  Even though the chipset
2585          * documentation doesn't mention it, we need to enter "Config
2586          * register write enable" mode to modify the ID registers.
2587          */
2588         /* Copy MAC address on stack to align. */
2589         bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN);
2590         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
2591         CSR_WRITE_4(sc, RL_IDR0,
2592             htole32(*(u_int32_t *)(&eaddr.eaddr[0])));
2593         CSR_WRITE_4(sc, RL_IDR4,
2594             htole32(*(u_int32_t *)(&eaddr.eaddr[4])));
2595         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
2596
2597         /*
2598          * For C+ mode, initialize the RX descriptors and mbufs.
2599          */
2600         re_rx_list_init(sc);
2601         re_tx_list_init(sc);
2602
2603         /*
2604          * Load the addresses of the RX and TX lists into the chip.
2605          */
2606
2607         CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI,
2608             RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr));
2609         CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO,
2610             RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr));
2611
2612         CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI,
2613             RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr));
2614         CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
2615             RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
2616
2617         /*
2618          * Enable transmit and receive.
2619          */
2620         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2621
2622         /*
2623          * Set the initial TX configuration.
2624          */
2625         if (sc->rl_testmode) {
2626                 if (sc->rl_type == RL_8169)
2627                         CSR_WRITE_4(sc, RL_TXCFG,
2628                             RL_TXCFG_CONFIG|RL_LOOPTEST_ON);
2629                 else
2630                         CSR_WRITE_4(sc, RL_TXCFG,
2631                             RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS);
2632         } else
2633                 CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG);
2634
2635         CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16);
2636
2637         /*
2638          * Set the initial RX configuration.
2639          */
2640         re_set_rxmode(sc);
2641
2642 #ifdef DEVICE_POLLING
2643         /*
2644          * Disable interrupts if we are polling.
2645          */
2646         if (ifp->if_capenable & IFCAP_POLLING)
2647                 CSR_WRITE_2(sc, RL_IMR, 0);
2648         else    /* otherwise ... */
2649 #endif
2650
2651         /*
2652          * Enable interrupts.
2653          */
2654         if (sc->rl_testmode)
2655                 CSR_WRITE_2(sc, RL_IMR, 0);
2656         else
2657                 CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2658         CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS);
2659
2660         /* Set initial TX threshold */
2661         sc->rl_txthresh = RL_TX_THRESH_INIT;
2662
2663         /* Start RX/TX process. */
2664         CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
2665 #ifdef notdef
2666         /* Enable receiver and transmitter. */
2667         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2668 #endif
2669
2670 #ifdef RE_TX_MODERATION
2671         /*
2672          * Initialize the timer interrupt register so that
2673          * a timer interrupt will be generated once the timer
2674          * reaches a certain number of ticks. The timer is
2675          * reloaded on each transmit. This gives us TX interrupt
2676          * moderation, which dramatically improves TX frame rate.
2677          */
2678         if (sc->rl_type == RL_8169)
2679                 CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800);
2680         else
2681                 CSR_WRITE_4(sc, RL_TIMERINT, 0x400);
2682 #endif
2683
2684         /*
2685          * For 8169 gigE NICs, set the max allowed RX packet
2686          * size so we can receive jumbo frames.
2687          */
2688         if (sc->rl_type == RL_8169)
2689                 CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383);
2690
2691         if (sc->rl_testmode)
2692                 return;
2693
2694         mii_mediachg(mii);
2695
2696         CSR_WRITE_1(sc, RL_CFG1, CSR_READ_1(sc, RL_CFG1) | RL_CFG1_DRVLOAD);
2697
2698         ifp->if_drv_flags |= IFF_DRV_RUNNING;
2699         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2700
2701         sc->rl_flags &= ~RL_FLAG_LINK;
2702         sc->rl_watchdog_timer = 0;
2703         callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2704 }
2705
2706 /*
2707  * Set media options.
2708  */
2709 static int
2710 re_ifmedia_upd(struct ifnet *ifp)
2711 {
2712         struct rl_softc         *sc;
2713         struct mii_data         *mii;
2714         int                     error;
2715
2716         sc = ifp->if_softc;
2717         mii = device_get_softc(sc->rl_miibus);
2718         RL_LOCK(sc);
2719         error = mii_mediachg(mii);
2720         RL_UNLOCK(sc);
2721
2722         return (error);
2723 }
2724
2725 /*
2726  * Report current media status.
2727  */
2728 static void
2729 re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2730 {
2731         struct rl_softc         *sc;
2732         struct mii_data         *mii;
2733
2734         sc = ifp->if_softc;
2735         mii = device_get_softc(sc->rl_miibus);
2736
2737         RL_LOCK(sc);
2738         mii_pollstat(mii);
2739         RL_UNLOCK(sc);
2740         ifmr->ifm_active = mii->mii_media_active;
2741         ifmr->ifm_status = mii->mii_media_status;
2742 }
2743
2744 static int
2745 re_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2746 {
2747         struct rl_softc         *sc = ifp->if_softc;
2748         struct ifreq            *ifr = (struct ifreq *) data;
2749         struct mii_data         *mii;
2750         int                     error = 0;
2751
2752         switch (command) {
2753         case SIOCSIFMTU:
2754                 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > RL_JUMBO_MTU) {
2755                         error = EINVAL;
2756                         break;
2757                 }
2758                 if ((sc->rl_flags & RL_FLAG_NOJUMBO) != 0 &&
2759                     ifr->ifr_mtu > RL_MAX_FRAMELEN) {
2760                         error = EINVAL;
2761                         break;
2762                 }
2763                 RL_LOCK(sc);
2764                 if (ifp->if_mtu != ifr->ifr_mtu)
2765                         ifp->if_mtu = ifr->ifr_mtu;
2766                 if (ifp->if_mtu > RL_TSO_MTU &&
2767                     (ifp->if_capenable & IFCAP_TSO4) != 0) {
2768                         ifp->if_capenable &= ~IFCAP_TSO4;
2769                         ifp->if_hwassist &= ~CSUM_TSO;
2770                 }
2771                 RL_UNLOCK(sc);
2772                 break;
2773         case SIOCSIFFLAGS:
2774                 RL_LOCK(sc);
2775                 if ((ifp->if_flags & IFF_UP) != 0) {
2776                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2777                                 if (((ifp->if_flags ^ sc->rl_if_flags)
2778                                     & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
2779                                         re_set_rxmode(sc);
2780                         } else
2781                                 re_init_locked(sc);
2782                 } else {
2783                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2784                                 re_stop(sc);
2785                 }
2786                 sc->rl_if_flags = ifp->if_flags;
2787                 RL_UNLOCK(sc);
2788                 break;
2789         case SIOCADDMULTI:
2790         case SIOCDELMULTI:
2791                 RL_LOCK(sc);
2792                 re_set_rxmode(sc);
2793                 RL_UNLOCK(sc);
2794                 break;
2795         case SIOCGIFMEDIA:
2796         case SIOCSIFMEDIA:
2797                 mii = device_get_softc(sc->rl_miibus);
2798                 error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
2799                 break;
2800         case SIOCSIFCAP:
2801             {
2802                 int mask, reinit;
2803
2804                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2805                 reinit = 0;
2806 #ifdef DEVICE_POLLING
2807                 if (mask & IFCAP_POLLING) {
2808                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
2809                                 error = ether_poll_register(re_poll, ifp);
2810                                 if (error)
2811                                         return(error);
2812                                 RL_LOCK(sc);
2813                                 /* Disable interrupts */
2814                                 CSR_WRITE_2(sc, RL_IMR, 0x0000);
2815                                 ifp->if_capenable |= IFCAP_POLLING;
2816                                 RL_UNLOCK(sc);
2817                         } else {
2818                                 error = ether_poll_deregister(ifp);
2819                                 /* Enable interrupts. */
2820                                 RL_LOCK(sc);
2821                                 CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2822                                 ifp->if_capenable &= ~IFCAP_POLLING;
2823                                 RL_UNLOCK(sc);
2824                         }
2825                 }
2826 #endif /* DEVICE_POLLING */
2827                 if (mask & IFCAP_HWCSUM) {
2828                         ifp->if_capenable ^= IFCAP_HWCSUM;
2829                         if (ifp->if_capenable & IFCAP_TXCSUM)
2830                                 ifp->if_hwassist |= RE_CSUM_FEATURES;
2831                         else
2832                                 ifp->if_hwassist &= ~RE_CSUM_FEATURES;
2833                         reinit = 1;
2834                 }
2835                 if (mask & IFCAP_VLAN_HWTAGGING) {
2836                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2837                         reinit = 1;
2838                 }
2839                 if (mask & IFCAP_TSO4) {
2840                         ifp->if_capenable ^= IFCAP_TSO4;
2841                         if ((IFCAP_TSO4 & ifp->if_capenable) &&
2842                             (IFCAP_TSO4 & ifp->if_capabilities))
2843                                 ifp->if_hwassist |= CSUM_TSO;
2844                         else
2845                                 ifp->if_hwassist &= ~CSUM_TSO;
2846                         if (ifp->if_mtu > RL_TSO_MTU &&
2847                             (ifp->if_capenable & IFCAP_TSO4) != 0) {
2848                                 ifp->if_capenable &= ~IFCAP_TSO4;
2849                                 ifp->if_hwassist &= ~CSUM_TSO;
2850                         }
2851                 }
2852                 if ((mask & IFCAP_WOL) != 0 &&
2853                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
2854                         if ((mask & IFCAP_WOL_UCAST) != 0)
2855                                 ifp->if_capenable ^= IFCAP_WOL_UCAST;
2856                         if ((mask & IFCAP_WOL_MCAST) != 0)
2857                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
2858                         if ((mask & IFCAP_WOL_MAGIC) != 0)
2859                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2860                 }
2861                 if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING)
2862                         re_init(sc);
2863                 VLAN_CAPABILITIES(ifp);
2864             }
2865                 break;
2866         default:
2867                 error = ether_ioctl(ifp, command, data);
2868                 break;
2869         }
2870
2871         return (error);
2872 }
2873
2874 static void
2875 re_watchdog(struct rl_softc *sc)
2876 {
2877         struct ifnet            *ifp;
2878
2879         RL_LOCK_ASSERT(sc);
2880
2881         if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0)
2882                 return;
2883
2884         ifp = sc->rl_ifp;
2885         re_txeof(sc);
2886         if (sc->rl_ldata.rl_tx_free == sc->rl_ldata.rl_tx_desc_cnt) {
2887                 if_printf(ifp, "watchdog timeout (missed Tx interrupts) "
2888                     "-- recovering\n");
2889                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2890                         taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2891                 return;
2892         }
2893
2894         if_printf(ifp, "watchdog timeout\n");
2895         ifp->if_oerrors++;
2896
2897         re_rxeof(sc, NULL);
2898         re_init_locked(sc);
2899         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2900                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2901 }
2902
2903 /*
2904  * Stop the adapter and free any mbufs allocated to the
2905  * RX and TX lists.
2906  */
2907 static void
2908 re_stop(struct rl_softc *sc)
2909 {
2910         int                     i;
2911         struct ifnet            *ifp;
2912         struct rl_txdesc        *txd;
2913         struct rl_rxdesc        *rxd;
2914
2915         RL_LOCK_ASSERT(sc);
2916
2917         ifp = sc->rl_ifp;
2918
2919         sc->rl_watchdog_timer = 0;
2920         callout_stop(&sc->rl_stat_callout);
2921         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2922
2923         if ((sc->rl_flags & RL_FLAG_CMDSTOP) != 0)
2924                 CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_STOPREQ | RL_CMD_TX_ENB |
2925                     RL_CMD_RX_ENB);
2926         else
2927                 CSR_WRITE_1(sc, RL_COMMAND, 0x00);
2928         DELAY(1000);
2929         CSR_WRITE_2(sc, RL_IMR, 0x0000);
2930         CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
2931
2932         if (sc->rl_head != NULL) {
2933                 m_freem(sc->rl_head);
2934                 sc->rl_head = sc->rl_tail = NULL;
2935         }
2936
2937         /* Free the TX list buffers. */
2938
2939         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
2940                 txd = &sc->rl_ldata.rl_tx_desc[i];
2941                 if (txd->tx_m != NULL) {
2942                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2943                             txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2944                         bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2945                             txd->tx_dmamap);
2946                         m_freem(txd->tx_m);
2947                         txd->tx_m = NULL;
2948                 }
2949         }
2950
2951         /* Free the RX list buffers. */
2952
2953         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
2954                 rxd = &sc->rl_ldata.rl_rx_desc[i];
2955                 if (rxd->rx_m != NULL) {
2956                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2957                             rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
2958                         bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
2959                             rxd->rx_dmamap);
2960                         m_freem(rxd->rx_m);
2961                         rxd->rx_m = NULL;
2962                 }
2963         }
2964 }
2965
2966 /*
2967  * Device suspend routine.  Stop the interface and save some PCI
2968  * settings in case the BIOS doesn't restore them properly on
2969  * resume.
2970  */
2971 static int
2972 re_suspend(device_t dev)
2973 {
2974         struct rl_softc         *sc;
2975
2976         sc = device_get_softc(dev);
2977
2978         RL_LOCK(sc);
2979         re_stop(sc);
2980         re_setwol(sc);
2981         sc->suspended = 1;
2982         RL_UNLOCK(sc);
2983
2984         return (0);
2985 }
2986
2987 /*
2988  * Device resume routine.  Restore some PCI settings in case the BIOS
2989  * doesn't, re-enable busmastering, and restart the interface if
2990  * appropriate.
2991  */
2992 static int
2993 re_resume(device_t dev)
2994 {
2995         struct rl_softc         *sc;
2996         struct ifnet            *ifp;
2997
2998         sc = device_get_softc(dev);
2999
3000         RL_LOCK(sc);
3001
3002         ifp = sc->rl_ifp;
3003         /* Take controller out of sleep mode. */
3004         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
3005                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
3006                         CSR_WRITE_1(sc, RL_GPIO,
3007                             CSR_READ_1(sc, RL_GPIO) | 0x01);
3008         }
3009
3010         /* reinitialize interface if necessary */
3011         if (ifp->if_flags & IFF_UP)
3012                 re_init_locked(sc);
3013
3014         /*
3015          * Clear WOL matching such that normal Rx filtering
3016          * wouldn't interfere with WOL patterns.
3017          */
3018         re_clrwol(sc);
3019         sc->suspended = 0;
3020         RL_UNLOCK(sc);
3021
3022         return (0);
3023 }
3024
3025 /*
3026  * Stop all chip I/O so that the kernel's probe routines don't
3027  * get confused by errant DMAs when rebooting.
3028  */
3029 static int
3030 re_shutdown(device_t dev)
3031 {
3032         struct rl_softc         *sc;
3033
3034         sc = device_get_softc(dev);
3035
3036         RL_LOCK(sc);
3037         re_stop(sc);
3038         /*
3039          * Mark interface as down since otherwise we will panic if
3040          * interrupt comes in later on, which can happen in some
3041          * cases.
3042          */
3043         sc->rl_ifp->if_flags &= ~IFF_UP;
3044         re_setwol(sc);
3045         RL_UNLOCK(sc);
3046
3047         return (0);
3048 }
3049
3050 static void
3051 re_setwol(struct rl_softc *sc)
3052 {
3053         struct ifnet            *ifp;
3054         int                     pmc;
3055         uint16_t                pmstat;
3056         uint8_t                 v;
3057
3058         RL_LOCK_ASSERT(sc);
3059
3060         if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3061                 return;
3062
3063         ifp = sc->rl_ifp;
3064         /* Put controller into sleep mode. */
3065         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
3066                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
3067                         CSR_WRITE_1(sc, RL_GPIO,
3068                             CSR_READ_1(sc, RL_GPIO) & ~0x01);
3069         }
3070         if ((ifp->if_capenable & IFCAP_WOL) != 0 &&
3071             (sc->rl_flags & RL_FLAG_WOLRXENB) != 0)
3072                 CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RX_ENB);
3073         /* Enable config register write. */
3074         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3075
3076         /* Enable PME. */
3077         v = CSR_READ_1(sc, RL_CFG1);
3078         v &= ~RL_CFG1_PME;
3079         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3080                 v |= RL_CFG1_PME;
3081         CSR_WRITE_1(sc, RL_CFG1, v);
3082
3083         v = CSR_READ_1(sc, RL_CFG3);
3084         v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3085         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0)
3086                 v |= RL_CFG3_WOL_MAGIC;
3087         CSR_WRITE_1(sc, RL_CFG3, v);
3088
3089         /* Config register write done. */
3090         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3091
3092         v = CSR_READ_1(sc, RL_CFG5);
3093         v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3094         v &= ~RL_CFG5_WOL_LANWAKE;
3095         if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0)
3096                 v |= RL_CFG5_WOL_UCAST;
3097         if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0)
3098                 v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST;
3099         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3100                 v |= RL_CFG5_WOL_LANWAKE;
3101         CSR_WRITE_1(sc, RL_CFG5, v);
3102
3103         /*
3104          * It seems that hardware resets its link speed to 100Mbps in
3105          * power down mode so switching to 100Mbps in driver is not
3106          * needed.
3107          */
3108
3109         /* Request PME if WOL is requested. */
3110         pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2);
3111         pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
3112         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3113                 pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
3114         pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
3115 }
3116
3117 static void
3118 re_clrwol(struct rl_softc *sc)
3119 {
3120         int                     pmc;
3121         uint8_t                 v;
3122
3123         RL_LOCK_ASSERT(sc);
3124
3125         if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3126                 return;
3127
3128         /* Enable config register write. */
3129         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3130
3131         v = CSR_READ_1(sc, RL_CFG3);
3132         v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3133         CSR_WRITE_1(sc, RL_CFG3, v);
3134
3135         /* Config register write done. */
3136         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3137
3138         v = CSR_READ_1(sc, RL_CFG5);
3139         v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3140         v &= ~RL_CFG5_WOL_LANWAKE;
3141         CSR_WRITE_1(sc, RL_CFG5, v);
3142 }