]> CyberLeo.Net >> Repos - FreeBSD/releng/8.0.git/blob - sys/dev/re/if_re.c
Adjust to reflect 8.0-RELEASE.
[FreeBSD/releng/8.0.git] / sys / dev / re / if_re.c
1 /*-
2  * Copyright (c) 1997, 1998-2003
3  *      Bill Paul <wpaul@windriver.com>.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by Bill Paul.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30  * THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 /*
37  * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver
38  *
39  * Written by Bill Paul <wpaul@windriver.com>
40  * Senior Networking Software Engineer
41  * Wind River Systems
42  */
43
44 /*
45  * This driver is designed to support RealTek's next generation of
46  * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
47  * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
48  * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E.
49  *
50  * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
51  * with the older 8139 family, however it also supports a special
52  * C+ mode of operation that provides several new performance enhancing
53  * features. These include:
54  *
55  *      o Descriptor based DMA mechanism. Each descriptor represents
56  *        a single packet fragment. Data buffers may be aligned on
57  *        any byte boundary.
58  *
59  *      o 64-bit DMA
60  *
61  *      o TCP/IP checksum offload for both RX and TX
62  *
63  *      o High and normal priority transmit DMA rings
64  *
65  *      o VLAN tag insertion and extraction
66  *
67  *      o TCP large send (segmentation offload)
68  *
69  * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
70  * programming API is fairly straightforward. The RX filtering, EEPROM
71  * access and PHY access is the same as it is on the older 8139 series
72  * chips.
73  *
74  * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
75  * same programming API and feature set as the 8139C+ with the following
76  * differences and additions:
77  *
78  *      o 1000Mbps mode
79  *
80  *      o Jumbo frames
81  *
82  *      o GMII and TBI ports/registers for interfacing with copper
83  *        or fiber PHYs
84  *
85  *      o RX and TX DMA rings can have up to 1024 descriptors
86  *        (the 8139C+ allows a maximum of 64)
87  *
88  *      o Slight differences in register layout from the 8139C+
89  *
90  * The TX start and timer interrupt registers are at different locations
91  * on the 8169 than they are on the 8139C+. Also, the status word in the
92  * RX descriptor has a slightly different bit layout. The 8169 does not
93  * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
94  * copper gigE PHY.
95  *
96  * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
97  * (the 'S' stands for 'single-chip'). These devices have the same
98  * programming API as the older 8169, but also have some vendor-specific
99  * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
100  * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
101  *
102  * This driver takes advantage of the RX and TX checksum offload and
103  * VLAN tag insertion/extraction features. It also implements TX
104  * interrupt moderation using the timer interrupt registers, which
105  * significantly reduces TX interrupt load. There is also support
106  * for jumbo frames, however the 8169/8169S/8110S can not transmit
107  * jumbo frames larger than 7440, so the max MTU possible with this
108  * driver is 7422 bytes.
109  */
110
111 #ifdef HAVE_KERNEL_OPTION_HEADERS
112 #include "opt_device_polling.h"
113 #endif
114
115 #include <sys/param.h>
116 #include <sys/endian.h>
117 #include <sys/systm.h>
118 #include <sys/sockio.h>
119 #include <sys/mbuf.h>
120 #include <sys/malloc.h>
121 #include <sys/module.h>
122 #include <sys/kernel.h>
123 #include <sys/socket.h>
124 #include <sys/lock.h>
125 #include <sys/mutex.h>
126 #include <sys/taskqueue.h>
127
128 #include <net/if.h>
129 #include <net/if_arp.h>
130 #include <net/ethernet.h>
131 #include <net/if_dl.h>
132 #include <net/if_media.h>
133 #include <net/if_types.h>
134 #include <net/if_vlan_var.h>
135
136 #include <net/bpf.h>
137
138 #include <machine/bus.h>
139 #include <machine/resource.h>
140 #include <sys/bus.h>
141 #include <sys/rman.h>
142
143 #include <dev/mii/mii.h>
144 #include <dev/mii/miivar.h>
145
146 #include <dev/pci/pcireg.h>
147 #include <dev/pci/pcivar.h>
148
149 #include <pci/if_rlreg.h>
150
151 MODULE_DEPEND(re, pci, 1, 1, 1);
152 MODULE_DEPEND(re, ether, 1, 1, 1);
153 MODULE_DEPEND(re, miibus, 1, 1, 1);
154
155 /* "device miibus" required.  See GENERIC if you get errors here. */
156 #include "miibus_if.h"
157
158 /* Tunables. */
159 static int msi_disable = 0;
160 TUNABLE_INT("hw.re.msi_disable", &msi_disable);
161 static int prefer_iomap = 0;
162 TUNABLE_INT("hw.re.prefer_iomap", &prefer_iomap);
163
164 #define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
165
166 /*
167  * Various supported device vendors/types and their names.
168  */
169 static struct rl_type re_devs[] = {
170         { DLINK_VENDORID, DLINK_DEVICEID_528T, 0,
171             "D-Link DGE-528(T) Gigabit Ethernet Adapter" },
172         { RT_VENDORID, RT_DEVICEID_8139, 0,
173             "RealTek 8139C+ 10/100BaseTX" },
174         { RT_VENDORID, RT_DEVICEID_8101E, 0,
175             "RealTek 8101E/8102E/8102EL PCIe 10/100baseTX" },
176         { RT_VENDORID, RT_DEVICEID_8168, 0,
177             "RealTek 8168/8168B/8168C/8168CP/8168D/8168DP/"
178             "8111B/8111C/8111CP/8111DP PCIe Gigabit Ethernet" },
179         { RT_VENDORID, RT_DEVICEID_8169, 0,
180             "RealTek 8169/8169S/8169SB(L)/8110S/8110SB(L) Gigabit Ethernet" },
181         { RT_VENDORID, RT_DEVICEID_8169SC, 0,
182             "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
183         { COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, 0,
184             "Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" },
185         { LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, 0,
186             "Linksys EG1032 (RTL8169S) Gigabit Ethernet" },
187         { USR_VENDORID, USR_DEVICEID_997902, 0,
188             "US Robotics 997902 (RTL8169S) Gigabit Ethernet" }
189 };
190
191 static struct rl_hwrev re_hwrevs[] = {
192         { RL_HWREV_8139, RL_8139,  "" },
193         { RL_HWREV_8139A, RL_8139, "A" },
194         { RL_HWREV_8139AG, RL_8139, "A-G" },
195         { RL_HWREV_8139B, RL_8139, "B" },
196         { RL_HWREV_8130, RL_8139, "8130" },
197         { RL_HWREV_8139C, RL_8139, "C" },
198         { RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C" },
199         { RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+"},
200         { RL_HWREV_8168_SPIN1, RL_8169, "8168"},
201         { RL_HWREV_8169, RL_8169, "8169"},
202         { RL_HWREV_8169S, RL_8169, "8169S"},
203         { RL_HWREV_8110S, RL_8169, "8110S"},
204         { RL_HWREV_8169_8110SB, RL_8169, "8169SB/8110SB"},
205         { RL_HWREV_8169_8110SC, RL_8169, "8169SC/8110SC"},
206         { RL_HWREV_8169_8110SBL, RL_8169, "8169SBL/8110SBL"},
207         { RL_HWREV_8169_8110SCE, RL_8169, "8169SC/8110SC"},
208         { RL_HWREV_8100, RL_8139, "8100"},
209         { RL_HWREV_8101, RL_8139, "8101"},
210         { RL_HWREV_8100E, RL_8169, "8100E"},
211         { RL_HWREV_8101E, RL_8169, "8101E"},
212         { RL_HWREV_8102E, RL_8169, "8102E"},
213         { RL_HWREV_8102EL, RL_8169, "8102EL"},
214         { RL_HWREV_8102EL_SPIN1, RL_8169, "8102EL"},
215         { RL_HWREV_8168_SPIN2, RL_8169, "8168"},
216         { RL_HWREV_8168_SPIN3, RL_8169, "8168"},
217         { RL_HWREV_8168C, RL_8169, "8168C/8111C"},
218         { RL_HWREV_8168C_SPIN2, RL_8169, "8168C/8111C"},
219         { RL_HWREV_8168CP, RL_8169, "8168CP/8111CP"},
220         { RL_HWREV_8168D, RL_8169, "8168D/8111D"},
221         { RL_HWREV_8168DP, RL_8169, "8168DP/8111DP"},
222         { 0, 0, NULL }
223 };
224
225 static int re_probe             (device_t);
226 static int re_attach            (device_t);
227 static int re_detach            (device_t);
228
229 static int re_encap             (struct rl_softc *, struct mbuf **);
230
231 static void re_dma_map_addr     (void *, bus_dma_segment_t *, int, int);
232 static int re_allocmem          (device_t, struct rl_softc *);
233 static __inline void re_discard_rxbuf
234                                 (struct rl_softc *, int);
235 static int re_newbuf            (struct rl_softc *, int);
236 static int re_rx_list_init      (struct rl_softc *);
237 static int re_tx_list_init      (struct rl_softc *);
238 #ifdef RE_FIXUP_RX
239 static __inline void re_fixup_rx
240                                 (struct mbuf *);
241 #endif
242 static int re_rxeof             (struct rl_softc *, int *);
243 static void re_txeof            (struct rl_softc *);
244 #ifdef DEVICE_POLLING
245 static int re_poll              (struct ifnet *, enum poll_cmd, int);
246 static int re_poll_locked       (struct ifnet *, enum poll_cmd, int);
247 #endif
248 static int re_intr              (void *);
249 static void re_tick             (void *);
250 static void re_tx_task          (void *, int);
251 static void re_int_task         (void *, int);
252 static void re_start            (struct ifnet *);
253 static int re_ioctl             (struct ifnet *, u_long, caddr_t);
254 static void re_init             (void *);
255 static void re_init_locked      (struct rl_softc *);
256 static void re_stop             (struct rl_softc *);
257 static void re_watchdog         (struct rl_softc *);
258 static int re_suspend           (device_t);
259 static int re_resume            (device_t);
260 static int re_shutdown          (device_t);
261 static int re_ifmedia_upd       (struct ifnet *);
262 static void re_ifmedia_sts      (struct ifnet *, struct ifmediareq *);
263
264 static void re_eeprom_putbyte   (struct rl_softc *, int);
265 static void re_eeprom_getword   (struct rl_softc *, int, u_int16_t *);
266 static void re_read_eeprom      (struct rl_softc *, caddr_t, int, int);
267 static int re_gmii_readreg      (device_t, int, int);
268 static int re_gmii_writereg     (device_t, int, int, int);
269
270 static int re_miibus_readreg    (device_t, int, int);
271 static int re_miibus_writereg   (device_t, int, int, int);
272 static void re_miibus_statchg   (device_t);
273
274 static void re_set_rxmode               (struct rl_softc *);
275 static void re_reset            (struct rl_softc *);
276 static void re_setwol           (struct rl_softc *);
277 static void re_clrwol           (struct rl_softc *);
278
279 #ifdef RE_DIAG
280 static int re_diag              (struct rl_softc *);
281 #endif
282
283 static device_method_t re_methods[] = {
284         /* Device interface */
285         DEVMETHOD(device_probe,         re_probe),
286         DEVMETHOD(device_attach,        re_attach),
287         DEVMETHOD(device_detach,        re_detach),
288         DEVMETHOD(device_suspend,       re_suspend),
289         DEVMETHOD(device_resume,        re_resume),
290         DEVMETHOD(device_shutdown,      re_shutdown),
291
292         /* bus interface */
293         DEVMETHOD(bus_print_child,      bus_generic_print_child),
294         DEVMETHOD(bus_driver_added,     bus_generic_driver_added),
295
296         /* MII interface */
297         DEVMETHOD(miibus_readreg,       re_miibus_readreg),
298         DEVMETHOD(miibus_writereg,      re_miibus_writereg),
299         DEVMETHOD(miibus_statchg,       re_miibus_statchg),
300
301         { 0, 0 }
302 };
303
304 static driver_t re_driver = {
305         "re",
306         re_methods,
307         sizeof(struct rl_softc)
308 };
309
310 static devclass_t re_devclass;
311
312 DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0);
313 DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
314
315 #define EE_SET(x)                                       \
316         CSR_WRITE_1(sc, RL_EECMD,                       \
317                 CSR_READ_1(sc, RL_EECMD) | x)
318
319 #define EE_CLR(x)                                       \
320         CSR_WRITE_1(sc, RL_EECMD,                       \
321                 CSR_READ_1(sc, RL_EECMD) & ~x)
322
323 /*
324  * Send a read command and address to the EEPROM, check for ACK.
325  */
326 static void
327 re_eeprom_putbyte(struct rl_softc *sc, int addr)
328 {
329         int                     d, i;
330
331         d = addr | (RL_9346_READ << sc->rl_eewidth);
332
333         /*
334          * Feed in each bit and strobe the clock.
335          */
336
337         for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) {
338                 if (d & i) {
339                         EE_SET(RL_EE_DATAIN);
340                 } else {
341                         EE_CLR(RL_EE_DATAIN);
342                 }
343                 DELAY(100);
344                 EE_SET(RL_EE_CLK);
345                 DELAY(150);
346                 EE_CLR(RL_EE_CLK);
347                 DELAY(100);
348         }
349 }
350
351 /*
352  * Read a word of data stored in the EEPROM at address 'addr.'
353  */
354 static void
355 re_eeprom_getword(struct rl_softc *sc, int addr, u_int16_t *dest)
356 {
357         int                     i;
358         u_int16_t               word = 0;
359
360         /*
361          * Send address of word we want to read.
362          */
363         re_eeprom_putbyte(sc, addr);
364
365         /*
366          * Start reading bits from EEPROM.
367          */
368         for (i = 0x8000; i; i >>= 1) {
369                 EE_SET(RL_EE_CLK);
370                 DELAY(100);
371                 if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT)
372                         word |= i;
373                 EE_CLR(RL_EE_CLK);
374                 DELAY(100);
375         }
376
377         *dest = word;
378 }
379
380 /*
381  * Read a sequence of words from the EEPROM.
382  */
383 static void
384 re_read_eeprom(struct rl_softc *sc, caddr_t dest, int off, int cnt)
385 {
386         int                     i;
387         u_int16_t               word = 0, *ptr;
388
389         CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
390
391         DELAY(100);
392
393         for (i = 0; i < cnt; i++) {
394                 CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL);
395                 re_eeprom_getword(sc, off + i, &word);
396                 CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL);
397                 ptr = (u_int16_t *)(dest + (i * 2));
398                 *ptr = word;
399         }
400
401         CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
402 }
403
404 static int
405 re_gmii_readreg(device_t dev, int phy, int reg)
406 {
407         struct rl_softc         *sc;
408         u_int32_t               rval;
409         int                     i;
410
411         if (phy != 1)
412                 return (0);
413
414         sc = device_get_softc(dev);
415
416         /* Let the rgephy driver read the GMEDIASTAT register */
417
418         if (reg == RL_GMEDIASTAT) {
419                 rval = CSR_READ_1(sc, RL_GMEDIASTAT);
420                 return (rval);
421         }
422
423         CSR_WRITE_4(sc, RL_PHYAR, reg << 16);
424         DELAY(1000);
425
426         for (i = 0; i < RL_PHY_TIMEOUT; i++) {
427                 rval = CSR_READ_4(sc, RL_PHYAR);
428                 if (rval & RL_PHYAR_BUSY)
429                         break;
430                 DELAY(100);
431         }
432
433         if (i == RL_PHY_TIMEOUT) {
434                 device_printf(sc->rl_dev, "PHY read failed\n");
435                 return (0);
436         }
437
438         return (rval & RL_PHYAR_PHYDATA);
439 }
440
441 static int
442 re_gmii_writereg(device_t dev, int phy, int reg, int data)
443 {
444         struct rl_softc         *sc;
445         u_int32_t               rval;
446         int                     i;
447
448         sc = device_get_softc(dev);
449
450         CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) |
451             (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY);
452         DELAY(1000);
453
454         for (i = 0; i < RL_PHY_TIMEOUT; i++) {
455                 rval = CSR_READ_4(sc, RL_PHYAR);
456                 if (!(rval & RL_PHYAR_BUSY))
457                         break;
458                 DELAY(100);
459         }
460
461         if (i == RL_PHY_TIMEOUT) {
462                 device_printf(sc->rl_dev, "PHY write failed\n");
463                 return (0);
464         }
465
466         return (0);
467 }
468
469 static int
470 re_miibus_readreg(device_t dev, int phy, int reg)
471 {
472         struct rl_softc         *sc;
473         u_int16_t               rval = 0;
474         u_int16_t               re8139_reg = 0;
475
476         sc = device_get_softc(dev);
477
478         if (sc->rl_type == RL_8169) {
479                 rval = re_gmii_readreg(dev, phy, reg);
480                 return (rval);
481         }
482
483         /* Pretend the internal PHY is only at address 0 */
484         if (phy) {
485                 return (0);
486         }
487         switch (reg) {
488         case MII_BMCR:
489                 re8139_reg = RL_BMCR;
490                 break;
491         case MII_BMSR:
492                 re8139_reg = RL_BMSR;
493                 break;
494         case MII_ANAR:
495                 re8139_reg = RL_ANAR;
496                 break;
497         case MII_ANER:
498                 re8139_reg = RL_ANER;
499                 break;
500         case MII_ANLPAR:
501                 re8139_reg = RL_LPAR;
502                 break;
503         case MII_PHYIDR1:
504         case MII_PHYIDR2:
505                 return (0);
506         /*
507          * Allow the rlphy driver to read the media status
508          * register. If we have a link partner which does not
509          * support NWAY, this is the register which will tell
510          * us the results of parallel detection.
511          */
512         case RL_MEDIASTAT:
513                 rval = CSR_READ_1(sc, RL_MEDIASTAT);
514                 return (rval);
515         default:
516                 device_printf(sc->rl_dev, "bad phy register\n");
517                 return (0);
518         }
519         rval = CSR_READ_2(sc, re8139_reg);
520         if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) {
521                 /* 8139C+ has different bit layout. */
522                 rval &= ~(BMCR_LOOP | BMCR_ISO);
523         }
524         return (rval);
525 }
526
527 static int
528 re_miibus_writereg(device_t dev, int phy, int reg, int data)
529 {
530         struct rl_softc         *sc;
531         u_int16_t               re8139_reg = 0;
532         int                     rval = 0;
533
534         sc = device_get_softc(dev);
535
536         if (sc->rl_type == RL_8169) {
537                 rval = re_gmii_writereg(dev, phy, reg, data);
538                 return (rval);
539         }
540
541         /* Pretend the internal PHY is only at address 0 */
542         if (phy)
543                 return (0);
544
545         switch (reg) {
546         case MII_BMCR:
547                 re8139_reg = RL_BMCR;
548                 if (sc->rl_type == RL_8139CPLUS) {
549                         /* 8139C+ has different bit layout. */
550                         data &= ~(BMCR_LOOP | BMCR_ISO);
551                 }
552                 break;
553         case MII_BMSR:
554                 re8139_reg = RL_BMSR;
555                 break;
556         case MII_ANAR:
557                 re8139_reg = RL_ANAR;
558                 break;
559         case MII_ANER:
560                 re8139_reg = RL_ANER;
561                 break;
562         case MII_ANLPAR:
563                 re8139_reg = RL_LPAR;
564                 break;
565         case MII_PHYIDR1:
566         case MII_PHYIDR2:
567                 return (0);
568                 break;
569         default:
570                 device_printf(sc->rl_dev, "bad phy register\n");
571                 return (0);
572         }
573         CSR_WRITE_2(sc, re8139_reg, data);
574         return (0);
575 }
576
577 static void
578 re_miibus_statchg(device_t dev)
579 {
580         struct rl_softc         *sc;
581         struct ifnet            *ifp;
582         struct mii_data         *mii;
583
584         sc = device_get_softc(dev);
585         mii = device_get_softc(sc->rl_miibus);
586         ifp = sc->rl_ifp;
587         if (mii == NULL || ifp == NULL ||
588             (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
589                 return;
590
591         sc->rl_flags &= ~RL_FLAG_LINK;
592         if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
593             (IFM_ACTIVE | IFM_AVALID)) {
594                 switch (IFM_SUBTYPE(mii->mii_media_active)) {
595                 case IFM_10_T:
596                 case IFM_100_TX:
597                         sc->rl_flags |= RL_FLAG_LINK;
598                         break;
599                 case IFM_1000_T:
600                         if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0)
601                                 break;
602                         sc->rl_flags |= RL_FLAG_LINK;
603                         break;
604                 default:
605                         break;
606                 }
607         }
608         /*
609          * RealTek controllers does not provide any interface to
610          * Tx/Rx MACs for resolved speed, duplex and flow-control
611          * parameters.
612          */
613 }
614
615 /*
616  * Set the RX configuration and 64-bit multicast hash filter.
617  */
618 static void
619 re_set_rxmode(struct rl_softc *sc)
620 {
621         struct ifnet            *ifp;
622         struct ifmultiaddr      *ifma;
623         uint32_t                hashes[2] = { 0, 0 };
624         uint32_t                h, rxfilt;
625
626         RL_LOCK_ASSERT(sc);
627
628         ifp = sc->rl_ifp;
629
630         rxfilt = RL_RXCFG_CONFIG | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_BROAD;
631
632         if (ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) {
633                 if (ifp->if_flags & IFF_PROMISC)
634                         rxfilt |= RL_RXCFG_RX_ALLPHYS;
635                 /*
636                  * Unlike other hardwares, we have to explicitly set
637                  * RL_RXCFG_RX_MULTI to receive multicast frames in
638                  * promiscuous mode.
639                  */
640                 rxfilt |= RL_RXCFG_RX_MULTI;
641                 hashes[0] = hashes[1] = 0xffffffff;
642                 goto done;
643         }
644
645         if_maddr_rlock(ifp);
646         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
647                 if (ifma->ifma_addr->sa_family != AF_LINK)
648                         continue;
649                 h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
650                     ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
651                 if (h < 32)
652                         hashes[0] |= (1 << h);
653                 else
654                         hashes[1] |= (1 << (h - 32));
655         }
656         if_maddr_runlock(ifp);
657
658         if (hashes[0] != 0 || hashes[1] != 0) {
659                 /*
660                  * For some unfathomable reason, RealTek decided to
661                  * reverse the order of the multicast hash registers
662                  * in the PCI Express parts.  This means we have to
663                  * write the hash pattern in reverse order for those
664                  * devices.
665                  */
666                 if ((sc->rl_flags & RL_FLAG_PCIE) != 0) {
667                         h = bswap32(hashes[0]);
668                         hashes[0] = bswap32(hashes[1]);
669                         hashes[1] = h;
670                 }
671                 rxfilt |= RL_RXCFG_RX_MULTI;
672         }
673
674 done:
675         CSR_WRITE_4(sc, RL_MAR0, hashes[0]);
676         CSR_WRITE_4(sc, RL_MAR4, hashes[1]);
677         CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
678 }
679
680 static void
681 re_reset(struct rl_softc *sc)
682 {
683         int                     i;
684
685         RL_LOCK_ASSERT(sc);
686
687         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET);
688
689         for (i = 0; i < RL_TIMEOUT; i++) {
690                 DELAY(10);
691                 if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET))
692                         break;
693         }
694         if (i == RL_TIMEOUT)
695                 device_printf(sc->rl_dev, "reset never completed!\n");
696
697         if ((sc->rl_flags & RL_FLAG_MACRESET) != 0)
698                 CSR_WRITE_1(sc, 0x82, 1);
699         if (sc->rl_hwrev == RL_HWREV_8169S)
700                 re_gmii_writereg(sc->rl_dev, 1, 0x0b, 0);
701 }
702
703 #ifdef RE_DIAG
704
705 /*
706  * The following routine is designed to test for a defect on some
707  * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
708  * lines connected to the bus, however for a 32-bit only card, they
709  * should be pulled high. The result of this defect is that the
710  * NIC will not work right if you plug it into a 64-bit slot: DMA
711  * operations will be done with 64-bit transfers, which will fail
712  * because the 64-bit data lines aren't connected.
713  *
714  * There's no way to work around this (short of talking a soldering
715  * iron to the board), however we can detect it. The method we use
716  * here is to put the NIC into digital loopback mode, set the receiver
717  * to promiscuous mode, and then try to send a frame. We then compare
718  * the frame data we sent to what was received. If the data matches,
719  * then the NIC is working correctly, otherwise we know the user has
720  * a defective NIC which has been mistakenly plugged into a 64-bit PCI
721  * slot. In the latter case, there's no way the NIC can work correctly,
722  * so we print out a message on the console and abort the device attach.
723  */
724
725 static int
726 re_diag(struct rl_softc *sc)
727 {
728         struct ifnet            *ifp = sc->rl_ifp;
729         struct mbuf             *m0;
730         struct ether_header     *eh;
731         struct rl_desc          *cur_rx;
732         u_int16_t               status;
733         u_int32_t               rxstat;
734         int                     total_len, i, error = 0, phyaddr;
735         u_int8_t                dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
736         u_int8_t                src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
737
738         /* Allocate a single mbuf */
739         MGETHDR(m0, M_DONTWAIT, MT_DATA);
740         if (m0 == NULL)
741                 return (ENOBUFS);
742
743         RL_LOCK(sc);
744
745         /*
746          * Initialize the NIC in test mode. This sets the chip up
747          * so that it can send and receive frames, but performs the
748          * following special functions:
749          * - Puts receiver in promiscuous mode
750          * - Enables digital loopback mode
751          * - Leaves interrupts turned off
752          */
753
754         ifp->if_flags |= IFF_PROMISC;
755         sc->rl_testmode = 1;
756         re_init_locked(sc);
757         sc->rl_flags |= RL_FLAG_LINK;
758         if (sc->rl_type == RL_8169)
759                 phyaddr = 1;
760         else
761                 phyaddr = 0;
762
763         re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET);
764         for (i = 0; i < RL_TIMEOUT; i++) {
765                 status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR);
766                 if (!(status & BMCR_RESET))
767                         break;
768         }
769
770         re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP);
771         CSR_WRITE_2(sc, RL_ISR, RL_INTRS);
772
773         DELAY(100000);
774
775         /* Put some data in the mbuf */
776
777         eh = mtod(m0, struct ether_header *);
778         bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN);
779         bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN);
780         eh->ether_type = htons(ETHERTYPE_IP);
781         m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
782
783         /*
784          * Queue the packet, start transmission.
785          * Note: IF_HANDOFF() ultimately calls re_start() for us.
786          */
787
788         CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
789         RL_UNLOCK(sc);
790         /* XXX: re_diag must not be called when in ALTQ mode */
791         IF_HANDOFF(&ifp->if_snd, m0, ifp);
792         RL_LOCK(sc);
793         m0 = NULL;
794
795         /* Wait for it to propagate through the chip */
796
797         DELAY(100000);
798         for (i = 0; i < RL_TIMEOUT; i++) {
799                 status = CSR_READ_2(sc, RL_ISR);
800                 CSR_WRITE_2(sc, RL_ISR, status);
801                 if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) ==
802                     (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK))
803                         break;
804                 DELAY(10);
805         }
806
807         if (i == RL_TIMEOUT) {
808                 device_printf(sc->rl_dev,
809                     "diagnostic failed, failed to receive packet in"
810                     " loopback mode\n");
811                 error = EIO;
812                 goto done;
813         }
814
815         /*
816          * The packet should have been dumped into the first
817          * entry in the RX DMA ring. Grab it from there.
818          */
819
820         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
821             sc->rl_ldata.rl_rx_list_map,
822             BUS_DMASYNC_POSTREAD);
823         bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
824             sc->rl_ldata.rl_rx_desc[0].rx_dmamap,
825             BUS_DMASYNC_POSTREAD);
826         bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
827             sc->rl_ldata.rl_rx_desc[0].rx_dmamap);
828
829         m0 = sc->rl_ldata.rl_rx_desc[0].rx_m;
830         sc->rl_ldata.rl_rx_desc[0].rx_m = NULL;
831         eh = mtod(m0, struct ether_header *);
832
833         cur_rx = &sc->rl_ldata.rl_rx_list[0];
834         total_len = RL_RXBYTES(cur_rx);
835         rxstat = le32toh(cur_rx->rl_cmdstat);
836
837         if (total_len != ETHER_MIN_LEN) {
838                 device_printf(sc->rl_dev,
839                     "diagnostic failed, received short packet\n");
840                 error = EIO;
841                 goto done;
842         }
843
844         /* Test that the received packet data matches what we sent. */
845
846         if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) ||
847             bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) ||
848             ntohs(eh->ether_type) != ETHERTYPE_IP) {
849                 device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n");
850                 device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n",
851                     dst, ":", src, ":", ETHERTYPE_IP);
852                 device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n",
853                     eh->ether_dhost, ":",  eh->ether_shost, ":",
854                     ntohs(eh->ether_type));
855                 device_printf(sc->rl_dev, "You may have a defective 32-bit "
856                     "NIC plugged into a 64-bit PCI slot.\n");
857                 device_printf(sc->rl_dev, "Please re-install the NIC in a "
858                     "32-bit slot for proper operation.\n");
859                 device_printf(sc->rl_dev, "Read the re(4) man page for more "
860                     "details.\n");
861                 error = EIO;
862         }
863
864 done:
865         /* Turn interface off, release resources */
866
867         sc->rl_testmode = 0;
868         sc->rl_flags &= ~RL_FLAG_LINK;
869         ifp->if_flags &= ~IFF_PROMISC;
870         re_stop(sc);
871         if (m0 != NULL)
872                 m_freem(m0);
873
874         RL_UNLOCK(sc);
875
876         return (error);
877 }
878
879 #endif
880
881 /*
882  * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
883  * IDs against our list and return a device name if we find a match.
884  */
885 static int
886 re_probe(device_t dev)
887 {
888         struct rl_type          *t;
889         uint16_t                devid, vendor;
890         uint16_t                revid, sdevid;
891         int                     i;
892         
893         vendor = pci_get_vendor(dev);
894         devid = pci_get_device(dev);
895         revid = pci_get_revid(dev);
896         sdevid = pci_get_subdevice(dev);
897
898         if (vendor == LINKSYS_VENDORID && devid == LINKSYS_DEVICEID_EG1032) {
899                 if (sdevid != LINKSYS_SUBDEVICE_EG1032_REV3) {
900                         /*
901                          * Only attach to rev. 3 of the Linksys EG1032 adapter.
902                          * Rev. 2 is supported by sk(4).
903                          */
904                         return (ENXIO);
905                 }
906         }
907
908         if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) {
909                 if (revid != 0x20) {
910                         /* 8139, let rl(4) take care of this device. */
911                         return (ENXIO);
912                 }
913         }
914
915         t = re_devs;
916         for (i = 0; i < sizeof(re_devs) / sizeof(re_devs[0]); i++, t++) {
917                 if (vendor == t->rl_vid && devid == t->rl_did) {
918                         device_set_desc(dev, t->rl_name);
919                         return (BUS_PROBE_DEFAULT);
920                 }
921         }
922
923         return (ENXIO);
924 }
925
926 /*
927  * Map a single buffer address.
928  */
929
930 static void
931 re_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
932 {
933         bus_addr_t              *addr;
934
935         if (error)
936                 return;
937
938         KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
939         addr = arg;
940         *addr = segs->ds_addr;
941 }
942
943 static int
944 re_allocmem(device_t dev, struct rl_softc *sc)
945 {
946         bus_size_t              rx_list_size, tx_list_size;
947         int                     error;
948         int                     i;
949
950         rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc);
951         tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc);
952
953         /*
954          * Allocate the parent bus DMA tag appropriate for PCI.
955          * In order to use DAC, RL_CPLUSCMD_PCI_DAC bit of RL_CPLUS_CMD
956          * register should be set. However some RealTek chips are known
957          * to be buggy on DAC handling, therefore disable DAC by limiting
958          * DMA address space to 32bit. PCIe variants of RealTek chips
959          * may not have the limitation but I took safer path.
960          */
961         error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
962             BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
963             BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
964             NULL, NULL, &sc->rl_parent_tag);
965         if (error) {
966                 device_printf(dev, "could not allocate parent DMA tag\n");
967                 return (error);
968         }
969
970         /*
971          * Allocate map for TX mbufs.
972          */
973         error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0,
974             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
975             NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0,
976             NULL, NULL, &sc->rl_ldata.rl_tx_mtag);
977         if (error) {
978                 device_printf(dev, "could not allocate TX DMA tag\n");
979                 return (error);
980         }
981
982         /*
983          * Allocate map for RX mbufs.
984          */
985
986         error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0,
987             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
988             MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag);
989         if (error) {
990                 device_printf(dev, "could not allocate RX DMA tag\n");
991                 return (error);
992         }
993
994         /*
995          * Allocate map for TX descriptor list.
996          */
997         error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
998             0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
999             NULL, tx_list_size, 1, tx_list_size, 0,
1000             NULL, NULL, &sc->rl_ldata.rl_tx_list_tag);
1001         if (error) {
1002                 device_printf(dev, "could not allocate TX DMA ring tag\n");
1003                 return (error);
1004         }
1005
1006         /* Allocate DMA'able memory for the TX ring */
1007
1008         error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag,
1009             (void **)&sc->rl_ldata.rl_tx_list,
1010             BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1011             &sc->rl_ldata.rl_tx_list_map);
1012         if (error) {
1013                 device_printf(dev, "could not allocate TX DMA ring\n");
1014                 return (error);
1015         }
1016
1017         /* Load the map for the TX ring. */
1018
1019         sc->rl_ldata.rl_tx_list_addr = 0;
1020         error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag,
1021              sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list,
1022              tx_list_size, re_dma_map_addr,
1023              &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT);
1024         if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) {
1025                 device_printf(dev, "could not load TX DMA ring\n");
1026                 return (ENOMEM);
1027         }
1028
1029         /* Create DMA maps for TX buffers */
1030
1031         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
1032                 error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0,
1033                     &sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1034                 if (error) {
1035                         device_printf(dev, "could not create DMA map for TX\n");
1036                         return (error);
1037                 }
1038         }
1039
1040         /*
1041          * Allocate map for RX descriptor list.
1042          */
1043         error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1044             0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1045             NULL, rx_list_size, 1, rx_list_size, 0,
1046             NULL, NULL, &sc->rl_ldata.rl_rx_list_tag);
1047         if (error) {
1048                 device_printf(dev, "could not create RX DMA ring tag\n");
1049                 return (error);
1050         }
1051
1052         /* Allocate DMA'able memory for the RX ring */
1053
1054         error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag,
1055             (void **)&sc->rl_ldata.rl_rx_list,
1056             BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1057             &sc->rl_ldata.rl_rx_list_map);
1058         if (error) {
1059                 device_printf(dev, "could not allocate RX DMA ring\n");
1060                 return (error);
1061         }
1062
1063         /* Load the map for the RX ring. */
1064
1065         sc->rl_ldata.rl_rx_list_addr = 0;
1066         error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag,
1067              sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list,
1068              rx_list_size, re_dma_map_addr,
1069              &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT);
1070         if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) {
1071                 device_printf(dev, "could not load RX DMA ring\n");
1072                 return (ENOMEM);
1073         }
1074
1075         /* Create DMA maps for RX buffers */
1076
1077         error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1078             &sc->rl_ldata.rl_rx_sparemap);
1079         if (error) {
1080                 device_printf(dev, "could not create spare DMA map for RX\n");
1081                 return (error);
1082         }
1083         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1084                 error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1085                     &sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1086                 if (error) {
1087                         device_printf(dev, "could not create DMA map for RX\n");
1088                         return (error);
1089                 }
1090         }
1091
1092         return (0);
1093 }
1094
1095 /*
1096  * Attach the interface. Allocate softc structures, do ifmedia
1097  * setup and ethernet/BPF attach.
1098  */
1099 static int
1100 re_attach(device_t dev)
1101 {
1102         u_char                  eaddr[ETHER_ADDR_LEN];
1103         u_int16_t               as[ETHER_ADDR_LEN / 2];
1104         struct rl_softc         *sc;
1105         struct ifnet            *ifp;
1106         struct rl_hwrev         *hw_rev;
1107         int                     hwrev;
1108         u_int16_t               devid, re_did = 0;
1109         int                     error = 0, rid, i;
1110         int                     msic, reg;
1111         uint8_t                 cfg;
1112
1113         sc = device_get_softc(dev);
1114         sc->rl_dev = dev;
1115
1116         mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
1117             MTX_DEF);
1118         callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0);
1119
1120         /*
1121          * Map control/status registers.
1122          */
1123         pci_enable_busmaster(dev);
1124
1125         devid = pci_get_device(dev);
1126         /*
1127          * Prefer memory space register mapping over IO space.
1128          * Because RTL8169SC does not seem to work when memory mapping
1129          * is used always activate io mapping. 
1130          */
1131         if (devid == RT_DEVICEID_8169SC)
1132                 prefer_iomap = 1;
1133         if (prefer_iomap == 0) {
1134                 sc->rl_res_id = PCIR_BAR(1);
1135                 sc->rl_res_type = SYS_RES_MEMORY;
1136                 /* RTL8168/8101E seems to use different BARs. */
1137                 if (devid == RT_DEVICEID_8168 || devid == RT_DEVICEID_8101E)
1138                         sc->rl_res_id = PCIR_BAR(2);
1139         } else {
1140                 sc->rl_res_id = PCIR_BAR(0);
1141                 sc->rl_res_type = SYS_RES_IOPORT;
1142         }
1143         sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1144             &sc->rl_res_id, RF_ACTIVE);
1145         if (sc->rl_res == NULL && prefer_iomap == 0) {
1146                 sc->rl_res_id = PCIR_BAR(0);
1147                 sc->rl_res_type = SYS_RES_IOPORT;
1148                 sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1149                     &sc->rl_res_id, RF_ACTIVE);
1150         }
1151         if (sc->rl_res == NULL) {
1152                 device_printf(dev, "couldn't map ports/memory\n");
1153                 error = ENXIO;
1154                 goto fail;
1155         }
1156
1157         sc->rl_btag = rman_get_bustag(sc->rl_res);
1158         sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
1159
1160         msic = 0;
1161         if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
1162                 sc->rl_flags |= RL_FLAG_PCIE;
1163                 msic = pci_msi_count(dev);
1164                 if (bootverbose)
1165                         device_printf(dev, "MSI count : %d\n", msic);
1166         }
1167         if (msic > 0 && msi_disable == 0) {
1168                 msic = 1;
1169                 if (pci_alloc_msi(dev, &msic) == 0) {
1170                         if (msic == RL_MSI_MESSAGES) {
1171                                 device_printf(dev, "Using %d MSI messages\n",
1172                                     msic);
1173                                 sc->rl_flags |= RL_FLAG_MSI;
1174                                 /* Explicitly set MSI enable bit. */
1175                                 CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1176                                 cfg = CSR_READ_1(sc, RL_CFG2);
1177                                 cfg |= RL_CFG2_MSI;
1178                                 CSR_WRITE_1(sc, RL_CFG2, cfg);
1179                                 CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1180                         } else
1181                                 pci_release_msi(dev);
1182                 }
1183         }
1184
1185         /* Allocate interrupt */
1186         if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1187                 rid = 0;
1188                 sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
1189                     RF_SHAREABLE | RF_ACTIVE);
1190                 if (sc->rl_irq[0] == NULL) {
1191                         device_printf(dev, "couldn't allocate IRQ resources\n");
1192                         error = ENXIO;
1193                         goto fail;
1194                 }
1195         } else {
1196                 for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1197                         sc->rl_irq[i] = bus_alloc_resource_any(dev,
1198                             SYS_RES_IRQ, &rid, RF_ACTIVE);
1199                         if (sc->rl_irq[i] == NULL) {
1200                                 device_printf(dev,
1201                                     "couldn't llocate IRQ resources for "
1202                                     "message %d\n", rid);
1203                                 error = ENXIO;
1204                                 goto fail;
1205                         }
1206                 }
1207         }
1208
1209         if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1210                 CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1211                 cfg = CSR_READ_1(sc, RL_CFG2);
1212                 if ((cfg & RL_CFG2_MSI) != 0) {
1213                         device_printf(dev, "turning off MSI enable bit.\n");
1214                         cfg &= ~RL_CFG2_MSI;
1215                         CSR_WRITE_1(sc, RL_CFG2, cfg);
1216                 }
1217                 CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1218         }
1219
1220         /* Reset the adapter. */
1221         RL_LOCK(sc);
1222         re_reset(sc);
1223         RL_UNLOCK(sc);
1224
1225         hw_rev = re_hwrevs;
1226         hwrev = CSR_READ_4(sc, RL_TXCFG);
1227         switch (hwrev & 0x70000000) {
1228         case 0x00000000:
1229         case 0x10000000:
1230                 device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0xfc800000);
1231                 hwrev &= (RL_TXCFG_HWREV | 0x80000000);
1232                 break;
1233         default:
1234                 device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0x7c800000);
1235                 hwrev &= RL_TXCFG_HWREV;
1236                 break;
1237         }
1238         device_printf(dev, "MAC rev. 0x%08x\n", hwrev & 0x00700000);
1239         while (hw_rev->rl_desc != NULL) {
1240                 if (hw_rev->rl_rev == hwrev) {
1241                         sc->rl_type = hw_rev->rl_type;
1242                         sc->rl_hwrev = hw_rev->rl_rev;
1243                         break;
1244                 }
1245                 hw_rev++;
1246         }
1247         if (hw_rev->rl_desc == NULL) {
1248                 device_printf(dev, "Unknown H/W revision: 0x%08x\n", hwrev);
1249                 error = ENXIO;
1250                 goto fail;
1251         }
1252
1253         switch (hw_rev->rl_rev) {
1254         case RL_HWREV_8139CPLUS:
1255                 sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_FASTETHER |
1256                     RL_FLAG_AUTOPAD;
1257                 break;
1258         case RL_HWREV_8100E:
1259         case RL_HWREV_8101E:
1260                 sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_PHYWAKE |
1261                     RL_FLAG_FASTETHER;
1262                 break;
1263         case RL_HWREV_8102E:
1264         case RL_HWREV_8102EL:
1265         case RL_HWREV_8102EL_SPIN1:
1266                 sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_PHYWAKE |
1267                     RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
1268                     RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD;
1269                 break;
1270         case RL_HWREV_8168_SPIN1:
1271         case RL_HWREV_8168_SPIN2:
1272                 sc->rl_flags |= RL_FLAG_WOLRXENB;
1273                 /* FALLTHROUGH */
1274         case RL_HWREV_8168_SPIN3:
1275                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_MACSTAT;
1276                 break;
1277         case RL_HWREV_8168C_SPIN2:
1278                 sc->rl_flags |= RL_FLAG_MACSLEEP;
1279                 /* FALLTHROUGH */
1280         case RL_HWREV_8168C:
1281                 if ((hwrev & 0x00700000) == 0x00200000)
1282                         sc->rl_flags |= RL_FLAG_MACSLEEP;
1283                 /* FALLTHROUGH */
1284         case RL_HWREV_8168CP:
1285         case RL_HWREV_8168D:
1286         case RL_HWREV_8168DP:
1287                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
1288                     RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
1289                     RL_FLAG_AUTOPAD;
1290                 /*
1291                  * These controllers support jumbo frame but it seems
1292                  * that enabling it requires touching additional magic
1293                  * registers. Depending on MAC revisions some
1294                  * controllers need to disable checksum offload. So
1295                  * disable jumbo frame until I have better idea what
1296                  * it really requires to make it support.
1297                  * RTL8168C/CP : supports up to 6KB jumbo frame.
1298                  * RTL8111C/CP : supports up to 9KB jumbo frame.
1299                  */
1300                 sc->rl_flags |= RL_FLAG_NOJUMBO;
1301                 break;
1302         case RL_HWREV_8169_8110SB:
1303         case RL_HWREV_8169_8110SBL:
1304         case RL_HWREV_8169_8110SC:
1305         case RL_HWREV_8169_8110SCE:
1306                 sc->rl_flags |= RL_FLAG_PHYWAKE;
1307                 /* FALLTHROUGH */
1308         case RL_HWREV_8169:
1309         case RL_HWREV_8169S:
1310         case RL_HWREV_8110S:
1311                 sc->rl_flags |= RL_FLAG_MACRESET;
1312                 break;
1313         default:
1314                 break;
1315         }
1316
1317         /* Enable PME. */
1318         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1319         cfg = CSR_READ_1(sc, RL_CFG1);
1320         cfg |= RL_CFG1_PME;
1321         CSR_WRITE_1(sc, RL_CFG1, cfg);
1322         cfg = CSR_READ_1(sc, RL_CFG5);
1323         cfg &= RL_CFG5_PME_STS;
1324         CSR_WRITE_1(sc, RL_CFG5, cfg);
1325         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1326
1327         if ((sc->rl_flags & RL_FLAG_PAR) != 0) {
1328                 /*
1329                  * XXX Should have a better way to extract station
1330                  * address from EEPROM.
1331                  */
1332                 for (i = 0; i < ETHER_ADDR_LEN; i++)
1333                         eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i);
1334         } else {
1335                 sc->rl_eewidth = RL_9356_ADDR_LEN;
1336                 re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
1337                 if (re_did != 0x8129)
1338                         sc->rl_eewidth = RL_9346_ADDR_LEN;
1339
1340                 /*
1341                  * Get station address from the EEPROM.
1342                  */
1343                 re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3);
1344                 for (i = 0; i < ETHER_ADDR_LEN / 2; i++)
1345                         as[i] = le16toh(as[i]);
1346                 bcopy(as, eaddr, sizeof(eaddr));
1347         }
1348
1349         if (sc->rl_type == RL_8169) {
1350                 /* Set RX length mask and number of descriptors. */
1351                 sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN;
1352                 sc->rl_txstart = RL_GTXSTART;
1353                 sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT;
1354                 sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT;
1355         } else {
1356                 /* Set RX length mask and number of descriptors. */
1357                 sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN;
1358                 sc->rl_txstart = RL_TXSTART;
1359                 sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT;
1360                 sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT;
1361         }
1362
1363         error = re_allocmem(dev, sc);
1364         if (error)
1365                 goto fail;
1366
1367         ifp = sc->rl_ifp = if_alloc(IFT_ETHER);
1368         if (ifp == NULL) {
1369                 device_printf(dev, "can not if_alloc()\n");
1370                 error = ENOSPC;
1371                 goto fail;
1372         }
1373
1374         /* Take controller out of deep sleep mode. */
1375         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
1376                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
1377                         CSR_WRITE_1(sc, RL_GPIO,
1378                             CSR_READ_1(sc, RL_GPIO) | 0x01);
1379                 else
1380                         CSR_WRITE_1(sc, RL_GPIO,
1381                             CSR_READ_1(sc, RL_GPIO) & ~0x01);
1382         }
1383
1384         /* Take PHY out of power down mode. */
1385         if ((sc->rl_flags & RL_FLAG_PHYWAKE) != 0) {
1386                 re_gmii_writereg(dev, 1, 0x1f, 0);
1387                 re_gmii_writereg(dev, 1, 0x0e, 0);
1388         }
1389
1390         /* Do MII setup */
1391         if (mii_phy_probe(dev, &sc->rl_miibus,
1392             re_ifmedia_upd, re_ifmedia_sts)) {
1393                 device_printf(dev, "MII without any phy!\n");
1394                 error = ENXIO;
1395                 goto fail;
1396         }
1397
1398         ifp->if_softc = sc;
1399         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1400         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1401         ifp->if_ioctl = re_ioctl;
1402         ifp->if_start = re_start;
1403         ifp->if_hwassist = RE_CSUM_FEATURES;
1404         ifp->if_capabilities = IFCAP_HWCSUM;
1405         ifp->if_capenable = ifp->if_capabilities;
1406         ifp->if_init = re_init;
1407         IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN);
1408         ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN;
1409         IFQ_SET_READY(&ifp->if_snd);
1410
1411         TASK_INIT(&sc->rl_txtask, 1, re_tx_task, ifp);
1412         TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc);
1413
1414         /*
1415          * XXX
1416          * Still have no idea how to make TSO work on 8168C, 8168CP,
1417          * 8111C and 8111CP.
1418          */
1419         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1420                 ifp->if_hwassist |= CSUM_TSO;
1421                 ifp->if_capabilities |= IFCAP_TSO4;
1422         }
1423
1424         /*
1425          * Call MI attach routine.
1426          */
1427         ether_ifattach(ifp, eaddr);
1428
1429         /* VLAN capability setup */
1430         ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
1431         if (ifp->if_capabilities & IFCAP_HWCSUM)
1432                 ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
1433         /* Enable WOL if PM is supported. */
1434         if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &reg) == 0)
1435                 ifp->if_capabilities |= IFCAP_WOL;
1436         ifp->if_capenable = ifp->if_capabilities;
1437         /*
1438          * Don't enable TSO by default. Under certain
1439          * circumtances the controller generated corrupted
1440          * packets in TSO size.
1441          */
1442         ifp->if_hwassist &= ~CSUM_TSO;
1443         ifp->if_capenable &= ~IFCAP_TSO4;
1444 #ifdef DEVICE_POLLING
1445         ifp->if_capabilities |= IFCAP_POLLING;
1446 #endif
1447         /*
1448          * Tell the upper layer(s) we support long frames.
1449          * Must appear after the call to ether_ifattach() because
1450          * ether_ifattach() sets ifi_hdrlen to the default value.
1451          */
1452         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1453
1454 #ifdef RE_DIAG
1455         /*
1456          * Perform hardware diagnostic on the original RTL8169.
1457          * Some 32-bit cards were incorrectly wired and would
1458          * malfunction if plugged into a 64-bit slot.
1459          */
1460
1461         if (hwrev == RL_HWREV_8169) {
1462                 error = re_diag(sc);
1463                 if (error) {
1464                         device_printf(dev,
1465                         "attach aborted due to hardware diag failure\n");
1466                         ether_ifdetach(ifp);
1467                         goto fail;
1468                 }
1469         }
1470 #endif
1471
1472         /* Hook interrupt last to avoid having to lock softc */
1473         if ((sc->rl_flags & RL_FLAG_MSI) == 0)
1474                 error = bus_setup_intr(dev, sc->rl_irq[0],
1475                     INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1476                     &sc->rl_intrhand[0]);
1477         else {
1478                 for (i = 0; i < RL_MSI_MESSAGES; i++) {
1479                         error = bus_setup_intr(dev, sc->rl_irq[i],
1480                             INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1481                             &sc->rl_intrhand[i]);
1482                         if (error != 0)
1483                                 break;
1484                 }
1485         }
1486         if (error) {
1487                 device_printf(dev, "couldn't set up irq\n");
1488                 ether_ifdetach(ifp);
1489         }
1490
1491 fail:
1492
1493         if (error)
1494                 re_detach(dev);
1495
1496         return (error);
1497 }
1498
1499 /*
1500  * Shutdown hardware and free up resources. This can be called any
1501  * time after the mutex has been initialized. It is called in both
1502  * the error case in attach and the normal detach case so it needs
1503  * to be careful about only freeing resources that have actually been
1504  * allocated.
1505  */
1506 static int
1507 re_detach(device_t dev)
1508 {
1509         struct rl_softc         *sc;
1510         struct ifnet            *ifp;
1511         int                     i, rid;
1512
1513         sc = device_get_softc(dev);
1514         ifp = sc->rl_ifp;
1515         KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized"));
1516
1517         /* These should only be active if attach succeeded */
1518         if (device_is_attached(dev)) {
1519 #ifdef DEVICE_POLLING
1520                 if (ifp->if_capenable & IFCAP_POLLING)
1521                         ether_poll_deregister(ifp);
1522 #endif
1523                 RL_LOCK(sc);
1524 #if 0
1525                 sc->suspended = 1;
1526 #endif
1527                 re_stop(sc);
1528                 RL_UNLOCK(sc);
1529                 callout_drain(&sc->rl_stat_callout);
1530                 taskqueue_drain(taskqueue_fast, &sc->rl_inttask);
1531                 taskqueue_drain(taskqueue_fast, &sc->rl_txtask);
1532                 /*
1533                  * Force off the IFF_UP flag here, in case someone
1534                  * still had a BPF descriptor attached to this
1535                  * interface. If they do, ether_ifdetach() will cause
1536                  * the BPF code to try and clear the promisc mode
1537                  * flag, which will bubble down to re_ioctl(),
1538                  * which will try to call re_init() again. This will
1539                  * turn the NIC back on and restart the MII ticker,
1540                  * which will panic the system when the kernel tries
1541                  * to invoke the re_tick() function that isn't there
1542                  * anymore.
1543                  */
1544                 ifp->if_flags &= ~IFF_UP;
1545                 ether_ifdetach(ifp);
1546         }
1547         if (sc->rl_miibus)
1548                 device_delete_child(dev, sc->rl_miibus);
1549         bus_generic_detach(dev);
1550
1551         /*
1552          * The rest is resource deallocation, so we should already be
1553          * stopped here.
1554          */
1555
1556         for (i = 0; i < RL_MSI_MESSAGES; i++) {
1557                 if (sc->rl_intrhand[i] != NULL) {
1558                         bus_teardown_intr(dev, sc->rl_irq[i],
1559                             sc->rl_intrhand[i]);
1560                         sc->rl_intrhand[i] = NULL;
1561                 }
1562         }
1563         if (ifp != NULL)
1564                 if_free(ifp);
1565         if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1566                 if (sc->rl_irq[0] != NULL) {
1567                         bus_release_resource(dev, SYS_RES_IRQ, 0,
1568                             sc->rl_irq[0]);
1569                         sc->rl_irq[0] = NULL;
1570                 }
1571         } else {
1572                 for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1573                         if (sc->rl_irq[i] != NULL) {
1574                                 bus_release_resource(dev, SYS_RES_IRQ, rid,
1575                                     sc->rl_irq[i]);
1576                                 sc->rl_irq[i] = NULL;
1577                         }
1578                 }
1579                 pci_release_msi(dev);
1580         }
1581         if (sc->rl_res)
1582                 bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id,
1583                     sc->rl_res);
1584
1585         /* Unload and free the RX DMA ring memory and map */
1586
1587         if (sc->rl_ldata.rl_rx_list_tag) {
1588                 bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
1589                     sc->rl_ldata.rl_rx_list_map);
1590                 bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
1591                     sc->rl_ldata.rl_rx_list,
1592                     sc->rl_ldata.rl_rx_list_map);
1593                 bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag);
1594         }
1595
1596         /* Unload and free the TX DMA ring memory and map */
1597
1598         if (sc->rl_ldata.rl_tx_list_tag) {
1599                 bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
1600                     sc->rl_ldata.rl_tx_list_map);
1601                 bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
1602                     sc->rl_ldata.rl_tx_list,
1603                     sc->rl_ldata.rl_tx_list_map);
1604                 bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag);
1605         }
1606
1607         /* Destroy all the RX and TX buffer maps */
1608
1609         if (sc->rl_ldata.rl_tx_mtag) {
1610                 for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1611                         bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag,
1612                             sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1613                 bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag);
1614         }
1615         if (sc->rl_ldata.rl_rx_mtag) {
1616                 for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++)
1617                         bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1618                             sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1619                 if (sc->rl_ldata.rl_rx_sparemap)
1620                         bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1621                             sc->rl_ldata.rl_rx_sparemap);
1622                 bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag);
1623         }
1624
1625         /* Unload and free the stats buffer and map */
1626
1627         if (sc->rl_ldata.rl_stag) {
1628                 bus_dmamap_unload(sc->rl_ldata.rl_stag,
1629                     sc->rl_ldata.rl_rx_list_map);
1630                 bus_dmamem_free(sc->rl_ldata.rl_stag,
1631                     sc->rl_ldata.rl_stats,
1632                     sc->rl_ldata.rl_smap);
1633                 bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
1634         }
1635
1636         if (sc->rl_parent_tag)
1637                 bus_dma_tag_destroy(sc->rl_parent_tag);
1638
1639         mtx_destroy(&sc->rl_mtx);
1640
1641         return (0);
1642 }
1643
1644 static __inline void
1645 re_discard_rxbuf(struct rl_softc *sc, int idx)
1646 {
1647         struct rl_desc          *desc;
1648         struct rl_rxdesc        *rxd;
1649         uint32_t                cmdstat;
1650
1651         rxd = &sc->rl_ldata.rl_rx_desc[idx];
1652         desc = &sc->rl_ldata.rl_rx_list[idx];
1653         desc->rl_vlanctl = 0;
1654         cmdstat = rxd->rx_size;
1655         if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1656                 cmdstat |= RL_RDESC_CMD_EOR;
1657         desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1658 }
1659
1660 static int
1661 re_newbuf(struct rl_softc *sc, int idx)
1662 {
1663         struct mbuf             *m;
1664         struct rl_rxdesc        *rxd;
1665         bus_dma_segment_t       segs[1];
1666         bus_dmamap_t            map;
1667         struct rl_desc          *desc;
1668         uint32_t                cmdstat;
1669         int                     error, nsegs;
1670
1671         m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1672         if (m == NULL)
1673                 return (ENOBUFS);
1674
1675         m->m_len = m->m_pkthdr.len = MCLBYTES;
1676 #ifdef RE_FIXUP_RX
1677         /*
1678          * This is part of an evil trick to deal with non-x86 platforms.
1679          * The RealTek chip requires RX buffers to be aligned on 64-bit
1680          * boundaries, but that will hose non-x86 machines. To get around
1681          * this, we leave some empty space at the start of each buffer
1682          * and for non-x86 hosts, we copy the buffer back six bytes
1683          * to achieve word alignment. This is slightly more efficient
1684          * than allocating a new buffer, copying the contents, and
1685          * discarding the old buffer.
1686          */
1687         m_adj(m, RE_ETHER_ALIGN);
1688 #endif
1689         error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag,
1690             sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
1691         if (error != 0) {
1692                 m_freem(m);
1693                 return (ENOBUFS);
1694         }
1695         KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
1696
1697         rxd = &sc->rl_ldata.rl_rx_desc[idx];
1698         if (rxd->rx_m != NULL) {
1699                 bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1700                     BUS_DMASYNC_POSTREAD);
1701                 bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap);
1702         }
1703
1704         rxd->rx_m = m;
1705         map = rxd->rx_dmamap;
1706         rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap;
1707         rxd->rx_size = segs[0].ds_len;
1708         sc->rl_ldata.rl_rx_sparemap = map;
1709         bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1710             BUS_DMASYNC_PREREAD);
1711
1712         desc = &sc->rl_ldata.rl_rx_list[idx];
1713         desc->rl_vlanctl = 0;
1714         desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
1715         desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
1716         cmdstat = segs[0].ds_len;
1717         if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1718                 cmdstat |= RL_RDESC_CMD_EOR;
1719         desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1720
1721         return (0);
1722 }
1723
1724 #ifdef RE_FIXUP_RX
1725 static __inline void
1726 re_fixup_rx(struct mbuf *m)
1727 {
1728         int                     i;
1729         uint16_t                *src, *dst;
1730
1731         src = mtod(m, uint16_t *);
1732         dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src;
1733
1734         for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
1735                 *dst++ = *src++;
1736
1737         m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN;
1738 }
1739 #endif
1740
1741 static int
1742 re_tx_list_init(struct rl_softc *sc)
1743 {
1744         struct rl_desc          *desc;
1745         int                     i;
1746
1747         RL_LOCK_ASSERT(sc);
1748
1749         bzero(sc->rl_ldata.rl_tx_list,
1750             sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
1751         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1752                 sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
1753         /* Set EOR. */
1754         desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
1755         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
1756
1757         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1758             sc->rl_ldata.rl_tx_list_map,
1759             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1760
1761         sc->rl_ldata.rl_tx_prodidx = 0;
1762         sc->rl_ldata.rl_tx_considx = 0;
1763         sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt;
1764
1765         return (0);
1766 }
1767
1768 static int
1769 re_rx_list_init(struct rl_softc *sc)
1770 {
1771         int                     error, i;
1772
1773         bzero(sc->rl_ldata.rl_rx_list,
1774             sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
1775         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1776                 sc->rl_ldata.rl_rx_desc[i].rx_m = NULL;
1777                 if ((error = re_newbuf(sc, i)) != 0)
1778                         return (error);
1779         }
1780
1781         /* Flush the RX descriptors */
1782
1783         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1784             sc->rl_ldata.rl_rx_list_map,
1785             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1786
1787         sc->rl_ldata.rl_rx_prodidx = 0;
1788         sc->rl_head = sc->rl_tail = NULL;
1789
1790         return (0);
1791 }
1792
1793 /*
1794  * RX handler for C+ and 8169. For the gigE chips, we support
1795  * the reception of jumbo frames that have been fragmented
1796  * across multiple 2K mbuf cluster buffers.
1797  */
1798 static int
1799 re_rxeof(struct rl_softc *sc, int *rx_npktsp)
1800 {
1801         struct mbuf             *m;
1802         struct ifnet            *ifp;
1803         int                     i, total_len;
1804         struct rl_desc          *cur_rx;
1805         u_int32_t               rxstat, rxvlan;
1806         int                     maxpkt = 16, rx_npkts = 0;
1807
1808         RL_LOCK_ASSERT(sc);
1809
1810         ifp = sc->rl_ifp;
1811
1812         /* Invalidate the descriptor memory */
1813
1814         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1815             sc->rl_ldata.rl_rx_list_map,
1816             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1817
1818         for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0;
1819             i = RL_RX_DESC_NXT(sc, i)) {
1820                 cur_rx = &sc->rl_ldata.rl_rx_list[i];
1821                 rxstat = le32toh(cur_rx->rl_cmdstat);
1822                 if ((rxstat & RL_RDESC_STAT_OWN) != 0)
1823                         break;
1824                 total_len = rxstat & sc->rl_rxlenmask;
1825                 rxvlan = le32toh(cur_rx->rl_vlanctl);
1826                 m = sc->rl_ldata.rl_rx_desc[i].rx_m;
1827
1828                 if (!(rxstat & RL_RDESC_STAT_EOF)) {
1829                         if (re_newbuf(sc, i) != 0) {
1830                                 /*
1831                                  * If this is part of a multi-fragment packet,
1832                                  * discard all the pieces.
1833                                  */
1834                                 if (sc->rl_head != NULL) {
1835                                         m_freem(sc->rl_head);
1836                                         sc->rl_head = sc->rl_tail = NULL;
1837                                 }
1838                                 re_discard_rxbuf(sc, i);
1839                                 continue;
1840                         }
1841                         m->m_len = RE_RX_DESC_BUFLEN;
1842                         if (sc->rl_head == NULL)
1843                                 sc->rl_head = sc->rl_tail = m;
1844                         else {
1845                                 m->m_flags &= ~M_PKTHDR;
1846                                 sc->rl_tail->m_next = m;
1847                                 sc->rl_tail = m;
1848                         }
1849                         continue;
1850                 }
1851
1852                 /*
1853                  * NOTE: for the 8139C+, the frame length field
1854                  * is always 12 bits in size, but for the gigE chips,
1855                  * it is 13 bits (since the max RX frame length is 16K).
1856                  * Unfortunately, all 32 bits in the status word
1857                  * were already used, so to make room for the extra
1858                  * length bit, RealTek took out the 'frame alignment
1859                  * error' bit and shifted the other status bits
1860                  * over one slot. The OWN, EOR, FS and LS bits are
1861                  * still in the same places. We have already extracted
1862                  * the frame length and checked the OWN bit, so rather
1863                  * than using an alternate bit mapping, we shift the
1864                  * status bits one space to the right so we can evaluate
1865                  * them using the 8169 status as though it was in the
1866                  * same format as that of the 8139C+.
1867                  */
1868                 if (sc->rl_type == RL_8169)
1869                         rxstat >>= 1;
1870
1871                 /*
1872                  * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be
1873                  * set, but if CRC is clear, it will still be a valid frame.
1874                  */
1875                 if (rxstat & RL_RDESC_STAT_RXERRSUM && !(total_len > 8191 &&
1876                     (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)) {
1877                         ifp->if_ierrors++;
1878                         /*
1879                          * If this is part of a multi-fragment packet,
1880                          * discard all the pieces.
1881                          */
1882                         if (sc->rl_head != NULL) {
1883                                 m_freem(sc->rl_head);
1884                                 sc->rl_head = sc->rl_tail = NULL;
1885                         }
1886                         re_discard_rxbuf(sc, i);
1887                         continue;
1888                 }
1889
1890                 /*
1891                  * If allocating a replacement mbuf fails,
1892                  * reload the current one.
1893                  */
1894
1895                 if (re_newbuf(sc, i) != 0) {
1896                         ifp->if_iqdrops++;
1897                         if (sc->rl_head != NULL) {
1898                                 m_freem(sc->rl_head);
1899                                 sc->rl_head = sc->rl_tail = NULL;
1900                         }
1901                         re_discard_rxbuf(sc, i);
1902                         continue;
1903                 }
1904
1905                 if (sc->rl_head != NULL) {
1906                         m->m_len = total_len % RE_RX_DESC_BUFLEN;
1907                         if (m->m_len == 0)
1908                                 m->m_len = RE_RX_DESC_BUFLEN;
1909                         /*
1910                          * Special case: if there's 4 bytes or less
1911                          * in this buffer, the mbuf can be discarded:
1912                          * the last 4 bytes is the CRC, which we don't
1913                          * care about anyway.
1914                          */
1915                         if (m->m_len <= ETHER_CRC_LEN) {
1916                                 sc->rl_tail->m_len -=
1917                                     (ETHER_CRC_LEN - m->m_len);
1918                                 m_freem(m);
1919                         } else {
1920                                 m->m_len -= ETHER_CRC_LEN;
1921                                 m->m_flags &= ~M_PKTHDR;
1922                                 sc->rl_tail->m_next = m;
1923                         }
1924                         m = sc->rl_head;
1925                         sc->rl_head = sc->rl_tail = NULL;
1926                         m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
1927                 } else
1928                         m->m_pkthdr.len = m->m_len =
1929                             (total_len - ETHER_CRC_LEN);
1930
1931 #ifdef RE_FIXUP_RX
1932                 re_fixup_rx(m);
1933 #endif
1934                 ifp->if_ipackets++;
1935                 m->m_pkthdr.rcvif = ifp;
1936
1937                 /* Do RX checksumming if enabled */
1938
1939                 if (ifp->if_capenable & IFCAP_RXCSUM) {
1940                         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1941                                 /* Check IP header checksum */
1942                                 if (rxstat & RL_RDESC_STAT_PROTOID)
1943                                         m->m_pkthdr.csum_flags |=
1944                                             CSUM_IP_CHECKED;
1945                                 if (!(rxstat & RL_RDESC_STAT_IPSUMBAD))
1946                                         m->m_pkthdr.csum_flags |=
1947                                             CSUM_IP_VALID;
1948
1949                                 /* Check TCP/UDP checksum */
1950                                 if ((RL_TCPPKT(rxstat) &&
1951                                     !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1952                                     (RL_UDPPKT(rxstat) &&
1953                                      !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1954                                         m->m_pkthdr.csum_flags |=
1955                                                 CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1956                                         m->m_pkthdr.csum_data = 0xffff;
1957                                 }
1958                         } else {
1959                                 /*
1960                                  * RTL8168C/RTL816CP/RTL8111C/RTL8111CP
1961                                  */
1962                                 if ((rxstat & RL_RDESC_STAT_PROTOID) &&
1963                                     (rxvlan & RL_RDESC_IPV4))
1964                                         m->m_pkthdr.csum_flags |=
1965                                             CSUM_IP_CHECKED;
1966                                 if (!(rxstat & RL_RDESC_STAT_IPSUMBAD) &&
1967                                     (rxvlan & RL_RDESC_IPV4))
1968                                         m->m_pkthdr.csum_flags |=
1969                                             CSUM_IP_VALID;
1970                                 if (((rxstat & RL_RDESC_STAT_TCP) &&
1971                                     !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1972                                     ((rxstat & RL_RDESC_STAT_UDP) &&
1973                                     !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1974                                         m->m_pkthdr.csum_flags |=
1975                                                 CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1976                                         m->m_pkthdr.csum_data = 0xffff;
1977                                 }
1978                         }
1979                 }
1980                 maxpkt--;
1981                 if (rxvlan & RL_RDESC_VLANCTL_TAG) {
1982                         m->m_pkthdr.ether_vtag =
1983                             bswap16((rxvlan & RL_RDESC_VLANCTL_DATA));
1984                         m->m_flags |= M_VLANTAG;
1985                 }
1986                 RL_UNLOCK(sc);
1987                 (*ifp->if_input)(ifp, m);
1988                 RL_LOCK(sc);
1989                 rx_npkts++;
1990         }
1991
1992         /* Flush the RX DMA ring */
1993
1994         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1995             sc->rl_ldata.rl_rx_list_map,
1996             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1997
1998         sc->rl_ldata.rl_rx_prodidx = i;
1999
2000         if (rx_npktsp != NULL)
2001                 *rx_npktsp = rx_npkts;
2002         if (maxpkt)
2003                 return(EAGAIN);
2004
2005         return(0);
2006 }
2007
2008 static void
2009 re_txeof(struct rl_softc *sc)
2010 {
2011         struct ifnet            *ifp;
2012         struct rl_txdesc        *txd;
2013         u_int32_t               txstat;
2014         int                     cons;
2015
2016         cons = sc->rl_ldata.rl_tx_considx;
2017         if (cons == sc->rl_ldata.rl_tx_prodidx)
2018                 return;
2019
2020         ifp = sc->rl_ifp;
2021         /* Invalidate the TX descriptor list */
2022         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2023             sc->rl_ldata.rl_tx_list_map,
2024             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2025
2026         for (; cons != sc->rl_ldata.rl_tx_prodidx;
2027             cons = RL_TX_DESC_NXT(sc, cons)) {
2028                 txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat);
2029                 if (txstat & RL_TDESC_STAT_OWN)
2030                         break;
2031                 /*
2032                  * We only stash mbufs in the last descriptor
2033                  * in a fragment chain, which also happens to
2034                  * be the only place where the TX status bits
2035                  * are valid.
2036                  */
2037                 if (txstat & RL_TDESC_CMD_EOF) {
2038                         txd = &sc->rl_ldata.rl_tx_desc[cons];
2039                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2040                             txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2041                         bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2042                             txd->tx_dmamap);
2043                         KASSERT(txd->tx_m != NULL,
2044                             ("%s: freeing NULL mbufs!", __func__));
2045                         m_freem(txd->tx_m);
2046                         txd->tx_m = NULL;
2047                         if (txstat & (RL_TDESC_STAT_EXCESSCOL|
2048                             RL_TDESC_STAT_COLCNT))
2049                                 ifp->if_collisions++;
2050                         if (txstat & RL_TDESC_STAT_TXERRSUM)
2051                                 ifp->if_oerrors++;
2052                         else
2053                                 ifp->if_opackets++;
2054                 }
2055                 sc->rl_ldata.rl_tx_free++;
2056                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2057         }
2058         sc->rl_ldata.rl_tx_considx = cons;
2059
2060         /* No changes made to the TX ring, so no flush needed */
2061
2062         if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) {
2063 #ifdef RE_TX_MODERATION
2064                 /*
2065                  * If not all descriptors have been reaped yet, reload
2066                  * the timer so that we will eventually get another
2067                  * interrupt that will cause us to re-enter this routine.
2068                  * This is done in case the transmitter has gone idle.
2069                  */
2070                 CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2071 #endif
2072         } else
2073                 sc->rl_watchdog_timer = 0;
2074 }
2075
2076 static void
2077 re_tick(void *xsc)
2078 {
2079         struct rl_softc         *sc;
2080         struct mii_data         *mii;
2081
2082         sc = xsc;
2083
2084         RL_LOCK_ASSERT(sc);
2085
2086         mii = device_get_softc(sc->rl_miibus);
2087         mii_tick(mii);
2088         if ((sc->rl_flags & RL_FLAG_LINK) == 0)
2089                 re_miibus_statchg(sc->rl_dev);
2090         /*
2091          * Reclaim transmitted frames here. Technically it is not
2092          * necessary to do here but it ensures periodic reclamation
2093          * regardless of Tx completion interrupt which seems to be
2094          * lost on PCIe based controllers under certain situations. 
2095          */
2096         re_txeof(sc);
2097         re_watchdog(sc);
2098         callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2099 }
2100
2101 #ifdef DEVICE_POLLING
2102 static int
2103 re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
2104 {
2105         struct rl_softc *sc = ifp->if_softc;
2106         int rx_npkts = 0;
2107
2108         RL_LOCK(sc);
2109         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2110                 rx_npkts = re_poll_locked(ifp, cmd, count);
2111         RL_UNLOCK(sc);
2112         return (rx_npkts);
2113 }
2114
2115 static int
2116 re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count)
2117 {
2118         struct rl_softc *sc = ifp->if_softc;
2119         int rx_npkts;
2120
2121         RL_LOCK_ASSERT(sc);
2122
2123         sc->rxcycles = count;
2124         re_rxeof(sc, &rx_npkts);
2125         re_txeof(sc);
2126
2127         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2128                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2129
2130         if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
2131                 u_int16_t       status;
2132
2133                 status = CSR_READ_2(sc, RL_ISR);
2134                 if (status == 0xffff)
2135                         return (rx_npkts);
2136                 if (status)
2137                         CSR_WRITE_2(sc, RL_ISR, status);
2138                 if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2139                     (sc->rl_flags & RL_FLAG_PCIE))
2140                         CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2141
2142                 /*
2143                  * XXX check behaviour on receiver stalls.
2144                  */
2145
2146                 if (status & RL_ISR_SYSTEM_ERR)
2147                         re_init_locked(sc);
2148         }
2149         return (rx_npkts);
2150 }
2151 #endif /* DEVICE_POLLING */
2152
2153 static int
2154 re_intr(void *arg)
2155 {
2156         struct rl_softc         *sc;
2157         uint16_t                status;
2158
2159         sc = arg;
2160
2161         status = CSR_READ_2(sc, RL_ISR);
2162         if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0)
2163                 return (FILTER_STRAY);
2164         CSR_WRITE_2(sc, RL_IMR, 0);
2165
2166         taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2167
2168         return (FILTER_HANDLED);
2169 }
2170
2171 static void
2172 re_int_task(void *arg, int npending)
2173 {
2174         struct rl_softc         *sc;
2175         struct ifnet            *ifp;
2176         u_int16_t               status;
2177         int                     rval = 0;
2178
2179         sc = arg;
2180         ifp = sc->rl_ifp;
2181
2182         RL_LOCK(sc);
2183
2184         status = CSR_READ_2(sc, RL_ISR);
2185         CSR_WRITE_2(sc, RL_ISR, status);
2186
2187         if (sc->suspended ||
2188             (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2189                 RL_UNLOCK(sc);
2190                 return;
2191         }
2192
2193 #ifdef DEVICE_POLLING
2194         if  (ifp->if_capenable & IFCAP_POLLING) {
2195                 RL_UNLOCK(sc);
2196                 return;
2197         }
2198 #endif
2199
2200         if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW))
2201                 rval = re_rxeof(sc, NULL);
2202
2203         /*
2204          * Some chips will ignore a second TX request issued
2205          * while an existing transmission is in progress. If
2206          * the transmitter goes idle but there are still
2207          * packets waiting to be sent, we need to restart the
2208          * channel here to flush them out. This only seems to
2209          * be required with the PCIe devices.
2210          */
2211         if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2212             (sc->rl_flags & RL_FLAG_PCIE))
2213                 CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2214         if (status & (
2215 #ifdef RE_TX_MODERATION
2216             RL_ISR_TIMEOUT_EXPIRED|
2217 #else
2218             RL_ISR_TX_OK|
2219 #endif
2220             RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL))
2221                 re_txeof(sc);
2222
2223         if (status & RL_ISR_SYSTEM_ERR)
2224                 re_init_locked(sc);
2225
2226         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2227                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2228
2229         RL_UNLOCK(sc);
2230
2231         if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) {
2232                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2233                 return;
2234         }
2235
2236         CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2237 }
2238
2239 static int
2240 re_encap(struct rl_softc *sc, struct mbuf **m_head)
2241 {
2242         struct rl_txdesc        *txd, *txd_last;
2243         bus_dma_segment_t       segs[RL_NTXSEGS];
2244         bus_dmamap_t            map;
2245         struct mbuf             *m_new;
2246         struct rl_desc          *desc;
2247         int                     nsegs, prod;
2248         int                     i, error, ei, si;
2249         int                     padlen;
2250         uint32_t                cmdstat, csum_flags, vlanctl;
2251
2252         RL_LOCK_ASSERT(sc);
2253         M_ASSERTPKTHDR((*m_head));
2254
2255         /*
2256          * With some of the RealTek chips, using the checksum offload
2257          * support in conjunction with the autopadding feature results
2258          * in the transmission of corrupt frames. For example, if we
2259          * need to send a really small IP fragment that's less than 60
2260          * bytes in size, and IP header checksumming is enabled, the
2261          * resulting ethernet frame that appears on the wire will
2262          * have garbled payload. To work around this, if TX IP checksum
2263          * offload is enabled, we always manually pad short frames out
2264          * to the minimum ethernet frame size.
2265          */
2266         if ((sc->rl_flags & RL_FLAG_AUTOPAD) == 0 &&
2267             (*m_head)->m_pkthdr.len < RL_IP4CSUMTX_PADLEN &&
2268             ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) != 0) {
2269                 padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len;
2270                 if (M_WRITABLE(*m_head) == 0) {
2271                         /* Get a writable copy. */
2272                         m_new = m_dup(*m_head, M_DONTWAIT);
2273                         m_freem(*m_head);
2274                         if (m_new == NULL) {
2275                                 *m_head = NULL;
2276                                 return (ENOBUFS);
2277                         }
2278                         *m_head = m_new;
2279                 }
2280                 if ((*m_head)->m_next != NULL ||
2281                     M_TRAILINGSPACE(*m_head) < padlen) {
2282                         m_new = m_defrag(*m_head, M_DONTWAIT);
2283                         if (m_new == NULL) {
2284                                 m_freem(*m_head);
2285                                 *m_head = NULL;
2286                                 return (ENOBUFS);
2287                         }
2288                 } else
2289                         m_new = *m_head;
2290
2291                 /*
2292                  * Manually pad short frames, and zero the pad space
2293                  * to avoid leaking data.
2294                  */
2295                 bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen);
2296                 m_new->m_pkthdr.len += padlen;
2297                 m_new->m_len = m_new->m_pkthdr.len;
2298                 *m_head = m_new;
2299         }
2300
2301         prod = sc->rl_ldata.rl_tx_prodidx;
2302         txd = &sc->rl_ldata.rl_tx_desc[prod];
2303         error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2304             *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2305         if (error == EFBIG) {
2306                 m_new = m_collapse(*m_head, M_DONTWAIT, RL_NTXSEGS);
2307                 if (m_new == NULL) {
2308                         m_freem(*m_head);
2309                         *m_head = NULL;
2310                         return (ENOBUFS);
2311                 }
2312                 *m_head = m_new;
2313                 error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag,
2314                     txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2315                 if (error != 0) {
2316                         m_freem(*m_head);
2317                         *m_head = NULL;
2318                         return (error);
2319                 }
2320         } else if (error != 0)
2321                 return (error);
2322         if (nsegs == 0) {
2323                 m_freem(*m_head);
2324                 *m_head = NULL;
2325                 return (EIO);
2326         }
2327
2328         /* Check for number of available descriptors. */
2329         if (sc->rl_ldata.rl_tx_free - nsegs <= 1) {
2330                 bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap);
2331                 return (ENOBUFS);
2332         }
2333
2334         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2335             BUS_DMASYNC_PREWRITE);
2336
2337         /*
2338          * Set up checksum offload. Note: checksum offload bits must
2339          * appear in all descriptors of a multi-descriptor transmit
2340          * attempt. This is according to testing done with an 8169
2341          * chip. This is a requirement.
2342          */
2343         vlanctl = 0;
2344         csum_flags = 0;
2345         if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0)
2346                 csum_flags = RL_TDESC_CMD_LGSEND |
2347                     ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
2348                     RL_TDESC_CMD_MSSVAL_SHIFT);
2349         else {
2350                 /*
2351                  * Unconditionally enable IP checksum if TCP or UDP
2352                  * checksum is required. Otherwise, TCP/UDP checksum
2353                  * does't make effects.
2354                  */
2355                 if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) {
2356                         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
2357                                 csum_flags |= RL_TDESC_CMD_IPCSUM;
2358                                 if (((*m_head)->m_pkthdr.csum_flags &
2359                                     CSUM_TCP) != 0)
2360                                         csum_flags |= RL_TDESC_CMD_TCPCSUM;
2361                                 if (((*m_head)->m_pkthdr.csum_flags &
2362                                     CSUM_UDP) != 0)
2363                                         csum_flags |= RL_TDESC_CMD_UDPCSUM;
2364                         } else {
2365                                 vlanctl |= RL_TDESC_CMD_IPCSUMV2;
2366                                 if (((*m_head)->m_pkthdr.csum_flags &
2367                                     CSUM_TCP) != 0)
2368                                         vlanctl |= RL_TDESC_CMD_TCPCSUMV2;
2369                                 if (((*m_head)->m_pkthdr.csum_flags &
2370                                     CSUM_UDP) != 0)
2371                                         vlanctl |= RL_TDESC_CMD_UDPCSUMV2;
2372                         }
2373                 }
2374         }
2375
2376         /*
2377          * Set up hardware VLAN tagging. Note: vlan tag info must
2378          * appear in all descriptors of a multi-descriptor
2379          * transmission attempt.
2380          */
2381         if ((*m_head)->m_flags & M_VLANTAG)
2382                 vlanctl |= bswap16((*m_head)->m_pkthdr.ether_vtag) |
2383                     RL_TDESC_VLANCTL_TAG;
2384
2385         si = prod;
2386         for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) {
2387                 desc = &sc->rl_ldata.rl_tx_list[prod];
2388                 desc->rl_vlanctl = htole32(vlanctl);
2389                 desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr));
2390                 desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr));
2391                 cmdstat = segs[i].ds_len;
2392                 if (i != 0)
2393                         cmdstat |= RL_TDESC_CMD_OWN;
2394                 if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1)
2395                         cmdstat |= RL_TDESC_CMD_EOR;
2396                 desc->rl_cmdstat = htole32(cmdstat | csum_flags);
2397                 sc->rl_ldata.rl_tx_free--;
2398         }
2399         /* Update producer index. */
2400         sc->rl_ldata.rl_tx_prodidx = prod;
2401
2402         /* Set EOF on the last descriptor. */
2403         ei = RL_TX_DESC_PRV(sc, prod);
2404         desc = &sc->rl_ldata.rl_tx_list[ei];
2405         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF);
2406
2407         desc = &sc->rl_ldata.rl_tx_list[si];
2408         /* Set SOF and transfer ownership of packet to the chip. */
2409         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF);
2410
2411         /*
2412          * Insure that the map for this transmission
2413          * is placed at the array index of the last descriptor
2414          * in this chain.  (Swap last and first dmamaps.)
2415          */
2416         txd_last = &sc->rl_ldata.rl_tx_desc[ei];
2417         map = txd->tx_dmamap;
2418         txd->tx_dmamap = txd_last->tx_dmamap;
2419         txd_last->tx_dmamap = map;
2420         txd_last->tx_m = *m_head;
2421
2422         return (0);
2423 }
2424
2425 static void
2426 re_tx_task(void *arg, int npending)
2427 {
2428         struct ifnet            *ifp;
2429
2430         ifp = arg;
2431         re_start(ifp);
2432 }
2433
2434 /*
2435  * Main transmit routine for C+ and gigE NICs.
2436  */
2437 static void
2438 re_start(struct ifnet *ifp)
2439 {
2440         struct rl_softc         *sc;
2441         struct mbuf             *m_head;
2442         int                     queued;
2443
2444         sc = ifp->if_softc;
2445
2446         RL_LOCK(sc);
2447
2448         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
2449             IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) {
2450                 RL_UNLOCK(sc);
2451                 return;
2452         }
2453
2454         for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
2455             sc->rl_ldata.rl_tx_free > 1;) {
2456                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2457                 if (m_head == NULL)
2458                         break;
2459
2460                 if (re_encap(sc, &m_head) != 0) {
2461                         if (m_head == NULL)
2462                                 break;
2463                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2464                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2465                         break;
2466                 }
2467
2468                 /*
2469                  * If there's a BPF listener, bounce a copy of this frame
2470                  * to him.
2471                  */
2472                 ETHER_BPF_MTAP(ifp, m_head);
2473
2474                 queued++;
2475         }
2476
2477         if (queued == 0) {
2478 #ifdef RE_TX_MODERATION
2479                 if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt)
2480                         CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2481 #endif
2482                 RL_UNLOCK(sc);
2483                 return;
2484         }
2485
2486         /* Flush the TX descriptors */
2487
2488         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2489             sc->rl_ldata.rl_tx_list_map,
2490             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2491
2492         CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2493
2494 #ifdef RE_TX_MODERATION
2495         /*
2496          * Use the countdown timer for interrupt moderation.
2497          * 'TX done' interrupts are disabled. Instead, we reset the
2498          * countdown timer, which will begin counting until it hits
2499          * the value in the TIMERINT register, and then trigger an
2500          * interrupt. Each time we write to the TIMERCNT register,
2501          * the timer count is reset to 0.
2502          */
2503         CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2504 #endif
2505
2506         /*
2507          * Set a timeout in case the chip goes out to lunch.
2508          */
2509         sc->rl_watchdog_timer = 5;
2510
2511         RL_UNLOCK(sc);
2512 }
2513
2514 static void
2515 re_init(void *xsc)
2516 {
2517         struct rl_softc         *sc = xsc;
2518
2519         RL_LOCK(sc);
2520         re_init_locked(sc);
2521         RL_UNLOCK(sc);
2522 }
2523
2524 static void
2525 re_init_locked(struct rl_softc *sc)
2526 {
2527         struct ifnet            *ifp = sc->rl_ifp;
2528         struct mii_data         *mii;
2529         uint32_t                reg;
2530         uint16_t                cfg;
2531         union {
2532                 uint32_t align_dummy;
2533                 u_char eaddr[ETHER_ADDR_LEN];
2534         } eaddr;
2535
2536         RL_LOCK_ASSERT(sc);
2537
2538         mii = device_get_softc(sc->rl_miibus);
2539
2540         /*
2541          * Cancel pending I/O and free all RX/TX buffers.
2542          */
2543         re_stop(sc);
2544
2545         /* Put controller into known state. */
2546         re_reset(sc);
2547
2548         /*
2549          * Enable C+ RX and TX mode, as well as VLAN stripping and
2550          * RX checksum offload. We must configure the C+ register
2551          * before all others.
2552          */
2553         cfg = RL_CPLUSCMD_PCI_MRW;
2554         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
2555                 cfg |= RL_CPLUSCMD_RXCSUM_ENB;
2556         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0)
2557                 cfg |= RL_CPLUSCMD_VLANSTRIP;
2558         if ((sc->rl_flags & RL_FLAG_MACSTAT) != 0) {
2559                 cfg |= RL_CPLUSCMD_MACSTAT_DIS;
2560                 /* XXX magic. */
2561                 cfg |= 0x0001;
2562         } else
2563                 cfg |= RL_CPLUSCMD_RXENB | RL_CPLUSCMD_TXENB;
2564         CSR_WRITE_2(sc, RL_CPLUS_CMD, cfg);
2565         if (sc->rl_hwrev == RL_HWREV_8169_8110SC ||
2566             sc->rl_hwrev == RL_HWREV_8169_8110SCE) {
2567                 reg = 0x000fff00;
2568                 if ((CSR_READ_1(sc, RL_CFG2) & RL_CFG2_PCI66MHZ) != 0)
2569                         reg |= 0x000000ff;
2570                 if (sc->rl_hwrev == RL_HWREV_8169_8110SCE)
2571                         reg |= 0x00f00000;
2572                 CSR_WRITE_4(sc, 0x7c, reg);
2573                 /* Disable interrupt mitigation. */
2574                 CSR_WRITE_2(sc, 0xe2, 0);
2575         }
2576         /*
2577          * Disable TSO if interface MTU size is greater than MSS
2578          * allowed in controller.
2579          */
2580         if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) {
2581                 ifp->if_capenable &= ~IFCAP_TSO4;
2582                 ifp->if_hwassist &= ~CSUM_TSO;
2583         }
2584
2585         /*
2586          * Init our MAC address.  Even though the chipset
2587          * documentation doesn't mention it, we need to enter "Config
2588          * register write enable" mode to modify the ID registers.
2589          */
2590         /* Copy MAC address on stack to align. */
2591         bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN);
2592         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
2593         CSR_WRITE_4(sc, RL_IDR0,
2594             htole32(*(u_int32_t *)(&eaddr.eaddr[0])));
2595         CSR_WRITE_4(sc, RL_IDR4,
2596             htole32(*(u_int32_t *)(&eaddr.eaddr[4])));
2597         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
2598
2599         /*
2600          * For C+ mode, initialize the RX descriptors and mbufs.
2601          */
2602         re_rx_list_init(sc);
2603         re_tx_list_init(sc);
2604
2605         /*
2606          * Load the addresses of the RX and TX lists into the chip.
2607          */
2608
2609         CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI,
2610             RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr));
2611         CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO,
2612             RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr));
2613
2614         CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI,
2615             RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr));
2616         CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
2617             RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
2618
2619         /*
2620          * Enable transmit and receive.
2621          */
2622         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2623
2624         /*
2625          * Set the initial TX configuration.
2626          */
2627         if (sc->rl_testmode) {
2628                 if (sc->rl_type == RL_8169)
2629                         CSR_WRITE_4(sc, RL_TXCFG,
2630                             RL_TXCFG_CONFIG|RL_LOOPTEST_ON);
2631                 else
2632                         CSR_WRITE_4(sc, RL_TXCFG,
2633                             RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS);
2634         } else
2635                 CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG);
2636
2637         CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16);
2638
2639         /*
2640          * Set the initial RX configuration.
2641          */
2642         re_set_rxmode(sc);
2643
2644 #ifdef DEVICE_POLLING
2645         /*
2646          * Disable interrupts if we are polling.
2647          */
2648         if (ifp->if_capenable & IFCAP_POLLING)
2649                 CSR_WRITE_2(sc, RL_IMR, 0);
2650         else    /* otherwise ... */
2651 #endif
2652
2653         /*
2654          * Enable interrupts.
2655          */
2656         if (sc->rl_testmode)
2657                 CSR_WRITE_2(sc, RL_IMR, 0);
2658         else
2659                 CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2660         CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS);
2661
2662         /* Set initial TX threshold */
2663         sc->rl_txthresh = RL_TX_THRESH_INIT;
2664
2665         /* Start RX/TX process. */
2666         CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
2667 #ifdef notdef
2668         /* Enable receiver and transmitter. */
2669         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2670 #endif
2671
2672 #ifdef RE_TX_MODERATION
2673         /*
2674          * Initialize the timer interrupt register so that
2675          * a timer interrupt will be generated once the timer
2676          * reaches a certain number of ticks. The timer is
2677          * reloaded on each transmit. This gives us TX interrupt
2678          * moderation, which dramatically improves TX frame rate.
2679          */
2680         if (sc->rl_type == RL_8169)
2681                 CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800);
2682         else
2683                 CSR_WRITE_4(sc, RL_TIMERINT, 0x400);
2684 #endif
2685
2686         /*
2687          * For 8169 gigE NICs, set the max allowed RX packet
2688          * size so we can receive jumbo frames.
2689          */
2690         if (sc->rl_type == RL_8169)
2691                 CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383);
2692
2693         if (sc->rl_testmode)
2694                 return;
2695
2696         mii_mediachg(mii);
2697
2698         CSR_WRITE_1(sc, RL_CFG1, CSR_READ_1(sc, RL_CFG1) | RL_CFG1_DRVLOAD);
2699
2700         ifp->if_drv_flags |= IFF_DRV_RUNNING;
2701         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2702
2703         sc->rl_flags &= ~RL_FLAG_LINK;
2704         sc->rl_watchdog_timer = 0;
2705         callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2706 }
2707
2708 /*
2709  * Set media options.
2710  */
2711 static int
2712 re_ifmedia_upd(struct ifnet *ifp)
2713 {
2714         struct rl_softc         *sc;
2715         struct mii_data         *mii;
2716         int                     error;
2717
2718         sc = ifp->if_softc;
2719         mii = device_get_softc(sc->rl_miibus);
2720         RL_LOCK(sc);
2721         error = mii_mediachg(mii);
2722         RL_UNLOCK(sc);
2723
2724         return (error);
2725 }
2726
2727 /*
2728  * Report current media status.
2729  */
2730 static void
2731 re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2732 {
2733         struct rl_softc         *sc;
2734         struct mii_data         *mii;
2735
2736         sc = ifp->if_softc;
2737         mii = device_get_softc(sc->rl_miibus);
2738
2739         RL_LOCK(sc);
2740         mii_pollstat(mii);
2741         RL_UNLOCK(sc);
2742         ifmr->ifm_active = mii->mii_media_active;
2743         ifmr->ifm_status = mii->mii_media_status;
2744 }
2745
2746 static int
2747 re_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2748 {
2749         struct rl_softc         *sc = ifp->if_softc;
2750         struct ifreq            *ifr = (struct ifreq *) data;
2751         struct mii_data         *mii;
2752         int                     error = 0;
2753
2754         switch (command) {
2755         case SIOCSIFMTU:
2756                 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > RL_JUMBO_MTU) {
2757                         error = EINVAL;
2758                         break;
2759                 }
2760                 if ((sc->rl_flags & RL_FLAG_NOJUMBO) != 0 &&
2761                     ifr->ifr_mtu > RL_MAX_FRAMELEN) {
2762                         error = EINVAL;
2763                         break;
2764                 }
2765                 RL_LOCK(sc);
2766                 if (ifp->if_mtu != ifr->ifr_mtu)
2767                         ifp->if_mtu = ifr->ifr_mtu;
2768                 if (ifp->if_mtu > RL_TSO_MTU &&
2769                     (ifp->if_capenable & IFCAP_TSO4) != 0) {
2770                         ifp->if_capenable &= ~IFCAP_TSO4;
2771                         ifp->if_hwassist &= ~CSUM_TSO;
2772                 }
2773                 RL_UNLOCK(sc);
2774                 break;
2775         case SIOCSIFFLAGS:
2776                 RL_LOCK(sc);
2777                 if ((ifp->if_flags & IFF_UP) != 0) {
2778                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2779                                 if (((ifp->if_flags ^ sc->rl_if_flags)
2780                                     & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
2781                                         re_set_rxmode(sc);
2782                         } else
2783                                 re_init_locked(sc);
2784                 } else {
2785                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2786                                 re_stop(sc);
2787                 }
2788                 sc->rl_if_flags = ifp->if_flags;
2789                 RL_UNLOCK(sc);
2790                 break;
2791         case SIOCADDMULTI:
2792         case SIOCDELMULTI:
2793                 RL_LOCK(sc);
2794                 re_set_rxmode(sc);
2795                 RL_UNLOCK(sc);
2796                 break;
2797         case SIOCGIFMEDIA:
2798         case SIOCSIFMEDIA:
2799                 mii = device_get_softc(sc->rl_miibus);
2800                 error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
2801                 break;
2802         case SIOCSIFCAP:
2803             {
2804                 int mask, reinit;
2805
2806                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2807                 reinit = 0;
2808 #ifdef DEVICE_POLLING
2809                 if (mask & IFCAP_POLLING) {
2810                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
2811                                 error = ether_poll_register(re_poll, ifp);
2812                                 if (error)
2813                                         return(error);
2814                                 RL_LOCK(sc);
2815                                 /* Disable interrupts */
2816                                 CSR_WRITE_2(sc, RL_IMR, 0x0000);
2817                                 ifp->if_capenable |= IFCAP_POLLING;
2818                                 RL_UNLOCK(sc);
2819                         } else {
2820                                 error = ether_poll_deregister(ifp);
2821                                 /* Enable interrupts. */
2822                                 RL_LOCK(sc);
2823                                 CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2824                                 ifp->if_capenable &= ~IFCAP_POLLING;
2825                                 RL_UNLOCK(sc);
2826                         }
2827                 }
2828 #endif /* DEVICE_POLLING */
2829                 if (mask & IFCAP_HWCSUM) {
2830                         ifp->if_capenable ^= IFCAP_HWCSUM;
2831                         if (ifp->if_capenable & IFCAP_TXCSUM)
2832                                 ifp->if_hwassist |= RE_CSUM_FEATURES;
2833                         else
2834                                 ifp->if_hwassist &= ~RE_CSUM_FEATURES;
2835                         reinit = 1;
2836                 }
2837                 if (mask & IFCAP_VLAN_HWTAGGING) {
2838                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2839                         reinit = 1;
2840                 }
2841                 if (mask & IFCAP_TSO4) {
2842                         ifp->if_capenable ^= IFCAP_TSO4;
2843                         if ((IFCAP_TSO4 & ifp->if_capenable) &&
2844                             (IFCAP_TSO4 & ifp->if_capabilities))
2845                                 ifp->if_hwassist |= CSUM_TSO;
2846                         else
2847                                 ifp->if_hwassist &= ~CSUM_TSO;
2848                         if (ifp->if_mtu > RL_TSO_MTU &&
2849                             (ifp->if_capenable & IFCAP_TSO4) != 0) {
2850                                 ifp->if_capenable &= ~IFCAP_TSO4;
2851                                 ifp->if_hwassist &= ~CSUM_TSO;
2852                         }
2853                 }
2854                 if ((mask & IFCAP_WOL) != 0 &&
2855                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
2856                         if ((mask & IFCAP_WOL_UCAST) != 0)
2857                                 ifp->if_capenable ^= IFCAP_WOL_UCAST;
2858                         if ((mask & IFCAP_WOL_MCAST) != 0)
2859                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
2860                         if ((mask & IFCAP_WOL_MAGIC) != 0)
2861                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2862                 }
2863                 if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING)
2864                         re_init(sc);
2865                 VLAN_CAPABILITIES(ifp);
2866             }
2867                 break;
2868         default:
2869                 error = ether_ioctl(ifp, command, data);
2870                 break;
2871         }
2872
2873         return (error);
2874 }
2875
2876 static void
2877 re_watchdog(struct rl_softc *sc)
2878 {
2879         struct ifnet            *ifp;
2880
2881         RL_LOCK_ASSERT(sc);
2882
2883         if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0)
2884                 return;
2885
2886         ifp = sc->rl_ifp;
2887         re_txeof(sc);
2888         if (sc->rl_ldata.rl_tx_free == sc->rl_ldata.rl_tx_desc_cnt) {
2889                 if_printf(ifp, "watchdog timeout (missed Tx interrupts) "
2890                     "-- recovering\n");
2891                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2892                         taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2893                 return;
2894         }
2895
2896         if_printf(ifp, "watchdog timeout\n");
2897         ifp->if_oerrors++;
2898
2899         re_rxeof(sc, NULL);
2900         re_init_locked(sc);
2901         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2902                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2903 }
2904
2905 /*
2906  * Stop the adapter and free any mbufs allocated to the
2907  * RX and TX lists.
2908  */
2909 static void
2910 re_stop(struct rl_softc *sc)
2911 {
2912         int                     i;
2913         struct ifnet            *ifp;
2914         struct rl_txdesc        *txd;
2915         struct rl_rxdesc        *rxd;
2916
2917         RL_LOCK_ASSERT(sc);
2918
2919         ifp = sc->rl_ifp;
2920
2921         sc->rl_watchdog_timer = 0;
2922         callout_stop(&sc->rl_stat_callout);
2923         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2924
2925         if ((sc->rl_flags & RL_FLAG_CMDSTOP) != 0)
2926                 CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_STOPREQ | RL_CMD_TX_ENB |
2927                     RL_CMD_RX_ENB);
2928         else
2929                 CSR_WRITE_1(sc, RL_COMMAND, 0x00);
2930         DELAY(1000);
2931         CSR_WRITE_2(sc, RL_IMR, 0x0000);
2932         CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
2933
2934         if (sc->rl_head != NULL) {
2935                 m_freem(sc->rl_head);
2936                 sc->rl_head = sc->rl_tail = NULL;
2937         }
2938
2939         /* Free the TX list buffers. */
2940
2941         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
2942                 txd = &sc->rl_ldata.rl_tx_desc[i];
2943                 if (txd->tx_m != NULL) {
2944                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2945                             txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2946                         bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2947                             txd->tx_dmamap);
2948                         m_freem(txd->tx_m);
2949                         txd->tx_m = NULL;
2950                 }
2951         }
2952
2953         /* Free the RX list buffers. */
2954
2955         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
2956                 rxd = &sc->rl_ldata.rl_rx_desc[i];
2957                 if (rxd->rx_m != NULL) {
2958                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2959                             rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
2960                         bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
2961                             rxd->rx_dmamap);
2962                         m_freem(rxd->rx_m);
2963                         rxd->rx_m = NULL;
2964                 }
2965         }
2966 }
2967
2968 /*
2969  * Device suspend routine.  Stop the interface and save some PCI
2970  * settings in case the BIOS doesn't restore them properly on
2971  * resume.
2972  */
2973 static int
2974 re_suspend(device_t dev)
2975 {
2976         struct rl_softc         *sc;
2977
2978         sc = device_get_softc(dev);
2979
2980         RL_LOCK(sc);
2981         re_stop(sc);
2982         re_setwol(sc);
2983         sc->suspended = 1;
2984         RL_UNLOCK(sc);
2985
2986         return (0);
2987 }
2988
2989 /*
2990  * Device resume routine.  Restore some PCI settings in case the BIOS
2991  * doesn't, re-enable busmastering, and restart the interface if
2992  * appropriate.
2993  */
2994 static int
2995 re_resume(device_t dev)
2996 {
2997         struct rl_softc         *sc;
2998         struct ifnet            *ifp;
2999
3000         sc = device_get_softc(dev);
3001
3002         RL_LOCK(sc);
3003
3004         ifp = sc->rl_ifp;
3005         /* Take controller out of sleep mode. */
3006         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
3007                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
3008                         CSR_WRITE_1(sc, RL_GPIO,
3009                             CSR_READ_1(sc, RL_GPIO) | 0x01);
3010         }
3011
3012         /* reinitialize interface if necessary */
3013         if (ifp->if_flags & IFF_UP)
3014                 re_init_locked(sc);
3015
3016         /*
3017          * Clear WOL matching such that normal Rx filtering
3018          * wouldn't interfere with WOL patterns.
3019          */
3020         re_clrwol(sc);
3021         sc->suspended = 0;
3022         RL_UNLOCK(sc);
3023
3024         return (0);
3025 }
3026
3027 /*
3028  * Stop all chip I/O so that the kernel's probe routines don't
3029  * get confused by errant DMAs when rebooting.
3030  */
3031 static int
3032 re_shutdown(device_t dev)
3033 {
3034         struct rl_softc         *sc;
3035
3036         sc = device_get_softc(dev);
3037
3038         RL_LOCK(sc);
3039         re_stop(sc);
3040         /*
3041          * Mark interface as down since otherwise we will panic if
3042          * interrupt comes in later on, which can happen in some
3043          * cases.
3044          */
3045         sc->rl_ifp->if_flags &= ~IFF_UP;
3046         re_setwol(sc);
3047         RL_UNLOCK(sc);
3048
3049         return (0);
3050 }
3051
3052 static void
3053 re_setwol(struct rl_softc *sc)
3054 {
3055         struct ifnet            *ifp;
3056         int                     pmc;
3057         uint16_t                pmstat;
3058         uint8_t                 v;
3059
3060         RL_LOCK_ASSERT(sc);
3061
3062         if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3063                 return;
3064
3065         ifp = sc->rl_ifp;
3066         /* Put controller into sleep mode. */
3067         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
3068                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
3069                         CSR_WRITE_1(sc, RL_GPIO,
3070                             CSR_READ_1(sc, RL_GPIO) & ~0x01);
3071         }
3072         if ((ifp->if_capenable & IFCAP_WOL) != 0 &&
3073             (sc->rl_flags & RL_FLAG_WOLRXENB) != 0)
3074                 CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RX_ENB);
3075         /* Enable config register write. */
3076         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3077
3078         /* Enable PME. */
3079         v = CSR_READ_1(sc, RL_CFG1);
3080         v &= ~RL_CFG1_PME;
3081         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3082                 v |= RL_CFG1_PME;
3083         CSR_WRITE_1(sc, RL_CFG1, v);
3084
3085         v = CSR_READ_1(sc, RL_CFG3);
3086         v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3087         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0)
3088                 v |= RL_CFG3_WOL_MAGIC;
3089         CSR_WRITE_1(sc, RL_CFG3, v);
3090
3091         /* Config register write done. */
3092         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3093
3094         v = CSR_READ_1(sc, RL_CFG5);
3095         v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3096         v &= ~RL_CFG5_WOL_LANWAKE;
3097         if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0)
3098                 v |= RL_CFG5_WOL_UCAST;
3099         if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0)
3100                 v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST;
3101         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3102                 v |= RL_CFG5_WOL_LANWAKE;
3103         CSR_WRITE_1(sc, RL_CFG5, v);
3104
3105         /*
3106          * It seems that hardware resets its link speed to 100Mbps in
3107          * power down mode so switching to 100Mbps in driver is not
3108          * needed.
3109          */
3110
3111         /* Request PME if WOL is requested. */
3112         pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2);
3113         pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
3114         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3115                 pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
3116         pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
3117 }
3118
3119 static void
3120 re_clrwol(struct rl_softc *sc)
3121 {
3122         int                     pmc;
3123         uint8_t                 v;
3124
3125         RL_LOCK_ASSERT(sc);
3126
3127         if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3128                 return;
3129
3130         /* Enable config register write. */
3131         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3132
3133         v = CSR_READ_1(sc, RL_CFG3);
3134         v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3135         CSR_WRITE_1(sc, RL_CFG3, v);
3136
3137         /* Config register write done. */
3138         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3139
3140         v = CSR_READ_1(sc, RL_CFG5);
3141         v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3142         v &= ~RL_CFG5_WOL_LANWAKE;
3143         CSR_WRITE_1(sc, RL_CFG5, v);
3144 }