]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - sys/dev/re/if_re.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / sys / dev / re / if_re.c
1 /*-
2  * Copyright (c) 1997, 1998-2003
3  *      Bill Paul <wpaul@windriver.com>.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by Bill Paul.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30  * THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 /*
37  * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver
38  *
39  * Written by Bill Paul <wpaul@windriver.com>
40  * Senior Networking Software Engineer
41  * Wind River Systems
42  */
43
44 /*
45  * This driver is designed to support RealTek's next generation of
46  * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
47  * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
48  * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E.
49  *
50  * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
51  * with the older 8139 family, however it also supports a special
52  * C+ mode of operation that provides several new performance enhancing
53  * features. These include:
54  *
55  *      o Descriptor based DMA mechanism. Each descriptor represents
56  *        a single packet fragment. Data buffers may be aligned on
57  *        any byte boundary.
58  *
59  *      o 64-bit DMA
60  *
61  *      o TCP/IP checksum offload for both RX and TX
62  *
63  *      o High and normal priority transmit DMA rings
64  *
65  *      o VLAN tag insertion and extraction
66  *
67  *      o TCP large send (segmentation offload)
68  *
69  * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
70  * programming API is fairly straightforward. The RX filtering, EEPROM
71  * access and PHY access is the same as it is on the older 8139 series
72  * chips.
73  *
74  * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
75  * same programming API and feature set as the 8139C+ with the following
76  * differences and additions:
77  *
78  *      o 1000Mbps mode
79  *
80  *      o Jumbo frames
81  *
82  *      o GMII and TBI ports/registers for interfacing with copper
83  *        or fiber PHYs
84  *
85  *      o RX and TX DMA rings can have up to 1024 descriptors
86  *        (the 8139C+ allows a maximum of 64)
87  *
88  *      o Slight differences in register layout from the 8139C+
89  *
90  * The TX start and timer interrupt registers are at different locations
91  * on the 8169 than they are on the 8139C+. Also, the status word in the
92  * RX descriptor has a slightly different bit layout. The 8169 does not
93  * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
94  * copper gigE PHY.
95  *
96  * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
97  * (the 'S' stands for 'single-chip'). These devices have the same
98  * programming API as the older 8169, but also have some vendor-specific
99  * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
100  * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
101  *
102  * This driver takes advantage of the RX and TX checksum offload and
103  * VLAN tag insertion/extraction features. It also implements TX
104  * interrupt moderation using the timer interrupt registers, which
105  * significantly reduces TX interrupt load. There is also support
106  * for jumbo frames, however the 8169/8169S/8110S can not transmit
107  * jumbo frames larger than 7440, so the max MTU possible with this
108  * driver is 7422 bytes.
109  */
110
111 #ifdef HAVE_KERNEL_OPTION_HEADERS
112 #include "opt_device_polling.h"
113 #endif
114
115 #include <sys/param.h>
116 #include <sys/endian.h>
117 #include <sys/systm.h>
118 #include <sys/sockio.h>
119 #include <sys/mbuf.h>
120 #include <sys/malloc.h>
121 #include <sys/module.h>
122 #include <sys/kernel.h>
123 #include <sys/socket.h>
124 #include <sys/lock.h>
125 #include <sys/mutex.h>
126 #include <sys/taskqueue.h>
127
128 #include <net/if.h>
129 #include <net/if_arp.h>
130 #include <net/ethernet.h>
131 #include <net/if_dl.h>
132 #include <net/if_media.h>
133 #include <net/if_types.h>
134 #include <net/if_vlan_var.h>
135
136 #include <net/bpf.h>
137
138 #include <machine/bus.h>
139 #include <machine/resource.h>
140 #include <sys/bus.h>
141 #include <sys/rman.h>
142
143 #include <dev/mii/mii.h>
144 #include <dev/mii/miivar.h>
145
146 #include <dev/pci/pcireg.h>
147 #include <dev/pci/pcivar.h>
148
149 #include <pci/if_rlreg.h>
150
151 MODULE_DEPEND(re, pci, 1, 1, 1);
152 MODULE_DEPEND(re, ether, 1, 1, 1);
153 MODULE_DEPEND(re, miibus, 1, 1, 1);
154
155 /* "device miibus" required.  See GENERIC if you get errors here. */
156 #include "miibus_if.h"
157
158 /* Tunables. */
159 static int msi_disable = 0;
160 TUNABLE_INT("hw.re.msi_disable", &msi_disable);
161 static int prefer_iomap = 0;
162 TUNABLE_INT("hw.re.prefer_iomap", &prefer_iomap);
163
164 #define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
165
166 /*
167  * Various supported device vendors/types and their names.
168  */
169 static struct rl_type re_devs[] = {
170         { DLINK_VENDORID, DLINK_DEVICEID_528T, 0,
171             "D-Link DGE-528(T) Gigabit Ethernet Adapter" },
172         { RT_VENDORID, RT_DEVICEID_8139, 0,
173             "RealTek 8139C+ 10/100BaseTX" },
174         { RT_VENDORID, RT_DEVICEID_8101E, 0,
175             "RealTek 8101E/8102E/8102EL/8103E PCIe 10/100baseTX" },
176         { RT_VENDORID, RT_DEVICEID_8168, 0,
177             "RealTek 8168/8111 B/C/CP/D/DP/E PCIe Gigabit Ethernet" },
178         { RT_VENDORID, RT_DEVICEID_8169, 0,
179             "RealTek 8169/8169S/8169SB(L)/8110S/8110SB(L) Gigabit Ethernet" },
180         { RT_VENDORID, RT_DEVICEID_8169SC, 0,
181             "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
182         { COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, 0,
183             "Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" },
184         { LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, 0,
185             "Linksys EG1032 (RTL8169S) Gigabit Ethernet" },
186         { USR_VENDORID, USR_DEVICEID_997902, 0,
187             "US Robotics 997902 (RTL8169S) Gigabit Ethernet" }
188 };
189
190 static struct rl_hwrev re_hwrevs[] = {
191         { RL_HWREV_8139, RL_8139,  "" },
192         { RL_HWREV_8139A, RL_8139, "A" },
193         { RL_HWREV_8139AG, RL_8139, "A-G" },
194         { RL_HWREV_8139B, RL_8139, "B" },
195         { RL_HWREV_8130, RL_8139, "8130" },
196         { RL_HWREV_8139C, RL_8139, "C" },
197         { RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C" },
198         { RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+"},
199         { RL_HWREV_8168_SPIN1, RL_8169, "8168"},
200         { RL_HWREV_8169, RL_8169, "8169"},
201         { RL_HWREV_8169S, RL_8169, "8169S"},
202         { RL_HWREV_8110S, RL_8169, "8110S"},
203         { RL_HWREV_8169_8110SB, RL_8169, "8169SB/8110SB"},
204         { RL_HWREV_8169_8110SC, RL_8169, "8169SC/8110SC"},
205         { RL_HWREV_8169_8110SBL, RL_8169, "8169SBL/8110SBL"},
206         { RL_HWREV_8169_8110SCE, RL_8169, "8169SC/8110SC"},
207         { RL_HWREV_8100, RL_8139, "8100"},
208         { RL_HWREV_8101, RL_8139, "8101"},
209         { RL_HWREV_8100E, RL_8169, "8100E"},
210         { RL_HWREV_8101E, RL_8169, "8101E"},
211         { RL_HWREV_8102E, RL_8169, "8102E"},
212         { RL_HWREV_8102EL, RL_8169, "8102EL"},
213         { RL_HWREV_8102EL_SPIN1, RL_8169, "8102EL"},
214         { RL_HWREV_8103E, RL_8169, "8103E"},
215         { RL_HWREV_8168_SPIN2, RL_8169, "8168"},
216         { RL_HWREV_8168_SPIN3, RL_8169, "8168"},
217         { RL_HWREV_8168C, RL_8169, "8168C/8111C"},
218         { RL_HWREV_8168C_SPIN2, RL_8169, "8168C/8111C"},
219         { RL_HWREV_8168CP, RL_8169, "8168CP/8111CP"},
220         { RL_HWREV_8168D, RL_8169, "8168D/8111D"},
221         { RL_HWREV_8168DP, RL_8169, "8168DP/8111DP"},
222         { RL_HWREV_8168E, RL_8169, "8168E/8111E"},
223         { 0, 0, NULL }
224 };
225
226 static int re_probe             (device_t);
227 static int re_attach            (device_t);
228 static int re_detach            (device_t);
229
230 static int re_encap             (struct rl_softc *, struct mbuf **);
231
232 static void re_dma_map_addr     (void *, bus_dma_segment_t *, int, int);
233 static int re_allocmem          (device_t, struct rl_softc *);
234 static __inline void re_discard_rxbuf
235                                 (struct rl_softc *, int);
236 static int re_newbuf            (struct rl_softc *, int);
237 static int re_rx_list_init      (struct rl_softc *);
238 static int re_tx_list_init      (struct rl_softc *);
239 #ifdef RE_FIXUP_RX
240 static __inline void re_fixup_rx
241                                 (struct mbuf *);
242 #endif
243 static int re_rxeof             (struct rl_softc *, int *);
244 static void re_txeof            (struct rl_softc *);
245 #ifdef DEVICE_POLLING
246 static int re_poll              (struct ifnet *, enum poll_cmd, int);
247 static int re_poll_locked       (struct ifnet *, enum poll_cmd, int);
248 #endif
249 static int re_intr              (void *);
250 static void re_tick             (void *);
251 static void re_tx_task          (void *, int);
252 static void re_int_task         (void *, int);
253 static void re_start            (struct ifnet *);
254 static int re_ioctl             (struct ifnet *, u_long, caddr_t);
255 static void re_init             (void *);
256 static void re_init_locked      (struct rl_softc *);
257 static void re_stop             (struct rl_softc *);
258 static void re_watchdog         (struct rl_softc *);
259 static int re_suspend           (device_t);
260 static int re_resume            (device_t);
261 static int re_shutdown          (device_t);
262 static int re_ifmedia_upd       (struct ifnet *);
263 static void re_ifmedia_sts      (struct ifnet *, struct ifmediareq *);
264
265 static void re_eeprom_putbyte   (struct rl_softc *, int);
266 static void re_eeprom_getword   (struct rl_softc *, int, u_int16_t *);
267 static void re_read_eeprom      (struct rl_softc *, caddr_t, int, int);
268 static int re_gmii_readreg      (device_t, int, int);
269 static int re_gmii_writereg     (device_t, int, int, int);
270
271 static int re_miibus_readreg    (device_t, int, int);
272 static int re_miibus_writereg   (device_t, int, int, int);
273 static void re_miibus_statchg   (device_t);
274
275 static void re_set_rxmode               (struct rl_softc *);
276 static void re_reset            (struct rl_softc *);
277 static void re_setwol           (struct rl_softc *);
278 static void re_clrwol           (struct rl_softc *);
279
280 #ifdef RE_DIAG
281 static int re_diag              (struct rl_softc *);
282 #endif
283
284 static device_method_t re_methods[] = {
285         /* Device interface */
286         DEVMETHOD(device_probe,         re_probe),
287         DEVMETHOD(device_attach,        re_attach),
288         DEVMETHOD(device_detach,        re_detach),
289         DEVMETHOD(device_suspend,       re_suspend),
290         DEVMETHOD(device_resume,        re_resume),
291         DEVMETHOD(device_shutdown,      re_shutdown),
292
293         /* bus interface */
294         DEVMETHOD(bus_print_child,      bus_generic_print_child),
295         DEVMETHOD(bus_driver_added,     bus_generic_driver_added),
296
297         /* MII interface */
298         DEVMETHOD(miibus_readreg,       re_miibus_readreg),
299         DEVMETHOD(miibus_writereg,      re_miibus_writereg),
300         DEVMETHOD(miibus_statchg,       re_miibus_statchg),
301
302         { 0, 0 }
303 };
304
305 static driver_t re_driver = {
306         "re",
307         re_methods,
308         sizeof(struct rl_softc)
309 };
310
311 static devclass_t re_devclass;
312
313 DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0);
314 DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
315
316 #define EE_SET(x)                                       \
317         CSR_WRITE_1(sc, RL_EECMD,                       \
318                 CSR_READ_1(sc, RL_EECMD) | x)
319
320 #define EE_CLR(x)                                       \
321         CSR_WRITE_1(sc, RL_EECMD,                       \
322                 CSR_READ_1(sc, RL_EECMD) & ~x)
323
324 /*
325  * Send a read command and address to the EEPROM, check for ACK.
326  */
327 static void
328 re_eeprom_putbyte(struct rl_softc *sc, int addr)
329 {
330         int                     d, i;
331
332         d = addr | (RL_9346_READ << sc->rl_eewidth);
333
334         /*
335          * Feed in each bit and strobe the clock.
336          */
337
338         for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) {
339                 if (d & i) {
340                         EE_SET(RL_EE_DATAIN);
341                 } else {
342                         EE_CLR(RL_EE_DATAIN);
343                 }
344                 DELAY(100);
345                 EE_SET(RL_EE_CLK);
346                 DELAY(150);
347                 EE_CLR(RL_EE_CLK);
348                 DELAY(100);
349         }
350 }
351
352 /*
353  * Read a word of data stored in the EEPROM at address 'addr.'
354  */
355 static void
356 re_eeprom_getword(struct rl_softc *sc, int addr, u_int16_t *dest)
357 {
358         int                     i;
359         u_int16_t               word = 0;
360
361         /*
362          * Send address of word we want to read.
363          */
364         re_eeprom_putbyte(sc, addr);
365
366         /*
367          * Start reading bits from EEPROM.
368          */
369         for (i = 0x8000; i; i >>= 1) {
370                 EE_SET(RL_EE_CLK);
371                 DELAY(100);
372                 if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT)
373                         word |= i;
374                 EE_CLR(RL_EE_CLK);
375                 DELAY(100);
376         }
377
378         *dest = word;
379 }
380
381 /*
382  * Read a sequence of words from the EEPROM.
383  */
384 static void
385 re_read_eeprom(struct rl_softc *sc, caddr_t dest, int off, int cnt)
386 {
387         int                     i;
388         u_int16_t               word = 0, *ptr;
389
390         CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
391
392         DELAY(100);
393
394         for (i = 0; i < cnt; i++) {
395                 CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL);
396                 re_eeprom_getword(sc, off + i, &word);
397                 CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL);
398                 ptr = (u_int16_t *)(dest + (i * 2));
399                 *ptr = word;
400         }
401
402         CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
403 }
404
405 static int
406 re_gmii_readreg(device_t dev, int phy, int reg)
407 {
408         struct rl_softc         *sc;
409         u_int32_t               rval;
410         int                     i;
411
412         if (phy != 1)
413                 return (0);
414
415         sc = device_get_softc(dev);
416
417         /* Let the rgephy driver read the GMEDIASTAT register */
418
419         if (reg == RL_GMEDIASTAT) {
420                 rval = CSR_READ_1(sc, RL_GMEDIASTAT);
421                 return (rval);
422         }
423
424         CSR_WRITE_4(sc, RL_PHYAR, reg << 16);
425         DELAY(1000);
426
427         for (i = 0; i < RL_PHY_TIMEOUT; i++) {
428                 rval = CSR_READ_4(sc, RL_PHYAR);
429                 if (rval & RL_PHYAR_BUSY)
430                         break;
431                 DELAY(100);
432         }
433
434         if (i == RL_PHY_TIMEOUT) {
435                 device_printf(sc->rl_dev, "PHY read failed\n");
436                 return (0);
437         }
438
439         return (rval & RL_PHYAR_PHYDATA);
440 }
441
442 static int
443 re_gmii_writereg(device_t dev, int phy, int reg, int data)
444 {
445         struct rl_softc         *sc;
446         u_int32_t               rval;
447         int                     i;
448
449         sc = device_get_softc(dev);
450
451         CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) |
452             (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY);
453         DELAY(1000);
454
455         for (i = 0; i < RL_PHY_TIMEOUT; i++) {
456                 rval = CSR_READ_4(sc, RL_PHYAR);
457                 if (!(rval & RL_PHYAR_BUSY))
458                         break;
459                 DELAY(100);
460         }
461
462         if (i == RL_PHY_TIMEOUT) {
463                 device_printf(sc->rl_dev, "PHY write failed\n");
464                 return (0);
465         }
466
467         return (0);
468 }
469
470 static int
471 re_miibus_readreg(device_t dev, int phy, int reg)
472 {
473         struct rl_softc         *sc;
474         u_int16_t               rval = 0;
475         u_int16_t               re8139_reg = 0;
476
477         sc = device_get_softc(dev);
478
479         if (sc->rl_type == RL_8169) {
480                 rval = re_gmii_readreg(dev, phy, reg);
481                 return (rval);
482         }
483
484         /* Pretend the internal PHY is only at address 0 */
485         if (phy) {
486                 return (0);
487         }
488         switch (reg) {
489         case MII_BMCR:
490                 re8139_reg = RL_BMCR;
491                 break;
492         case MII_BMSR:
493                 re8139_reg = RL_BMSR;
494                 break;
495         case MII_ANAR:
496                 re8139_reg = RL_ANAR;
497                 break;
498         case MII_ANER:
499                 re8139_reg = RL_ANER;
500                 break;
501         case MII_ANLPAR:
502                 re8139_reg = RL_LPAR;
503                 break;
504         case MII_PHYIDR1:
505         case MII_PHYIDR2:
506                 return (0);
507         /*
508          * Allow the rlphy driver to read the media status
509          * register. If we have a link partner which does not
510          * support NWAY, this is the register which will tell
511          * us the results of parallel detection.
512          */
513         case RL_MEDIASTAT:
514                 rval = CSR_READ_1(sc, RL_MEDIASTAT);
515                 return (rval);
516         default:
517                 device_printf(sc->rl_dev, "bad phy register\n");
518                 return (0);
519         }
520         rval = CSR_READ_2(sc, re8139_reg);
521         if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) {
522                 /* 8139C+ has different bit layout. */
523                 rval &= ~(BMCR_LOOP | BMCR_ISO);
524         }
525         return (rval);
526 }
527
528 static int
529 re_miibus_writereg(device_t dev, int phy, int reg, int data)
530 {
531         struct rl_softc         *sc;
532         u_int16_t               re8139_reg = 0;
533         int                     rval = 0;
534
535         sc = device_get_softc(dev);
536
537         if (sc->rl_type == RL_8169) {
538                 rval = re_gmii_writereg(dev, phy, reg, data);
539                 return (rval);
540         }
541
542         /* Pretend the internal PHY is only at address 0 */
543         if (phy)
544                 return (0);
545
546         switch (reg) {
547         case MII_BMCR:
548                 re8139_reg = RL_BMCR;
549                 if (sc->rl_type == RL_8139CPLUS) {
550                         /* 8139C+ has different bit layout. */
551                         data &= ~(BMCR_LOOP | BMCR_ISO);
552                 }
553                 break;
554         case MII_BMSR:
555                 re8139_reg = RL_BMSR;
556                 break;
557         case MII_ANAR:
558                 re8139_reg = RL_ANAR;
559                 break;
560         case MII_ANER:
561                 re8139_reg = RL_ANER;
562                 break;
563         case MII_ANLPAR:
564                 re8139_reg = RL_LPAR;
565                 break;
566         case MII_PHYIDR1:
567         case MII_PHYIDR2:
568                 return (0);
569                 break;
570         default:
571                 device_printf(sc->rl_dev, "bad phy register\n");
572                 return (0);
573         }
574         CSR_WRITE_2(sc, re8139_reg, data);
575         return (0);
576 }
577
578 static void
579 re_miibus_statchg(device_t dev)
580 {
581         struct rl_softc         *sc;
582         struct ifnet            *ifp;
583         struct mii_data         *mii;
584
585         sc = device_get_softc(dev);
586         mii = device_get_softc(sc->rl_miibus);
587         ifp = sc->rl_ifp;
588         if (mii == NULL || ifp == NULL ||
589             (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
590                 return;
591
592         sc->rl_flags &= ~RL_FLAG_LINK;
593         if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
594             (IFM_ACTIVE | IFM_AVALID)) {
595                 switch (IFM_SUBTYPE(mii->mii_media_active)) {
596                 case IFM_10_T:
597                 case IFM_100_TX:
598                         sc->rl_flags |= RL_FLAG_LINK;
599                         break;
600                 case IFM_1000_T:
601                         if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0)
602                                 break;
603                         sc->rl_flags |= RL_FLAG_LINK;
604                         break;
605                 default:
606                         break;
607                 }
608         }
609         /*
610          * RealTek controllers does not provide any interface to
611          * Tx/Rx MACs for resolved speed, duplex and flow-control
612          * parameters.
613          */
614 }
615
616 /*
617  * Set the RX configuration and 64-bit multicast hash filter.
618  */
619 static void
620 re_set_rxmode(struct rl_softc *sc)
621 {
622         struct ifnet            *ifp;
623         struct ifmultiaddr      *ifma;
624         uint32_t                hashes[2] = { 0, 0 };
625         uint32_t                h, rxfilt;
626
627         RL_LOCK_ASSERT(sc);
628
629         ifp = sc->rl_ifp;
630
631         rxfilt = RL_RXCFG_CONFIG | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_BROAD;
632
633         if (ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) {
634                 if (ifp->if_flags & IFF_PROMISC)
635                         rxfilt |= RL_RXCFG_RX_ALLPHYS;
636                 /*
637                  * Unlike other hardwares, we have to explicitly set
638                  * RL_RXCFG_RX_MULTI to receive multicast frames in
639                  * promiscuous mode.
640                  */
641                 rxfilt |= RL_RXCFG_RX_MULTI;
642                 hashes[0] = hashes[1] = 0xffffffff;
643                 goto done;
644         }
645
646         if_maddr_rlock(ifp);
647         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
648                 if (ifma->ifma_addr->sa_family != AF_LINK)
649                         continue;
650                 h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
651                     ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
652                 if (h < 32)
653                         hashes[0] |= (1 << h);
654                 else
655                         hashes[1] |= (1 << (h - 32));
656         }
657         if_maddr_runlock(ifp);
658
659         if (hashes[0] != 0 || hashes[1] != 0) {
660                 /*
661                  * For some unfathomable reason, RealTek decided to
662                  * reverse the order of the multicast hash registers
663                  * in the PCI Express parts.  This means we have to
664                  * write the hash pattern in reverse order for those
665                  * devices.
666                  */
667                 if ((sc->rl_flags & RL_FLAG_PCIE) != 0) {
668                         h = bswap32(hashes[0]);
669                         hashes[0] = bswap32(hashes[1]);
670                         hashes[1] = h;
671                 }
672                 rxfilt |= RL_RXCFG_RX_MULTI;
673         }
674
675 done:
676         CSR_WRITE_4(sc, RL_MAR0, hashes[0]);
677         CSR_WRITE_4(sc, RL_MAR4, hashes[1]);
678         CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
679 }
680
681 static void
682 re_reset(struct rl_softc *sc)
683 {
684         int                     i;
685
686         RL_LOCK_ASSERT(sc);
687
688         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET);
689
690         for (i = 0; i < RL_TIMEOUT; i++) {
691                 DELAY(10);
692                 if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET))
693                         break;
694         }
695         if (i == RL_TIMEOUT)
696                 device_printf(sc->rl_dev, "reset never completed!\n");
697
698         if ((sc->rl_flags & RL_FLAG_MACRESET) != 0)
699                 CSR_WRITE_1(sc, 0x82, 1);
700         if (sc->rl_hwrev == RL_HWREV_8169S)
701                 re_gmii_writereg(sc->rl_dev, 1, 0x0b, 0);
702 }
703
704 #ifdef RE_DIAG
705
706 /*
707  * The following routine is designed to test for a defect on some
708  * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
709  * lines connected to the bus, however for a 32-bit only card, they
710  * should be pulled high. The result of this defect is that the
711  * NIC will not work right if you plug it into a 64-bit slot: DMA
712  * operations will be done with 64-bit transfers, which will fail
713  * because the 64-bit data lines aren't connected.
714  *
715  * There's no way to work around this (short of talking a soldering
716  * iron to the board), however we can detect it. The method we use
717  * here is to put the NIC into digital loopback mode, set the receiver
718  * to promiscuous mode, and then try to send a frame. We then compare
719  * the frame data we sent to what was received. If the data matches,
720  * then the NIC is working correctly, otherwise we know the user has
721  * a defective NIC which has been mistakenly plugged into a 64-bit PCI
722  * slot. In the latter case, there's no way the NIC can work correctly,
723  * so we print out a message on the console and abort the device attach.
724  */
725
726 static int
727 re_diag(struct rl_softc *sc)
728 {
729         struct ifnet            *ifp = sc->rl_ifp;
730         struct mbuf             *m0;
731         struct ether_header     *eh;
732         struct rl_desc          *cur_rx;
733         u_int16_t               status;
734         u_int32_t               rxstat;
735         int                     total_len, i, error = 0, phyaddr;
736         u_int8_t                dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
737         u_int8_t                src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
738
739         /* Allocate a single mbuf */
740         MGETHDR(m0, M_DONTWAIT, MT_DATA);
741         if (m0 == NULL)
742                 return (ENOBUFS);
743
744         RL_LOCK(sc);
745
746         /*
747          * Initialize the NIC in test mode. This sets the chip up
748          * so that it can send and receive frames, but performs the
749          * following special functions:
750          * - Puts receiver in promiscuous mode
751          * - Enables digital loopback mode
752          * - Leaves interrupts turned off
753          */
754
755         ifp->if_flags |= IFF_PROMISC;
756         sc->rl_testmode = 1;
757         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
758         re_init_locked(sc);
759         sc->rl_flags |= RL_FLAG_LINK;
760         if (sc->rl_type == RL_8169)
761                 phyaddr = 1;
762         else
763                 phyaddr = 0;
764
765         re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET);
766         for (i = 0; i < RL_TIMEOUT; i++) {
767                 status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR);
768                 if (!(status & BMCR_RESET))
769                         break;
770         }
771
772         re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP);
773         CSR_WRITE_2(sc, RL_ISR, RL_INTRS);
774
775         DELAY(100000);
776
777         /* Put some data in the mbuf */
778
779         eh = mtod(m0, struct ether_header *);
780         bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN);
781         bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN);
782         eh->ether_type = htons(ETHERTYPE_IP);
783         m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
784
785         /*
786          * Queue the packet, start transmission.
787          * Note: IF_HANDOFF() ultimately calls re_start() for us.
788          */
789
790         CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
791         RL_UNLOCK(sc);
792         /* XXX: re_diag must not be called when in ALTQ mode */
793         IF_HANDOFF(&ifp->if_snd, m0, ifp);
794         RL_LOCK(sc);
795         m0 = NULL;
796
797         /* Wait for it to propagate through the chip */
798
799         DELAY(100000);
800         for (i = 0; i < RL_TIMEOUT; i++) {
801                 status = CSR_READ_2(sc, RL_ISR);
802                 CSR_WRITE_2(sc, RL_ISR, status);
803                 if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) ==
804                     (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK))
805                         break;
806                 DELAY(10);
807         }
808
809         if (i == RL_TIMEOUT) {
810                 device_printf(sc->rl_dev,
811                     "diagnostic failed, failed to receive packet in"
812                     " loopback mode\n");
813                 error = EIO;
814                 goto done;
815         }
816
817         /*
818          * The packet should have been dumped into the first
819          * entry in the RX DMA ring. Grab it from there.
820          */
821
822         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
823             sc->rl_ldata.rl_rx_list_map,
824             BUS_DMASYNC_POSTREAD);
825         bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
826             sc->rl_ldata.rl_rx_desc[0].rx_dmamap,
827             BUS_DMASYNC_POSTREAD);
828         bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
829             sc->rl_ldata.rl_rx_desc[0].rx_dmamap);
830
831         m0 = sc->rl_ldata.rl_rx_desc[0].rx_m;
832         sc->rl_ldata.rl_rx_desc[0].rx_m = NULL;
833         eh = mtod(m0, struct ether_header *);
834
835         cur_rx = &sc->rl_ldata.rl_rx_list[0];
836         total_len = RL_RXBYTES(cur_rx);
837         rxstat = le32toh(cur_rx->rl_cmdstat);
838
839         if (total_len != ETHER_MIN_LEN) {
840                 device_printf(sc->rl_dev,
841                     "diagnostic failed, received short packet\n");
842                 error = EIO;
843                 goto done;
844         }
845
846         /* Test that the received packet data matches what we sent. */
847
848         if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) ||
849             bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) ||
850             ntohs(eh->ether_type) != ETHERTYPE_IP) {
851                 device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n");
852                 device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n",
853                     dst, ":", src, ":", ETHERTYPE_IP);
854                 device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n",
855                     eh->ether_dhost, ":",  eh->ether_shost, ":",
856                     ntohs(eh->ether_type));
857                 device_printf(sc->rl_dev, "You may have a defective 32-bit "
858                     "NIC plugged into a 64-bit PCI slot.\n");
859                 device_printf(sc->rl_dev, "Please re-install the NIC in a "
860                     "32-bit slot for proper operation.\n");
861                 device_printf(sc->rl_dev, "Read the re(4) man page for more "
862                     "details.\n");
863                 error = EIO;
864         }
865
866 done:
867         /* Turn interface off, release resources */
868
869         sc->rl_testmode = 0;
870         sc->rl_flags &= ~RL_FLAG_LINK;
871         ifp->if_flags &= ~IFF_PROMISC;
872         re_stop(sc);
873         if (m0 != NULL)
874                 m_freem(m0);
875
876         RL_UNLOCK(sc);
877
878         return (error);
879 }
880
881 #endif
882
883 /*
884  * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
885  * IDs against our list and return a device name if we find a match.
886  */
887 static int
888 re_probe(device_t dev)
889 {
890         struct rl_type          *t;
891         uint16_t                devid, vendor;
892         uint16_t                revid, sdevid;
893         int                     i;
894         
895         vendor = pci_get_vendor(dev);
896         devid = pci_get_device(dev);
897         revid = pci_get_revid(dev);
898         sdevid = pci_get_subdevice(dev);
899
900         if (vendor == LINKSYS_VENDORID && devid == LINKSYS_DEVICEID_EG1032) {
901                 if (sdevid != LINKSYS_SUBDEVICE_EG1032_REV3) {
902                         /*
903                          * Only attach to rev. 3 of the Linksys EG1032 adapter.
904                          * Rev. 2 is supported by sk(4).
905                          */
906                         return (ENXIO);
907                 }
908         }
909
910         if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) {
911                 if (revid != 0x20) {
912                         /* 8139, let rl(4) take care of this device. */
913                         return (ENXIO);
914                 }
915         }
916
917         t = re_devs;
918         for (i = 0; i < sizeof(re_devs) / sizeof(re_devs[0]); i++, t++) {
919                 if (vendor == t->rl_vid && devid == t->rl_did) {
920                         device_set_desc(dev, t->rl_name);
921                         return (BUS_PROBE_DEFAULT);
922                 }
923         }
924
925         return (ENXIO);
926 }
927
928 /*
929  * Map a single buffer address.
930  */
931
932 static void
933 re_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
934 {
935         bus_addr_t              *addr;
936
937         if (error)
938                 return;
939
940         KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
941         addr = arg;
942         *addr = segs->ds_addr;
943 }
944
945 static int
946 re_allocmem(device_t dev, struct rl_softc *sc)
947 {
948         bus_size_t              rx_list_size, tx_list_size;
949         int                     error;
950         int                     i;
951
952         rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc);
953         tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc);
954
955         /*
956          * Allocate the parent bus DMA tag appropriate for PCI.
957          * In order to use DAC, RL_CPLUSCMD_PCI_DAC bit of RL_CPLUS_CMD
958          * register should be set. However some RealTek chips are known
959          * to be buggy on DAC handling, therefore disable DAC by limiting
960          * DMA address space to 32bit. PCIe variants of RealTek chips
961          * may not have the limitation but I took safer path.
962          */
963         error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
964             BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
965             BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
966             NULL, NULL, &sc->rl_parent_tag);
967         if (error) {
968                 device_printf(dev, "could not allocate parent DMA tag\n");
969                 return (error);
970         }
971
972         /*
973          * Allocate map for TX mbufs.
974          */
975         error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0,
976             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
977             NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0,
978             NULL, NULL, &sc->rl_ldata.rl_tx_mtag);
979         if (error) {
980                 device_printf(dev, "could not allocate TX DMA tag\n");
981                 return (error);
982         }
983
984         /*
985          * Allocate map for RX mbufs.
986          */
987
988         error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0,
989             BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
990             MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag);
991         if (error) {
992                 device_printf(dev, "could not allocate RX DMA tag\n");
993                 return (error);
994         }
995
996         /*
997          * Allocate map for TX descriptor list.
998          */
999         error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1000             0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1001             NULL, tx_list_size, 1, tx_list_size, 0,
1002             NULL, NULL, &sc->rl_ldata.rl_tx_list_tag);
1003         if (error) {
1004                 device_printf(dev, "could not allocate TX DMA ring tag\n");
1005                 return (error);
1006         }
1007
1008         /* Allocate DMA'able memory for the TX ring */
1009
1010         error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag,
1011             (void **)&sc->rl_ldata.rl_tx_list,
1012             BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1013             &sc->rl_ldata.rl_tx_list_map);
1014         if (error) {
1015                 device_printf(dev, "could not allocate TX DMA ring\n");
1016                 return (error);
1017         }
1018
1019         /* Load the map for the TX ring. */
1020
1021         sc->rl_ldata.rl_tx_list_addr = 0;
1022         error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag,
1023              sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list,
1024              tx_list_size, re_dma_map_addr,
1025              &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT);
1026         if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) {
1027                 device_printf(dev, "could not load TX DMA ring\n");
1028                 return (ENOMEM);
1029         }
1030
1031         /* Create DMA maps for TX buffers */
1032
1033         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
1034                 error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0,
1035                     &sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1036                 if (error) {
1037                         device_printf(dev, "could not create DMA map for TX\n");
1038                         return (error);
1039                 }
1040         }
1041
1042         /*
1043          * Allocate map for RX descriptor list.
1044          */
1045         error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1046             0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1047             NULL, rx_list_size, 1, rx_list_size, 0,
1048             NULL, NULL, &sc->rl_ldata.rl_rx_list_tag);
1049         if (error) {
1050                 device_printf(dev, "could not create RX DMA ring tag\n");
1051                 return (error);
1052         }
1053
1054         /* Allocate DMA'able memory for the RX ring */
1055
1056         error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag,
1057             (void **)&sc->rl_ldata.rl_rx_list,
1058             BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1059             &sc->rl_ldata.rl_rx_list_map);
1060         if (error) {
1061                 device_printf(dev, "could not allocate RX DMA ring\n");
1062                 return (error);
1063         }
1064
1065         /* Load the map for the RX ring. */
1066
1067         sc->rl_ldata.rl_rx_list_addr = 0;
1068         error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag,
1069              sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list,
1070              rx_list_size, re_dma_map_addr,
1071              &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT);
1072         if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) {
1073                 device_printf(dev, "could not load RX DMA ring\n");
1074                 return (ENOMEM);
1075         }
1076
1077         /* Create DMA maps for RX buffers */
1078
1079         error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1080             &sc->rl_ldata.rl_rx_sparemap);
1081         if (error) {
1082                 device_printf(dev, "could not create spare DMA map for RX\n");
1083                 return (error);
1084         }
1085         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1086                 error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1087                     &sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1088                 if (error) {
1089                         device_printf(dev, "could not create DMA map for RX\n");
1090                         return (error);
1091                 }
1092         }
1093
1094         return (0);
1095 }
1096
1097 /*
1098  * Attach the interface. Allocate softc structures, do ifmedia
1099  * setup and ethernet/BPF attach.
1100  */
1101 static int
1102 re_attach(device_t dev)
1103 {
1104         u_char                  eaddr[ETHER_ADDR_LEN];
1105         u_int16_t               as[ETHER_ADDR_LEN / 2];
1106         struct rl_softc         *sc;
1107         struct ifnet            *ifp;
1108         struct rl_hwrev         *hw_rev;
1109         int                     hwrev;
1110         u_int16_t               devid, re_did = 0;
1111         int                     error = 0, rid, i;
1112         int                     msic, reg;
1113         uint8_t                 cfg;
1114
1115         sc = device_get_softc(dev);
1116         sc->rl_dev = dev;
1117
1118         mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
1119             MTX_DEF);
1120         callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0);
1121
1122         /*
1123          * Map control/status registers.
1124          */
1125         pci_enable_busmaster(dev);
1126
1127         devid = pci_get_device(dev);
1128         /*
1129          * Prefer memory space register mapping over IO space.
1130          * Because RTL8169SC does not seem to work when memory mapping
1131          * is used always activate io mapping. 
1132          */
1133         if (devid == RT_DEVICEID_8169SC)
1134                 prefer_iomap = 1;
1135         if (prefer_iomap == 0) {
1136                 sc->rl_res_id = PCIR_BAR(1);
1137                 sc->rl_res_type = SYS_RES_MEMORY;
1138                 /* RTL8168/8101E seems to use different BARs. */
1139                 if (devid == RT_DEVICEID_8168 || devid == RT_DEVICEID_8101E)
1140                         sc->rl_res_id = PCIR_BAR(2);
1141         } else {
1142                 sc->rl_res_id = PCIR_BAR(0);
1143                 sc->rl_res_type = SYS_RES_IOPORT;
1144         }
1145         sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1146             &sc->rl_res_id, RF_ACTIVE);
1147         if (sc->rl_res == NULL && prefer_iomap == 0) {
1148                 sc->rl_res_id = PCIR_BAR(0);
1149                 sc->rl_res_type = SYS_RES_IOPORT;
1150                 sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1151                     &sc->rl_res_id, RF_ACTIVE);
1152         }
1153         if (sc->rl_res == NULL) {
1154                 device_printf(dev, "couldn't map ports/memory\n");
1155                 error = ENXIO;
1156                 goto fail;
1157         }
1158
1159         sc->rl_btag = rman_get_bustag(sc->rl_res);
1160         sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
1161
1162         msic = 0;
1163         if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
1164                 sc->rl_flags |= RL_FLAG_PCIE;
1165                 if (devid != RT_DEVICEID_8101E) {
1166                         /* Set PCIe maximum read request size to 2048. */
1167                         if (pci_get_max_read_req(dev) < 2048)
1168                                 pci_set_max_read_req(dev, 2048);
1169                 }
1170                 msic = pci_msi_count(dev);
1171                 if (bootverbose)
1172                         device_printf(dev, "MSI count : %d\n", msic);
1173         }
1174         if (msic > 0 && msi_disable == 0) {
1175                 msic = 1;
1176                 if (pci_alloc_msi(dev, &msic) == 0) {
1177                         if (msic == RL_MSI_MESSAGES) {
1178                                 device_printf(dev, "Using %d MSI messages\n",
1179                                     msic);
1180                                 sc->rl_flags |= RL_FLAG_MSI;
1181                                 /* Explicitly set MSI enable bit. */
1182                                 CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1183                                 cfg = CSR_READ_1(sc, RL_CFG2);
1184                                 cfg |= RL_CFG2_MSI;
1185                                 CSR_WRITE_1(sc, RL_CFG2, cfg);
1186                                 CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1187                         } else
1188                                 pci_release_msi(dev);
1189                 }
1190         }
1191
1192         /* Allocate interrupt */
1193         if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1194                 rid = 0;
1195                 sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
1196                     RF_SHAREABLE | RF_ACTIVE);
1197                 if (sc->rl_irq[0] == NULL) {
1198                         device_printf(dev, "couldn't allocate IRQ resources\n");
1199                         error = ENXIO;
1200                         goto fail;
1201                 }
1202         } else {
1203                 for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1204                         sc->rl_irq[i] = bus_alloc_resource_any(dev,
1205                             SYS_RES_IRQ, &rid, RF_ACTIVE);
1206                         if (sc->rl_irq[i] == NULL) {
1207                                 device_printf(dev,
1208                                     "couldn't llocate IRQ resources for "
1209                                     "message %d\n", rid);
1210                                 error = ENXIO;
1211                                 goto fail;
1212                         }
1213                 }
1214         }
1215
1216         if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1217                 CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1218                 cfg = CSR_READ_1(sc, RL_CFG2);
1219                 if ((cfg & RL_CFG2_MSI) != 0) {
1220                         device_printf(dev, "turning off MSI enable bit.\n");
1221                         cfg &= ~RL_CFG2_MSI;
1222                         CSR_WRITE_1(sc, RL_CFG2, cfg);
1223                 }
1224                 CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1225         }
1226
1227         /* Reset the adapter. */
1228         RL_LOCK(sc);
1229         re_reset(sc);
1230         RL_UNLOCK(sc);
1231
1232         hw_rev = re_hwrevs;
1233         hwrev = CSR_READ_4(sc, RL_TXCFG);
1234         switch (hwrev & 0x70000000) {
1235         case 0x00000000:
1236         case 0x10000000:
1237                 device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0xfc800000);
1238                 hwrev &= (RL_TXCFG_HWREV | 0x80000000);
1239                 break;
1240         default:
1241                 device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0x7c800000);
1242                 hwrev &= RL_TXCFG_HWREV;
1243                 break;
1244         }
1245         device_printf(dev, "MAC rev. 0x%08x\n", hwrev & 0x00700000);
1246         while (hw_rev->rl_desc != NULL) {
1247                 if (hw_rev->rl_rev == hwrev) {
1248                         sc->rl_type = hw_rev->rl_type;
1249                         sc->rl_hwrev = hw_rev->rl_rev;
1250                         break;
1251                 }
1252                 hw_rev++;
1253         }
1254         if (hw_rev->rl_desc == NULL) {
1255                 device_printf(dev, "Unknown H/W revision: 0x%08x\n", hwrev);
1256                 error = ENXIO;
1257                 goto fail;
1258         }
1259
1260         switch (hw_rev->rl_rev) {
1261         case RL_HWREV_8139CPLUS:
1262                 sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_FASTETHER |
1263                     RL_FLAG_AUTOPAD;
1264                 break;
1265         case RL_HWREV_8100E:
1266         case RL_HWREV_8101E:
1267                 sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_PHYWAKE |
1268                     RL_FLAG_FASTETHER;
1269                 break;
1270         case RL_HWREV_8102E:
1271         case RL_HWREV_8102EL:
1272         case RL_HWREV_8102EL_SPIN1:
1273                 sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_PHYWAKE |
1274                     RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
1275                     RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD;
1276                 break;
1277         case RL_HWREV_8103E:
1278                 sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_PHYWAKE |
1279                     RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
1280                     RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD |
1281                     RL_FLAG_MACSLEEP;
1282                 break;
1283         case RL_HWREV_8168_SPIN1:
1284         case RL_HWREV_8168_SPIN2:
1285                 sc->rl_flags |= RL_FLAG_WOLRXENB;
1286                 /* FALLTHROUGH */
1287         case RL_HWREV_8168_SPIN3:
1288                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_MACSTAT;
1289                 break;
1290         case RL_HWREV_8168C_SPIN2:
1291                 sc->rl_flags |= RL_FLAG_MACSLEEP;
1292                 /* FALLTHROUGH */
1293         case RL_HWREV_8168C:
1294                 if ((hwrev & 0x00700000) == 0x00200000)
1295                         sc->rl_flags |= RL_FLAG_MACSLEEP;
1296                 /* FALLTHROUGH */
1297         case RL_HWREV_8168CP:
1298         case RL_HWREV_8168D:
1299         case RL_HWREV_8168DP:
1300                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
1301                     RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
1302                     RL_FLAG_AUTOPAD;
1303                 /*
1304                  * These controllers support jumbo frame but it seems
1305                  * that enabling it requires touching additional magic
1306                  * registers. Depending on MAC revisions some
1307                  * controllers need to disable checksum offload. So
1308                  * disable jumbo frame until I have better idea what
1309                  * it really requires to make it support.
1310                  * RTL8168C/CP : supports up to 6KB jumbo frame.
1311                  * RTL8111C/CP : supports up to 9KB jumbo frame.
1312                  */
1313                 sc->rl_flags |= RL_FLAG_NOJUMBO;
1314                 break;
1315         case RL_HWREV_8168E:
1316                 sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM |
1317                     RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
1318                     RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_NOJUMBO;
1319                 break;
1320         case RL_HWREV_8169_8110SB:
1321         case RL_HWREV_8169_8110SBL:
1322         case RL_HWREV_8169_8110SC:
1323         case RL_HWREV_8169_8110SCE:
1324                 sc->rl_flags |= RL_FLAG_PHYWAKE;
1325                 /* FALLTHROUGH */
1326         case RL_HWREV_8169:
1327         case RL_HWREV_8169S:
1328         case RL_HWREV_8110S:
1329                 sc->rl_flags |= RL_FLAG_MACRESET;
1330                 break;
1331         default:
1332                 break;
1333         }
1334
1335         /* Enable PME. */
1336         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1337         cfg = CSR_READ_1(sc, RL_CFG1);
1338         cfg |= RL_CFG1_PME;
1339         CSR_WRITE_1(sc, RL_CFG1, cfg);
1340         cfg = CSR_READ_1(sc, RL_CFG5);
1341         cfg &= RL_CFG5_PME_STS;
1342         CSR_WRITE_1(sc, RL_CFG5, cfg);
1343         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1344
1345         if ((sc->rl_flags & RL_FLAG_PAR) != 0) {
1346                 /*
1347                  * XXX Should have a better way to extract station
1348                  * address from EEPROM.
1349                  */
1350                 for (i = 0; i < ETHER_ADDR_LEN; i++)
1351                         eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i);
1352         } else {
1353                 sc->rl_eewidth = RL_9356_ADDR_LEN;
1354                 re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
1355                 if (re_did != 0x8129)
1356                         sc->rl_eewidth = RL_9346_ADDR_LEN;
1357
1358                 /*
1359                  * Get station address from the EEPROM.
1360                  */
1361                 re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3);
1362                 for (i = 0; i < ETHER_ADDR_LEN / 2; i++)
1363                         as[i] = le16toh(as[i]);
1364                 bcopy(as, eaddr, sizeof(eaddr));
1365         }
1366
1367         if (sc->rl_type == RL_8169) {
1368                 /* Set RX length mask and number of descriptors. */
1369                 sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN;
1370                 sc->rl_txstart = RL_GTXSTART;
1371                 sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT;
1372                 sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT;
1373         } else {
1374                 /* Set RX length mask and number of descriptors. */
1375                 sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN;
1376                 sc->rl_txstart = RL_TXSTART;
1377                 sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT;
1378                 sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT;
1379         }
1380
1381         error = re_allocmem(dev, sc);
1382         if (error)
1383                 goto fail;
1384
1385         ifp = sc->rl_ifp = if_alloc(IFT_ETHER);
1386         if (ifp == NULL) {
1387                 device_printf(dev, "can not if_alloc()\n");
1388                 error = ENOSPC;
1389                 goto fail;
1390         }
1391
1392         /* Take controller out of deep sleep mode. */
1393         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
1394                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
1395                         CSR_WRITE_1(sc, RL_GPIO,
1396                             CSR_READ_1(sc, RL_GPIO) | 0x01);
1397                 else
1398                         CSR_WRITE_1(sc, RL_GPIO,
1399                             CSR_READ_1(sc, RL_GPIO) & ~0x01);
1400         }
1401
1402         /* Take PHY out of power down mode. */
1403         if ((sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0)
1404                 CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) | 0x80);
1405         if ((sc->rl_flags & RL_FLAG_PHYWAKE) != 0) {
1406                 re_gmii_writereg(dev, 1, 0x1f, 0);
1407                 re_gmii_writereg(dev, 1, 0x0e, 0);
1408         }
1409
1410         /* Do MII setup */
1411         if (mii_phy_probe(dev, &sc->rl_miibus,
1412             re_ifmedia_upd, re_ifmedia_sts)) {
1413                 device_printf(dev, "MII without any phy!\n");
1414                 error = ENXIO;
1415                 goto fail;
1416         }
1417
1418         ifp->if_softc = sc;
1419         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1420         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1421         ifp->if_ioctl = re_ioctl;
1422         ifp->if_start = re_start;
1423         ifp->if_hwassist = RE_CSUM_FEATURES;
1424         ifp->if_capabilities = IFCAP_HWCSUM;
1425         ifp->if_capenable = ifp->if_capabilities;
1426         ifp->if_init = re_init;
1427         IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN);
1428         ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN;
1429         IFQ_SET_READY(&ifp->if_snd);
1430
1431         TASK_INIT(&sc->rl_txtask, 1, re_tx_task, ifp);
1432         TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc);
1433
1434         /*
1435          * XXX
1436          * Still have no idea how to make TSO work on 8168C, 8168CP,
1437          * 8111C and 8111CP.
1438          */
1439         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1440                 ifp->if_hwassist |= CSUM_TSO;
1441                 ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO;
1442         }
1443
1444         /*
1445          * Call MI attach routine.
1446          */
1447         ether_ifattach(ifp, eaddr);
1448
1449         /* VLAN capability setup */
1450         ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
1451         if (ifp->if_capabilities & IFCAP_HWCSUM)
1452                 ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
1453         /* Enable WOL if PM is supported. */
1454         if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &reg) == 0)
1455                 ifp->if_capabilities |= IFCAP_WOL;
1456         ifp->if_capenable = ifp->if_capabilities;
1457         /*
1458          * Don't enable TSO by default. Under certain
1459          * circumtances the controller generated corrupted
1460          * packets in TSO size.
1461          */
1462         ifp->if_hwassist &= ~CSUM_TSO;
1463         ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO);
1464 #ifdef DEVICE_POLLING
1465         ifp->if_capabilities |= IFCAP_POLLING;
1466 #endif
1467         /*
1468          * Tell the upper layer(s) we support long frames.
1469          * Must appear after the call to ether_ifattach() because
1470          * ether_ifattach() sets ifi_hdrlen to the default value.
1471          */
1472         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1473
1474 #ifdef RE_DIAG
1475         /*
1476          * Perform hardware diagnostic on the original RTL8169.
1477          * Some 32-bit cards were incorrectly wired and would
1478          * malfunction if plugged into a 64-bit slot.
1479          */
1480
1481         if (hwrev == RL_HWREV_8169) {
1482                 error = re_diag(sc);
1483                 if (error) {
1484                         device_printf(dev,
1485                         "attach aborted due to hardware diag failure\n");
1486                         ether_ifdetach(ifp);
1487                         goto fail;
1488                 }
1489         }
1490 #endif
1491
1492         /* Hook interrupt last to avoid having to lock softc */
1493         if ((sc->rl_flags & RL_FLAG_MSI) == 0)
1494                 error = bus_setup_intr(dev, sc->rl_irq[0],
1495                     INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1496                     &sc->rl_intrhand[0]);
1497         else {
1498                 for (i = 0; i < RL_MSI_MESSAGES; i++) {
1499                         error = bus_setup_intr(dev, sc->rl_irq[i],
1500                             INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1501                             &sc->rl_intrhand[i]);
1502                         if (error != 0)
1503                                 break;
1504                 }
1505         }
1506         if (error) {
1507                 device_printf(dev, "couldn't set up irq\n");
1508                 ether_ifdetach(ifp);
1509         }
1510
1511 fail:
1512
1513         if (error)
1514                 re_detach(dev);
1515
1516         return (error);
1517 }
1518
1519 /*
1520  * Shutdown hardware and free up resources. This can be called any
1521  * time after the mutex has been initialized. It is called in both
1522  * the error case in attach and the normal detach case so it needs
1523  * to be careful about only freeing resources that have actually been
1524  * allocated.
1525  */
1526 static int
1527 re_detach(device_t dev)
1528 {
1529         struct rl_softc         *sc;
1530         struct ifnet            *ifp;
1531         int                     i, rid;
1532
1533         sc = device_get_softc(dev);
1534         ifp = sc->rl_ifp;
1535         KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized"));
1536
1537         /* These should only be active if attach succeeded */
1538         if (device_is_attached(dev)) {
1539 #ifdef DEVICE_POLLING
1540                 if (ifp->if_capenable & IFCAP_POLLING)
1541                         ether_poll_deregister(ifp);
1542 #endif
1543                 RL_LOCK(sc);
1544 #if 0
1545                 sc->suspended = 1;
1546 #endif
1547                 re_stop(sc);
1548                 RL_UNLOCK(sc);
1549                 callout_drain(&sc->rl_stat_callout);
1550                 taskqueue_drain(taskqueue_fast, &sc->rl_inttask);
1551                 taskqueue_drain(taskqueue_fast, &sc->rl_txtask);
1552                 /*
1553                  * Force off the IFF_UP flag here, in case someone
1554                  * still had a BPF descriptor attached to this
1555                  * interface. If they do, ether_ifdetach() will cause
1556                  * the BPF code to try and clear the promisc mode
1557                  * flag, which will bubble down to re_ioctl(),
1558                  * which will try to call re_init() again. This will
1559                  * turn the NIC back on and restart the MII ticker,
1560                  * which will panic the system when the kernel tries
1561                  * to invoke the re_tick() function that isn't there
1562                  * anymore.
1563                  */
1564                 ifp->if_flags &= ~IFF_UP;
1565                 ether_ifdetach(ifp);
1566         }
1567         if (sc->rl_miibus)
1568                 device_delete_child(dev, sc->rl_miibus);
1569         bus_generic_detach(dev);
1570
1571         /*
1572          * The rest is resource deallocation, so we should already be
1573          * stopped here.
1574          */
1575
1576         for (i = 0; i < RL_MSI_MESSAGES; i++) {
1577                 if (sc->rl_intrhand[i] != NULL) {
1578                         bus_teardown_intr(dev, sc->rl_irq[i],
1579                             sc->rl_intrhand[i]);
1580                         sc->rl_intrhand[i] = NULL;
1581                 }
1582         }
1583         if (ifp != NULL)
1584                 if_free(ifp);
1585         if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1586                 if (sc->rl_irq[0] != NULL) {
1587                         bus_release_resource(dev, SYS_RES_IRQ, 0,
1588                             sc->rl_irq[0]);
1589                         sc->rl_irq[0] = NULL;
1590                 }
1591         } else {
1592                 for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1593                         if (sc->rl_irq[i] != NULL) {
1594                                 bus_release_resource(dev, SYS_RES_IRQ, rid,
1595                                     sc->rl_irq[i]);
1596                                 sc->rl_irq[i] = NULL;
1597                         }
1598                 }
1599                 pci_release_msi(dev);
1600         }
1601         if (sc->rl_res)
1602                 bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id,
1603                     sc->rl_res);
1604
1605         /* Unload and free the RX DMA ring memory and map */
1606
1607         if (sc->rl_ldata.rl_rx_list_tag) {
1608                 bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
1609                     sc->rl_ldata.rl_rx_list_map);
1610                 bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
1611                     sc->rl_ldata.rl_rx_list,
1612                     sc->rl_ldata.rl_rx_list_map);
1613                 bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag);
1614         }
1615
1616         /* Unload and free the TX DMA ring memory and map */
1617
1618         if (sc->rl_ldata.rl_tx_list_tag) {
1619                 bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
1620                     sc->rl_ldata.rl_tx_list_map);
1621                 bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
1622                     sc->rl_ldata.rl_tx_list,
1623                     sc->rl_ldata.rl_tx_list_map);
1624                 bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag);
1625         }
1626
1627         /* Destroy all the RX and TX buffer maps */
1628
1629         if (sc->rl_ldata.rl_tx_mtag) {
1630                 for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1631                         bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag,
1632                             sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1633                 bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag);
1634         }
1635         if (sc->rl_ldata.rl_rx_mtag) {
1636                 for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++)
1637                         bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1638                             sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1639                 if (sc->rl_ldata.rl_rx_sparemap)
1640                         bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1641                             sc->rl_ldata.rl_rx_sparemap);
1642                 bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag);
1643         }
1644
1645         /* Unload and free the stats buffer and map */
1646
1647         if (sc->rl_ldata.rl_stag) {
1648                 bus_dmamap_unload(sc->rl_ldata.rl_stag,
1649                     sc->rl_ldata.rl_rx_list_map);
1650                 bus_dmamem_free(sc->rl_ldata.rl_stag,
1651                     sc->rl_ldata.rl_stats,
1652                     sc->rl_ldata.rl_smap);
1653                 bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
1654         }
1655
1656         if (sc->rl_parent_tag)
1657                 bus_dma_tag_destroy(sc->rl_parent_tag);
1658
1659         mtx_destroy(&sc->rl_mtx);
1660
1661         return (0);
1662 }
1663
1664 static __inline void
1665 re_discard_rxbuf(struct rl_softc *sc, int idx)
1666 {
1667         struct rl_desc          *desc;
1668         struct rl_rxdesc        *rxd;
1669         uint32_t                cmdstat;
1670
1671         rxd = &sc->rl_ldata.rl_rx_desc[idx];
1672         desc = &sc->rl_ldata.rl_rx_list[idx];
1673         desc->rl_vlanctl = 0;
1674         cmdstat = rxd->rx_size;
1675         if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1676                 cmdstat |= RL_RDESC_CMD_EOR;
1677         desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1678 }
1679
1680 static int
1681 re_newbuf(struct rl_softc *sc, int idx)
1682 {
1683         struct mbuf             *m;
1684         struct rl_rxdesc        *rxd;
1685         bus_dma_segment_t       segs[1];
1686         bus_dmamap_t            map;
1687         struct rl_desc          *desc;
1688         uint32_t                cmdstat;
1689         int                     error, nsegs;
1690
1691         m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1692         if (m == NULL)
1693                 return (ENOBUFS);
1694
1695         m->m_len = m->m_pkthdr.len = MCLBYTES;
1696 #ifdef RE_FIXUP_RX
1697         /*
1698          * This is part of an evil trick to deal with non-x86 platforms.
1699          * The RealTek chip requires RX buffers to be aligned on 64-bit
1700          * boundaries, but that will hose non-x86 machines. To get around
1701          * this, we leave some empty space at the start of each buffer
1702          * and for non-x86 hosts, we copy the buffer back six bytes
1703          * to achieve word alignment. This is slightly more efficient
1704          * than allocating a new buffer, copying the contents, and
1705          * discarding the old buffer.
1706          */
1707         m_adj(m, RE_ETHER_ALIGN);
1708 #endif
1709         error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag,
1710             sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
1711         if (error != 0) {
1712                 m_freem(m);
1713                 return (ENOBUFS);
1714         }
1715         KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
1716
1717         rxd = &sc->rl_ldata.rl_rx_desc[idx];
1718         if (rxd->rx_m != NULL) {
1719                 bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1720                     BUS_DMASYNC_POSTREAD);
1721                 bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap);
1722         }
1723
1724         rxd->rx_m = m;
1725         map = rxd->rx_dmamap;
1726         rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap;
1727         rxd->rx_size = segs[0].ds_len;
1728         sc->rl_ldata.rl_rx_sparemap = map;
1729         bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1730             BUS_DMASYNC_PREREAD);
1731
1732         desc = &sc->rl_ldata.rl_rx_list[idx];
1733         desc->rl_vlanctl = 0;
1734         desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
1735         desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
1736         cmdstat = segs[0].ds_len;
1737         if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1738                 cmdstat |= RL_RDESC_CMD_EOR;
1739         desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1740
1741         return (0);
1742 }
1743
1744 #ifdef RE_FIXUP_RX
1745 static __inline void
1746 re_fixup_rx(struct mbuf *m)
1747 {
1748         int                     i;
1749         uint16_t                *src, *dst;
1750
1751         src = mtod(m, uint16_t *);
1752         dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src;
1753
1754         for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
1755                 *dst++ = *src++;
1756
1757         m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN;
1758 }
1759 #endif
1760
1761 static int
1762 re_tx_list_init(struct rl_softc *sc)
1763 {
1764         struct rl_desc          *desc;
1765         int                     i;
1766
1767         RL_LOCK_ASSERT(sc);
1768
1769         bzero(sc->rl_ldata.rl_tx_list,
1770             sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
1771         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1772                 sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
1773         /* Set EOR. */
1774         desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
1775         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
1776
1777         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1778             sc->rl_ldata.rl_tx_list_map,
1779             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1780
1781         sc->rl_ldata.rl_tx_prodidx = 0;
1782         sc->rl_ldata.rl_tx_considx = 0;
1783         sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt;
1784
1785         return (0);
1786 }
1787
1788 static int
1789 re_rx_list_init(struct rl_softc *sc)
1790 {
1791         int                     error, i;
1792
1793         bzero(sc->rl_ldata.rl_rx_list,
1794             sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
1795         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1796                 sc->rl_ldata.rl_rx_desc[i].rx_m = NULL;
1797                 if ((error = re_newbuf(sc, i)) != 0)
1798                         return (error);
1799         }
1800
1801         /* Flush the RX descriptors */
1802
1803         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1804             sc->rl_ldata.rl_rx_list_map,
1805             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1806
1807         sc->rl_ldata.rl_rx_prodidx = 0;
1808         sc->rl_head = sc->rl_tail = NULL;
1809
1810         return (0);
1811 }
1812
1813 /*
1814  * RX handler for C+ and 8169. For the gigE chips, we support
1815  * the reception of jumbo frames that have been fragmented
1816  * across multiple 2K mbuf cluster buffers.
1817  */
1818 static int
1819 re_rxeof(struct rl_softc *sc, int *rx_npktsp)
1820 {
1821         struct mbuf             *m;
1822         struct ifnet            *ifp;
1823         int                     i, total_len;
1824         struct rl_desc          *cur_rx;
1825         u_int32_t               rxstat, rxvlan;
1826         int                     maxpkt = 16, rx_npkts = 0;
1827
1828         RL_LOCK_ASSERT(sc);
1829
1830         ifp = sc->rl_ifp;
1831
1832         /* Invalidate the descriptor memory */
1833
1834         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1835             sc->rl_ldata.rl_rx_list_map,
1836             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1837
1838         for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0;
1839             i = RL_RX_DESC_NXT(sc, i)) {
1840                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1841                         break;
1842                 cur_rx = &sc->rl_ldata.rl_rx_list[i];
1843                 rxstat = le32toh(cur_rx->rl_cmdstat);
1844                 if ((rxstat & RL_RDESC_STAT_OWN) != 0)
1845                         break;
1846                 total_len = rxstat & sc->rl_rxlenmask;
1847                 rxvlan = le32toh(cur_rx->rl_vlanctl);
1848                 m = sc->rl_ldata.rl_rx_desc[i].rx_m;
1849
1850                 if (!(rxstat & RL_RDESC_STAT_EOF)) {
1851                         if (re_newbuf(sc, i) != 0) {
1852                                 /*
1853                                  * If this is part of a multi-fragment packet,
1854                                  * discard all the pieces.
1855                                  */
1856                                 if (sc->rl_head != NULL) {
1857                                         m_freem(sc->rl_head);
1858                                         sc->rl_head = sc->rl_tail = NULL;
1859                                 }
1860                                 re_discard_rxbuf(sc, i);
1861                                 continue;
1862                         }
1863                         m->m_len = RE_RX_DESC_BUFLEN;
1864                         if (sc->rl_head == NULL)
1865                                 sc->rl_head = sc->rl_tail = m;
1866                         else {
1867                                 m->m_flags &= ~M_PKTHDR;
1868                                 sc->rl_tail->m_next = m;
1869                                 sc->rl_tail = m;
1870                         }
1871                         continue;
1872                 }
1873
1874                 /*
1875                  * NOTE: for the 8139C+, the frame length field
1876                  * is always 12 bits in size, but for the gigE chips,
1877                  * it is 13 bits (since the max RX frame length is 16K).
1878                  * Unfortunately, all 32 bits in the status word
1879                  * were already used, so to make room for the extra
1880                  * length bit, RealTek took out the 'frame alignment
1881                  * error' bit and shifted the other status bits
1882                  * over one slot. The OWN, EOR, FS and LS bits are
1883                  * still in the same places. We have already extracted
1884                  * the frame length and checked the OWN bit, so rather
1885                  * than using an alternate bit mapping, we shift the
1886                  * status bits one space to the right so we can evaluate
1887                  * them using the 8169 status as though it was in the
1888                  * same format as that of the 8139C+.
1889                  */
1890                 if (sc->rl_type == RL_8169)
1891                         rxstat >>= 1;
1892
1893                 /*
1894                  * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be
1895                  * set, but if CRC is clear, it will still be a valid frame.
1896                  */
1897                 if (rxstat & RL_RDESC_STAT_RXERRSUM && !(total_len > 8191 &&
1898                     (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)) {
1899                         ifp->if_ierrors++;
1900                         /*
1901                          * If this is part of a multi-fragment packet,
1902                          * discard all the pieces.
1903                          */
1904                         if (sc->rl_head != NULL) {
1905                                 m_freem(sc->rl_head);
1906                                 sc->rl_head = sc->rl_tail = NULL;
1907                         }
1908                         re_discard_rxbuf(sc, i);
1909                         continue;
1910                 }
1911
1912                 /*
1913                  * If allocating a replacement mbuf fails,
1914                  * reload the current one.
1915                  */
1916
1917                 if (re_newbuf(sc, i) != 0) {
1918                         ifp->if_iqdrops++;
1919                         if (sc->rl_head != NULL) {
1920                                 m_freem(sc->rl_head);
1921                                 sc->rl_head = sc->rl_tail = NULL;
1922                         }
1923                         re_discard_rxbuf(sc, i);
1924                         continue;
1925                 }
1926
1927                 if (sc->rl_head != NULL) {
1928                         m->m_len = total_len % RE_RX_DESC_BUFLEN;
1929                         if (m->m_len == 0)
1930                                 m->m_len = RE_RX_DESC_BUFLEN;
1931                         /*
1932                          * Special case: if there's 4 bytes or less
1933                          * in this buffer, the mbuf can be discarded:
1934                          * the last 4 bytes is the CRC, which we don't
1935                          * care about anyway.
1936                          */
1937                         if (m->m_len <= ETHER_CRC_LEN) {
1938                                 sc->rl_tail->m_len -=
1939                                     (ETHER_CRC_LEN - m->m_len);
1940                                 m_freem(m);
1941                         } else {
1942                                 m->m_len -= ETHER_CRC_LEN;
1943                                 m->m_flags &= ~M_PKTHDR;
1944                                 sc->rl_tail->m_next = m;
1945                         }
1946                         m = sc->rl_head;
1947                         sc->rl_head = sc->rl_tail = NULL;
1948                         m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
1949                 } else
1950                         m->m_pkthdr.len = m->m_len =
1951                             (total_len - ETHER_CRC_LEN);
1952
1953 #ifdef RE_FIXUP_RX
1954                 re_fixup_rx(m);
1955 #endif
1956                 ifp->if_ipackets++;
1957                 m->m_pkthdr.rcvif = ifp;
1958
1959                 /* Do RX checksumming if enabled */
1960
1961                 if (ifp->if_capenable & IFCAP_RXCSUM) {
1962                         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1963                                 /* Check IP header checksum */
1964                                 if (rxstat & RL_RDESC_STAT_PROTOID)
1965                                         m->m_pkthdr.csum_flags |=
1966                                             CSUM_IP_CHECKED;
1967                                 if (!(rxstat & RL_RDESC_STAT_IPSUMBAD))
1968                                         m->m_pkthdr.csum_flags |=
1969                                             CSUM_IP_VALID;
1970
1971                                 /* Check TCP/UDP checksum */
1972                                 if ((RL_TCPPKT(rxstat) &&
1973                                     !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1974                                     (RL_UDPPKT(rxstat) &&
1975                                      !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1976                                         m->m_pkthdr.csum_flags |=
1977                                                 CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1978                                         m->m_pkthdr.csum_data = 0xffff;
1979                                 }
1980                         } else {
1981                                 /*
1982                                  * RTL8168C/RTL816CP/RTL8111C/RTL8111CP
1983                                  */
1984                                 if ((rxstat & RL_RDESC_STAT_PROTOID) &&
1985                                     (rxvlan & RL_RDESC_IPV4))
1986                                         m->m_pkthdr.csum_flags |=
1987                                             CSUM_IP_CHECKED;
1988                                 if (!(rxstat & RL_RDESC_STAT_IPSUMBAD) &&
1989                                     (rxvlan & RL_RDESC_IPV4))
1990                                         m->m_pkthdr.csum_flags |=
1991                                             CSUM_IP_VALID;
1992                                 if (((rxstat & RL_RDESC_STAT_TCP) &&
1993                                     !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1994                                     ((rxstat & RL_RDESC_STAT_UDP) &&
1995                                     !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1996                                         m->m_pkthdr.csum_flags |=
1997                                                 CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1998                                         m->m_pkthdr.csum_data = 0xffff;
1999                                 }
2000                         }
2001                 }
2002                 maxpkt--;
2003                 if (rxvlan & RL_RDESC_VLANCTL_TAG) {
2004                         m->m_pkthdr.ether_vtag =
2005                             bswap16((rxvlan & RL_RDESC_VLANCTL_DATA));
2006                         m->m_flags |= M_VLANTAG;
2007                 }
2008                 RL_UNLOCK(sc);
2009                 (*ifp->if_input)(ifp, m);
2010                 RL_LOCK(sc);
2011                 rx_npkts++;
2012         }
2013
2014         /* Flush the RX DMA ring */
2015
2016         bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
2017             sc->rl_ldata.rl_rx_list_map,
2018             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2019
2020         sc->rl_ldata.rl_rx_prodidx = i;
2021
2022         if (rx_npktsp != NULL)
2023                 *rx_npktsp = rx_npkts;
2024         if (maxpkt)
2025                 return(EAGAIN);
2026
2027         return(0);
2028 }
2029
2030 static void
2031 re_txeof(struct rl_softc *sc)
2032 {
2033         struct ifnet            *ifp;
2034         struct rl_txdesc        *txd;
2035         u_int32_t               txstat;
2036         int                     cons;
2037
2038         cons = sc->rl_ldata.rl_tx_considx;
2039         if (cons == sc->rl_ldata.rl_tx_prodidx)
2040                 return;
2041
2042         ifp = sc->rl_ifp;
2043         /* Invalidate the TX descriptor list */
2044         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2045             sc->rl_ldata.rl_tx_list_map,
2046             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2047
2048         for (; cons != sc->rl_ldata.rl_tx_prodidx;
2049             cons = RL_TX_DESC_NXT(sc, cons)) {
2050                 txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat);
2051                 if (txstat & RL_TDESC_STAT_OWN)
2052                         break;
2053                 /*
2054                  * We only stash mbufs in the last descriptor
2055                  * in a fragment chain, which also happens to
2056                  * be the only place where the TX status bits
2057                  * are valid.
2058                  */
2059                 if (txstat & RL_TDESC_CMD_EOF) {
2060                         txd = &sc->rl_ldata.rl_tx_desc[cons];
2061                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2062                             txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2063                         bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2064                             txd->tx_dmamap);
2065                         KASSERT(txd->tx_m != NULL,
2066                             ("%s: freeing NULL mbufs!", __func__));
2067                         m_freem(txd->tx_m);
2068                         txd->tx_m = NULL;
2069                         if (txstat & (RL_TDESC_STAT_EXCESSCOL|
2070                             RL_TDESC_STAT_COLCNT))
2071                                 ifp->if_collisions++;
2072                         if (txstat & RL_TDESC_STAT_TXERRSUM)
2073                                 ifp->if_oerrors++;
2074                         else
2075                                 ifp->if_opackets++;
2076                 }
2077                 sc->rl_ldata.rl_tx_free++;
2078                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2079         }
2080         sc->rl_ldata.rl_tx_considx = cons;
2081
2082         /* No changes made to the TX ring, so no flush needed */
2083
2084         if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) {
2085 #ifdef RE_TX_MODERATION
2086                 /*
2087                  * If not all descriptors have been reaped yet, reload
2088                  * the timer so that we will eventually get another
2089                  * interrupt that will cause us to re-enter this routine.
2090                  * This is done in case the transmitter has gone idle.
2091                  */
2092                 CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2093 #endif
2094         } else
2095                 sc->rl_watchdog_timer = 0;
2096 }
2097
2098 static void
2099 re_tick(void *xsc)
2100 {
2101         struct rl_softc         *sc;
2102         struct mii_data         *mii;
2103
2104         sc = xsc;
2105
2106         RL_LOCK_ASSERT(sc);
2107
2108         mii = device_get_softc(sc->rl_miibus);
2109         mii_tick(mii);
2110         if ((sc->rl_flags & RL_FLAG_LINK) == 0)
2111                 re_miibus_statchg(sc->rl_dev);
2112         /*
2113          * Reclaim transmitted frames here. Technically it is not
2114          * necessary to do here but it ensures periodic reclamation
2115          * regardless of Tx completion interrupt which seems to be
2116          * lost on PCIe based controllers under certain situations. 
2117          */
2118         re_txeof(sc);
2119         re_watchdog(sc);
2120         callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2121 }
2122
2123 #ifdef DEVICE_POLLING
2124 static int
2125 re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
2126 {
2127         struct rl_softc *sc = ifp->if_softc;
2128         int rx_npkts = 0;
2129
2130         RL_LOCK(sc);
2131         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2132                 rx_npkts = re_poll_locked(ifp, cmd, count);
2133         RL_UNLOCK(sc);
2134         return (rx_npkts);
2135 }
2136
2137 static int
2138 re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count)
2139 {
2140         struct rl_softc *sc = ifp->if_softc;
2141         int rx_npkts;
2142
2143         RL_LOCK_ASSERT(sc);
2144
2145         sc->rxcycles = count;
2146         re_rxeof(sc, &rx_npkts);
2147         re_txeof(sc);
2148
2149         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2150                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2151
2152         if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
2153                 u_int16_t       status;
2154
2155                 status = CSR_READ_2(sc, RL_ISR);
2156                 if (status == 0xffff)
2157                         return (rx_npkts);
2158                 if (status)
2159                         CSR_WRITE_2(sc, RL_ISR, status);
2160                 if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2161                     (sc->rl_flags & RL_FLAG_PCIE))
2162                         CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2163
2164                 /*
2165                  * XXX check behaviour on receiver stalls.
2166                  */
2167
2168                 if (status & RL_ISR_SYSTEM_ERR) {
2169                         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2170                         re_init_locked(sc);
2171                 }
2172         }
2173         return (rx_npkts);
2174 }
2175 #endif /* DEVICE_POLLING */
2176
2177 static int
2178 re_intr(void *arg)
2179 {
2180         struct rl_softc         *sc;
2181         uint16_t                status;
2182
2183         sc = arg;
2184
2185         status = CSR_READ_2(sc, RL_ISR);
2186         if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0)
2187                 return (FILTER_STRAY);
2188         CSR_WRITE_2(sc, RL_IMR, 0);
2189
2190         taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2191
2192         return (FILTER_HANDLED);
2193 }
2194
2195 static void
2196 re_int_task(void *arg, int npending)
2197 {
2198         struct rl_softc         *sc;
2199         struct ifnet            *ifp;
2200         u_int16_t               status;
2201         int                     rval = 0;
2202
2203         sc = arg;
2204         ifp = sc->rl_ifp;
2205
2206         RL_LOCK(sc);
2207
2208         status = CSR_READ_2(sc, RL_ISR);
2209         CSR_WRITE_2(sc, RL_ISR, status);
2210
2211         if (sc->suspended ||
2212             (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2213                 RL_UNLOCK(sc);
2214                 return;
2215         }
2216
2217 #ifdef DEVICE_POLLING
2218         if  (ifp->if_capenable & IFCAP_POLLING) {
2219                 RL_UNLOCK(sc);
2220                 return;
2221         }
2222 #endif
2223
2224         if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW))
2225                 rval = re_rxeof(sc, NULL);
2226
2227         /*
2228          * Some chips will ignore a second TX request issued
2229          * while an existing transmission is in progress. If
2230          * the transmitter goes idle but there are still
2231          * packets waiting to be sent, we need to restart the
2232          * channel here to flush them out. This only seems to
2233          * be required with the PCIe devices.
2234          */
2235         if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2236             (sc->rl_flags & RL_FLAG_PCIE))
2237                 CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2238         if (status & (
2239 #ifdef RE_TX_MODERATION
2240             RL_ISR_TIMEOUT_EXPIRED|
2241 #else
2242             RL_ISR_TX_OK|
2243 #endif
2244             RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL))
2245                 re_txeof(sc);
2246
2247         if (status & RL_ISR_SYSTEM_ERR) {
2248                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2249                 re_init_locked(sc);
2250         }
2251
2252         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2253                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2254
2255         RL_UNLOCK(sc);
2256
2257         if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) {
2258                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2259                 return;
2260         }
2261
2262         CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2263 }
2264
2265 static int
2266 re_encap(struct rl_softc *sc, struct mbuf **m_head)
2267 {
2268         struct rl_txdesc        *txd, *txd_last;
2269         bus_dma_segment_t       segs[RL_NTXSEGS];
2270         bus_dmamap_t            map;
2271         struct mbuf             *m_new;
2272         struct rl_desc          *desc;
2273         int                     nsegs, prod;
2274         int                     i, error, ei, si;
2275         int                     padlen;
2276         uint32_t                cmdstat, csum_flags, vlanctl;
2277
2278         RL_LOCK_ASSERT(sc);
2279         M_ASSERTPKTHDR((*m_head));
2280
2281         /*
2282          * With some of the RealTek chips, using the checksum offload
2283          * support in conjunction with the autopadding feature results
2284          * in the transmission of corrupt frames. For example, if we
2285          * need to send a really small IP fragment that's less than 60
2286          * bytes in size, and IP header checksumming is enabled, the
2287          * resulting ethernet frame that appears on the wire will
2288          * have garbled payload. To work around this, if TX IP checksum
2289          * offload is enabled, we always manually pad short frames out
2290          * to the minimum ethernet frame size.
2291          */
2292         if ((sc->rl_flags & RL_FLAG_AUTOPAD) == 0 &&
2293             (*m_head)->m_pkthdr.len < RL_IP4CSUMTX_PADLEN &&
2294             ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) != 0) {
2295                 padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len;
2296                 if (M_WRITABLE(*m_head) == 0) {
2297                         /* Get a writable copy. */
2298                         m_new = m_dup(*m_head, M_DONTWAIT);
2299                         m_freem(*m_head);
2300                         if (m_new == NULL) {
2301                                 *m_head = NULL;
2302                                 return (ENOBUFS);
2303                         }
2304                         *m_head = m_new;
2305                 }
2306                 if ((*m_head)->m_next != NULL ||
2307                     M_TRAILINGSPACE(*m_head) < padlen) {
2308                         m_new = m_defrag(*m_head, M_DONTWAIT);
2309                         if (m_new == NULL) {
2310                                 m_freem(*m_head);
2311                                 *m_head = NULL;
2312                                 return (ENOBUFS);
2313                         }
2314                 } else
2315                         m_new = *m_head;
2316
2317                 /*
2318                  * Manually pad short frames, and zero the pad space
2319                  * to avoid leaking data.
2320                  */
2321                 bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen);
2322                 m_new->m_pkthdr.len += padlen;
2323                 m_new->m_len = m_new->m_pkthdr.len;
2324                 *m_head = m_new;
2325         }
2326
2327         prod = sc->rl_ldata.rl_tx_prodidx;
2328         txd = &sc->rl_ldata.rl_tx_desc[prod];
2329         error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2330             *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2331         if (error == EFBIG) {
2332                 m_new = m_collapse(*m_head, M_DONTWAIT, RL_NTXSEGS);
2333                 if (m_new == NULL) {
2334                         m_freem(*m_head);
2335                         *m_head = NULL;
2336                         return (ENOBUFS);
2337                 }
2338                 *m_head = m_new;
2339                 error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag,
2340                     txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2341                 if (error != 0) {
2342                         m_freem(*m_head);
2343                         *m_head = NULL;
2344                         return (error);
2345                 }
2346         } else if (error != 0)
2347                 return (error);
2348         if (nsegs == 0) {
2349                 m_freem(*m_head);
2350                 *m_head = NULL;
2351                 return (EIO);
2352         }
2353
2354         /* Check for number of available descriptors. */
2355         if (sc->rl_ldata.rl_tx_free - nsegs <= 1) {
2356                 bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap);
2357                 return (ENOBUFS);
2358         }
2359
2360         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2361             BUS_DMASYNC_PREWRITE);
2362
2363         /*
2364          * Set up checksum offload. Note: checksum offload bits must
2365          * appear in all descriptors of a multi-descriptor transmit
2366          * attempt. This is according to testing done with an 8169
2367          * chip. This is a requirement.
2368          */
2369         vlanctl = 0;
2370         csum_flags = 0;
2371         if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0)
2372                 csum_flags = RL_TDESC_CMD_LGSEND |
2373                     ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
2374                     RL_TDESC_CMD_MSSVAL_SHIFT);
2375         else {
2376                 /*
2377                  * Unconditionally enable IP checksum if TCP or UDP
2378                  * checksum is required. Otherwise, TCP/UDP checksum
2379                  * does't make effects.
2380                  */
2381                 if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) {
2382                         if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
2383                                 csum_flags |= RL_TDESC_CMD_IPCSUM;
2384                                 if (((*m_head)->m_pkthdr.csum_flags &
2385                                     CSUM_TCP) != 0)
2386                                         csum_flags |= RL_TDESC_CMD_TCPCSUM;
2387                                 if (((*m_head)->m_pkthdr.csum_flags &
2388                                     CSUM_UDP) != 0)
2389                                         csum_flags |= RL_TDESC_CMD_UDPCSUM;
2390                         } else {
2391                                 vlanctl |= RL_TDESC_CMD_IPCSUMV2;
2392                                 if (((*m_head)->m_pkthdr.csum_flags &
2393                                     CSUM_TCP) != 0)
2394                                         vlanctl |= RL_TDESC_CMD_TCPCSUMV2;
2395                                 if (((*m_head)->m_pkthdr.csum_flags &
2396                                     CSUM_UDP) != 0)
2397                                         vlanctl |= RL_TDESC_CMD_UDPCSUMV2;
2398                         }
2399                 }
2400         }
2401
2402         /*
2403          * Set up hardware VLAN tagging. Note: vlan tag info must
2404          * appear in all descriptors of a multi-descriptor
2405          * transmission attempt.
2406          */
2407         if ((*m_head)->m_flags & M_VLANTAG)
2408                 vlanctl |= bswap16((*m_head)->m_pkthdr.ether_vtag) |
2409                     RL_TDESC_VLANCTL_TAG;
2410
2411         si = prod;
2412         for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) {
2413                 desc = &sc->rl_ldata.rl_tx_list[prod];
2414                 desc->rl_vlanctl = htole32(vlanctl);
2415                 desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr));
2416                 desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr));
2417                 cmdstat = segs[i].ds_len;
2418                 if (i != 0)
2419                         cmdstat |= RL_TDESC_CMD_OWN;
2420                 if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1)
2421                         cmdstat |= RL_TDESC_CMD_EOR;
2422                 desc->rl_cmdstat = htole32(cmdstat | csum_flags);
2423                 sc->rl_ldata.rl_tx_free--;
2424         }
2425         /* Update producer index. */
2426         sc->rl_ldata.rl_tx_prodidx = prod;
2427
2428         /* Set EOF on the last descriptor. */
2429         ei = RL_TX_DESC_PRV(sc, prod);
2430         desc = &sc->rl_ldata.rl_tx_list[ei];
2431         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF);
2432
2433         desc = &sc->rl_ldata.rl_tx_list[si];
2434         /* Set SOF and transfer ownership of packet to the chip. */
2435         desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF);
2436
2437         /*
2438          * Insure that the map for this transmission
2439          * is placed at the array index of the last descriptor
2440          * in this chain.  (Swap last and first dmamaps.)
2441          */
2442         txd_last = &sc->rl_ldata.rl_tx_desc[ei];
2443         map = txd->tx_dmamap;
2444         txd->tx_dmamap = txd_last->tx_dmamap;
2445         txd_last->tx_dmamap = map;
2446         txd_last->tx_m = *m_head;
2447
2448         return (0);
2449 }
2450
2451 static void
2452 re_tx_task(void *arg, int npending)
2453 {
2454         struct ifnet            *ifp;
2455
2456         ifp = arg;
2457         re_start(ifp);
2458 }
2459
2460 /*
2461  * Main transmit routine for C+ and gigE NICs.
2462  */
2463 static void
2464 re_start(struct ifnet *ifp)
2465 {
2466         struct rl_softc         *sc;
2467         struct mbuf             *m_head;
2468         int                     queued;
2469
2470         sc = ifp->if_softc;
2471
2472         RL_LOCK(sc);
2473
2474         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
2475             IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) {
2476                 RL_UNLOCK(sc);
2477                 return;
2478         }
2479
2480         for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
2481             sc->rl_ldata.rl_tx_free > 1;) {
2482                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2483                 if (m_head == NULL)
2484                         break;
2485
2486                 if (re_encap(sc, &m_head) != 0) {
2487                         if (m_head == NULL)
2488                                 break;
2489                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2490                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2491                         break;
2492                 }
2493
2494                 /*
2495                  * If there's a BPF listener, bounce a copy of this frame
2496                  * to him.
2497                  */
2498                 ETHER_BPF_MTAP(ifp, m_head);
2499
2500                 queued++;
2501         }
2502
2503         if (queued == 0) {
2504 #ifdef RE_TX_MODERATION
2505                 if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt)
2506                         CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2507 #endif
2508                 RL_UNLOCK(sc);
2509                 return;
2510         }
2511
2512         /* Flush the TX descriptors */
2513
2514         bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2515             sc->rl_ldata.rl_tx_list_map,
2516             BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2517
2518         CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2519
2520 #ifdef RE_TX_MODERATION
2521         /*
2522          * Use the countdown timer for interrupt moderation.
2523          * 'TX done' interrupts are disabled. Instead, we reset the
2524          * countdown timer, which will begin counting until it hits
2525          * the value in the TIMERINT register, and then trigger an
2526          * interrupt. Each time we write to the TIMERCNT register,
2527          * the timer count is reset to 0.
2528          */
2529         CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2530 #endif
2531
2532         /*
2533          * Set a timeout in case the chip goes out to lunch.
2534          */
2535         sc->rl_watchdog_timer = 5;
2536
2537         RL_UNLOCK(sc);
2538 }
2539
2540 static void
2541 re_init(void *xsc)
2542 {
2543         struct rl_softc         *sc = xsc;
2544
2545         RL_LOCK(sc);
2546         re_init_locked(sc);
2547         RL_UNLOCK(sc);
2548 }
2549
2550 static void
2551 re_init_locked(struct rl_softc *sc)
2552 {
2553         struct ifnet            *ifp = sc->rl_ifp;
2554         struct mii_data         *mii;
2555         uint32_t                reg;
2556         uint16_t                cfg;
2557         union {
2558                 uint32_t align_dummy;
2559                 u_char eaddr[ETHER_ADDR_LEN];
2560         } eaddr;
2561
2562         RL_LOCK_ASSERT(sc);
2563
2564         mii = device_get_softc(sc->rl_miibus);
2565
2566         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2567                 return;
2568
2569         /*
2570          * Cancel pending I/O and free all RX/TX buffers.
2571          */
2572         re_stop(sc);
2573
2574         /* Put controller into known state. */
2575         re_reset(sc);
2576
2577         /*
2578          * Enable C+ RX and TX mode, as well as VLAN stripping and
2579          * RX checksum offload. We must configure the C+ register
2580          * before all others.
2581          */
2582         cfg = RL_CPLUSCMD_PCI_MRW;
2583         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
2584                 cfg |= RL_CPLUSCMD_RXCSUM_ENB;
2585         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0)
2586                 cfg |= RL_CPLUSCMD_VLANSTRIP;
2587         if ((sc->rl_flags & RL_FLAG_MACSTAT) != 0) {
2588                 cfg |= RL_CPLUSCMD_MACSTAT_DIS;
2589                 /* XXX magic. */
2590                 cfg |= 0x0001;
2591         } else
2592                 cfg |= RL_CPLUSCMD_RXENB | RL_CPLUSCMD_TXENB;
2593         CSR_WRITE_2(sc, RL_CPLUS_CMD, cfg);
2594         if (sc->rl_hwrev == RL_HWREV_8169_8110SC ||
2595             sc->rl_hwrev == RL_HWREV_8169_8110SCE) {
2596                 reg = 0x000fff00;
2597                 if ((CSR_READ_1(sc, RL_CFG2) & RL_CFG2_PCI66MHZ) != 0)
2598                         reg |= 0x000000ff;
2599                 if (sc->rl_hwrev == RL_HWREV_8169_8110SCE)
2600                         reg |= 0x00f00000;
2601                 CSR_WRITE_4(sc, 0x7c, reg);
2602                 /* Disable interrupt mitigation. */
2603                 CSR_WRITE_2(sc, 0xe2, 0);
2604         }
2605         /*
2606          * Disable TSO if interface MTU size is greater than MSS
2607          * allowed in controller.
2608          */
2609         if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) {
2610                 ifp->if_capenable &= ~IFCAP_TSO4;
2611                 ifp->if_hwassist &= ~CSUM_TSO;
2612         }
2613
2614         /*
2615          * Init our MAC address.  Even though the chipset
2616          * documentation doesn't mention it, we need to enter "Config
2617          * register write enable" mode to modify the ID registers.
2618          */
2619         /* Copy MAC address on stack to align. */
2620         bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN);
2621         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
2622         CSR_WRITE_4(sc, RL_IDR0,
2623             htole32(*(u_int32_t *)(&eaddr.eaddr[0])));
2624         CSR_WRITE_4(sc, RL_IDR4,
2625             htole32(*(u_int32_t *)(&eaddr.eaddr[4])));
2626         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
2627
2628         /*
2629          * For C+ mode, initialize the RX descriptors and mbufs.
2630          */
2631         re_rx_list_init(sc);
2632         re_tx_list_init(sc);
2633
2634         /*
2635          * Load the addresses of the RX and TX lists into the chip.
2636          */
2637
2638         CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI,
2639             RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr));
2640         CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO,
2641             RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr));
2642
2643         CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI,
2644             RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr));
2645         CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
2646             RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
2647
2648         /*
2649          * Enable transmit and receive.
2650          */
2651         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2652
2653         /*
2654          * Set the initial TX configuration.
2655          */
2656         if (sc->rl_testmode) {
2657                 if (sc->rl_type == RL_8169)
2658                         CSR_WRITE_4(sc, RL_TXCFG,
2659                             RL_TXCFG_CONFIG|RL_LOOPTEST_ON);
2660                 else
2661                         CSR_WRITE_4(sc, RL_TXCFG,
2662                             RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS);
2663         } else
2664                 CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG);
2665
2666         CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16);
2667
2668         /*
2669          * Set the initial RX configuration.
2670          */
2671         re_set_rxmode(sc);
2672
2673 #ifdef DEVICE_POLLING
2674         /*
2675          * Disable interrupts if we are polling.
2676          */
2677         if (ifp->if_capenable & IFCAP_POLLING)
2678                 CSR_WRITE_2(sc, RL_IMR, 0);
2679         else    /* otherwise ... */
2680 #endif
2681
2682         /*
2683          * Enable interrupts.
2684          */
2685         if (sc->rl_testmode)
2686                 CSR_WRITE_2(sc, RL_IMR, 0);
2687         else
2688                 CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2689         CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS);
2690
2691         /* Set initial TX threshold */
2692         sc->rl_txthresh = RL_TX_THRESH_INIT;
2693
2694         /* Start RX/TX process. */
2695         CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
2696 #ifdef notdef
2697         /* Enable receiver and transmitter. */
2698         CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2699 #endif
2700
2701 #ifdef RE_TX_MODERATION
2702         /*
2703          * Initialize the timer interrupt register so that
2704          * a timer interrupt will be generated once the timer
2705          * reaches a certain number of ticks. The timer is
2706          * reloaded on each transmit. This gives us TX interrupt
2707          * moderation, which dramatically improves TX frame rate.
2708          */
2709         if (sc->rl_type == RL_8169)
2710                 CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800);
2711         else
2712                 CSR_WRITE_4(sc, RL_TIMERINT, 0x400);
2713 #endif
2714
2715         /*
2716          * For 8169 gigE NICs, set the max allowed RX packet
2717          * size so we can receive jumbo frames.
2718          */
2719         if (sc->rl_type == RL_8169)
2720                 CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383);
2721
2722         if (sc->rl_testmode)
2723                 return;
2724
2725         mii_mediachg(mii);
2726
2727         CSR_WRITE_1(sc, RL_CFG1, CSR_READ_1(sc, RL_CFG1) | RL_CFG1_DRVLOAD);
2728
2729         ifp->if_drv_flags |= IFF_DRV_RUNNING;
2730         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2731
2732         sc->rl_flags &= ~RL_FLAG_LINK;
2733         sc->rl_watchdog_timer = 0;
2734         callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2735 }
2736
2737 /*
2738  * Set media options.
2739  */
2740 static int
2741 re_ifmedia_upd(struct ifnet *ifp)
2742 {
2743         struct rl_softc         *sc;
2744         struct mii_data         *mii;
2745         int                     error;
2746
2747         sc = ifp->if_softc;
2748         mii = device_get_softc(sc->rl_miibus);
2749         RL_LOCK(sc);
2750         error = mii_mediachg(mii);
2751         RL_UNLOCK(sc);
2752
2753         return (error);
2754 }
2755
2756 /*
2757  * Report current media status.
2758  */
2759 static void
2760 re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2761 {
2762         struct rl_softc         *sc;
2763         struct mii_data         *mii;
2764
2765         sc = ifp->if_softc;
2766         mii = device_get_softc(sc->rl_miibus);
2767
2768         RL_LOCK(sc);
2769         mii_pollstat(mii);
2770         RL_UNLOCK(sc);
2771         ifmr->ifm_active = mii->mii_media_active;
2772         ifmr->ifm_status = mii->mii_media_status;
2773 }
2774
2775 static int
2776 re_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2777 {
2778         struct rl_softc         *sc = ifp->if_softc;
2779         struct ifreq            *ifr = (struct ifreq *) data;
2780         struct mii_data         *mii;
2781         int                     error = 0;
2782
2783         switch (command) {
2784         case SIOCSIFMTU:
2785                 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > RL_JUMBO_MTU) {
2786                         error = EINVAL;
2787                         break;
2788                 }
2789                 if ((sc->rl_flags & RL_FLAG_NOJUMBO) != 0 &&
2790                     ifr->ifr_mtu > RL_MAX_FRAMELEN) {
2791                         error = EINVAL;
2792                         break;
2793                 }
2794                 RL_LOCK(sc);
2795                 if (ifp->if_mtu != ifr->ifr_mtu)
2796                         ifp->if_mtu = ifr->ifr_mtu;
2797                 if (ifp->if_mtu > RL_TSO_MTU &&
2798                     (ifp->if_capenable & IFCAP_TSO4) != 0) {
2799                         ifp->if_capenable &= ~IFCAP_TSO4;
2800                         ifp->if_hwassist &= ~CSUM_TSO;
2801                         VLAN_CAPABILITIES(ifp);
2802                 }
2803                 RL_UNLOCK(sc);
2804                 break;
2805         case SIOCSIFFLAGS:
2806                 RL_LOCK(sc);
2807                 if ((ifp->if_flags & IFF_UP) != 0) {
2808                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2809                                 if (((ifp->if_flags ^ sc->rl_if_flags)
2810                                     & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
2811                                         re_set_rxmode(sc);
2812                         } else
2813                                 re_init_locked(sc);
2814                 } else {
2815                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2816                                 re_stop(sc);
2817                 }
2818                 sc->rl_if_flags = ifp->if_flags;
2819                 RL_UNLOCK(sc);
2820                 break;
2821         case SIOCADDMULTI:
2822         case SIOCDELMULTI:
2823                 RL_LOCK(sc);
2824                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2825                         re_set_rxmode(sc);
2826                 RL_UNLOCK(sc);
2827                 break;
2828         case SIOCGIFMEDIA:
2829         case SIOCSIFMEDIA:
2830                 mii = device_get_softc(sc->rl_miibus);
2831                 error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
2832                 break;
2833         case SIOCSIFCAP:
2834             {
2835                 int mask, reinit;
2836
2837                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2838                 reinit = 0;
2839 #ifdef DEVICE_POLLING
2840                 if (mask & IFCAP_POLLING) {
2841                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
2842                                 error = ether_poll_register(re_poll, ifp);
2843                                 if (error)
2844                                         return(error);
2845                                 RL_LOCK(sc);
2846                                 /* Disable interrupts */
2847                                 CSR_WRITE_2(sc, RL_IMR, 0x0000);
2848                                 ifp->if_capenable |= IFCAP_POLLING;
2849                                 RL_UNLOCK(sc);
2850                         } else {
2851                                 error = ether_poll_deregister(ifp);
2852                                 /* Enable interrupts. */
2853                                 RL_LOCK(sc);
2854                                 CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2855                                 ifp->if_capenable &= ~IFCAP_POLLING;
2856                                 RL_UNLOCK(sc);
2857                         }
2858                 }
2859 #endif /* DEVICE_POLLING */
2860                 if (mask & IFCAP_HWCSUM) {
2861                         ifp->if_capenable ^= IFCAP_HWCSUM;
2862                         if (ifp->if_capenable & IFCAP_TXCSUM)
2863                                 ifp->if_hwassist |= RE_CSUM_FEATURES;
2864                         else
2865                                 ifp->if_hwassist &= ~RE_CSUM_FEATURES;
2866                         reinit = 1;
2867                 }
2868                 if ((mask & IFCAP_TSO4) != 0 &&
2869                     (ifp->if_capabilities & IFCAP_TSO) != 0) {
2870                         ifp->if_capenable ^= IFCAP_TSO4;
2871                         if ((IFCAP_TSO4 & ifp->if_capenable) != 0)
2872                                 ifp->if_hwassist |= CSUM_TSO;
2873                         else
2874                                 ifp->if_hwassist &= ~CSUM_TSO;
2875                         if (ifp->if_mtu > RL_TSO_MTU &&
2876                             (ifp->if_capenable & IFCAP_TSO4) != 0) {
2877                                 ifp->if_capenable &= ~IFCAP_TSO4;
2878                                 ifp->if_hwassist &= ~CSUM_TSO;
2879                         }
2880                 }
2881                 if ((mask & IFCAP_VLAN_HWTSO) != 0 &&
2882                     (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0)
2883                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2884                 if ((mask & IFCAP_VLAN_HWTAGGING) != 0 &&
2885                     (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) {
2886                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2887                         /* TSO over VLAN requires VLAN hardware tagging. */
2888                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
2889                                 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
2890                         reinit = 1;
2891                 }
2892                 if ((mask & IFCAP_WOL) != 0 &&
2893                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
2894                         if ((mask & IFCAP_WOL_UCAST) != 0)
2895                                 ifp->if_capenable ^= IFCAP_WOL_UCAST;
2896                         if ((mask & IFCAP_WOL_MCAST) != 0)
2897                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
2898                         if ((mask & IFCAP_WOL_MAGIC) != 0)
2899                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2900                 }
2901                 if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING) {
2902                         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2903                         re_init(sc);
2904                 }
2905                 VLAN_CAPABILITIES(ifp);
2906             }
2907                 break;
2908         default:
2909                 error = ether_ioctl(ifp, command, data);
2910                 break;
2911         }
2912
2913         return (error);
2914 }
2915
2916 static void
2917 re_watchdog(struct rl_softc *sc)
2918 {
2919         struct ifnet            *ifp;
2920
2921         RL_LOCK_ASSERT(sc);
2922
2923         if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0)
2924                 return;
2925
2926         ifp = sc->rl_ifp;
2927         re_txeof(sc);
2928         if (sc->rl_ldata.rl_tx_free == sc->rl_ldata.rl_tx_desc_cnt) {
2929                 if_printf(ifp, "watchdog timeout (missed Tx interrupts) "
2930                     "-- recovering\n");
2931                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2932                         taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2933                 return;
2934         }
2935
2936         if_printf(ifp, "watchdog timeout\n");
2937         ifp->if_oerrors++;
2938
2939         re_rxeof(sc, NULL);
2940         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2941         re_init_locked(sc);
2942         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2943                 taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2944 }
2945
2946 /*
2947  * Stop the adapter and free any mbufs allocated to the
2948  * RX and TX lists.
2949  */
2950 static void
2951 re_stop(struct rl_softc *sc)
2952 {
2953         int                     i;
2954         struct ifnet            *ifp;
2955         struct rl_txdesc        *txd;
2956         struct rl_rxdesc        *rxd;
2957
2958         RL_LOCK_ASSERT(sc);
2959
2960         ifp = sc->rl_ifp;
2961
2962         sc->rl_watchdog_timer = 0;
2963         callout_stop(&sc->rl_stat_callout);
2964         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2965
2966         if ((sc->rl_flags & RL_FLAG_CMDSTOP) != 0)
2967                 CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_STOPREQ | RL_CMD_TX_ENB |
2968                     RL_CMD_RX_ENB);
2969         else
2970                 CSR_WRITE_1(sc, RL_COMMAND, 0x00);
2971         DELAY(1000);
2972         CSR_WRITE_2(sc, RL_IMR, 0x0000);
2973         CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
2974
2975         if (sc->rl_head != NULL) {
2976                 m_freem(sc->rl_head);
2977                 sc->rl_head = sc->rl_tail = NULL;
2978         }
2979
2980         /* Free the TX list buffers. */
2981
2982         for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
2983                 txd = &sc->rl_ldata.rl_tx_desc[i];
2984                 if (txd->tx_m != NULL) {
2985                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2986                             txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2987                         bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2988                             txd->tx_dmamap);
2989                         m_freem(txd->tx_m);
2990                         txd->tx_m = NULL;
2991                 }
2992         }
2993
2994         /* Free the RX list buffers. */
2995
2996         for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
2997                 rxd = &sc->rl_ldata.rl_rx_desc[i];
2998                 if (rxd->rx_m != NULL) {
2999                         bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
3000                             rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
3001                         bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
3002                             rxd->rx_dmamap);
3003                         m_freem(rxd->rx_m);
3004                         rxd->rx_m = NULL;
3005                 }
3006         }
3007 }
3008
3009 /*
3010  * Device suspend routine.  Stop the interface and save some PCI
3011  * settings in case the BIOS doesn't restore them properly on
3012  * resume.
3013  */
3014 static int
3015 re_suspend(device_t dev)
3016 {
3017         struct rl_softc         *sc;
3018
3019         sc = device_get_softc(dev);
3020
3021         RL_LOCK(sc);
3022         re_stop(sc);
3023         re_setwol(sc);
3024         sc->suspended = 1;
3025         RL_UNLOCK(sc);
3026
3027         return (0);
3028 }
3029
3030 /*
3031  * Device resume routine.  Restore some PCI settings in case the BIOS
3032  * doesn't, re-enable busmastering, and restart the interface if
3033  * appropriate.
3034  */
3035 static int
3036 re_resume(device_t dev)
3037 {
3038         struct rl_softc         *sc;
3039         struct ifnet            *ifp;
3040
3041         sc = device_get_softc(dev);
3042
3043         RL_LOCK(sc);
3044
3045         ifp = sc->rl_ifp;
3046         /* Take controller out of sleep mode. */
3047         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
3048                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
3049                         CSR_WRITE_1(sc, RL_GPIO,
3050                             CSR_READ_1(sc, RL_GPIO) | 0x01);
3051         }
3052
3053         /*
3054          * Clear WOL matching such that normal Rx filtering
3055          * wouldn't interfere with WOL patterns.
3056          */
3057         re_clrwol(sc);
3058
3059         /* reinitialize interface if necessary */
3060         if (ifp->if_flags & IFF_UP)
3061                 re_init_locked(sc);
3062
3063         sc->suspended = 0;
3064         RL_UNLOCK(sc);
3065
3066         return (0);
3067 }
3068
3069 /*
3070  * Stop all chip I/O so that the kernel's probe routines don't
3071  * get confused by errant DMAs when rebooting.
3072  */
3073 static int
3074 re_shutdown(device_t dev)
3075 {
3076         struct rl_softc         *sc;
3077
3078         sc = device_get_softc(dev);
3079
3080         RL_LOCK(sc);
3081         re_stop(sc);
3082         /*
3083          * Mark interface as down since otherwise we will panic if
3084          * interrupt comes in later on, which can happen in some
3085          * cases.
3086          */
3087         sc->rl_ifp->if_flags &= ~IFF_UP;
3088         re_setwol(sc);
3089         RL_UNLOCK(sc);
3090
3091         return (0);
3092 }
3093
3094 static void
3095 re_setwol(struct rl_softc *sc)
3096 {
3097         struct ifnet            *ifp;
3098         int                     pmc;
3099         uint16_t                pmstat;
3100         uint8_t                 v;
3101
3102         RL_LOCK_ASSERT(sc);
3103
3104         if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3105                 return;
3106
3107         ifp = sc->rl_ifp;
3108         /* Put controller into sleep mode. */
3109         if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
3110                 if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
3111                         CSR_WRITE_1(sc, RL_GPIO,
3112                             CSR_READ_1(sc, RL_GPIO) & ~0x01);
3113         }
3114         if ((ifp->if_capenable & IFCAP_WOL) != 0 &&
3115             (sc->rl_flags & RL_FLAG_WOLRXENB) != 0)
3116                 CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RX_ENB);
3117         /* Enable config register write. */
3118         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3119
3120         /* Enable PME. */
3121         v = CSR_READ_1(sc, RL_CFG1);
3122         v &= ~RL_CFG1_PME;
3123         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3124                 v |= RL_CFG1_PME;
3125         CSR_WRITE_1(sc, RL_CFG1, v);
3126
3127         v = CSR_READ_1(sc, RL_CFG3);
3128         v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3129         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0)
3130                 v |= RL_CFG3_WOL_MAGIC;
3131         CSR_WRITE_1(sc, RL_CFG3, v);
3132
3133         /* Config register write done. */
3134         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3135
3136         v = CSR_READ_1(sc, RL_CFG5);
3137         v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3138         v &= ~RL_CFG5_WOL_LANWAKE;
3139         if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0)
3140                 v |= RL_CFG5_WOL_UCAST;
3141         if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0)
3142                 v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST;
3143         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3144                 v |= RL_CFG5_WOL_LANWAKE;
3145         CSR_WRITE_1(sc, RL_CFG5, v);
3146
3147         if ((ifp->if_capenable & IFCAP_WOL) != 0 &&
3148             (sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0)
3149                 CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) & ~0x80);
3150         /*
3151          * It seems that hardware resets its link speed to 100Mbps in
3152          * power down mode so switching to 100Mbps in driver is not
3153          * needed.
3154          */
3155
3156         /* Request PME if WOL is requested. */
3157         pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2);
3158         pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
3159         if ((ifp->if_capenable & IFCAP_WOL) != 0)
3160                 pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
3161         pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
3162 }
3163
3164 static void
3165 re_clrwol(struct rl_softc *sc)
3166 {
3167         int                     pmc;
3168         uint8_t                 v;
3169
3170         RL_LOCK_ASSERT(sc);
3171
3172         if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3173                 return;
3174
3175         /* Enable config register write. */
3176         CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3177
3178         v = CSR_READ_1(sc, RL_CFG3);
3179         v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3180         CSR_WRITE_1(sc, RL_CFG3, v);
3181
3182         /* Config register write done. */
3183         CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3184
3185         v = CSR_READ_1(sc, RL_CFG5);
3186         v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3187         v &= ~RL_CFG5_WOL_LANWAKE;
3188         CSR_WRITE_1(sc, RL_CFG5, v);
3189 }