]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/e1000/if_em.c
MFC: r353778 (partial)
[FreeBSD/stable/10.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_arp.h>
74 #include <net/if_dl.h>
75 #include <net/if_media.h>
76
77 #include <net/if_types.h>
78 #include <net/if_vlan_var.h>
79
80 #include <netinet/in_systm.h>
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip6.h>
85 #include <netinet/tcp.h>
86 #include <netinet/udp.h>
87
88 #include <machine/in_cksum.h>
89 #include <dev/led/led.h>
90 #include <dev/pci/pcivar.h>
91 #include <dev/pci/pcireg.h>
92
93 #include "e1000_api.h"
94 #include "e1000_82571.h"
95 #include "if_em.h"
96
97 /*********************************************************************
98  *  Driver version:
99  *********************************************************************/
100 char em_driver_version[] = "7.6.1-k";
101
102 /*********************************************************************
103  *  PCI Device ID Table
104  *
105  *  Used by probe to select devices to load on
106  *  Last field stores an index into e1000_strings
107  *  Last entry must be all 0s
108  *
109  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
110  *********************************************************************/
111
112 static em_vendor_info_t em_vendor_info_array[] =
113 {
114         /* Intel(R) PRO/1000 Network Connection */
115         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
127                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
134
135         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
144                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
146                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
181                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
182         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
183                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
184         { 0x8086, E1000_DEV_ID_PCH_I218_LM2,    PCI_ANY_ID, PCI_ANY_ID, 0},
185         { 0x8086, E1000_DEV_ID_PCH_I218_V2,     PCI_ANY_ID, PCI_ANY_ID, 0},
186         { 0x8086, E1000_DEV_ID_PCH_I218_LM3,    PCI_ANY_ID, PCI_ANY_ID, 0},
187         { 0x8086, E1000_DEV_ID_PCH_I218_V3,     PCI_ANY_ID, PCI_ANY_ID, 0},
188         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
189         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
190         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
191                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
192         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
193         { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
194                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
195         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
196                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
197         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
198         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
199                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
200         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
201         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
202                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
203         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
204         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
205                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
206         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
207         { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM6,
208                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
209         { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V6, PCI_ANY_ID, PCI_ANY_ID, 0},
210         { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM7,
211                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
212         { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V7, PCI_ANY_ID, PCI_ANY_ID, 0},
213         { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM8,
214                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
215         { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, PCI_ANY_ID, PCI_ANY_ID, 0},
216         { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9,
217                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
218         { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, PCI_ANY_ID, PCI_ANY_ID, 0},
219         /* required last entry */
220         { 0, 0, 0, 0, 0}
221 };
222
223 /*********************************************************************
224  *  Table of branding strings for all supported NICs.
225  *********************************************************************/
226
227 static char *em_strings[] = {
228         "Intel(R) PRO/1000 Network Connection"
229 };
230
231 /*********************************************************************
232  *  Function prototypes
233  *********************************************************************/
234 static int      em_probe(device_t);
235 static int      em_attach(device_t);
236 static int      em_detach(device_t);
237 static int      em_shutdown(device_t);
238 static int      em_suspend(device_t);
239 static int      em_resume(device_t);
240 #ifdef EM_MULTIQUEUE
241 static int      em_mq_start(struct ifnet *, struct mbuf *);
242 static int      em_mq_start_locked(struct ifnet *,
243                     struct tx_ring *);
244 static void     em_qflush(struct ifnet *);
245 #else
246 static void     em_start(struct ifnet *);
247 static void     em_start_locked(struct ifnet *, struct tx_ring *);
248 #endif
249 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
250 static void     em_init(void *);
251 static void     em_init_locked(struct adapter *);
252 static void     em_stop(void *);
253 static void     em_media_status(struct ifnet *, struct ifmediareq *);
254 static int      em_media_change(struct ifnet *);
255 static void     em_identify_hardware(struct adapter *);
256 static int      em_allocate_pci_resources(struct adapter *);
257 static int      em_allocate_legacy(struct adapter *);
258 static int      em_allocate_msix(struct adapter *);
259 static int      em_allocate_queues(struct adapter *);
260 static int      em_setup_msix(struct adapter *);
261 static void     em_free_pci_resources(struct adapter *);
262 static void     em_local_timer(void *);
263 static void     em_reset(struct adapter *);
264 static int      em_setup_interface(device_t, struct adapter *);
265 static void     em_flush_desc_rings(struct adapter *);
266
267 static void     em_setup_transmit_structures(struct adapter *);
268 static void     em_initialize_transmit_unit(struct adapter *);
269 static int      em_allocate_transmit_buffers(struct tx_ring *);
270 static void     em_free_transmit_structures(struct adapter *);
271 static void     em_free_transmit_buffers(struct tx_ring *);
272
273 static int      em_setup_receive_structures(struct adapter *);
274 static int      em_allocate_receive_buffers(struct rx_ring *);
275 static void     em_initialize_receive_unit(struct adapter *);
276 static void     em_free_receive_structures(struct adapter *);
277 static void     em_free_receive_buffers(struct rx_ring *);
278
279 static void     em_enable_intr(struct adapter *);
280 static void     em_disable_intr(struct adapter *);
281 static void     em_update_stats_counters(struct adapter *);
282 static void     em_add_hw_stats(struct adapter *adapter);
283 static void     em_txeof(struct tx_ring *);
284 static bool     em_rxeof(struct rx_ring *, int, int *);
285 #ifndef __NO_STRICT_ALIGNMENT
286 static int      em_fixup_rx(struct rx_ring *);
287 #endif
288 static void     em_setup_rxdesc(union e1000_rx_desc_extended *,
289                     const struct em_rxbuffer *rxbuf);
290 static void     em_receive_checksum(uint32_t status, struct mbuf *);
291 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
292                     struct ip *, u32 *, u32 *);
293 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
294                     struct tcphdr *, u32 *, u32 *);
295 static void     em_set_promisc(struct adapter *);
296 static void     em_disable_promisc(struct adapter *);
297 static void     em_set_multi(struct adapter *);
298 static void     em_update_link_status(struct adapter *);
299 static void     em_refresh_mbufs(struct rx_ring *, int);
300 static void     em_register_vlan(void *, struct ifnet *, u16);
301 static void     em_unregister_vlan(void *, struct ifnet *, u16);
302 static void     em_setup_vlan_hw_support(struct adapter *);
303 static int      em_xmit(struct tx_ring *, struct mbuf **);
304 static int      em_dma_malloc(struct adapter *, bus_size_t,
305                     struct em_dma_alloc *, int);
306 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
307 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
308 static void     em_print_nvm_info(struct adapter *);
309 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
310 static void     em_print_debug_info(struct adapter *);
311 static int      em_is_valid_ether_addr(u8 *);
312 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
313 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
314                     const char *, struct em_int_delay_info *, int, int);
315 /* Management and WOL Support */
316 static void     em_init_manageability(struct adapter *);
317 static void     em_release_manageability(struct adapter *);
318 static void     em_get_hw_control(struct adapter *);
319 static void     em_release_hw_control(struct adapter *);
320 static void     em_get_wakeup(device_t);
321 static void     em_enable_wakeup(device_t);
322 static int      em_enable_phy_wakeup(struct adapter *);
323 static void     em_led_func(void *, int);
324 static void     em_disable_aspm(struct adapter *);
325
326 static int      em_irq_fast(void *);
327
328 /* MSIX handlers */
329 static void     em_msix_tx(void *);
330 static void     em_msix_rx(void *);
331 static void     em_msix_link(void *);
332 static void     em_handle_tx(void *context, int pending);
333 static void     em_handle_rx(void *context, int pending);
334 static void     em_handle_link(void *context, int pending);
335
336 #ifdef EM_MULTIQUEUE
337 static void     em_enable_vectors_82574(struct adapter *);
338 #endif
339
340 static void     em_set_sysctl_value(struct adapter *, const char *,
341                     const char *, int *, int);
342 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
343 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
344
345 static __inline void em_rx_discard(struct rx_ring *, int);
346
347 #ifdef DEVICE_POLLING
348 static poll_handler_t em_poll;
349 #endif /* POLLING */
350
351 /*********************************************************************
352  *  FreeBSD Device Interface Entry Points
353  *********************************************************************/
354
355 static device_method_t em_methods[] = {
356         /* Device interface */
357         DEVMETHOD(device_probe, em_probe),
358         DEVMETHOD(device_attach, em_attach),
359         DEVMETHOD(device_detach, em_detach),
360         DEVMETHOD(device_shutdown, em_shutdown),
361         DEVMETHOD(device_suspend, em_suspend),
362         DEVMETHOD(device_resume, em_resume),
363         DEVMETHOD_END
364 };
365
366 static driver_t em_driver = {
367         "em", em_methods, sizeof(struct adapter),
368 };
369
370 devclass_t em_devclass;
371 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
372 MODULE_DEPEND(em, pci, 1, 1, 1);
373 MODULE_DEPEND(em, ether, 1, 1, 1);
374
375 /*********************************************************************
376  *  Tunable default values.
377  *********************************************************************/
378
379 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
380 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
381 #define M_TSO_LEN                       66
382
383 #define MAX_INTS_PER_SEC        8000
384 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
385
386 #define TSO_WORKAROUND  4
387
388 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
389
390 static int em_disable_crc_stripping = 0;
391 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
392     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
393
394 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
395 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
396 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
397 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
398 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
399     0, "Default transmit interrupt delay in usecs");
400 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
401     0, "Default receive interrupt delay in usecs");
402
403 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
404 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
405 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
406 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
407 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
408     &em_tx_abs_int_delay_dflt, 0,
409     "Default transmit interrupt delay limit in usecs");
410 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
411     &em_rx_abs_int_delay_dflt, 0,
412     "Default receive interrupt delay limit in usecs");
413
414 static int em_rxd = EM_DEFAULT_RXD;
415 static int em_txd = EM_DEFAULT_TXD;
416 TUNABLE_INT("hw.em.rxd", &em_rxd);
417 TUNABLE_INT("hw.em.txd", &em_txd);
418 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
419     "Number of receive descriptors per queue");
420 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
421     "Number of transmit descriptors per queue");
422
423 static int em_smart_pwr_down = FALSE;
424 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
425 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
426     0, "Set to true to leave smart power down enabled on newer adapters");
427
428 /* Controls whether promiscuous also shows bad packets */
429 static int em_debug_sbp = FALSE;
430 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
431 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
432     "Show bad packets in promiscuous mode");
433
434 static int em_enable_msix = TRUE;
435 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
436 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
437     "Enable MSI-X interrupts");
438
439 #ifdef EM_MULTIQUEUE
440 static int em_num_queues = 1;
441 TUNABLE_INT("hw.em.num_queues", &em_num_queues);
442 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
443     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
444 #endif
445
446 /*
447 ** Global variable to store last used CPU when binding queues
448 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
449 ** queue is bound to a cpu.
450 */
451 static int em_last_bind_cpu = -1;
452
453 /* How many packets rxeof tries to clean at a time */
454 static int em_rx_process_limit = 100;
455 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
456 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
457     &em_rx_process_limit, 0,
458     "Maximum number of received packets to process "
459     "at a time, -1 means unlimited");
460
461 /* Energy efficient ethernet - default to OFF */
462 static int eee_setting = 1;
463 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
464 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
465     "Enable Energy Efficient Ethernet");
466
467 /* Global used in WOL setup with multiport cards */
468 static int global_quad_port_a = 0;
469
470 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
471 #include <dev/netmap/if_em_netmap.h>
472 #endif /* DEV_NETMAP */
473
474 /*********************************************************************
475  *  Device identification routine
476  *
477  *  em_probe determines if the driver should be loaded on
478  *  adapter based on PCI vendor/device id of the adapter.
479  *
480  *  return BUS_PROBE_DEFAULT on success, positive on failure
481  *********************************************************************/
482
483 static int
484 em_probe(device_t dev)
485 {
486         char            adapter_name[60];
487         uint16_t        pci_vendor_id = 0;
488         uint16_t        pci_device_id = 0;
489         uint16_t        pci_subvendor_id = 0;
490         uint16_t        pci_subdevice_id = 0;
491         em_vendor_info_t *ent;
492
493         INIT_DEBUGOUT("em_probe: begin");
494
495         pci_vendor_id = pci_get_vendor(dev);
496         if (pci_vendor_id != EM_VENDOR_ID)
497                 return (ENXIO);
498
499         pci_device_id = pci_get_device(dev);
500         pci_subvendor_id = pci_get_subvendor(dev);
501         pci_subdevice_id = pci_get_subdevice(dev);
502
503         ent = em_vendor_info_array;
504         while (ent->vendor_id != 0) {
505                 if ((pci_vendor_id == ent->vendor_id) &&
506                     (pci_device_id == ent->device_id) &&
507
508                     ((pci_subvendor_id == ent->subvendor_id) ||
509                     (ent->subvendor_id == PCI_ANY_ID)) &&
510
511                     ((pci_subdevice_id == ent->subdevice_id) ||
512                     (ent->subdevice_id == PCI_ANY_ID))) {
513                         sprintf(adapter_name, "%s %s",
514                                 em_strings[ent->index],
515                                 em_driver_version);
516                         device_set_desc_copy(dev, adapter_name);
517                         return (BUS_PROBE_DEFAULT);
518                 }
519                 ent++;
520         }
521
522         return (ENXIO);
523 }
524
525 /*********************************************************************
526  *  Device initialization routine
527  *
528  *  The attach entry point is called when the driver is being loaded.
529  *  This routine identifies the type of hardware, allocates all resources
530  *  and initializes the hardware.
531  *
532  *  return 0 on success, positive on failure
533  *********************************************************************/
534
535 static int
536 em_attach(device_t dev)
537 {
538         struct adapter  *adapter;
539         struct e1000_hw *hw;
540         int             error = 0;
541
542         INIT_DEBUGOUT("em_attach: begin");
543
544         if (resource_disabled("em", device_get_unit(dev))) {
545                 device_printf(dev, "Disabled by device hint\n");
546                 return (ENXIO);
547         }
548
549         adapter = device_get_softc(dev);
550         adapter->dev = adapter->osdep.dev = dev;
551         hw = &adapter->hw;
552         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
553
554         /* SYSCTL stuff */
555         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
556             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
557             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
558             em_sysctl_nvm_info, "I", "NVM Information");
559
560         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
561             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
562             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
563             em_sysctl_debug_info, "I", "Debug Information");
564
565         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
566             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
567             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
568             em_set_flowcntl, "I", "Flow Control");
569
570         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
571
572         /* Determine hardware and mac info */
573         em_identify_hardware(adapter);
574
575         /* Setup PCI resources */
576         if (em_allocate_pci_resources(adapter)) {
577                 device_printf(dev, "Allocation of PCI resources failed\n");
578                 error = ENXIO;
579                 goto err_pci;
580         }
581
582         /*
583         ** For ICH8 and family we need to
584         ** map the flash memory, and this
585         ** must happen after the MAC is 
586         ** identified
587         */
588         if ((hw->mac.type == e1000_ich8lan) ||
589             (hw->mac.type == e1000_ich9lan) ||
590             (hw->mac.type == e1000_ich10lan) ||
591             (hw->mac.type == e1000_pchlan) ||
592             (hw->mac.type == e1000_pch2lan) ||
593             (hw->mac.type == e1000_pch_lpt)) {
594                 int rid = EM_BAR_TYPE_FLASH;
595                 adapter->flash = bus_alloc_resource_any(dev,
596                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
597                 if (adapter->flash == NULL) {
598                         device_printf(dev, "Mapping of Flash failed\n");
599                         error = ENXIO;
600                         goto err_pci;
601                 }
602                 /* This is used in the shared code */
603                 hw->flash_address = (u8 *)adapter->flash;
604                 adapter->osdep.flash_bus_space_tag =
605                     rman_get_bustag(adapter->flash);
606                 adapter->osdep.flash_bus_space_handle =
607                     rman_get_bushandle(adapter->flash);
608         }
609         /*
610         ** In the new SPT device flash is not  a
611         ** seperate BAR, rather it is also in BAR0,
612         ** so use the same tag and an offset handle for the
613         ** FLASH read/write macros in the shared code.
614         */
615         else if (hw->mac.type >= e1000_pch_spt) {
616                 adapter->osdep.flash_bus_space_tag =
617                     adapter->osdep.mem_bus_space_tag;
618                 adapter->osdep.flash_bus_space_handle =
619                     adapter->osdep.mem_bus_space_handle
620                     + E1000_FLASH_BASE_ADDR;
621         }
622
623         /* Do Shared Code initialization */
624         error = e1000_setup_init_funcs(hw, TRUE);
625         if (error) {
626                 device_printf(dev, "Setup of Shared code failed, error %d\n",
627                     error);
628                 error = ENXIO;
629                 goto err_pci;
630         }
631
632         /*
633          * Setup MSI/X or MSI if PCI Express
634          */
635         adapter->msix = em_setup_msix(adapter);
636
637         e1000_get_bus_info(hw);
638
639         /* Set up some sysctls for the tunable interrupt delays */
640         em_add_int_delay_sysctl(adapter, "rx_int_delay",
641             "receive interrupt delay in usecs", &adapter->rx_int_delay,
642             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
643         em_add_int_delay_sysctl(adapter, "tx_int_delay",
644             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
645             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
646         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
647             "receive interrupt delay limit in usecs",
648             &adapter->rx_abs_int_delay,
649             E1000_REGISTER(hw, E1000_RADV),
650             em_rx_abs_int_delay_dflt);
651         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
652             "transmit interrupt delay limit in usecs",
653             &adapter->tx_abs_int_delay,
654             E1000_REGISTER(hw, E1000_TADV),
655             em_tx_abs_int_delay_dflt);
656         em_add_int_delay_sysctl(adapter, "itr",
657             "interrupt delay limit in usecs/4",
658             &adapter->tx_itr,
659             E1000_REGISTER(hw, E1000_ITR),
660             DEFAULT_ITR);
661
662         /* Sysctl for limiting the amount of work done in the taskqueue */
663         em_set_sysctl_value(adapter, "rx_processing_limit",
664             "max number of rx packets to process", &adapter->rx_process_limit,
665             em_rx_process_limit);
666
667         /*
668          * Validate number of transmit and receive descriptors. It
669          * must not exceed hardware maximum, and must be multiple
670          * of E1000_DBA_ALIGN.
671          */
672         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
673             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
674                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
675                     EM_DEFAULT_TXD, em_txd);
676                 adapter->num_tx_desc = EM_DEFAULT_TXD;
677         } else
678                 adapter->num_tx_desc = em_txd;
679
680         if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
681             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
682                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
683                     EM_DEFAULT_RXD, em_rxd);
684                 adapter->num_rx_desc = EM_DEFAULT_RXD;
685         } else
686                 adapter->num_rx_desc = em_rxd;
687
688         hw->mac.autoneg = DO_AUTO_NEG;
689         hw->phy.autoneg_wait_to_complete = FALSE;
690         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
691
692         /* Copper options */
693         if (hw->phy.media_type == e1000_media_type_copper) {
694                 hw->phy.mdix = AUTO_ALL_MODES;
695                 hw->phy.disable_polarity_correction = FALSE;
696                 hw->phy.ms_type = EM_MASTER_SLAVE;
697         }
698
699         /*
700          * Set the frame limits assuming
701          * standard ethernet sized frames.
702          */
703         adapter->hw.mac.max_frame_size =
704             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
705
706         /*
707          * This controls when hardware reports transmit completion
708          * status.
709          */
710         hw->mac.report_tx_early = 1;
711
712         /* 
713         ** Get queue/ring memory
714         */
715         if (em_allocate_queues(adapter)) {
716                 error = ENOMEM;
717                 goto err_pci;
718         }
719
720         /* Allocate multicast array memory. */
721         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
722             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
723         if (adapter->mta == NULL) {
724                 device_printf(dev, "Can not allocate multicast setup array\n");
725                 error = ENOMEM;
726                 goto err_late;
727         }
728
729         /* Check SOL/IDER usage */
730         if (e1000_check_reset_block(hw))
731                 device_printf(dev, "PHY reset is blocked"
732                     " due to SOL/IDER session.\n");
733
734         /* Sysctl for setting Energy Efficient Ethernet */
735         hw->dev_spec.ich8lan.eee_disable = eee_setting;
736         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
737             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
738             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
739             adapter, 0, em_sysctl_eee, "I",
740             "Disable Energy Efficient Ethernet");
741
742         /*
743         ** Start from a known state, this is
744         ** important in reading the nvm and
745         ** mac from that.
746         */
747         e1000_reset_hw(hw);
748
749
750         /* Make sure we have a good EEPROM before we read from it */
751         if (e1000_validate_nvm_checksum(hw) < 0) {
752                 /*
753                 ** Some PCI-E parts fail the first check due to
754                 ** the link being in sleep state, call it again,
755                 ** if it fails a second time its a real issue.
756                 */
757                 if (e1000_validate_nvm_checksum(hw) < 0) {
758                         device_printf(dev,
759                             "The EEPROM Checksum Is Not Valid\n");
760                         error = EIO;
761                         goto err_late;
762                 }
763         }
764
765         /* Copy the permanent MAC address out of the EEPROM */
766         if (e1000_read_mac_addr(hw) < 0) {
767                 device_printf(dev, "EEPROM read error while reading MAC"
768                     " address\n");
769                 error = EIO;
770                 goto err_late;
771         }
772
773         if (!em_is_valid_ether_addr(hw->mac.addr)) {
774                 device_printf(dev, "Invalid MAC address\n");
775                 error = EIO;
776                 goto err_late;
777         }
778
779         /* Disable ULP support */
780         e1000_disable_ulp_lpt_lp(hw, TRUE);
781
782         /*
783         **  Do interrupt configuration
784         */
785         if (adapter->msix > 1) /* Do MSIX */
786                 error = em_allocate_msix(adapter);
787         else  /* MSI or Legacy */
788                 error = em_allocate_legacy(adapter);
789         if (error)
790                 goto err_late;
791
792         /*
793          * Get Wake-on-Lan and Management info for later use
794          */
795         em_get_wakeup(dev);
796
797         /* Setup OS specific network interface */
798         if (em_setup_interface(dev, adapter) != 0)
799                 goto err_late;
800
801         em_reset(adapter);
802
803         /* Initialize statistics */
804         em_update_stats_counters(adapter);
805
806         hw->mac.get_link_status = 1;
807         em_update_link_status(adapter);
808
809         /* Register for VLAN events */
810         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
811             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
812         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
813             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
814
815         em_add_hw_stats(adapter);
816
817         /* Non-AMT based hardware can now take control from firmware */
818         if (adapter->has_manage && !adapter->has_amt)
819                 em_get_hw_control(adapter);
820
821         /* Tell the stack that the interface is not active */
822         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
823         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
824
825         adapter->led_dev = led_create(em_led_func, adapter,
826             device_get_nameunit(dev));
827 #ifdef DEV_NETMAP
828         em_netmap_attach(adapter);
829 #endif /* DEV_NETMAP */
830
831         INIT_DEBUGOUT("em_attach: end");
832
833         return (0);
834
835 err_late:
836         em_free_transmit_structures(adapter);
837         em_free_receive_structures(adapter);
838         em_release_hw_control(adapter);
839         if (adapter->ifp != NULL)
840                 if_free(adapter->ifp);
841 err_pci:
842         em_free_pci_resources(adapter);
843         free(adapter->mta, M_DEVBUF);
844         EM_CORE_LOCK_DESTROY(adapter);
845
846         return (error);
847 }
848
849 /*********************************************************************
850  *  Device removal routine
851  *
852  *  The detach entry point is called when the driver is being removed.
853  *  This routine stops the adapter and deallocates all the resources
854  *  that were allocated for driver operation.
855  *
856  *  return 0 on success, positive on failure
857  *********************************************************************/
858
859 static int
860 em_detach(device_t dev)
861 {
862         struct adapter  *adapter = device_get_softc(dev);
863         struct ifnet    *ifp = adapter->ifp;
864
865         INIT_DEBUGOUT("em_detach: begin");
866
867         /* Make sure VLANS are not using driver */
868         if (adapter->ifp->if_vlantrunk != NULL) {
869                 device_printf(dev,"Vlan in use, detach first\n");
870                 return (EBUSY);
871         }
872
873 #ifdef DEVICE_POLLING
874         if (ifp->if_capenable & IFCAP_POLLING)
875                 ether_poll_deregister(ifp);
876 #endif
877
878         if (adapter->led_dev != NULL)
879                 led_destroy(adapter->led_dev);
880
881         EM_CORE_LOCK(adapter);
882         adapter->in_detach = 1;
883         em_stop(adapter);
884         EM_CORE_UNLOCK(adapter);
885         EM_CORE_LOCK_DESTROY(adapter);
886
887         e1000_phy_hw_reset(&adapter->hw);
888
889         em_release_manageability(adapter);
890         em_release_hw_control(adapter);
891
892         /* Unregister VLAN events */
893         if (adapter->vlan_attach != NULL)
894                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
895         if (adapter->vlan_detach != NULL)
896                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
897
898         ether_ifdetach(adapter->ifp);
899         callout_drain(&adapter->timer);
900
901 #ifdef DEV_NETMAP
902         netmap_detach(ifp);
903 #endif /* DEV_NETMAP */
904
905         em_free_pci_resources(adapter);
906         bus_generic_detach(dev);
907         if_free(ifp);
908
909         em_free_transmit_structures(adapter);
910         em_free_receive_structures(adapter);
911
912         em_release_hw_control(adapter);
913         free(adapter->mta, M_DEVBUF);
914
915         return (0);
916 }
917
918 /*********************************************************************
919  *
920  *  Shutdown entry point
921  *
922  **********************************************************************/
923
924 static int
925 em_shutdown(device_t dev)
926 {
927         return em_suspend(dev);
928 }
929
930 /*
931  * Suspend/resume device methods.
932  */
933 static int
934 em_suspend(device_t dev)
935 {
936         struct adapter *adapter = device_get_softc(dev);
937
938         EM_CORE_LOCK(adapter);
939
940         em_release_manageability(adapter);
941         em_release_hw_control(adapter);
942         em_enable_wakeup(dev);
943
944         EM_CORE_UNLOCK(adapter);
945
946         return bus_generic_suspend(dev);
947 }
948
949 static int
950 em_resume(device_t dev)
951 {
952         struct adapter *adapter = device_get_softc(dev);
953         struct tx_ring  *txr = adapter->tx_rings;
954         struct ifnet *ifp = adapter->ifp;
955
956         EM_CORE_LOCK(adapter);
957         if (adapter->hw.mac.type == e1000_pch2lan)
958                 e1000_resume_workarounds_pchlan(&adapter->hw);
959         em_init_locked(adapter);
960         em_init_manageability(adapter);
961
962         if ((ifp->if_flags & IFF_UP) &&
963             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
964                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
965                         EM_TX_LOCK(txr);
966 #ifdef EM_MULTIQUEUE
967                         if (!drbr_empty(ifp, txr->br))
968                                 em_mq_start_locked(ifp, txr);
969 #else
970                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
971                                 em_start_locked(ifp, txr);
972 #endif
973                         EM_TX_UNLOCK(txr);
974                 }
975         }
976         EM_CORE_UNLOCK(adapter);
977
978         return bus_generic_resume(dev);
979 }
980
981
982 #ifndef EM_MULTIQUEUE
983 static void
984 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
985 {
986         struct adapter  *adapter = ifp->if_softc;
987         struct mbuf     *m_head;
988
989         EM_TX_LOCK_ASSERT(txr);
990
991         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
992             IFF_DRV_RUNNING)
993                 return;
994
995         if (!adapter->link_active)
996                 return;
997
998         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
999                 /* Call cleanup if number of TX descriptors low */
1000                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1001                         em_txeof(txr);
1002                 if (txr->tx_avail < EM_MAX_SCATTER) {
1003                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1004                         break;
1005                 }
1006                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1007                 if (m_head == NULL)
1008                         break;
1009                 /*
1010                  *  Encapsulation can modify our pointer, and or make it
1011                  *  NULL on failure.  In that event, we can't requeue.
1012                  */
1013                 if (em_xmit(txr, &m_head)) {
1014                         if (m_head == NULL)
1015                                 break;
1016                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1017                         break;
1018                 }
1019
1020                 /* Mark the queue as having work */
1021                 if (txr->busy == EM_TX_IDLE)
1022                         txr->busy = EM_TX_BUSY;
1023
1024                 /* Send a copy of the frame to the BPF listener */
1025                 ETHER_BPF_MTAP(ifp, m_head);
1026
1027         }
1028
1029         return;
1030 }
1031
1032 static void
1033 em_start(struct ifnet *ifp)
1034 {
1035         struct adapter  *adapter = ifp->if_softc;
1036         struct tx_ring  *txr = adapter->tx_rings;
1037
1038         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1039                 EM_TX_LOCK(txr);
1040                 em_start_locked(ifp, txr);
1041                 EM_TX_UNLOCK(txr);
1042         }
1043         return;
1044 }
1045 #else /* EM_MULTIQUEUE */
1046 /*********************************************************************
1047  *  Multiqueue Transmit routines 
1048  *
1049  *  em_mq_start is called by the stack to initiate a transmit.
1050  *  however, if busy the driver can queue the request rather
1051  *  than do an immediate send. It is this that is an advantage
1052  *  in this driver, rather than also having multiple tx queues.
1053  **********************************************************************/
1054 /*
1055 ** Multiqueue capable stack interface
1056 */
1057 static int
1058 em_mq_start(struct ifnet *ifp, struct mbuf *m)
1059 {
1060         struct adapter  *adapter = ifp->if_softc;
1061         struct tx_ring  *txr = adapter->tx_rings;
1062         unsigned int    i, error;
1063
1064         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1065                 i = m->m_pkthdr.flowid % adapter->num_queues;
1066         else
1067                 i = curcpu % adapter->num_queues;
1068
1069         txr = &adapter->tx_rings[i];
1070
1071         error = drbr_enqueue(ifp, txr->br, m);
1072         if (error)
1073                 return (error);
1074
1075         if (EM_TX_TRYLOCK(txr)) {
1076                 em_mq_start_locked(ifp, txr);
1077                 EM_TX_UNLOCK(txr);
1078         } else 
1079                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1080
1081         return (0);
1082 }
1083
1084 static int
1085 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1086 {
1087         struct adapter  *adapter = txr->adapter;
1088         struct mbuf     *next;
1089         int             err = 0, enq = 0;
1090
1091         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1092             IFF_DRV_RUNNING || adapter->link_active == 0) {
1093                 return (ENETDOWN);
1094         }
1095
1096         /* Process the queue */
1097         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1098                 if ((err = em_xmit(txr, &next)) != 0) {
1099                         if (next == NULL) {
1100                                 /* It was freed, move forward */
1101                                 drbr_advance(ifp, txr->br);
1102                         } else {
1103                                 /* 
1104                                  * Still have one left, it may not be
1105                                  * the same since the transmit function
1106                                  * may have changed it.
1107                                  */
1108                                 drbr_putback(ifp, txr->br, next);
1109                         }
1110                         break;
1111                 }
1112                 drbr_advance(ifp, txr->br);
1113                 enq++;
1114                 ifp->if_obytes += next->m_pkthdr.len;
1115                 if (next->m_flags & M_MCAST)
1116                         ifp->if_omcasts++;
1117                 ETHER_BPF_MTAP(ifp, next);
1118                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1119                         break;
1120         }
1121
1122         /* Mark the queue as having work */
1123         if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1124                 txr->busy = EM_TX_BUSY;
1125
1126         if (txr->tx_avail < EM_MAX_SCATTER)
1127                 em_txeof(txr);
1128         if (txr->tx_avail < EM_MAX_SCATTER) {
1129                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1130         }
1131         return (err);
1132 }
1133
1134 /*
1135 ** Flush all ring buffers
1136 */
1137 static void
1138 em_qflush(struct ifnet *ifp)
1139 {
1140         struct adapter  *adapter = ifp->if_softc;
1141         struct tx_ring  *txr = adapter->tx_rings;
1142         struct mbuf     *m;
1143
1144         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1145                 EM_TX_LOCK(txr);
1146                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1147                         m_freem(m);
1148                 EM_TX_UNLOCK(txr);
1149         }
1150         if_qflush(ifp);
1151 }
1152 #endif /* EM_MULTIQUEUE */
1153
1154 /*********************************************************************
1155  *  Ioctl entry point
1156  *
1157  *  em_ioctl is called when the user wants to configure the
1158  *  interface.
1159  *
1160  *  return 0 on success, positive on failure
1161  **********************************************************************/
1162
1163 static int
1164 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1165 {
1166         struct adapter  *adapter = ifp->if_softc;
1167         struct ifreq    *ifr = (struct ifreq *)data;
1168 #if defined(INET) || defined(INET6)
1169         struct ifaddr   *ifa = (struct ifaddr *)data;
1170 #endif
1171         bool            avoid_reset = FALSE;
1172         int             error = 0;
1173
1174         if (adapter->in_detach)
1175                 return (error);
1176
1177         switch (command) {
1178         case SIOCSIFADDR:
1179 #ifdef INET
1180                 if (ifa->ifa_addr->sa_family == AF_INET)
1181                         avoid_reset = TRUE;
1182 #endif
1183 #ifdef INET6
1184                 if (ifa->ifa_addr->sa_family == AF_INET6)
1185                         avoid_reset = TRUE;
1186 #endif
1187                 /*
1188                 ** Calling init results in link renegotiation,
1189                 ** so we avoid doing it when possible.
1190                 */
1191                 if (avoid_reset) {
1192                         ifp->if_flags |= IFF_UP;
1193                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1194                                 em_init(adapter);
1195 #ifdef INET
1196                         if (!(ifp->if_flags & IFF_NOARP))
1197                                 arp_ifinit(ifp, ifa);
1198 #endif
1199                 } else
1200                         error = ether_ioctl(ifp, command, data);
1201                 break;
1202         case SIOCSIFMTU:
1203             {
1204                 int max_frame_size;
1205
1206                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1207
1208                 EM_CORE_LOCK(adapter);
1209                 switch (adapter->hw.mac.type) {
1210                 case e1000_82571:
1211                 case e1000_82572:
1212                 case e1000_ich9lan:
1213                 case e1000_ich10lan:
1214                 case e1000_pch2lan:
1215                 case e1000_pch_lpt:
1216                 case e1000_pch_spt:
1217                 case e1000_pch_cnp:
1218                 case e1000_82574:
1219                 case e1000_82583:
1220                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1221                         max_frame_size = 9234;
1222                         break;
1223                 case e1000_pchlan:
1224                         max_frame_size = 4096;
1225                         break;
1226                         /* Adapters that do not support jumbo frames */
1227                 case e1000_ich8lan:
1228                         max_frame_size = ETHER_MAX_LEN;
1229                         break;
1230                 default:
1231                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1232                 }
1233                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1234                     ETHER_CRC_LEN) {
1235                         EM_CORE_UNLOCK(adapter);
1236                         error = EINVAL;
1237                         break;
1238                 }
1239
1240                 ifp->if_mtu = ifr->ifr_mtu;
1241                 adapter->hw.mac.max_frame_size =
1242                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1243                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1244                         em_init_locked(adapter);
1245                 EM_CORE_UNLOCK(adapter);
1246                 break;
1247             }
1248         case SIOCSIFFLAGS:
1249                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1250                     SIOCSIFFLAGS (Set Interface Flags)");
1251                 EM_CORE_LOCK(adapter);
1252                 if (ifp->if_flags & IFF_UP) {
1253                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1254                                 if ((ifp->if_flags ^ adapter->if_flags) &
1255                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1256                                         em_disable_promisc(adapter);
1257                                         em_set_promisc(adapter);
1258                                 }
1259                         } else
1260                                 em_init_locked(adapter);
1261                 } else
1262                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1263                                 em_stop(adapter);
1264                 adapter->if_flags = ifp->if_flags;
1265                 EM_CORE_UNLOCK(adapter);
1266                 break;
1267         case SIOCADDMULTI:
1268         case SIOCDELMULTI:
1269                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1270                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1271                         EM_CORE_LOCK(adapter);
1272                         em_disable_intr(adapter);
1273                         em_set_multi(adapter);
1274 #ifdef DEVICE_POLLING
1275                         if (!(ifp->if_capenable & IFCAP_POLLING))
1276 #endif
1277                                 em_enable_intr(adapter);
1278                         EM_CORE_UNLOCK(adapter);
1279                 }
1280                 break;
1281         case SIOCSIFMEDIA:
1282                 /* Check SOL/IDER usage */
1283                 EM_CORE_LOCK(adapter);
1284                 if (e1000_check_reset_block(&adapter->hw)) {
1285                         EM_CORE_UNLOCK(adapter);
1286                         device_printf(adapter->dev, "Media change is"
1287                             " blocked due to SOL/IDER session.\n");
1288                         break;
1289                 }
1290                 EM_CORE_UNLOCK(adapter);
1291                 /* falls thru */
1292         case SIOCGIFMEDIA:
1293                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1294                     SIOCxIFMEDIA (Get/Set Interface Media)");
1295                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1296                 break;
1297         case SIOCSIFCAP:
1298             {
1299                 int mask, reinit;
1300
1301                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1302                 reinit = 0;
1303                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1304 #ifdef DEVICE_POLLING
1305                 if (mask & IFCAP_POLLING) {
1306                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1307                                 error = ether_poll_register(em_poll, ifp);
1308                                 if (error)
1309                                         return (error);
1310                                 EM_CORE_LOCK(adapter);
1311                                 em_disable_intr(adapter);
1312                                 ifp->if_capenable |= IFCAP_POLLING;
1313                                 EM_CORE_UNLOCK(adapter);
1314                         } else {
1315                                 error = ether_poll_deregister(ifp);
1316                                 /* Enable interrupt even in error case */
1317                                 EM_CORE_LOCK(adapter);
1318                                 em_enable_intr(adapter);
1319                                 ifp->if_capenable &= ~IFCAP_POLLING;
1320                                 EM_CORE_UNLOCK(adapter);
1321                         }
1322                 }
1323 #endif
1324                 if (mask & IFCAP_HWCSUM) {
1325                         ifp->if_capenable ^= IFCAP_HWCSUM;
1326                         reinit = 1;
1327                 }
1328                 if (mask & IFCAP_TSO4) {
1329                         ifp->if_capenable ^= IFCAP_TSO4;
1330                         reinit = 1;
1331                 }
1332                 if (mask & IFCAP_VLAN_HWTAGGING) {
1333                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1334                         reinit = 1;
1335                 }
1336                 if (mask & IFCAP_VLAN_HWFILTER) {
1337                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1338                         reinit = 1;
1339                 }
1340                 if (mask & IFCAP_VLAN_HWTSO) {
1341                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1342                         reinit = 1;
1343                 }
1344                 if ((mask & IFCAP_WOL) &&
1345                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1346                         if (mask & IFCAP_WOL_MCAST)
1347                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1348                         if (mask & IFCAP_WOL_MAGIC)
1349                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1350                 }
1351                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1352                         em_init(adapter);
1353                 VLAN_CAPABILITIES(ifp);
1354                 break;
1355             }
1356
1357         default:
1358                 error = ether_ioctl(ifp, command, data);
1359                 break;
1360         }
1361
1362         return (error);
1363 }
1364
1365
1366 /*********************************************************************
1367  *  Init entry point
1368  *
1369  *  This routine is used in two ways. It is used by the stack as
1370  *  init entry point in network interface structure. It is also used
1371  *  by the driver as a hw/sw initialization routine to get to a
1372  *  consistent state.
1373  *
1374  *  return 0 on success, positive on failure
1375  **********************************************************************/
1376
1377 static void
1378 em_init_locked(struct adapter *adapter)
1379 {
1380         struct ifnet    *ifp = adapter->ifp;
1381         device_t        dev = adapter->dev;
1382
1383         INIT_DEBUGOUT("em_init: begin");
1384
1385         EM_CORE_LOCK_ASSERT(adapter);
1386
1387         em_disable_intr(adapter);
1388         callout_stop(&adapter->timer);
1389
1390         /* Get the latest mac address, User can use a LAA */
1391         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1392               ETHER_ADDR_LEN);
1393
1394         /* Put the address into the Receive Address Array */
1395         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1396
1397         /*
1398          * With the 82571 adapter, RAR[0] may be overwritten
1399          * when the other port is reset, we make a duplicate
1400          * in RAR[14] for that eventuality, this assures
1401          * the interface continues to function.
1402          */
1403         if (adapter->hw.mac.type == e1000_82571) {
1404                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1405                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1406                     E1000_RAR_ENTRIES - 1);
1407         }
1408
1409         /* Initialize the hardware */
1410         em_reset(adapter);
1411         em_update_link_status(adapter);
1412
1413         /* Setup VLAN support, basic and offload if available */
1414         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1415
1416         /* Set hardware offload abilities */
1417         if (ifp->if_capenable & IFCAP_TXCSUM)
1418                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1419         else
1420                 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
1421
1422         /* Configure for OS presence */
1423         em_init_manageability(adapter);
1424
1425         /* Prepare transmit descriptors and buffers */
1426         em_setup_transmit_structures(adapter);
1427         em_initialize_transmit_unit(adapter);
1428
1429         /* Setup Multicast table */
1430         em_set_multi(adapter);
1431
1432         /*
1433         ** Figure out the desired mbuf
1434         ** pool for doing jumbos
1435         */
1436         if (adapter->hw.mac.max_frame_size <= 2048)
1437                 adapter->rx_mbuf_sz = MCLBYTES;
1438 #ifndef CONTIGMALLOC_WORKS
1439        else
1440                adapter->rx_mbuf_sz = MJUMPAGESIZE;
1441 #else
1442         else if (adapter->hw.mac.max_frame_size <= 4096)
1443                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1444         else
1445                 adapter->rx_mbuf_sz = MJUM9BYTES;
1446 #endif
1447
1448         /* Prepare receive descriptors and buffers */
1449         if (em_setup_receive_structures(adapter)) {
1450                 device_printf(dev, "Could not setup receive structures\n");
1451                 em_stop(adapter);
1452                 return;
1453         }
1454         em_initialize_receive_unit(adapter);
1455
1456         /* Use real VLAN Filter support? */
1457         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1458                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1459                         /* Use real VLAN Filter support */
1460                         em_setup_vlan_hw_support(adapter);
1461                 else {
1462                         u32 ctrl;
1463                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1464                         ctrl |= E1000_CTRL_VME;
1465                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1466                 }
1467         }
1468
1469         /* Don't lose promiscuous settings */
1470         em_set_promisc(adapter);
1471
1472         /* Set the interface as ACTIVE */
1473         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1474         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1475
1476         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1477         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1478
1479         /* MSI/X configuration for 82574 */
1480         if (adapter->hw.mac.type == e1000_82574) {
1481                 int tmp;
1482                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1483                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1484                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1485                 /* Set the IVAR - interrupt vector routing. */
1486                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1487         }
1488
1489 #ifdef DEVICE_POLLING
1490         /*
1491          * Only enable interrupts if we are not polling, make sure
1492          * they are off otherwise.
1493          */
1494         if (ifp->if_capenable & IFCAP_POLLING)
1495                 em_disable_intr(adapter);
1496         else
1497 #endif /* DEVICE_POLLING */
1498                 em_enable_intr(adapter);
1499
1500         /* AMT based hardware can now take control from firmware */
1501         if (adapter->has_manage && adapter->has_amt)
1502                 em_get_hw_control(adapter);
1503 }
1504
1505 static void
1506 em_init(void *arg)
1507 {
1508         struct adapter *adapter = arg;
1509
1510         EM_CORE_LOCK(adapter);
1511         em_init_locked(adapter);
1512         EM_CORE_UNLOCK(adapter);
1513 }
1514
1515
1516 #ifdef DEVICE_POLLING
1517 /*********************************************************************
1518  *
1519  *  Legacy polling routine: note this only works with single queue
1520  *
1521  *********************************************************************/
1522 static int
1523 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1524 {
1525         struct adapter *adapter = ifp->if_softc;
1526         struct tx_ring  *txr = adapter->tx_rings;
1527         struct rx_ring  *rxr = adapter->rx_rings;
1528         u32             reg_icr;
1529         int             rx_done;
1530
1531         EM_CORE_LOCK(adapter);
1532         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1533                 EM_CORE_UNLOCK(adapter);
1534                 return (0);
1535         }
1536
1537         if (cmd == POLL_AND_CHECK_STATUS) {
1538                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1539                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1540                         callout_stop(&adapter->timer);
1541                         adapter->hw.mac.get_link_status = 1;
1542                         em_update_link_status(adapter);
1543                         callout_reset(&adapter->timer, hz,
1544                             em_local_timer, adapter);
1545                 }
1546         }
1547         EM_CORE_UNLOCK(adapter);
1548
1549         em_rxeof(rxr, count, &rx_done);
1550
1551         EM_TX_LOCK(txr);
1552         em_txeof(txr);
1553 #ifdef EM_MULTIQUEUE
1554         if (!drbr_empty(ifp, txr->br))
1555                 em_mq_start_locked(ifp, txr);
1556 #else
1557         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1558                 em_start_locked(ifp, txr);
1559 #endif
1560         EM_TX_UNLOCK(txr);
1561
1562         return (rx_done);
1563 }
1564 #endif /* DEVICE_POLLING */
1565
1566
1567 /*********************************************************************
1568  *
1569  *  Fast Legacy/MSI Combined Interrupt Service routine  
1570  *
1571  *********************************************************************/
1572 static int
1573 em_irq_fast(void *arg)
1574 {
1575         struct adapter  *adapter = arg;
1576         struct ifnet    *ifp;
1577         u32             reg_icr;
1578
1579         ifp = adapter->ifp;
1580
1581         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1582
1583         /* Hot eject?  */
1584         if (reg_icr == 0xffffffff)
1585                 return FILTER_STRAY;
1586
1587         /* Definitely not our interrupt.  */
1588         if (reg_icr == 0x0)
1589                 return FILTER_STRAY;
1590
1591         /*
1592          * Starting with the 82571 chip, bit 31 should be used to
1593          * determine whether the interrupt belongs to us.
1594          */
1595         if (adapter->hw.mac.type >= e1000_82571 &&
1596             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1597                 return FILTER_STRAY;
1598
1599         em_disable_intr(adapter);
1600         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1601
1602         /* Link status change */
1603         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1604                 adapter->hw.mac.get_link_status = 1;
1605                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1606         }
1607
1608         if (reg_icr & E1000_ICR_RXO)
1609                 adapter->rx_overruns++;
1610         return FILTER_HANDLED;
1611 }
1612
1613 /* Combined RX/TX handler, used by Legacy and MSI */
1614 static void
1615 em_handle_que(void *context, int pending)
1616 {
1617         struct adapter  *adapter = context;
1618         struct ifnet    *ifp = adapter->ifp;
1619         struct tx_ring  *txr = adapter->tx_rings;
1620         struct rx_ring  *rxr = adapter->rx_rings;
1621
1622         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1623                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1624
1625                 EM_TX_LOCK(txr);
1626                 em_txeof(txr);
1627 #ifdef EM_MULTIQUEUE
1628                 if (!drbr_empty(ifp, txr->br))
1629                         em_mq_start_locked(ifp, txr);
1630 #else
1631                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1632                         em_start_locked(ifp, txr);
1633 #endif
1634                 EM_TX_UNLOCK(txr);
1635                 if (more) {
1636                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1637                         return;
1638                 }
1639         }
1640
1641         em_enable_intr(adapter);
1642         return;
1643 }
1644
1645
1646 /*********************************************************************
1647  *
1648  *  MSIX Interrupt Service Routines
1649  *
1650  **********************************************************************/
1651 static void
1652 em_msix_tx(void *arg)
1653 {
1654         struct tx_ring *txr = arg;
1655         struct adapter *adapter = txr->adapter;
1656         struct ifnet    *ifp = adapter->ifp;
1657
1658         ++txr->tx_irq;
1659         EM_TX_LOCK(txr);
1660         em_txeof(txr);
1661 #ifdef EM_MULTIQUEUE
1662         if (!drbr_empty(ifp, txr->br))
1663                 em_mq_start_locked(ifp, txr);
1664 #else
1665         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1666                 em_start_locked(ifp, txr);
1667 #endif
1668
1669         /* Reenable this interrupt */
1670         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1671         EM_TX_UNLOCK(txr);
1672         return;
1673 }
1674
1675 /*********************************************************************
1676  *
1677  *  MSIX RX Interrupt Service routine
1678  *
1679  **********************************************************************/
1680
1681 static void
1682 em_msix_rx(void *arg)
1683 {
1684         struct rx_ring  *rxr = arg;
1685         struct adapter  *adapter = rxr->adapter;
1686         bool            more;
1687
1688         ++rxr->rx_irq;
1689         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1690                 return;
1691         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1692         if (more)
1693                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1694         else {
1695                 /* Reenable this interrupt */
1696                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1697         }
1698         return;
1699 }
1700
1701 /*********************************************************************
1702  *
1703  *  MSIX Link Fast Interrupt Service routine
1704  *
1705  **********************************************************************/
1706 static void
1707 em_msix_link(void *arg)
1708 {
1709         struct adapter  *adapter = arg;
1710         u32             reg_icr;
1711
1712         ++adapter->link_irq;
1713         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1714
1715         if (reg_icr & E1000_ICR_RXO)
1716                 adapter->rx_overruns++;
1717
1718         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1719                 adapter->hw.mac.get_link_status = 1;
1720                 em_handle_link(adapter, 0);
1721         } else
1722                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1723                     EM_MSIX_LINK | E1000_IMS_LSC);
1724         /*
1725         ** Because we must read the ICR for this interrupt
1726         ** it may clear other causes using autoclear, for
1727         ** this reason we simply create a soft interrupt
1728         ** for all these vectors.
1729         */
1730         if (reg_icr) {
1731                 E1000_WRITE_REG(&adapter->hw,
1732                         E1000_ICS, adapter->ims);
1733         }
1734         return;
1735 }
1736
1737 static void
1738 em_handle_rx(void *context, int pending)
1739 {
1740         struct rx_ring  *rxr = context;
1741         struct adapter  *adapter = rxr->adapter;
1742         bool            more;
1743
1744         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1745         if (more)
1746                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1747         else {
1748                 /* Reenable this interrupt */
1749                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1750         }
1751 }
1752
1753 static void
1754 em_handle_tx(void *context, int pending)
1755 {
1756         struct tx_ring  *txr = context;
1757         struct adapter  *adapter = txr->adapter;
1758         struct ifnet    *ifp = adapter->ifp;
1759
1760         EM_TX_LOCK(txr);
1761         em_txeof(txr);
1762 #ifdef EM_MULTIQUEUE
1763         if (!drbr_empty(ifp, txr->br))
1764                 em_mq_start_locked(ifp, txr);
1765 #else
1766         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1767                 em_start_locked(ifp, txr);
1768 #endif
1769         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1770         EM_TX_UNLOCK(txr);
1771 }
1772
1773 static void
1774 em_handle_link(void *context, int pending)
1775 {
1776         struct adapter  *adapter = context;
1777         struct e1000_hw *hw = &adapter->hw;
1778         struct tx_ring  *txr = adapter->tx_rings;
1779         struct ifnet *ifp = adapter->ifp;
1780
1781         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1782                 return;
1783
1784         EM_CORE_LOCK(adapter);
1785         callout_stop(&adapter->timer);
1786         em_update_link_status(adapter);
1787         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1788         if (hw->mac.type == e1000_82574 && adapter->msix_mem != NULL)
1789                 E1000_WRITE_REG(hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC);
1790         if (adapter->link_active) {
1791                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1792                         EM_TX_LOCK(txr);
1793 #ifdef EM_MULTIQUEUE
1794                         if (!drbr_empty(ifp, txr->br))
1795                                 em_mq_start_locked(ifp, txr);
1796 #else
1797                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1798                                 em_start_locked(ifp, txr);
1799 #endif
1800                         EM_TX_UNLOCK(txr);
1801                 }
1802         }
1803         EM_CORE_UNLOCK(adapter);
1804 }
1805
1806
1807 /*********************************************************************
1808  *
1809  *  Media Ioctl callback
1810  *
1811  *  This routine is called whenever the user queries the status of
1812  *  the interface using ifconfig.
1813  *
1814  **********************************************************************/
1815 static void
1816 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1817 {
1818         struct adapter *adapter = ifp->if_softc;
1819         u_char fiber_type = IFM_1000_SX;
1820
1821         INIT_DEBUGOUT("em_media_status: begin");
1822
1823         EM_CORE_LOCK(adapter);
1824         em_update_link_status(adapter);
1825
1826         ifmr->ifm_status = IFM_AVALID;
1827         ifmr->ifm_active = IFM_ETHER;
1828
1829         if (!adapter->link_active) {
1830                 EM_CORE_UNLOCK(adapter);
1831                 return;
1832         }
1833
1834         ifmr->ifm_status |= IFM_ACTIVE;
1835
1836         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1837             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1838                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1839         } else {
1840                 switch (adapter->link_speed) {
1841                 case 10:
1842                         ifmr->ifm_active |= IFM_10_T;
1843                         break;
1844                 case 100:
1845                         ifmr->ifm_active |= IFM_100_TX;
1846                         break;
1847                 case 1000:
1848                         ifmr->ifm_active |= IFM_1000_T;
1849                         break;
1850                 }
1851                 if (adapter->link_duplex == FULL_DUPLEX)
1852                         ifmr->ifm_active |= IFM_FDX;
1853                 else
1854                         ifmr->ifm_active |= IFM_HDX;
1855         }
1856         EM_CORE_UNLOCK(adapter);
1857 }
1858
1859 /*********************************************************************
1860  *
1861  *  Media Ioctl callback
1862  *
1863  *  This routine is called when the user changes speed/duplex using
1864  *  media/mediopt option with ifconfig.
1865  *
1866  **********************************************************************/
1867 static int
1868 em_media_change(struct ifnet *ifp)
1869 {
1870         struct adapter *adapter = ifp->if_softc;
1871         struct ifmedia  *ifm = &adapter->media;
1872
1873         INIT_DEBUGOUT("em_media_change: begin");
1874
1875         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1876                 return (EINVAL);
1877
1878         EM_CORE_LOCK(adapter);
1879         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1880         case IFM_AUTO:
1881                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1882                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1883                 break;
1884         case IFM_1000_LX:
1885         case IFM_1000_SX:
1886         case IFM_1000_T:
1887                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1888                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1889                 break;
1890         case IFM_100_TX:
1891                 adapter->hw.mac.autoneg = FALSE;
1892                 adapter->hw.phy.autoneg_advertised = 0;
1893                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1894                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1895                 else
1896                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1897                 break;
1898         case IFM_10_T:
1899                 adapter->hw.mac.autoneg = FALSE;
1900                 adapter->hw.phy.autoneg_advertised = 0;
1901                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1902                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1903                 else
1904                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1905                 break;
1906         default:
1907                 device_printf(adapter->dev, "Unsupported media type\n");
1908         }
1909
1910         em_init_locked(adapter);
1911         EM_CORE_UNLOCK(adapter);
1912
1913         return (0);
1914 }
1915
1916 /*********************************************************************
1917  *
1918  *  This routine maps the mbufs to tx descriptors.
1919  *
1920  *  return 0 on success, positive on failure
1921  **********************************************************************/
1922
1923 static int
1924 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1925 {
1926         struct adapter          *adapter = txr->adapter;
1927         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1928         bus_dmamap_t            map;
1929         struct em_txbuffer      *tx_buffer, *tx_buffer_mapped;
1930         struct e1000_tx_desc    *ctxd = NULL;
1931         struct mbuf             *m_head;
1932         struct ether_header     *eh;
1933         struct ip               *ip = NULL;
1934         struct tcphdr           *tp = NULL;
1935         u32                     txd_upper = 0, txd_lower = 0;
1936         int                     ip_off, poff;
1937         int                     nsegs, i, j, first, last = 0;
1938         int                     error;
1939         bool                    do_tso, tso_desc, remap = TRUE;
1940
1941         m_head = *m_headp;
1942         do_tso = m_head->m_pkthdr.csum_flags & CSUM_IP_TSO;
1943         tso_desc = FALSE;
1944         ip_off = poff = 0;
1945
1946         /*
1947          * Intel recommends entire IP/TCP header length reside in a single
1948          * buffer. If multiple descriptors are used to describe the IP and
1949          * TCP header, each descriptor should describe one or more
1950          * complete headers; descriptors referencing only parts of headers
1951          * are not supported. If all layer headers are not coalesced into
1952          * a single buffer, each buffer should not cross a 4KB boundary,
1953          * or be larger than the maximum read request size.
1954          * Controller also requires modifing IP/TCP header to make TSO work
1955          * so we firstly get a writable mbuf chain then coalesce ethernet/
1956          * IP/TCP header into a single buffer to meet the requirement of
1957          * controller. This also simplifies IP/TCP/UDP checksum offloading
1958          * which also has similiar restrictions.
1959          */
1960         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1961                 if (do_tso || (m_head->m_next != NULL && 
1962                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1963                         if (M_WRITABLE(*m_headp) == 0) {
1964                                 m_head = m_dup(*m_headp, M_NOWAIT);
1965                                 m_freem(*m_headp);
1966                                 if (m_head == NULL) {
1967                                         *m_headp = NULL;
1968                                         return (ENOBUFS);
1969                                 }
1970                                 *m_headp = m_head;
1971                         }
1972                 }
1973                 /*
1974                  * XXX
1975                  * Assume IPv4, we don't have TSO/checksum offload support
1976                  * for IPv6 yet.
1977                  */
1978                 ip_off = sizeof(struct ether_header);
1979                 if (m_head->m_len < ip_off) {
1980                         m_head = m_pullup(m_head, ip_off);
1981                         if (m_head == NULL) {
1982                                 *m_headp = NULL;
1983                                 return (ENOBUFS);
1984                         }
1985                 }
1986                 eh = mtod(m_head, struct ether_header *);
1987                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1988                         ip_off = sizeof(struct ether_vlan_header);
1989                         if (m_head->m_len < ip_off) {
1990                                 m_head = m_pullup(m_head, ip_off);
1991                                 if (m_head == NULL) {
1992                                         *m_headp = NULL;
1993                                         return (ENOBUFS);
1994                                 }
1995                         }
1996                 }
1997                 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1998                         m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1999                         if (m_head == NULL) {
2000                                 *m_headp = NULL;
2001                                 return (ENOBUFS);
2002                         }
2003                 }
2004                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2005                 poff = ip_off + (ip->ip_hl << 2);
2006
2007                 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
2008                         if (m_head->m_len < poff + sizeof(struct tcphdr)) {
2009                                 m_head = m_pullup(m_head, poff +
2010                                     sizeof(struct tcphdr));
2011                                 if (m_head == NULL) {
2012                                         *m_headp = NULL;
2013                                         return (ENOBUFS);
2014                                 }
2015                         }
2016                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2017                         /*
2018                          * TSO workaround:
2019                          *   pull 4 more bytes of data into it.
2020                          */
2021                         if (m_head->m_len < poff + (tp->th_off << 2)) {
2022                                 m_head = m_pullup(m_head, poff +
2023                                                  (tp->th_off << 2) +
2024                                                  TSO_WORKAROUND);
2025                                 if (m_head == NULL) {
2026                                         *m_headp = NULL;
2027                                         return (ENOBUFS);
2028                                 }
2029                         }
2030                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2031                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2032                         if (do_tso) {
2033                                 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2034                                                   (ip->ip_hl << 2) +
2035                                                   (tp->th_off << 2));
2036                                 ip->ip_sum = 0;
2037                                 /*
2038                                  * The pseudo TCP checksum does not include TCP
2039                                  * payload length so driver should recompute
2040                                  * the checksum here what hardware expect to
2041                                  * see. This is adherence of Microsoft's Large
2042                                  * Send specification.
2043                                 */
2044                                 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2045                                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2046                         }
2047                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2048                         if (m_head->m_len < poff + sizeof(struct udphdr)) {
2049                                 m_head = m_pullup(m_head, poff +
2050                                     sizeof(struct udphdr));
2051                                 if (m_head == NULL) {
2052                                         *m_headp = NULL;
2053                                         return (ENOBUFS);
2054                                 }
2055                         }
2056                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2057                 }
2058                 *m_headp = m_head;
2059         }
2060
2061         /*
2062          * Map the packet for DMA
2063          *
2064          * Capture the first descriptor index,
2065          * this descriptor will have the index
2066          * of the EOP which is the only one that
2067          * now gets a DONE bit writeback.
2068          */
2069         first = txr->next_avail_desc;
2070         tx_buffer = &txr->tx_buffers[first];
2071         tx_buffer_mapped = tx_buffer;
2072         map = tx_buffer->map;
2073
2074 retry:
2075         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2076             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2077
2078         /*
2079          * There are two types of errors we can (try) to handle:
2080          * - EFBIG means the mbuf chain was too long and bus_dma ran
2081          *   out of segments.  Defragment the mbuf chain and try again.
2082          * - ENOMEM means bus_dma could not obtain enough bounce buffers
2083          *   at this point in time.  Defer sending and try again later.
2084          * All other errors, in particular EINVAL, are fatal and prevent the
2085          * mbuf chain from ever going through.  Drop it and report error.
2086          */
2087         if (error == EFBIG && remap) {
2088                 struct mbuf *m;
2089
2090                 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2091                 if (m == NULL) {
2092                         adapter->mbuf_defrag_failed++;
2093                         m_freem(*m_headp);
2094                         *m_headp = NULL;
2095                         return (ENOBUFS);
2096                 }
2097                 *m_headp = m;
2098
2099                 /* Try it again, but only once */
2100                 remap = FALSE;
2101                 goto retry;
2102         } else if (error != 0) {
2103                 adapter->no_tx_dma_setup++;
2104                 m_freem(*m_headp);
2105                 *m_headp = NULL;
2106                 return (error);
2107         }
2108
2109         /*
2110          * TSO Hardware workaround, if this packet is not
2111          * TSO, and is only a single descriptor long, and
2112          * it follows a TSO burst, then we need to add a
2113          * sentinel descriptor to prevent premature writeback.
2114          */
2115         if ((!do_tso) && (txr->tx_tso == TRUE)) {
2116                 if (nsegs == 1)
2117                         tso_desc = TRUE;
2118                 txr->tx_tso = FALSE;
2119         }
2120
2121         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2122                 txr->no_desc_avail++;
2123                 bus_dmamap_unload(txr->txtag, map);
2124                 return (ENOBUFS);
2125         }
2126         m_head = *m_headp;
2127
2128         /* Do hardware assists */
2129         if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
2130                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2131                     &txd_upper, &txd_lower);
2132                 /* we need to make a final sentinel transmit desc */
2133                 tso_desc = TRUE;
2134         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2135                 em_transmit_checksum_setup(txr, m_head,
2136                     ip_off, ip, &txd_upper, &txd_lower);
2137
2138         if (m_head->m_flags & M_VLANTAG) {
2139                 /* Set the vlan id. */
2140                 txd_upper |=
2141                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2142                 /* Tell hardware to add tag */
2143                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2144         }
2145
2146         i = txr->next_avail_desc;
2147
2148         /* Set up our transmit descriptors */
2149         for (j = 0; j < nsegs; j++) {
2150                 bus_size_t seg_len;
2151                 bus_addr_t seg_addr;
2152
2153                 tx_buffer = &txr->tx_buffers[i];
2154                 ctxd = &txr->tx_base[i];
2155                 seg_addr = segs[j].ds_addr;
2156                 seg_len  = segs[j].ds_len;
2157                 /*
2158                 ** TSO Workaround:
2159                 ** If this is the last descriptor, we want to
2160                 ** split it so we have a small final sentinel
2161                 */
2162                 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2163                         seg_len -= TSO_WORKAROUND;
2164                         ctxd->buffer_addr = htole64(seg_addr);
2165                         ctxd->lower.data = htole32(
2166                                 adapter->txd_cmd | txd_lower | seg_len);
2167                         ctxd->upper.data = htole32(txd_upper);
2168                         if (++i == adapter->num_tx_desc)
2169                                 i = 0;
2170
2171                         /* Now make the sentinel */     
2172                         txr->tx_avail--;
2173                         ctxd = &txr->tx_base[i];
2174                         tx_buffer = &txr->tx_buffers[i];
2175                         ctxd->buffer_addr =
2176                             htole64(seg_addr + seg_len);
2177                         ctxd->lower.data = htole32(
2178                         adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2179                         ctxd->upper.data =
2180                             htole32(txd_upper);
2181                         last = i;
2182                         if (++i == adapter->num_tx_desc)
2183                                 i = 0;
2184                 } else {
2185                         ctxd->buffer_addr = htole64(seg_addr);
2186                         ctxd->lower.data = htole32(
2187                         adapter->txd_cmd | txd_lower | seg_len);
2188                         ctxd->upper.data = htole32(txd_upper);
2189                         last = i;
2190                         if (++i == adapter->num_tx_desc)
2191                                 i = 0;
2192                 }
2193                 tx_buffer->m_head = NULL;
2194                 tx_buffer->next_eop = -1;
2195         }
2196
2197         txr->next_avail_desc = i;
2198         txr->tx_avail -= nsegs;
2199
2200         tx_buffer->m_head = m_head;
2201         /*
2202         ** Here we swap the map so the last descriptor,
2203         ** which gets the completion interrupt has the
2204         ** real map, and the first descriptor gets the
2205         ** unused map from this descriptor.
2206         */
2207         tx_buffer_mapped->map = tx_buffer->map;
2208         tx_buffer->map = map;
2209         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2210
2211         /*
2212          * Last Descriptor of Packet
2213          * needs End Of Packet (EOP)
2214          * and Report Status (RS)
2215          */
2216         ctxd->lower.data |=
2217             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2218         /*
2219          * Keep track in the first buffer which
2220          * descriptor will be written back
2221          */
2222         tx_buffer = &txr->tx_buffers[first];
2223         tx_buffer->next_eop = last;
2224
2225         /*
2226          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2227          * that this frame is available to transmit.
2228          */
2229         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2230             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2231         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2232
2233         return (0);
2234 }
2235
2236 static void
2237 em_set_promisc(struct adapter *adapter)
2238 {
2239         struct ifnet    *ifp = adapter->ifp;
2240         u32             reg_rctl;
2241
2242         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243
2244         if (ifp->if_flags & IFF_PROMISC) {
2245                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2246                 /* Turn this on if you want to see bad packets */
2247                 if (em_debug_sbp)
2248                         reg_rctl |= E1000_RCTL_SBP;
2249                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2250         } else if (ifp->if_flags & IFF_ALLMULTI) {
2251                 reg_rctl |= E1000_RCTL_MPE;
2252                 reg_rctl &= ~E1000_RCTL_UPE;
2253                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2254         }
2255 }
2256
2257 static void
2258 em_disable_promisc(struct adapter *adapter)
2259 {
2260         struct ifnet    *ifp = adapter->ifp;
2261         u32             reg_rctl;
2262         int             mcnt = 0;
2263
2264         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2265         reg_rctl &=  (~E1000_RCTL_UPE);
2266         if (ifp->if_flags & IFF_ALLMULTI)
2267                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2268         else {
2269                 struct  ifmultiaddr *ifma;
2270 #if __FreeBSD_version < 800000
2271                 IF_ADDR_LOCK(ifp);
2272 #else   
2273                 if_maddr_rlock(ifp);
2274 #endif
2275                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2276                         if (ifma->ifma_addr->sa_family != AF_LINK)
2277                                 continue;
2278                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2279                                 break;
2280                         mcnt++;
2281                 }
2282 #if __FreeBSD_version < 800000
2283                 IF_ADDR_UNLOCK(ifp);
2284 #else
2285                 if_maddr_runlock(ifp);
2286 #endif
2287         }
2288         /* Don't disable if in MAX groups */
2289         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2290                 reg_rctl &=  (~E1000_RCTL_MPE);
2291         reg_rctl &=  (~E1000_RCTL_SBP);
2292         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2293 }
2294
2295
2296 /*********************************************************************
2297  *  Multicast Update
2298  *
2299  *  This routine is called whenever multicast address list is updated.
2300  *
2301  **********************************************************************/
2302
2303 static void
2304 em_set_multi(struct adapter *adapter)
2305 {
2306         struct ifnet    *ifp = adapter->ifp;
2307         struct ifmultiaddr *ifma;
2308         u32 reg_rctl = 0;
2309         u8  *mta; /* Multicast array memory */
2310         int mcnt = 0;
2311
2312         IOCTL_DEBUGOUT("em_set_multi: begin");
2313
2314         mta = adapter->mta;
2315         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2316
2317         if (adapter->hw.mac.type == e1000_82542 && 
2318             adapter->hw.revision_id == E1000_REVISION_2) {
2319                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2320                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2321                         e1000_pci_clear_mwi(&adapter->hw);
2322                 reg_rctl |= E1000_RCTL_RST;
2323                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2324                 msec_delay(5);
2325         }
2326
2327 #if __FreeBSD_version < 800000
2328         IF_ADDR_LOCK(ifp);
2329 #else
2330         if_maddr_rlock(ifp);
2331 #endif
2332         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2333                 if (ifma->ifma_addr->sa_family != AF_LINK)
2334                         continue;
2335
2336                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2337                         break;
2338
2339                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2340                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2341                 mcnt++;
2342         }
2343 #if __FreeBSD_version < 800000
2344         IF_ADDR_UNLOCK(ifp);
2345 #else
2346         if_maddr_runlock(ifp);
2347 #endif
2348         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2349                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2350                 reg_rctl |= E1000_RCTL_MPE;
2351                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2352         } else
2353                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2354
2355         if (adapter->hw.mac.type == e1000_82542 && 
2356             adapter->hw.revision_id == E1000_REVISION_2) {
2357                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2358                 reg_rctl &= ~E1000_RCTL_RST;
2359                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2360                 msec_delay(5);
2361                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2362                         e1000_pci_set_mwi(&adapter->hw);
2363         }
2364 }
2365
2366
2367 /*********************************************************************
2368  *  Timer routine
2369  *
2370  *  This routine checks for link status and updates statistics.
2371  *
2372  **********************************************************************/
2373
2374 static void
2375 em_local_timer(void *arg)
2376 {
2377         struct adapter  *adapter = arg;
2378         struct ifnet    *ifp = adapter->ifp;
2379         struct tx_ring  *txr = adapter->tx_rings;
2380         struct rx_ring  *rxr = adapter->rx_rings;
2381         u32             trigger = 0;
2382
2383         EM_CORE_LOCK_ASSERT(adapter);
2384
2385         em_update_link_status(adapter);
2386         em_update_stats_counters(adapter);
2387
2388         /* Reset LAA into RAR[0] on 82571 */
2389         if ((adapter->hw.mac.type == e1000_82571) &&
2390             e1000_get_laa_state_82571(&adapter->hw))
2391                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2392
2393         /* Mask to use in the irq trigger */
2394         if (adapter->msix_mem) {
2395                 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2396                         trigger |= rxr->ims;
2397                 rxr = adapter->rx_rings;
2398         } else
2399                 trigger = E1000_ICS_RXDMT0;
2400
2401         /*
2402         ** Check on the state of the TX queue(s), this 
2403         ** can be done without the lock because its RO
2404         ** and the HUNG state will be static if set.
2405         */
2406         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2407                 if (txr->busy == EM_TX_HUNG)
2408                         goto hung;
2409                 if (txr->busy >= EM_TX_MAXTRIES)
2410                         txr->busy = EM_TX_HUNG;
2411                 /* Schedule a TX tasklet if needed */
2412                 if (txr->tx_avail <= EM_MAX_SCATTER)
2413                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2414         }
2415         
2416         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2417 #ifndef DEVICE_POLLING
2418         /* Trigger an RX interrupt to guarantee mbuf refresh */
2419         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2420 #endif
2421         return;
2422 hung:
2423         /* Looks like we're hung */
2424         device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2425                         txr->me);
2426         em_print_debug_info(adapter);
2427         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2428         adapter->watchdog_events++;
2429         em_init_locked(adapter);
2430 }
2431
2432
2433 static void
2434 em_update_link_status(struct adapter *adapter)
2435 {
2436         struct e1000_hw *hw = &adapter->hw;
2437         struct ifnet *ifp = adapter->ifp;
2438         device_t dev = adapter->dev;
2439         struct tx_ring *txr = adapter->tx_rings;
2440         u32 link_check = 0;
2441
2442         /* Get the cached link value or read phy for real */
2443         switch (hw->phy.media_type) {
2444         case e1000_media_type_copper:
2445                 if (hw->mac.get_link_status) {
2446                         if (hw->mac.type == e1000_pch_spt)
2447                                 msec_delay(50);
2448                         /* Do the work to read phy */
2449                         e1000_check_for_link(hw);
2450                         link_check = !hw->mac.get_link_status;
2451                         if (link_check) /* ESB2 fix */
2452                                 e1000_cfg_on_link_up(hw);
2453                 } else
2454                         link_check = TRUE;
2455                 break;
2456         case e1000_media_type_fiber:
2457                 e1000_check_for_link(hw);
2458                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2459                                  E1000_STATUS_LU);
2460                 break;
2461         case e1000_media_type_internal_serdes:
2462                 e1000_check_for_link(hw);
2463                 link_check = adapter->hw.mac.serdes_has_link;
2464                 break;
2465         default:
2466         case e1000_media_type_unknown:
2467                 break;
2468         }
2469
2470         /* Now check for a transition */
2471         if (link_check && (adapter->link_active == 0)) {
2472                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2473                     &adapter->link_duplex);
2474
2475                 /*
2476                 ** There have proven to be problems with TSO when not at full
2477                 ** gigabit speed, so disable the assist automatically when at
2478                 ** lower speeds.  -jfv
2479                 */
2480                 if (ifp->if_capenable & IFCAP_TSO4) {
2481                         if (adapter->link_speed == SPEED_1000)
2482                                 ifp->if_hwassist |= CSUM_IP_TSO;
2483                         else
2484                                 ifp->if_hwassist &= ~CSUM_IP_TSO;
2485                 }
2486
2487                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2488                 if ((adapter->link_speed != SPEED_1000) &&
2489                     ((hw->mac.type == e1000_82571) ||
2490                     (hw->mac.type == e1000_82572))) {
2491                         int tarc0;
2492                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2493                         tarc0 &= ~TARC_SPEED_MODE_BIT;
2494                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2495                 }
2496                 if (bootverbose)
2497                         device_printf(dev, "Link is up %d Mbps %s\n",
2498                             adapter->link_speed,
2499                             ((adapter->link_duplex == FULL_DUPLEX) ?
2500                             "Full Duplex" : "Half Duplex"));
2501                 adapter->link_active = 1;
2502                 adapter->smartspeed = 0;
2503                 ifp->if_baudrate = adapter->link_speed * 1000000;
2504                 if_link_state_change(ifp, LINK_STATE_UP);
2505         } else if (!link_check && (adapter->link_active == 1)) {
2506                 ifp->if_baudrate = adapter->link_speed = 0;
2507                 adapter->link_duplex = 0;
2508                 if (bootverbose)
2509                         device_printf(dev, "Link is Down\n");
2510                 adapter->link_active = 0;
2511                 /* Link down, disable hang detection */
2512                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2513                         txr->busy = EM_TX_IDLE;
2514                 if_link_state_change(ifp, LINK_STATE_DOWN);
2515         }
2516 }
2517
2518 /*********************************************************************
2519  *
2520  *  This routine disables all traffic on the adapter by issuing a
2521  *  global reset on the MAC and deallocates TX/RX buffers.
2522  *
2523  *  This routine should always be called with BOTH the CORE
2524  *  and TX locks.
2525  **********************************************************************/
2526
2527 static void
2528 em_stop(void *arg)
2529 {
2530         struct adapter  *adapter = arg;
2531         struct ifnet    *ifp = adapter->ifp;
2532         struct tx_ring  *txr = adapter->tx_rings;
2533
2534         EM_CORE_LOCK_ASSERT(adapter);
2535
2536         INIT_DEBUGOUT("em_stop: begin");
2537
2538         em_disable_intr(adapter);
2539         callout_stop(&adapter->timer);
2540
2541         /* Tell the stack that the interface is no longer active */
2542         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2543         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2544
2545         /* Disarm Hang Detection. */
2546         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2547                 EM_TX_LOCK(txr);
2548                 txr->busy = EM_TX_IDLE;
2549                 EM_TX_UNLOCK(txr);
2550         }
2551
2552         /* I219 needs some special flushing to avoid hangs */
2553         if (adapter->hw.mac.type == e1000_pch_spt)
2554                 em_flush_desc_rings(adapter);
2555
2556         e1000_reset_hw(&adapter->hw);
2557         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2558
2559         e1000_led_off(&adapter->hw);
2560         e1000_cleanup_led(&adapter->hw);
2561 }
2562
2563
2564 /*********************************************************************
2565  *
2566  *  Determine hardware revision.
2567  *
2568  **********************************************************************/
2569 static void
2570 em_identify_hardware(struct adapter *adapter)
2571 {
2572         device_t dev = adapter->dev;
2573
2574         /* Make sure our PCI config space has the necessary stuff set */
2575         pci_enable_busmaster(dev);
2576         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2577
2578         /* Save off the information about this board */
2579         adapter->hw.vendor_id = pci_get_vendor(dev);
2580         adapter->hw.device_id = pci_get_device(dev);
2581         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2582         adapter->hw.subsystem_vendor_id =
2583             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2584         adapter->hw.subsystem_device_id =
2585             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2586
2587         /* Do Shared Code Init and Setup */
2588         if (e1000_set_mac_type(&adapter->hw)) {
2589                 device_printf(dev, "Setup init failure\n");
2590                 return;
2591         }
2592 }
2593
2594 static int
2595 em_allocate_pci_resources(struct adapter *adapter)
2596 {
2597         device_t        dev = adapter->dev;
2598         int             rid;
2599
2600         rid = PCIR_BAR(0);
2601         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2602             &rid, RF_ACTIVE);
2603         if (adapter->memory == NULL) {
2604                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2605                 return (ENXIO);
2606         }
2607         adapter->osdep.mem_bus_space_tag =
2608             rman_get_bustag(adapter->memory);
2609         adapter->osdep.mem_bus_space_handle =
2610             rman_get_bushandle(adapter->memory);
2611         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2612
2613         adapter->hw.back = &adapter->osdep;
2614
2615         return (0);
2616 }
2617
2618 /*********************************************************************
2619  *
2620  *  Setup the Legacy or MSI Interrupt handler
2621  *
2622  **********************************************************************/
2623 static int
2624 em_allocate_legacy(struct adapter *adapter)
2625 {
2626         device_t dev = adapter->dev;
2627         struct tx_ring  *txr = adapter->tx_rings;
2628         int error, rid = 0;
2629
2630         /* Manually turn off all interrupts */
2631         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2632
2633         if (adapter->msix == 1) /* using MSI */
2634                 rid = 1;
2635         /* We allocate a single interrupt resource */
2636         adapter->res = bus_alloc_resource_any(dev,
2637             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2638         if (adapter->res == NULL) {
2639                 device_printf(dev, "Unable to allocate bus resource: "
2640                     "interrupt\n");
2641                 return (ENXIO);
2642         }
2643
2644         /*
2645          * Allocate a fast interrupt and the associated
2646          * deferred processing contexts.
2647          */
2648         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2649         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2650             taskqueue_thread_enqueue, &adapter->tq);
2651         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2652             device_get_nameunit(adapter->dev));
2653         /* Use a TX only tasklet for local timer */
2654         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2655         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2656             taskqueue_thread_enqueue, &txr->tq);
2657         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2658             device_get_nameunit(adapter->dev));
2659         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2660         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2661             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2662                 device_printf(dev, "Failed to register fast interrupt "
2663                             "handler: %d\n", error);
2664                 taskqueue_free(adapter->tq);
2665                 adapter->tq = NULL;
2666                 return (error);
2667         }
2668         
2669         return (0);
2670 }
2671
2672 /*********************************************************************
2673  *
2674  *  Setup the MSIX Interrupt handlers
2675  *   This is not really Multiqueue, rather
2676  *   its just seperate interrupt vectors
2677  *   for TX, RX, and Link.
2678  *
2679  **********************************************************************/
2680 static int
2681 em_allocate_msix(struct adapter *adapter)
2682 {
2683         device_t        dev = adapter->dev;
2684         struct          tx_ring *txr = adapter->tx_rings;
2685         struct          rx_ring *rxr = adapter->rx_rings;
2686         int             error, rid, vector = 0;
2687         int             cpu_id = 0;
2688
2689
2690         /* Make sure all interrupts are disabled */
2691         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2692
2693         /* First set up ring resources */
2694         for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2695
2696                 /* RX ring */
2697                 rid = vector + 1;
2698
2699                 rxr->res = bus_alloc_resource_any(dev,
2700                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2701                 if (rxr->res == NULL) {
2702                         device_printf(dev,
2703                             "Unable to allocate bus resource: "
2704                             "RX MSIX Interrupt %d\n", i);
2705                         return (ENXIO);
2706                 }
2707                 if ((error = bus_setup_intr(dev, rxr->res,
2708                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2709                     rxr, &rxr->tag)) != 0) {
2710                         device_printf(dev, "Failed to register RX handler");
2711                         return (error);
2712                 }
2713 #if __FreeBSD_version >= 800504
2714                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2715 #endif
2716                 rxr->msix = vector;
2717
2718                 if (em_last_bind_cpu < 0)
2719                         em_last_bind_cpu = CPU_FIRST();
2720                 cpu_id = em_last_bind_cpu;
2721                 bus_bind_intr(dev, rxr->res, cpu_id);
2722
2723                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2724                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2725                     taskqueue_thread_enqueue, &rxr->tq);
2726                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2727                     device_get_nameunit(adapter->dev), cpu_id);
2728                 /*
2729                 ** Set the bit to enable interrupt
2730                 ** in E1000_IMS -- bits 20 and 21
2731                 ** are for RX0 and RX1, note this has
2732                 ** NOTHING to do with the MSIX vector
2733                 */
2734                 rxr->ims = 1 << (20 + i);
2735                 adapter->ims |= rxr->ims;
2736                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2737
2738                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2739         }
2740
2741         for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2742                 /* TX ring */
2743                 rid = vector + 1;
2744                 txr->res = bus_alloc_resource_any(dev,
2745                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2746                 if (txr->res == NULL) {
2747                         device_printf(dev,
2748                             "Unable to allocate bus resource: "
2749                             "TX MSIX Interrupt %d\n", i);
2750                         return (ENXIO);
2751                 }
2752                 if ((error = bus_setup_intr(dev, txr->res,
2753                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2754                     txr, &txr->tag)) != 0) {
2755                         device_printf(dev, "Failed to register TX handler");
2756                         return (error);
2757                 }
2758 #if __FreeBSD_version >= 800504
2759                 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2760 #endif
2761                 txr->msix = vector;
2762
2763                 if (em_last_bind_cpu < 0)
2764                         em_last_bind_cpu = CPU_FIRST();
2765                 cpu_id = em_last_bind_cpu;
2766                 bus_bind_intr(dev, txr->res, cpu_id);
2767
2768                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2769                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2770                     taskqueue_thread_enqueue, &txr->tq);
2771                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2772                     device_get_nameunit(adapter->dev), cpu_id);
2773                 /*
2774                 ** Set the bit to enable interrupt
2775                 ** in E1000_IMS -- bits 22 and 23
2776                 ** are for TX0 and TX1, note this has
2777                 ** NOTHING to do with the MSIX vector
2778                 */
2779                 txr->ims = 1 << (22 + i);
2780                 adapter->ims |= txr->ims;
2781                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2782
2783                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2784         }
2785
2786         /* Link interrupt */
2787         rid = vector + 1;
2788         adapter->res = bus_alloc_resource_any(dev,
2789             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2790         if (!adapter->res) {
2791                 device_printf(dev,"Unable to allocate "
2792                     "bus resource: Link interrupt [%d]\n", rid);
2793                 return (ENXIO);
2794         }
2795         /* Set the link handler function */
2796         error = bus_setup_intr(dev, adapter->res,
2797             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2798             em_msix_link, adapter, &adapter->tag);
2799         if (error) {
2800                 adapter->res = NULL;
2801                 device_printf(dev, "Failed to register LINK handler");
2802                 return (error);
2803         }
2804 #if __FreeBSD_version >= 800504
2805         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2806 #endif
2807         adapter->linkvec = vector;
2808         adapter->ivars |=  (8 | vector) << 16;
2809         adapter->ivars |= 0x80000000;
2810
2811         return (0);
2812 }
2813
2814
2815 static void
2816 em_free_pci_resources(struct adapter *adapter)
2817 {
2818         device_t        dev = adapter->dev;
2819         struct tx_ring  *txr;
2820         struct rx_ring  *rxr;
2821         int             rid;
2822
2823
2824         /*
2825         ** Release all the queue interrupt resources:
2826         */
2827         for (int i = 0; i < adapter->num_queues; i++) {
2828                 txr = &adapter->tx_rings[i];
2829                 /* an early abort? */
2830                 if (txr == NULL)
2831                         break;
2832                 rid = txr->msix +1;
2833                 if (txr->tag != NULL) {
2834                         bus_teardown_intr(dev, txr->res, txr->tag);
2835                         txr->tag = NULL;
2836                 }
2837                 if (txr->res != NULL)
2838                         bus_release_resource(dev, SYS_RES_IRQ,
2839                             rid, txr->res);
2840
2841                 rxr = &adapter->rx_rings[i];
2842                 /* an early abort? */
2843                 if (rxr == NULL)
2844                         break;
2845                 rid = rxr->msix +1;
2846                 if (rxr->tag != NULL) {
2847                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2848                         rxr->tag = NULL;
2849                 }
2850                 if (rxr->res != NULL)
2851                         bus_release_resource(dev, SYS_RES_IRQ,
2852                             rid, rxr->res);
2853         }
2854
2855         if (adapter->linkvec) /* we are doing MSIX */
2856                 rid = adapter->linkvec + 1;
2857         else
2858                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2859
2860         if (adapter->tag != NULL) {
2861                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2862                 adapter->tag = NULL;
2863         }
2864
2865         if (adapter->res != NULL)
2866                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2867
2868
2869         if (adapter->msix)
2870                 pci_release_msi(dev);
2871
2872         if (adapter->msix_mem != NULL)
2873                 bus_release_resource(dev, SYS_RES_MEMORY,
2874                     adapter->memrid, adapter->msix_mem);
2875
2876         if (adapter->memory != NULL)
2877                 bus_release_resource(dev, SYS_RES_MEMORY,
2878                     PCIR_BAR(0), adapter->memory);
2879
2880         if (adapter->flash != NULL)
2881                 bus_release_resource(dev, SYS_RES_MEMORY,
2882                     EM_FLASH, adapter->flash);
2883 }
2884
2885 /*
2886  * Setup MSI or MSI/X
2887  */
2888 static int
2889 em_setup_msix(struct adapter *adapter)
2890 {
2891         device_t dev = adapter->dev;
2892         int val;
2893
2894         /* Nearly always going to use one queue */
2895         adapter->num_queues = 1;
2896
2897         /*
2898         ** Try using MSI-X for Hartwell adapters
2899         */
2900         if ((adapter->hw.mac.type == e1000_82574) &&
2901             (em_enable_msix == TRUE)) {
2902 #ifdef EM_MULTIQUEUE
2903                 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2904                 if (adapter->num_queues > 1)
2905                         em_enable_vectors_82574(adapter);
2906 #endif
2907                 /* Map the MSIX BAR */
2908                 adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2909                 adapter->msix_mem = bus_alloc_resource_any(dev,
2910                     SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2911                 if (adapter->msix_mem == NULL) {
2912                         /* May not be enabled */
2913                         device_printf(adapter->dev,
2914                             "Unable to map MSIX table \n");
2915                         goto msi;
2916                 }
2917                 val = pci_msix_count(dev); 
2918
2919 #ifdef EM_MULTIQUEUE
2920                 /* We need 5 vectors in the multiqueue case */
2921                 if (adapter->num_queues > 1 ) {
2922                         if (val >= 5)
2923                                 val = 5;
2924                         else {
2925                                 adapter->num_queues = 1;
2926                                 device_printf(adapter->dev,
2927                                     "Insufficient MSIX vectors for >1 queue, "
2928                                     "using single queue...\n");
2929                                 goto msix_one;
2930                         }
2931                 } else {
2932 msix_one:
2933 #endif
2934                         if (val >= 3)
2935                                 val = 3;
2936                         else {
2937                                 device_printf(adapter->dev,
2938                                 "Insufficient MSIX vectors, using MSI\n");
2939                                 goto msi;
2940                         }
2941 #ifdef EM_MULTIQUEUE
2942                 }
2943 #endif
2944
2945                 if ((pci_alloc_msix(dev, &val) == 0)) {
2946                         device_printf(adapter->dev,
2947                             "Using MSIX interrupts "
2948                             "with %d vectors\n", val);
2949                         return (val);
2950                 }
2951
2952                 /*
2953                 ** If MSIX alloc failed or provided us with
2954                 ** less than needed, free and fall through to MSI
2955                 */
2956                 pci_release_msi(dev);
2957         }
2958 msi:
2959         if (adapter->msix_mem != NULL) {
2960                 bus_release_resource(dev, SYS_RES_MEMORY,
2961                     adapter->memrid, adapter->msix_mem);
2962                 adapter->msix_mem = NULL;
2963         }
2964         val = 1;
2965         if (pci_alloc_msi(dev, &val) == 0) {
2966                 device_printf(adapter->dev, "Using an MSI interrupt\n");
2967                 return (val);
2968         } 
2969         /* Should only happen due to manual configuration */
2970         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2971         return (0);
2972 }
2973
2974
2975 /*
2976 ** The 3 following flush routines are used as a workaround in the
2977 ** I219 client parts and only for them.
2978 **
2979 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2980 **
2981 ** We want to clear all pending descriptors from the TX ring.
2982 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2983 ** the data of the next descriptor. We don't care about the data we are about
2984 ** to reset the HW.
2985 */
2986 static void
2987 em_flush_tx_ring(struct adapter *adapter)
2988 {
2989         struct e1000_hw         *hw = &adapter->hw;
2990         struct tx_ring          *txr = adapter->tx_rings;
2991         struct e1000_tx_desc    *txd;
2992         u32                     tctl, txd_lower = E1000_TXD_CMD_IFCS;
2993         u16                     size = 512;
2994
2995         tctl = E1000_READ_REG(hw, E1000_TCTL);
2996         E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2997
2998         txd = &txr->tx_base[txr->next_avail_desc++];
2999         if (txr->next_avail_desc == adapter->num_tx_desc)
3000                 txr->next_avail_desc = 0;
3001
3002         /* Just use the ring as a dummy buffer addr */
3003         txd->buffer_addr = txr->txdma.dma_paddr;
3004         txd->lower.data = htole32(txd_lower | size);
3005         txd->upper.data = 0;
3006
3007         /* flush descriptors to memory before notifying the HW */
3008         wmb();
3009
3010         E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
3011         mb();
3012         usec_delay(250);
3013 }
3014
3015 /*
3016 ** em_flush_rx_ring - remove all descriptors from the rx_ring
3017 **
3018 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
3019 */
3020 static void
3021 em_flush_rx_ring(struct adapter *adapter)
3022 {
3023         struct e1000_hw *hw = &adapter->hw;
3024         u32             rctl, rxdctl;
3025
3026         rctl = E1000_READ_REG(hw, E1000_RCTL);
3027         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3028         E1000_WRITE_FLUSH(hw);
3029         usec_delay(150);
3030
3031         rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
3032         /* zero the lower 14 bits (prefetch and host thresholds) */
3033         rxdctl &= 0xffffc000;
3034         /*
3035          * update thresholds: prefetch threshold to 31, host threshold to 1
3036          * and make sure the granularity is "descriptors" and not "cache lines"
3037          */
3038         rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3039         E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
3040
3041         /* momentarily enable the RX ring for the changes to take effect */
3042         E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3043         E1000_WRITE_FLUSH(hw);
3044         usec_delay(150);
3045         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3046 }
3047
3048 /*
3049 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
3050 **
3051 ** In i219, the descriptor rings must be emptied before resetting the HW
3052 ** or before changing the device state to D3 during runtime (runtime PM).
3053 **
3054 ** Failure to do this will cause the HW to enter a unit hang state which can
3055 ** only be released by PCI reset on the device
3056 **
3057 */
3058 static void
3059 em_flush_desc_rings(struct adapter *adapter)
3060 {
3061         struct e1000_hw *hw = &adapter->hw;
3062         device_t        dev = adapter->dev;
3063         u16             hang_state;
3064         u32             fext_nvm11, tdlen;
3065  
3066         /* First, disable MULR fix in FEXTNVM11 */
3067         fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3068         fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3069         E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3070         
3071         /* do nothing if we're not in faulty state, or if the queue is empty */
3072         tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3073         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3074         if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3075                 return;
3076         em_flush_tx_ring(adapter);
3077
3078         /* recheck, maybe the fault is caused by the rx ring */
3079         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3080         if (hang_state & FLUSH_DESC_REQUIRED)
3081                 em_flush_rx_ring(adapter);
3082 }
3083
3084
3085 /*********************************************************************
3086  *
3087  *  Initialize the hardware to a configuration
3088  *  as specified by the adapter structure.
3089  *
3090  **********************************************************************/
3091 static void
3092 em_reset(struct adapter *adapter)
3093 {
3094         device_t        dev = adapter->dev;
3095         struct ifnet    *ifp = adapter->ifp;
3096         struct e1000_hw *hw = &adapter->hw;
3097         u16             rx_buffer_size;
3098         u32             pba;
3099
3100         INIT_DEBUGOUT("em_reset: begin");
3101
3102         /* Set up smart power down as default off on newer adapters. */
3103         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3104             hw->mac.type == e1000_82572)) {
3105                 u16 phy_tmp = 0;
3106
3107                 /* Speed up time to link by disabling smart power down. */
3108                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3109                 phy_tmp &= ~IGP02E1000_PM_SPD;
3110                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3111         }
3112
3113         /*
3114          * Packet Buffer Allocation (PBA)
3115          * Writing PBA sets the receive portion of the buffer
3116          * the remainder is used for the transmit buffer.
3117          */
3118         switch (hw->mac.type) {
3119         /* Total Packet Buffer on these is 48K */
3120         case e1000_82571:
3121         case e1000_82572:
3122         case e1000_80003es2lan:
3123                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3124                 break;
3125         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3126                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3127                 break;
3128         case e1000_82574:
3129         case e1000_82583:
3130                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3131                 break;
3132         case e1000_ich8lan:
3133                 pba = E1000_PBA_8K;
3134                 break;
3135         case e1000_ich9lan:
3136         case e1000_ich10lan:
3137                 /* Boost Receive side for jumbo frames */
3138                 if (adapter->hw.mac.max_frame_size > 4096)
3139                         pba = E1000_PBA_14K;
3140                 else
3141                         pba = E1000_PBA_10K;
3142                 break;
3143         case e1000_pchlan:
3144         case e1000_pch2lan:
3145         case e1000_pch_lpt:
3146         case e1000_pch_spt:
3147         case e1000_pch_cnp:
3148                 pba = E1000_PBA_26K;
3149                 break;
3150         default:
3151                 if (adapter->hw.mac.max_frame_size > 8192)
3152                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3153                 else
3154                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3155         }
3156         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3157
3158         /*
3159          * These parameters control the automatic generation (Tx) and
3160          * response (Rx) to Ethernet PAUSE frames.
3161          * - High water mark should allow for at least two frames to be
3162          *   received after sending an XOFF.
3163          * - Low water mark works best when it is very near the high water mark.
3164          *   This allows the receiver to restart by sending XON when it has
3165          *   drained a bit. Here we use an arbitary value of 1500 which will
3166          *   restart after one full frame is pulled from the buffer. There
3167          *   could be several smaller frames in the buffer and if so they will
3168          *   not trigger the XON until their total number reduces the buffer
3169          *   by 1500.
3170          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3171          */
3172         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3173         hw->fc.high_water = rx_buffer_size -
3174             roundup2(adapter->hw.mac.max_frame_size, 1024);
3175         hw->fc.low_water = hw->fc.high_water - 1500;
3176
3177         if (adapter->fc) /* locally set flow control value? */
3178                 hw->fc.requested_mode = adapter->fc;
3179         else
3180                 hw->fc.requested_mode = e1000_fc_full;
3181
3182         if (hw->mac.type == e1000_80003es2lan)
3183                 hw->fc.pause_time = 0xFFFF;
3184         else
3185                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3186
3187         hw->fc.send_xon = TRUE;
3188
3189         /* Device specific overrides/settings */
3190         switch (hw->mac.type) {
3191         case e1000_pchlan:
3192                 /* Workaround: no TX flow ctrl for PCH */
3193                 hw->fc.requested_mode = e1000_fc_rx_pause;
3194                 hw->fc.pause_time = 0xFFFF; /* override */
3195                 if (ifp->if_mtu > ETHERMTU) {
3196                         hw->fc.high_water = 0x3500;
3197                         hw->fc.low_water = 0x1500;
3198                 } else {
3199                         hw->fc.high_water = 0x5000;
3200                         hw->fc.low_water = 0x3000;
3201                 }
3202                 hw->fc.refresh_time = 0x1000;
3203                 break;
3204         case e1000_pch2lan:
3205         case e1000_pch_lpt:
3206         case e1000_pch_spt:
3207         case e1000_pch_cnp:
3208                 hw->fc.high_water = 0x5C20;
3209                 hw->fc.low_water = 0x5048;
3210                 hw->fc.pause_time = 0x0650;
3211                 hw->fc.refresh_time = 0x0400;
3212                 /* Jumbos need adjusted PBA */
3213                 if (ifp->if_mtu > ETHERMTU)
3214                         E1000_WRITE_REG(hw, E1000_PBA, 12);
3215                 else
3216                         E1000_WRITE_REG(hw, E1000_PBA, 26);
3217                 break;
3218         case e1000_ich9lan:
3219         case e1000_ich10lan:
3220                 if (ifp->if_mtu > ETHERMTU) {
3221                         hw->fc.high_water = 0x2800;
3222                         hw->fc.low_water = hw->fc.high_water - 8;
3223                         break;
3224                 } 
3225                 /* else fall thru */
3226         default:
3227                 if (hw->mac.type == e1000_80003es2lan)
3228                         hw->fc.pause_time = 0xFFFF;
3229                 break;
3230         }
3231
3232         /* I219 needs some special flushing to avoid hangs */
3233         if (hw->mac.type == e1000_pch_spt)
3234                 em_flush_desc_rings(adapter);
3235
3236         /* Issue a global reset */
3237         e1000_reset_hw(hw);
3238         E1000_WRITE_REG(hw, E1000_WUC, 0);
3239         em_disable_aspm(adapter);
3240         /* and a re-init */
3241         if (e1000_init_hw(hw) < 0) {
3242                 device_printf(dev, "Hardware Initialization Failed\n");
3243                 return;
3244         }
3245
3246         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3247         e1000_get_phy_info(hw);
3248         e1000_check_for_link(hw);
3249         return;
3250 }
3251
3252 /*********************************************************************
3253  *
3254  *  Setup networking device structure and register an interface.
3255  *
3256  **********************************************************************/
3257 static int
3258 em_setup_interface(device_t dev, struct adapter *adapter)
3259 {
3260         struct ifnet   *ifp;
3261
3262         INIT_DEBUGOUT("em_setup_interface: begin");
3263
3264         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3265         if (ifp == NULL) {
3266                 device_printf(dev, "can not allocate ifnet structure\n");
3267                 return (-1);
3268         }
3269         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3270         ifp->if_init =  em_init;
3271         ifp->if_softc = adapter;
3272         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3273         ifp->if_ioctl = em_ioctl;
3274
3275         /* TSO parameters */
3276         ifp->if_hw_tsomax = IP_MAXPACKET;
3277         /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3278         ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3279         ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3280
3281 #ifdef EM_MULTIQUEUE
3282         /* Multiqueue stack interface */
3283         ifp->if_transmit = em_mq_start;
3284         ifp->if_qflush = em_qflush;
3285 #else
3286         ifp->if_start = em_start;
3287         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3288         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3289         IFQ_SET_READY(&ifp->if_snd);
3290 #endif  
3291
3292         ether_ifattach(ifp, adapter->hw.mac.addr);
3293
3294         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3295         ifp->if_capenable = ifp->if_capabilities;
3296
3297         /*
3298          * Tell the upper layer(s) we
3299          * support full VLAN capability
3300          */
3301         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3302         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3303                              |  IFCAP_VLAN_HWTSO
3304                              |  IFCAP_VLAN_MTU;
3305         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3306                           |  IFCAP_VLAN_MTU;
3307
3308         /*
3309          * We don't enable IFCAP_{TSO4,VLAN_HWTSO} by default because:
3310          * - Although the silicon bug of TSO only working at gigabit speed is
3311          *   worked around in em_update_link_status() by selectively setting
3312          *   CSUM_IP_TSO, we cannot atomically flush already queued TSO-using
3313          *   descriptors.  Thus, such descriptors may still cause the MAC to
3314          *   hang and, consequently, TSO is only safe to be used in setups
3315          *   where the link isn't expected to switch from gigabit to lower
3316          *   speeds.
3317          * - Similarly, there's currently no way to trigger a reconfiguration
3318          *   of vlan(4) when the state of IFCAP_VLAN_HWTSO support changes at
3319          *   runtime.  Therefore, IFCAP_VLAN_HWTSO also only is safe to use
3320          *   when link speed changes are not to be expected.
3321          * - Despite all the workarounds for TSO-related silicon bugs, at
3322          *   least 82579 still may hang at gigabit speed with IFCAP_TSO4.
3323          */
3324         ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO;
3325
3326         /*
3327         ** Don't turn this on by default, if vlans are
3328         ** created on another pseudo device (eg. lagg)
3329         ** then vlan events are not passed thru, breaking
3330         ** operation, but with HW FILTER off it works. If
3331         ** using vlans directly on the em driver you can
3332         ** enable this and get full hardware tag filtering.
3333         */
3334         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3335
3336 #ifdef DEVICE_POLLING
3337         ifp->if_capabilities |= IFCAP_POLLING;
3338 #endif
3339
3340         /* Enable only WOL MAGIC by default */
3341         if (adapter->wol) {
3342                 ifp->if_capabilities |= IFCAP_WOL;
3343                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3344         }
3345                 
3346         /*
3347          * Specify the media types supported by this adapter and register
3348          * callbacks to update media and link information
3349          */
3350         ifmedia_init(&adapter->media, IFM_IMASK,
3351             em_media_change, em_media_status);
3352         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3353             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3354                 u_char fiber_type = IFM_1000_SX;        /* default type */
3355
3356                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3357                             0, NULL);
3358                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3359         } else {
3360                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3361                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3362                             0, NULL);
3363                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3364                             0, NULL);
3365                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3366                             0, NULL);
3367                 if (adapter->hw.phy.type != e1000_phy_ife) {
3368                         ifmedia_add(&adapter->media,
3369                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3370                         ifmedia_add(&adapter->media,
3371                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3372                 }
3373         }
3374         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3375         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3376         return (0);
3377 }
3378
3379
3380 /*
3381  * Manage DMA'able memory.
3382  */
3383 static void
3384 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3385 {
3386         if (error)
3387                 return;
3388         *(bus_addr_t *) arg = segs[0].ds_addr;
3389 }
3390
3391 static int
3392 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3393         struct em_dma_alloc *dma, int mapflags)
3394 {
3395         int error;
3396
3397         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3398                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3399                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3400                                 BUS_SPACE_MAXADDR,      /* highaddr */
3401                                 NULL, NULL,             /* filter, filterarg */
3402                                 size,                   /* maxsize */
3403                                 1,                      /* nsegments */
3404                                 size,                   /* maxsegsize */
3405                                 0,                      /* flags */
3406                                 NULL,                   /* lockfunc */
3407                                 NULL,                   /* lockarg */
3408                                 &dma->dma_tag);
3409         if (error) {
3410                 device_printf(adapter->dev,
3411                     "%s: bus_dma_tag_create failed: %d\n",
3412                     __func__, error);
3413                 goto fail_0;
3414         }
3415
3416         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3417             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3418         if (error) {
3419                 device_printf(adapter->dev,
3420                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3421                     __func__, (uintmax_t)size, error);
3422                 goto fail_2;
3423         }
3424
3425         dma->dma_paddr = 0;
3426         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3427             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3428         if (error || dma->dma_paddr == 0) {
3429                 device_printf(adapter->dev,
3430                     "%s: bus_dmamap_load failed: %d\n",
3431                     __func__, error);
3432                 goto fail_3;
3433         }
3434
3435         return (0);
3436
3437 fail_3:
3438         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3439 fail_2:
3440         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3441         bus_dma_tag_destroy(dma->dma_tag);
3442 fail_0:
3443         dma->dma_tag = NULL;
3444
3445         return (error);
3446 }
3447
3448 static void
3449 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3450 {
3451         if (dma->dma_tag == NULL)
3452                 return;
3453         if (dma->dma_paddr != 0) {
3454                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3455                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3456                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3457                 dma->dma_paddr = 0;
3458         }
3459         if (dma->dma_vaddr != NULL) {
3460                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3461                 dma->dma_vaddr = NULL;
3462         }
3463         bus_dma_tag_destroy(dma->dma_tag);
3464         dma->dma_tag = NULL;
3465 }
3466
3467
3468 /*********************************************************************
3469  *
3470  *  Allocate memory for the transmit and receive rings, and then
3471  *  the descriptors associated with each, called only once at attach.
3472  *
3473  **********************************************************************/
3474 static int
3475 em_allocate_queues(struct adapter *adapter)
3476 {
3477         device_t                dev = adapter->dev;
3478         struct tx_ring          *txr = NULL;
3479         struct rx_ring          *rxr = NULL;
3480         int rsize, tsize, error = E1000_SUCCESS;
3481         int txconf = 0, rxconf = 0;
3482
3483
3484         /* Allocate the TX ring struct memory */
3485         if (!(adapter->tx_rings =
3486             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3487             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3488                 device_printf(dev, "Unable to allocate TX ring memory\n");
3489                 error = ENOMEM;
3490                 goto fail;
3491         }
3492
3493         /* Now allocate the RX */
3494         if (!(adapter->rx_rings =
3495             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3496             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3497                 device_printf(dev, "Unable to allocate RX ring memory\n");
3498                 error = ENOMEM;
3499                 goto rx_fail;
3500         }
3501
3502         tsize = roundup2(adapter->num_tx_desc *
3503             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3504         /*
3505          * Now set up the TX queues, txconf is needed to handle the
3506          * possibility that things fail midcourse and we need to
3507          * undo memory gracefully
3508          */ 
3509         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3510                 /* Set up some basics */
3511                 txr = &adapter->tx_rings[i];
3512                 txr->adapter = adapter;
3513                 txr->me = i;
3514
3515                 /* Initialize the TX lock */
3516                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3517                     device_get_nameunit(dev), txr->me);
3518                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3519
3520                 if (em_dma_malloc(adapter, tsize,
3521                         &txr->txdma, BUS_DMA_NOWAIT)) {
3522                         device_printf(dev,
3523                             "Unable to allocate TX Descriptor memory\n");
3524                         error = ENOMEM;
3525                         goto err_tx_desc;
3526                 }
3527                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3528                 bzero((void *)txr->tx_base, tsize);
3529
3530                 if (em_allocate_transmit_buffers(txr)) {
3531                         device_printf(dev,
3532                             "Critical Failure setting up transmit buffers\n");
3533                         error = ENOMEM;
3534                         goto err_tx_desc;
3535                 }
3536 #if __FreeBSD_version >= 800000
3537                 /* Allocate a buf ring */
3538                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3539                     M_WAITOK, &txr->tx_mtx);
3540 #endif
3541         }
3542
3543         /*
3544          * Next the RX queues...
3545          */ 
3546         rsize = roundup2(adapter->num_rx_desc *
3547             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3548         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3549                 rxr = &adapter->rx_rings[i];
3550                 rxr->adapter = adapter;
3551                 rxr->me = i;
3552
3553                 /* Initialize the RX lock */
3554                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3555                     device_get_nameunit(dev), txr->me);
3556                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3557
3558                 if (em_dma_malloc(adapter, rsize,
3559                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3560                         device_printf(dev,
3561                             "Unable to allocate RxDescriptor memory\n");
3562                         error = ENOMEM;
3563                         goto err_rx_desc;
3564                 }
3565                 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3566                 bzero((void *)rxr->rx_base, rsize);
3567
3568                 /* Allocate receive buffers for the ring*/
3569                 if (em_allocate_receive_buffers(rxr)) {
3570                         device_printf(dev,
3571                             "Critical Failure setting up receive buffers\n");
3572                         error = ENOMEM;
3573                         goto err_rx_desc;
3574                 }
3575         }
3576
3577         return (0);
3578
3579 err_rx_desc:
3580         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3581                 em_dma_free(adapter, &rxr->rxdma);
3582 err_tx_desc:
3583         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3584                 em_dma_free(adapter, &txr->txdma);
3585         free(adapter->rx_rings, M_DEVBUF);
3586 rx_fail:
3587 #if __FreeBSD_version >= 800000
3588         buf_ring_free(txr->br, M_DEVBUF);
3589 #endif
3590         free(adapter->tx_rings, M_DEVBUF);
3591 fail:
3592         return (error);
3593 }
3594
3595
3596 /*********************************************************************
3597  *
3598  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3599  *  the information needed to transmit a packet on the wire. This is
3600  *  called only once at attach, setup is done every reset.
3601  *
3602  **********************************************************************/
3603 static int
3604 em_allocate_transmit_buffers(struct tx_ring *txr)
3605 {
3606         struct adapter *adapter = txr->adapter;
3607         device_t dev = adapter->dev;
3608         struct em_txbuffer *txbuf;
3609         int error, i;
3610
3611         /*
3612          * Setup DMA descriptor areas.
3613          */
3614         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3615                                1, 0,                    /* alignment, bounds */
3616                                BUS_SPACE_MAXADDR,       /* lowaddr */
3617                                BUS_SPACE_MAXADDR,       /* highaddr */
3618                                NULL, NULL,              /* filter, filterarg */
3619                                EM_TSO_SIZE,             /* maxsize */
3620                                EM_MAX_SCATTER,          /* nsegments */
3621                                PAGE_SIZE,               /* maxsegsize */
3622                                0,                       /* flags */
3623                                NULL,                    /* lockfunc */
3624                                NULL,                    /* lockfuncarg */
3625                                &txr->txtag))) {
3626                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3627                 goto fail;
3628         }
3629
3630         if (!(txr->tx_buffers =
3631             (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3632             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3633                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3634                 error = ENOMEM;
3635                 goto fail;
3636         }
3637
3638         /* Create the descriptor buffer dma maps */
3639         txbuf = txr->tx_buffers;
3640         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3641                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3642                 if (error != 0) {
3643                         device_printf(dev, "Unable to create TX DMA map\n");
3644                         goto fail;
3645                 }
3646         }
3647
3648         return 0;
3649 fail:
3650         /* We free all, it handles case where we are in the middle */
3651         em_free_transmit_structures(adapter);
3652         return (error);
3653 }
3654
3655 /*********************************************************************
3656  *
3657  *  Initialize a transmit ring.
3658  *
3659  **********************************************************************/
3660 static void
3661 em_setup_transmit_ring(struct tx_ring *txr)
3662 {
3663         struct adapter *adapter = txr->adapter;
3664         struct em_txbuffer *txbuf;
3665         int i;
3666 #ifdef DEV_NETMAP
3667         struct netmap_adapter *na = NA(adapter->ifp);
3668         struct netmap_slot *slot;
3669 #endif /* DEV_NETMAP */
3670
3671         /* Clear the old descriptor contents */
3672         EM_TX_LOCK(txr);
3673 #ifdef DEV_NETMAP
3674         slot = netmap_reset(na, NR_TX, txr->me, 0);
3675 #endif /* DEV_NETMAP */
3676
3677         bzero((void *)txr->tx_base,
3678               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3679         /* Reset indices */
3680         txr->next_avail_desc = 0;
3681         txr->next_to_clean = 0;
3682
3683         /* Free any existing tx buffers. */
3684         txbuf = txr->tx_buffers;
3685         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3686                 if (txbuf->m_head != NULL) {
3687                         bus_dmamap_sync(txr->txtag, txbuf->map,
3688                             BUS_DMASYNC_POSTWRITE);
3689                         bus_dmamap_unload(txr->txtag, txbuf->map);
3690                         m_freem(txbuf->m_head);
3691                         txbuf->m_head = NULL;
3692                 }
3693 #ifdef DEV_NETMAP
3694                 if (slot) {
3695                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3696                         uint64_t paddr;
3697                         void *addr;
3698
3699                         addr = PNMB(na, slot + si, &paddr);
3700                         txr->tx_base[i].buffer_addr = htole64(paddr);
3701                         /* reload the map for netmap mode */
3702                         netmap_load_map(na, txr->txtag, txbuf->map, addr);
3703                 }
3704 #endif /* DEV_NETMAP */
3705
3706                 /* clear the watch index */
3707                 txbuf->next_eop = -1;
3708         }
3709
3710         /* Set number of descriptors available */
3711         txr->tx_avail = adapter->num_tx_desc;
3712         txr->busy = EM_TX_IDLE;
3713
3714         /* Clear checksum offload context. */
3715         txr->last_hw_offload = 0;
3716         txr->last_hw_ipcss = 0;
3717         txr->last_hw_ipcso = 0;
3718         txr->last_hw_tucss = 0;
3719         txr->last_hw_tucso = 0;
3720
3721         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3722             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3723         EM_TX_UNLOCK(txr);
3724 }
3725
3726 /*********************************************************************
3727  *
3728  *  Initialize all transmit rings.
3729  *
3730  **********************************************************************/
3731 static void
3732 em_setup_transmit_structures(struct adapter *adapter)
3733 {
3734         struct tx_ring *txr = adapter->tx_rings;
3735
3736         for (int i = 0; i < adapter->num_queues; i++, txr++)
3737                 em_setup_transmit_ring(txr);
3738
3739         return;
3740 }
3741
3742 /*********************************************************************
3743  *
3744  *  Enable transmit unit.
3745  *
3746  **********************************************************************/
3747 static void
3748 em_initialize_transmit_unit(struct adapter *adapter)
3749 {
3750         struct tx_ring  *txr = adapter->tx_rings;
3751         struct e1000_hw *hw = &adapter->hw;
3752         u32     tctl, txdctl = 0, tarc, tipg = 0;
3753
3754          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3755
3756         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3757                 u64 bus_addr = txr->txdma.dma_paddr;
3758                 /* Base and Len of TX Ring */
3759                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3760                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3761                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3762                     (u32)(bus_addr >> 32));
3763                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3764                     (u32)bus_addr);
3765                 /* Init the HEAD/TAIL indices */
3766                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3767                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3768
3769                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3770                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3771                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3772
3773                 txr->busy = EM_TX_IDLE;
3774                 txdctl = 0; /* clear txdctl */
3775                 txdctl |= 0x1f; /* PTHRESH */
3776                 txdctl |= 1 << 8; /* HTHRESH */
3777                 txdctl |= 1 << 16;/* WTHRESH */
3778                 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3779                 txdctl |= E1000_TXDCTL_GRAN;
3780                 txdctl |= 1 << 25; /* LWTHRESH */
3781
3782                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3783         }
3784
3785         /* Set the default values for the Tx Inter Packet Gap timer */
3786         switch (adapter->hw.mac.type) {
3787         case e1000_80003es2lan:
3788                 tipg = DEFAULT_82543_TIPG_IPGR1;
3789                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3790                     E1000_TIPG_IPGR2_SHIFT;
3791                 break;
3792         default:
3793                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3794                     (adapter->hw.phy.media_type ==
3795                     e1000_media_type_internal_serdes))
3796                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3797                 else
3798                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3799                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3800                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3801         }
3802
3803         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3804         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3805
3806         if(adapter->hw.mac.type >= e1000_82540)
3807                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3808                     adapter->tx_abs_int_delay.value);
3809
3810         if ((adapter->hw.mac.type == e1000_82571) ||
3811             (adapter->hw.mac.type == e1000_82572)) {
3812                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3813                 tarc |= TARC_SPEED_MODE_BIT;
3814                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3815         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3816                 /* errata: program both queues to unweighted RR */
3817                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3818                 tarc |= 1;
3819                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3820                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3821                 tarc |= 1;
3822                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3823         } else if (adapter->hw.mac.type == e1000_82574) {
3824                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3825                 tarc |= TARC_ERRATA_BIT;
3826                 if ( adapter->num_queues > 1) {
3827                         tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3828                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3829                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3830                 } else
3831                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3832         }
3833
3834         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3835         if (adapter->tx_int_delay.value > 0)
3836                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3837
3838         /* Program the Transmit Control Register */
3839         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3840         tctl &= ~E1000_TCTL_CT;
3841         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3842                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3843
3844         if (adapter->hw.mac.type >= e1000_82571)
3845                 tctl |= E1000_TCTL_MULR;
3846
3847         /* This write will effectively turn on the transmit unit. */
3848         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3849
3850         /* SPT and KBL errata workarounds */
3851         if (hw->mac.type == e1000_pch_spt) {
3852                 u32 reg;
3853                 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3854                 reg |= E1000_RCTL_RDMTS_HEX;
3855                 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3856                 /* i218-i219 Specification Update 1.5.4.5 */
3857                 reg = E1000_READ_REG(hw, E1000_TARC(0));
3858                 reg &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
3859                 reg |= E1000_TARC0_CB_MULTIQ_2_REQ;
3860                 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3861         }
3862 }
3863
3864
3865 /*********************************************************************
3866  *
3867  *  Free all transmit rings.
3868  *
3869  **********************************************************************/
3870 static void
3871 em_free_transmit_structures(struct adapter *adapter)
3872 {
3873         struct tx_ring *txr = adapter->tx_rings;
3874
3875         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3876                 EM_TX_LOCK(txr);
3877                 em_free_transmit_buffers(txr);
3878                 em_dma_free(adapter, &txr->txdma);
3879                 EM_TX_UNLOCK(txr);
3880                 EM_TX_LOCK_DESTROY(txr);
3881         }
3882
3883         free(adapter->tx_rings, M_DEVBUF);
3884 }
3885
3886 /*********************************************************************
3887  *
3888  *  Free transmit ring related data structures.
3889  *
3890  **********************************************************************/
3891 static void
3892 em_free_transmit_buffers(struct tx_ring *txr)
3893 {
3894         struct adapter          *adapter = txr->adapter;
3895         struct em_txbuffer      *txbuf;
3896
3897         INIT_DEBUGOUT("free_transmit_ring: begin");
3898
3899         if (txr->tx_buffers == NULL)
3900                 return;
3901
3902         for (int i = 0; i < adapter->num_tx_desc; i++) {
3903                 txbuf = &txr->tx_buffers[i];
3904                 if (txbuf->m_head != NULL) {
3905                         bus_dmamap_sync(txr->txtag, txbuf->map,
3906                             BUS_DMASYNC_POSTWRITE);
3907                         bus_dmamap_unload(txr->txtag,
3908                             txbuf->map);
3909                         m_freem(txbuf->m_head);
3910                         txbuf->m_head = NULL;
3911                         if (txbuf->map != NULL) {
3912                                 bus_dmamap_destroy(txr->txtag,
3913                                     txbuf->map);
3914                                 txbuf->map = NULL;
3915                         }
3916                 } else if (txbuf->map != NULL) {
3917                         bus_dmamap_unload(txr->txtag,
3918                             txbuf->map);
3919                         bus_dmamap_destroy(txr->txtag,
3920                             txbuf->map);
3921                         txbuf->map = NULL;
3922                 }
3923         }
3924 #if __FreeBSD_version >= 800000
3925         if (txr->br != NULL)
3926                 buf_ring_free(txr->br, M_DEVBUF);
3927 #endif
3928         if (txr->tx_buffers != NULL) {
3929                 free(txr->tx_buffers, M_DEVBUF);
3930                 txr->tx_buffers = NULL;
3931         }
3932         if (txr->txtag != NULL) {
3933                 bus_dma_tag_destroy(txr->txtag);
3934                 txr->txtag = NULL;
3935         }
3936         return;
3937 }
3938
3939
3940 /*********************************************************************
3941  *  The offload context is protocol specific (TCP/UDP) and thus
3942  *  only needs to be set when the protocol changes. The occasion
3943  *  of a context change can be a performance detriment, and
3944  *  might be better just disabled. The reason arises in the way
3945  *  in which the controller supports pipelined requests from the
3946  *  Tx data DMA. Up to four requests can be pipelined, and they may
3947  *  belong to the same packet or to multiple packets. However all
3948  *  requests for one packet are issued before a request is issued
3949  *  for a subsequent packet and if a request for the next packet
3950  *  requires a context change, that request will be stalled
3951  *  until the previous request completes. This means setting up
3952  *  a new context effectively disables pipelined Tx data DMA which
3953  *  in turn greatly slow down performance to send small sized
3954  *  frames. 
3955  **********************************************************************/
3956 static void
3957 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3958     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3959 {
3960         struct adapter                  *adapter = txr->adapter;
3961         struct e1000_context_desc       *TXD = NULL;
3962         struct em_txbuffer              *tx_buffer;
3963         int                             cur, hdr_len;
3964         u32                             cmd = 0;
3965         u16                             offload = 0;
3966         u8                              ipcso, ipcss, tucso, tucss;
3967
3968         ipcss = ipcso = tucss = tucso = 0;
3969         hdr_len = ip_off + (ip->ip_hl << 2);
3970         cur = txr->next_avail_desc;
3971
3972         /* Setup of IP header checksum. */
3973         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3974                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3975                 offload |= CSUM_IP;
3976                 ipcss = ip_off;
3977                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3978                 /*
3979                  * Start offset for header checksum calculation.
3980                  * End offset for header checksum calculation.
3981                  * Offset of place to put the checksum.
3982                  */
3983                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3984                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3985                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3986                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3987                 cmd |= E1000_TXD_CMD_IP;
3988         }
3989
3990         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3991                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3992                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3993                 offload |= CSUM_TCP;
3994                 tucss = hdr_len;
3995                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3996                 /*
3997                  * The 82574L can only remember the *last* context used
3998                  * regardless of queue that it was use for.  We cannot reuse
3999                  * contexts on this hardware platform and must generate a new
4000                  * context every time.  82574L hardware spec, section 7.2.6,
4001                  * second note.
4002                  */
4003                 if (adapter->num_queues < 2) {
4004                         /*
4005                         * Setting up new checksum offload context for every
4006                         * frames takes a lot of processing time for hardware.
4007                         * This also reduces performance a lot for small sized
4008                         * frames so avoid it if driver can use previously
4009                         * configured checksum offload context.
4010                         */
4011                         if (txr->last_hw_offload == offload) {
4012                                 if (offload & CSUM_IP) {
4013                                         if (txr->last_hw_ipcss == ipcss &&
4014                                         txr->last_hw_ipcso == ipcso &&
4015                                         txr->last_hw_tucss == tucss &&
4016                                         txr->last_hw_tucso == tucso)
4017                                                 return;
4018                                 } else {
4019                                         if (txr->last_hw_tucss == tucss &&
4020                                         txr->last_hw_tucso == tucso)
4021                                                 return;
4022                                 }
4023                         }
4024                         txr->last_hw_offload = offload;
4025                         txr->last_hw_tucss = tucss;
4026                         txr->last_hw_tucso = tucso;
4027                 }
4028                 /*
4029                  * Start offset for payload checksum calculation.
4030                  * End offset for payload checksum calculation.
4031                  * Offset of place to put the checksum.
4032                  */
4033                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4034                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
4035                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4036                 TXD->upper_setup.tcp_fields.tucso = tucso;
4037                 cmd |= E1000_TXD_CMD_TCP;
4038         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
4039                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
4040                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
4041                 tucss = hdr_len;
4042                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
4043                 /*
4044                  * The 82574L can only remember the *last* context used
4045                  * regardless of queue that it was use for.  We cannot reuse
4046                  * contexts on this hardware platform and must generate a new
4047                  * context every time.  82574L hardware spec, section 7.2.6,
4048                  * second note.
4049                  */
4050                 if (adapter->num_queues < 2) {
4051                         /*
4052                         * Setting up new checksum offload context for every
4053                         * frames takes a lot of processing time for hardware.
4054                         * This also reduces performance a lot for small sized
4055                         * frames so avoid it if driver can use previously
4056                         * configured checksum offload context.
4057                         */
4058                         if (txr->last_hw_offload == offload) {
4059                                 if (offload & CSUM_IP) {
4060                                         if (txr->last_hw_ipcss == ipcss &&
4061                                         txr->last_hw_ipcso == ipcso &&
4062                                         txr->last_hw_tucss == tucss &&
4063                                         txr->last_hw_tucso == tucso)
4064                                                 return;
4065                                 } else {
4066                                         if (txr->last_hw_tucss == tucss &&
4067                                         txr->last_hw_tucso == tucso)
4068                                                 return;
4069                                 }
4070                         }
4071                         txr->last_hw_offload = offload;
4072                         txr->last_hw_tucss = tucss;
4073                         txr->last_hw_tucso = tucso;
4074                 }
4075                 /*
4076                  * Start offset for header checksum calculation.
4077                  * End offset for header checksum calculation.
4078                  * Offset of place to put the checksum.
4079                  */
4080                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4081                 TXD->upper_setup.tcp_fields.tucss = tucss;
4082                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4083                 TXD->upper_setup.tcp_fields.tucso = tucso;
4084         }
4085   
4086         if (offload & CSUM_IP) {
4087                 txr->last_hw_ipcss = ipcss;
4088                 txr->last_hw_ipcso = ipcso;
4089         }
4090
4091         TXD->tcp_seg_setup.data = htole32(0);
4092         TXD->cmd_and_length =
4093             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4094         tx_buffer = &txr->tx_buffers[cur];
4095         tx_buffer->m_head = NULL;
4096         tx_buffer->next_eop = -1;
4097
4098         if (++cur == adapter->num_tx_desc)
4099                 cur = 0;
4100
4101         txr->tx_avail--;
4102         txr->next_avail_desc = cur;
4103 }
4104
4105
4106 /**********************************************************************
4107  *
4108  *  Setup work for hardware segmentation offload (TSO)
4109  *
4110  **********************************************************************/
4111 static void
4112 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4113     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4114 {
4115         struct adapter                  *adapter = txr->adapter;
4116         struct e1000_context_desc       *TXD;
4117         struct em_txbuffer              *tx_buffer;
4118         int cur, hdr_len;
4119
4120         /*
4121          * In theory we can use the same TSO context if and only if
4122          * frame is the same type(IP/TCP) and the same MSS. However
4123          * checking whether a frame has the same IP/TCP structure is
4124          * hard thing so just ignore that and always restablish a
4125          * new TSO context.
4126          */
4127         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4128         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
4129                       E1000_TXD_DTYP_D |        /* Data descr type */
4130                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
4131
4132         /* IP and/or TCP header checksum calculation and insertion. */
4133         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4134
4135         cur = txr->next_avail_desc;
4136         tx_buffer = &txr->tx_buffers[cur];
4137         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4138
4139         /*
4140          * Start offset for header checksum calculation.
4141          * End offset for header checksum calculation.
4142          * Offset of place put the checksum.
4143          */
4144         TXD->lower_setup.ip_fields.ipcss = ip_off;
4145         TXD->lower_setup.ip_fields.ipcse =
4146             htole16(ip_off + (ip->ip_hl << 2) - 1);
4147         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4148         /*
4149          * Start offset for payload checksum calculation.
4150          * End offset for payload checksum calculation.
4151          * Offset of place to put the checksum.
4152          */
4153         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4154         TXD->upper_setup.tcp_fields.tucse = 0;
4155         TXD->upper_setup.tcp_fields.tucso =
4156             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4157         /*
4158          * Payload size per packet w/o any headers.
4159          * Length of all headers up to payload.
4160          */
4161         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4162         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4163
4164         TXD->cmd_and_length = htole32(adapter->txd_cmd |
4165                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
4166                                 E1000_TXD_CMD_TSE |     /* TSE context */
4167                                 E1000_TXD_CMD_IP |      /* Do IP csum */
4168                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
4169                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4170
4171         tx_buffer->m_head = NULL;
4172         tx_buffer->next_eop = -1;
4173
4174         if (++cur == adapter->num_tx_desc)
4175                 cur = 0;
4176
4177         txr->tx_avail--;
4178         txr->next_avail_desc = cur;
4179         txr->tx_tso = TRUE;
4180 }
4181
4182
4183 /**********************************************************************
4184  *
4185  *  Examine each tx_buffer in the used queue. If the hardware is done
4186  *  processing the packet then free associated resources. The
4187  *  tx_buffer is put back on the free queue.
4188  *
4189  **********************************************************************/
4190 static void
4191 em_txeof(struct tx_ring *txr)
4192 {
4193         struct adapter  *adapter = txr->adapter;
4194         int first, last, done, processed;
4195         struct em_txbuffer *tx_buffer;
4196         struct e1000_tx_desc   *tx_desc, *eop_desc;
4197         struct ifnet   *ifp = adapter->ifp;
4198
4199         EM_TX_LOCK_ASSERT(txr);
4200 #ifdef DEV_NETMAP
4201         if (netmap_tx_irq(ifp, txr->me))
4202                 return;
4203 #endif /* DEV_NETMAP */
4204
4205         /* No work, make sure hang detection is disabled */
4206         if (txr->tx_avail == adapter->num_tx_desc) {
4207                 txr->busy = EM_TX_IDLE;
4208                 return;
4209         }
4210
4211         processed = 0;
4212         first = txr->next_to_clean;
4213         tx_desc = &txr->tx_base[first];
4214         tx_buffer = &txr->tx_buffers[first];
4215         last = tx_buffer->next_eop;
4216         eop_desc = &txr->tx_base[last];
4217
4218         /*
4219          * What this does is get the index of the
4220          * first descriptor AFTER the EOP of the 
4221          * first packet, that way we can do the
4222          * simple comparison on the inner while loop.
4223          */
4224         if (++last == adapter->num_tx_desc)
4225                 last = 0;
4226         done = last;
4227
4228         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4229             BUS_DMASYNC_POSTREAD);
4230
4231         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4232                 /* We clean the range of the packet */
4233                 while (first != done) {
4234                         tx_desc->upper.data = 0;
4235                         tx_desc->lower.data = 0;
4236                         tx_desc->buffer_addr = 0;
4237                         ++txr->tx_avail;
4238                         ++processed;
4239
4240                         if (tx_buffer->m_head) {
4241                                 bus_dmamap_sync(txr->txtag,
4242                                     tx_buffer->map,
4243                                     BUS_DMASYNC_POSTWRITE);
4244                                 bus_dmamap_unload(txr->txtag,
4245                                     tx_buffer->map);
4246                                 m_freem(tx_buffer->m_head);
4247                                 tx_buffer->m_head = NULL;
4248                         }
4249                         tx_buffer->next_eop = -1;
4250
4251                         if (++first == adapter->num_tx_desc)
4252                                 first = 0;
4253
4254                         tx_buffer = &txr->tx_buffers[first];
4255                         tx_desc = &txr->tx_base[first];
4256                 }
4257                 ++ifp->if_opackets;
4258                 /* See if we can continue to the next packet */
4259                 last = tx_buffer->next_eop;
4260                 if (last != -1) {
4261                         eop_desc = &txr->tx_base[last];
4262                         /* Get new done point */
4263                         if (++last == adapter->num_tx_desc) last = 0;
4264                         done = last;
4265                 } else
4266                         break;
4267         }
4268         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4269             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4270
4271         txr->next_to_clean = first;
4272
4273         /*
4274         ** Hang detection: we know there's work outstanding
4275         ** or the entry return would have been taken, so no
4276         ** descriptor processed here indicates a potential hang.
4277         ** The local timer will examine this and do a reset if needed.
4278         */
4279         if (processed == 0) {
4280                 if (txr->busy != EM_TX_HUNG)
4281                         ++txr->busy;
4282         } else /* At least one descriptor was cleaned */
4283                 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4284
4285         /*
4286          * If we have a minimum free, clear IFF_DRV_OACTIVE
4287          * to tell the stack that it is OK to send packets.
4288          * Notice that all writes of OACTIVE happen under the
4289          * TX lock which, with a single queue, guarantees 
4290          * sanity.
4291          */
4292         if (txr->tx_avail >= EM_MAX_SCATTER) {
4293                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
4294         }
4295
4296         /* Disable hang detection if all clean */
4297         if (txr->tx_avail == adapter->num_tx_desc)
4298                 txr->busy = EM_TX_IDLE;
4299 }
4300
4301 /*********************************************************************
4302  *
4303  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4304  *
4305  **********************************************************************/
4306 static void
4307 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4308 {
4309         struct adapter          *adapter = rxr->adapter;
4310         struct mbuf             *m;
4311         bus_dma_segment_t       segs;
4312         struct em_rxbuffer      *rxbuf;
4313         int                     i, j, error, nsegs;
4314         bool                    cleaned = FALSE;
4315
4316         i = j = rxr->next_to_refresh;
4317         /*
4318         ** Get one descriptor beyond
4319         ** our work mark to control
4320         ** the loop.
4321         */
4322         if (++j == adapter->num_rx_desc)
4323                 j = 0;
4324
4325         while (j != limit) {
4326                 rxbuf = &rxr->rx_buffers[i];
4327                 if (rxbuf->m_head == NULL) {
4328                         m = m_getjcl(M_NOWAIT, MT_DATA,
4329                             M_PKTHDR, adapter->rx_mbuf_sz);
4330                         /*
4331                         ** If we have a temporary resource shortage
4332                         ** that causes a failure, just abort refresh
4333                         ** for now, we will return to this point when
4334                         ** reinvoked from em_rxeof.
4335                         */
4336                         if (m == NULL)
4337                                 goto update;
4338                 } else
4339                         m = rxbuf->m_head;
4340
4341                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4342                 m->m_flags |= M_PKTHDR;
4343                 m->m_data = m->m_ext.ext_buf;
4344
4345                 /* Use bus_dma machinery to setup the memory mapping  */
4346                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4347                     m, &segs, &nsegs, BUS_DMA_NOWAIT);
4348                 if (error != 0) {
4349                         printf("Refresh mbufs: hdr dmamap load"
4350                             " failure - %d\n", error);
4351                         m_free(m);
4352                         rxbuf->m_head = NULL;
4353                         goto update;
4354                 }
4355                 rxbuf->m_head = m;
4356                 rxbuf->paddr = segs.ds_addr;
4357                 bus_dmamap_sync(rxr->rxtag,
4358                     rxbuf->map, BUS_DMASYNC_PREREAD);
4359                 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4360                 cleaned = TRUE;
4361
4362                 i = j; /* Next is precalulated for us */
4363                 rxr->next_to_refresh = i;
4364                 /* Calculate next controlling index */
4365                 if (++j == adapter->num_rx_desc)
4366                         j = 0;
4367         }
4368 update:
4369         /*
4370         ** Update the tail pointer only if,
4371         ** and as far as we have refreshed.
4372         */
4373         if (cleaned)
4374                 E1000_WRITE_REG(&adapter->hw,
4375                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4376
4377         return;
4378 }
4379
4380
4381 /*********************************************************************
4382  *
4383  *  Allocate memory for rx_buffer structures. Since we use one
4384  *  rx_buffer per received packet, the maximum number of rx_buffer's
4385  *  that we'll need is equal to the number of receive descriptors
4386  *  that we've allocated.
4387  *
4388  **********************************************************************/
4389 static int
4390 em_allocate_receive_buffers(struct rx_ring *rxr)
4391 {
4392         struct adapter          *adapter = rxr->adapter;
4393         device_t                dev = adapter->dev;
4394         struct em_rxbuffer      *rxbuf;
4395         int                     error;
4396
4397         rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4398             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4399         if (rxr->rx_buffers == NULL) {
4400                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4401                 return (ENOMEM);
4402         }
4403
4404         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4405                                 1, 0,                   /* alignment, bounds */
4406                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4407                                 BUS_SPACE_MAXADDR,      /* highaddr */
4408                                 NULL, NULL,             /* filter, filterarg */
4409                                 MJUM9BYTES,             /* maxsize */
4410                                 1,                      /* nsegments */
4411                                 MJUM9BYTES,             /* maxsegsize */
4412                                 0,                      /* flags */
4413                                 NULL,                   /* lockfunc */
4414                                 NULL,                   /* lockarg */
4415                                 &rxr->rxtag);
4416         if (error) {
4417                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4418                     __func__, error);
4419                 goto fail;
4420         }
4421
4422         rxbuf = rxr->rx_buffers;
4423         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4424                 rxbuf = &rxr->rx_buffers[i];
4425                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4426                 if (error) {
4427                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4428                             __func__, error);
4429                         goto fail;
4430                 }
4431         }
4432
4433         return (0);
4434
4435 fail:
4436         em_free_receive_structures(adapter);
4437         return (error);
4438 }
4439
4440
4441 /*********************************************************************
4442  *
4443  *  Initialize a receive ring and its buffers.
4444  *
4445  **********************************************************************/
4446 static int
4447 em_setup_receive_ring(struct rx_ring *rxr)
4448 {
4449         struct  adapter         *adapter = rxr->adapter;
4450         struct em_rxbuffer      *rxbuf;
4451         bus_dma_segment_t       seg[1];
4452         int                     rsize, nsegs, error = 0;
4453 #ifdef DEV_NETMAP
4454         struct netmap_adapter *na = NA(adapter->ifp);
4455         struct netmap_slot *slot;
4456 #endif
4457
4458
4459         /* Clear the ring contents */
4460         EM_RX_LOCK(rxr);
4461         rsize = roundup2(adapter->num_rx_desc *
4462             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4463         bzero((void *)rxr->rx_base, rsize);
4464 #ifdef DEV_NETMAP
4465         slot = netmap_reset(na, NR_RX, 0, 0);
4466 #endif
4467
4468         /*
4469         ** Free current RX buffer structs and their mbufs
4470         */
4471         for (int i = 0; i < adapter->num_rx_desc; i++) {
4472                 rxbuf = &rxr->rx_buffers[i];
4473                 if (rxbuf->m_head != NULL) {
4474                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4475                             BUS_DMASYNC_POSTREAD);
4476                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4477                         m_freem(rxbuf->m_head);
4478                         rxbuf->m_head = NULL; /* mark as freed */
4479                 }
4480         }
4481
4482         /* Now replenish the mbufs */
4483         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4484                 rxbuf = &rxr->rx_buffers[j];
4485 #ifdef DEV_NETMAP
4486                 if (slot) {
4487                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4488                         uint64_t paddr;
4489                         void *addr;
4490
4491                         addr = PNMB(na, slot + si, &paddr);
4492                         netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4493                         rxbuf->paddr = paddr;
4494                         em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4495                         continue;
4496                 }
4497 #endif /* DEV_NETMAP */
4498                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4499                     M_PKTHDR, adapter->rx_mbuf_sz);
4500                 if (rxbuf->m_head == NULL) {
4501                         error = ENOBUFS;
4502                         goto fail;
4503                 }
4504                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4505                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4506                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4507
4508                 /* Get the memory mapping */
4509                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4510                     rxbuf->map, rxbuf->m_head, seg,
4511                     &nsegs, BUS_DMA_NOWAIT);
4512                 if (error != 0) {
4513                         m_freem(rxbuf->m_head);
4514                         rxbuf->m_head = NULL;
4515                         goto fail;
4516                 }
4517                 bus_dmamap_sync(rxr->rxtag,
4518                     rxbuf->map, BUS_DMASYNC_PREREAD);
4519
4520                 rxbuf->paddr = seg[0].ds_addr;
4521                 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4522         }
4523         rxr->next_to_check = 0;
4524         rxr->next_to_refresh = 0;
4525         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4526             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4527
4528 fail:
4529         EM_RX_UNLOCK(rxr);
4530         return (error);
4531 }
4532
4533 /*********************************************************************
4534  *
4535  *  Initialize all receive rings.
4536  *
4537  **********************************************************************/
4538 static int
4539 em_setup_receive_structures(struct adapter *adapter)
4540 {
4541         struct rx_ring *rxr = adapter->rx_rings;
4542         int q;
4543
4544         for (q = 0; q < adapter->num_queues; q++, rxr++)
4545                 if (em_setup_receive_ring(rxr))
4546                         goto fail;
4547
4548         return (0);
4549 fail:
4550         /*
4551          * Free RX buffers allocated so far, we will only handle
4552          * the rings that completed, the failing case will have
4553          * cleaned up for itself. 'q' failed, so its the terminus.
4554          */
4555         for (int i = 0; i < q; ++i) {
4556                 rxr = &adapter->rx_rings[i];
4557                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4558                         struct em_rxbuffer *rxbuf;
4559                         rxbuf = &rxr->rx_buffers[n];
4560                         if (rxbuf->m_head != NULL) {
4561                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4562                                   BUS_DMASYNC_POSTREAD);
4563                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4564                                 m_freem(rxbuf->m_head);
4565                                 rxbuf->m_head = NULL;
4566                         }
4567                 }
4568                 rxr->next_to_check = 0;
4569                 rxr->next_to_refresh = 0;
4570         }
4571
4572         return (ENOBUFS);
4573 }
4574
4575 /*********************************************************************
4576  *
4577  *  Free all receive rings.
4578  *
4579  **********************************************************************/
4580 static void
4581 em_free_receive_structures(struct adapter *adapter)
4582 {
4583         struct rx_ring *rxr = adapter->rx_rings;
4584
4585         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4586                 em_free_receive_buffers(rxr);
4587                 /* Free the ring memory as well */
4588                 em_dma_free(adapter, &rxr->rxdma);
4589                 EM_RX_LOCK_DESTROY(rxr);
4590         }
4591
4592         free(adapter->rx_rings, M_DEVBUF);
4593 }
4594
4595
4596 /*********************************************************************
4597  *
4598  *  Free receive ring data structures
4599  *
4600  **********************************************************************/
4601 static void
4602 em_free_receive_buffers(struct rx_ring *rxr)
4603 {
4604         struct adapter          *adapter = rxr->adapter;
4605         struct em_rxbuffer      *rxbuf = NULL;
4606
4607         INIT_DEBUGOUT("free_receive_buffers: begin");
4608
4609         if (rxr->rx_buffers != NULL) {
4610                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4611                         rxbuf = &rxr->rx_buffers[i];
4612                         if (rxbuf->map != NULL) {
4613                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4614                                     BUS_DMASYNC_POSTREAD);
4615                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4616                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4617                         }
4618                         if (rxbuf->m_head != NULL) {
4619                                 m_freem(rxbuf->m_head);
4620                                 rxbuf->m_head = NULL;
4621                         }
4622                 }
4623                 free(rxr->rx_buffers, M_DEVBUF);
4624                 rxr->rx_buffers = NULL;
4625                 rxr->next_to_check = 0;
4626                 rxr->next_to_refresh = 0;
4627         }
4628
4629         if (rxr->rxtag != NULL) {
4630                 bus_dma_tag_destroy(rxr->rxtag);
4631                 rxr->rxtag = NULL;
4632         }
4633
4634         return;
4635 }
4636
4637
4638 /*********************************************************************
4639  *
4640  *  Enable receive unit.
4641  *
4642  **********************************************************************/
4643
4644 static void
4645 em_initialize_receive_unit(struct adapter *adapter)
4646 {
4647         struct rx_ring *rxr = adapter->rx_rings;
4648         struct ifnet    *ifp = adapter->ifp;
4649         struct e1000_hw *hw = &adapter->hw;
4650         u32     rctl, rxcsum, rfctl;
4651
4652         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4653
4654         /*
4655          * Make sure receives are disabled while setting
4656          * up the descriptor ring
4657          */
4658         rctl = E1000_READ_REG(hw, E1000_RCTL);
4659         /* Do not disable if ever enabled on this hardware */
4660         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4661                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4662
4663         /* Setup the Receive Control Register */
4664         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4665         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4666             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4667             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4668
4669         /* Do not store bad packets */
4670         rctl &= ~E1000_RCTL_SBP;
4671
4672         /* Enable Long Packet receive */
4673         if (ifp->if_mtu > ETHERMTU)
4674                 rctl |= E1000_RCTL_LPE;
4675         else
4676                 rctl &= ~E1000_RCTL_LPE;
4677
4678         /* Strip the CRC */
4679         if (!em_disable_crc_stripping)
4680                 rctl |= E1000_RCTL_SECRC;
4681
4682         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4683             adapter->rx_abs_int_delay.value);
4684
4685         E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4686             adapter->rx_int_delay.value);
4687         /*
4688          * Set the interrupt throttling rate. Value is calculated
4689          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4690          */
4691         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4692
4693         /* Use extended rx descriptor formats */
4694         rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4695         rfctl |= E1000_RFCTL_EXTEN;
4696         /*
4697         ** When using MSIX interrupts we need to throttle
4698         ** using the EITR register (82574 only)
4699         */
4700         if (hw->mac.type == e1000_82574) {
4701                 for (int i = 0; i < 4; i++)
4702                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4703                             DEFAULT_ITR);
4704                 /* Disable accelerated acknowledge */
4705                 rfctl |= E1000_RFCTL_ACK_DIS;
4706         }
4707         E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4708
4709         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4710         if (ifp->if_capenable & IFCAP_RXCSUM) {
4711 #ifdef EM_MULTIQUEUE
4712                 rxcsum |= E1000_RXCSUM_TUOFL |
4713                           E1000_RXCSUM_IPOFL |
4714                           E1000_RXCSUM_PCSD;
4715 #else
4716                 rxcsum |= E1000_RXCSUM_TUOFL;
4717 #endif
4718         } else
4719                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4720
4721         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4722
4723 #ifdef EM_MULTIQUEUE
4724 #define RSSKEYLEN 10
4725         if (adapter->num_queues > 1) {
4726                 uint8_t  rss_key[4 * RSSKEYLEN];
4727                 uint32_t reta = 0;
4728                 int i;
4729
4730                 /*
4731                 * Configure RSS key
4732                 */
4733                 arc4rand(rss_key, sizeof(rss_key), 0);
4734                 for (i = 0; i < RSSKEYLEN; ++i) {
4735                         uint32_t rssrk = 0;
4736
4737                         rssrk = EM_RSSRK_VAL(rss_key, i);
4738                         E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4739                 }
4740
4741                 /*
4742                 * Configure RSS redirect table in following fashion:
4743                 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4744                 */
4745                 for (i = 0; i < sizeof(reta); ++i) {
4746                         uint32_t q;
4747
4748                         q = (i % adapter->num_queues) << 7;
4749                         reta |= q << (8 * i);
4750                 }
4751
4752                 for (i = 0; i < 32; ++i) {
4753                         E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4754                 }
4755
4756                 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 
4757                                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4758                                 E1000_MRQC_RSS_FIELD_IPV4 |
4759                                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4760                                 E1000_MRQC_RSS_FIELD_IPV6_EX |
4761                                 E1000_MRQC_RSS_FIELD_IPV6);
4762         }
4763 #endif
4764         /*
4765         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4766         ** long latencies are observed, like Lenovo X60. This
4767         ** change eliminates the problem, but since having positive
4768         ** values in RDTR is a known source of problems on other
4769         ** platforms another solution is being sought.
4770         */
4771         if (hw->mac.type == e1000_82573)
4772                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4773
4774         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4775                 /* Setup the Base and Length of the Rx Descriptor Ring */
4776                 u64 bus_addr = rxr->rxdma.dma_paddr;
4777                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4778
4779                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4780                     adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4781                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4782                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4783                 /* Setup the Head and Tail Descriptor Pointers */
4784                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4785 #ifdef DEV_NETMAP
4786                 /*
4787                  * an init() while a netmap client is active must
4788                  * preserve the rx buffers passed to userspace.
4789                  */
4790                 if (ifp->if_capenable & IFCAP_NETMAP)
4791                         rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4792 #endif /* DEV_NETMAP */
4793                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4794         }
4795
4796         /*
4797          * Set PTHRESH for improved jumbo performance
4798          * According to 10.2.5.11 of Intel 82574 Datasheet,
4799          * RXDCTL(1) is written whenever RXDCTL(0) is written.
4800          * Only write to RXDCTL(1) if there is a need for different
4801          * settings.
4802          */
4803         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4804             (adapter->hw.mac.type == e1000_pch2lan) ||
4805             (adapter->hw.mac.type == e1000_ich10lan)) &&
4806             (ifp->if_mtu > ETHERMTU)) {
4807                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4808                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4809         } else if (adapter->hw.mac.type == e1000_82574) {
4810                 for (int i = 0; i < adapter->num_queues; i++) {
4811                         u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4812
4813                         rxdctl |= 0x20; /* PTHRESH */
4814                         rxdctl |= 4 << 8; /* HTHRESH */
4815                         rxdctl |= 4 << 16;/* WTHRESH */
4816                         rxdctl |= 1 << 24; /* Switch to granularity */
4817                         E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4818                 }
4819         }
4820                 
4821         if (adapter->hw.mac.type >= e1000_pch2lan) {
4822                 if (ifp->if_mtu > ETHERMTU)
4823                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4824                 else
4825                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4826         }
4827
4828         /* Make sure VLAN Filters are off */
4829         rctl &= ~E1000_RCTL_VFE;
4830
4831         if (adapter->rx_mbuf_sz == MCLBYTES)
4832                 rctl |= E1000_RCTL_SZ_2048;
4833         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4834                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4835         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4836                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4837
4838         /* ensure we clear use DTYPE of 00 here */
4839         rctl &= ~0x00000C00;
4840         /* Write out the settings */
4841         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4842
4843         return;
4844 }
4845
4846
4847 /*********************************************************************
4848  *
4849  *  This routine executes in interrupt context. It replenishes
4850  *  the mbufs in the descriptor and sends data which has been
4851  *  dma'ed into host memory to upper layer.
4852  *
4853  *  We loop at most count times if count is > 0, or until done if
4854  *  count < 0.
4855  *  
4856  *  For polling we also now return the number of cleaned packets
4857  *********************************************************************/
4858 static bool
4859 em_rxeof(struct rx_ring *rxr, int count, int *done)
4860 {
4861         struct adapter          *adapter = rxr->adapter;
4862         struct ifnet            *ifp = adapter->ifp;
4863         struct mbuf             *mp, *sendmp;
4864         u32                     status = 0;
4865         u16                     len;
4866         int                     i, processed, rxdone = 0;
4867         bool                    eop;
4868         union e1000_rx_desc_extended    *cur;
4869
4870         EM_RX_LOCK(rxr);
4871
4872         /* Sync the ring */
4873         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4874             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4875
4876
4877 #ifdef DEV_NETMAP
4878         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4879                 EM_RX_UNLOCK(rxr);
4880                 return (FALSE);
4881         }
4882 #endif /* DEV_NETMAP */
4883
4884         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4885                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4886                         break;
4887
4888                 cur = &rxr->rx_base[i];
4889                 status = le32toh(cur->wb.upper.status_error);
4890                 mp = sendmp = NULL;
4891
4892                 if ((status & E1000_RXD_STAT_DD) == 0)
4893                         break;
4894
4895                 len = le16toh(cur->wb.upper.length);
4896                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4897
4898                 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4899                     (rxr->discard == TRUE)) {
4900                         adapter->dropped_pkts++;
4901                         ++rxr->rx_discarded;
4902                         if (!eop) /* Catch subsequent segs */
4903                                 rxr->discard = TRUE;
4904                         else
4905                                 rxr->discard = FALSE;
4906                         em_rx_discard(rxr, i);
4907                         goto next_desc;
4908                 }
4909                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4910
4911                 /* Assign correct length to the current fragment */
4912                 mp = rxr->rx_buffers[i].m_head;
4913                 mp->m_len = len;
4914
4915                 /* Trigger for refresh */
4916                 rxr->rx_buffers[i].m_head = NULL;
4917
4918                 /* First segment? */
4919                 if (rxr->fmp == NULL) {
4920                         mp->m_pkthdr.len = len;
4921                         rxr->fmp = rxr->lmp = mp;
4922                 } else {
4923                         /* Chain mbuf's together */
4924                         mp->m_flags &= ~M_PKTHDR;
4925                         rxr->lmp->m_next = mp;
4926                         rxr->lmp = mp;
4927                         rxr->fmp->m_pkthdr.len += len;
4928                 }
4929
4930                 if (eop) {
4931                         --count;
4932                         sendmp = rxr->fmp;
4933                         sendmp->m_pkthdr.rcvif = ifp;
4934                         ifp->if_ipackets++;
4935                         em_receive_checksum(status, sendmp);
4936 #ifndef __NO_STRICT_ALIGNMENT
4937                         if (adapter->hw.mac.max_frame_size >
4938                             (MCLBYTES - ETHER_ALIGN) &&
4939                             em_fixup_rx(rxr) != 0)
4940                                 goto skip;
4941 #endif
4942                         if (status & E1000_RXD_STAT_VP) {
4943                                 sendmp->m_pkthdr.ether_vtag =
4944                                     le16toh(cur->wb.upper.vlan);
4945                                 sendmp->m_flags |= M_VLANTAG;
4946                         }
4947 #ifndef __NO_STRICT_ALIGNMENT
4948 skip:
4949 #endif
4950                         rxr->fmp = rxr->lmp = NULL;
4951                 }
4952 next_desc:
4953                 /* Sync the ring */
4954                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4955                         BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4956
4957                 /* Zero out the receive descriptors status. */
4958                 cur->wb.upper.status_error &= htole32(~0xFF);
4959                 ++rxdone;       /* cumulative for POLL */
4960                 ++processed;
4961
4962                 /* Advance our pointers to the next descriptor. */
4963                 if (++i == adapter->num_rx_desc)
4964                         i = 0;
4965
4966                 /* Send to the stack */
4967                 if (sendmp != NULL) {
4968                         rxr->next_to_check = i;
4969                         EM_RX_UNLOCK(rxr);
4970                         (*ifp->if_input)(ifp, sendmp);
4971                         EM_RX_LOCK(rxr);
4972                         i = rxr->next_to_check;
4973                 }
4974
4975                 /* Only refresh mbufs every 8 descriptors */
4976                 if (processed == 8) {
4977                         em_refresh_mbufs(rxr, i);
4978                         processed = 0;
4979                 }
4980         }
4981
4982         /* Catch any remaining refresh work */
4983         if (e1000_rx_unrefreshed(rxr))
4984                 em_refresh_mbufs(rxr, i);
4985
4986         rxr->next_to_check = i;
4987         if (done != NULL)
4988                 *done = rxdone;
4989         EM_RX_UNLOCK(rxr);
4990
4991         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4992 }
4993
4994 static __inline void
4995 em_rx_discard(struct rx_ring *rxr, int i)
4996 {
4997         struct em_rxbuffer      *rbuf;
4998
4999         rbuf = &rxr->rx_buffers[i];
5000         bus_dmamap_unload(rxr->rxtag, rbuf->map);
5001
5002         /* Free any previous pieces */
5003         if (rxr->fmp != NULL) {
5004                 rxr->fmp->m_flags |= M_PKTHDR;
5005                 m_freem(rxr->fmp);
5006                 rxr->fmp = NULL;
5007                 rxr->lmp = NULL;
5008         }
5009         /*
5010         ** Free buffer and allow em_refresh_mbufs()
5011         ** to clean up and recharge buffer.
5012         */
5013         if (rbuf->m_head) {
5014                 m_free(rbuf->m_head);
5015                 rbuf->m_head = NULL;
5016         }
5017         return;
5018 }
5019
5020 #ifndef __NO_STRICT_ALIGNMENT
5021 /*
5022  * When jumbo frames are enabled we should realign entire payload on
5023  * architecures with strict alignment. This is serious design mistake of 8254x
5024  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
5025  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
5026  * payload. On architecures without strict alignment restrictions 8254x still
5027  * performs unaligned memory access which would reduce the performance too.
5028  * To avoid copying over an entire frame to align, we allocate a new mbuf and
5029  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
5030  * existing mbuf chain.
5031  *
5032  * Be aware, best performance of the 8254x is achived only when jumbo frame is
5033  * not used at all on architectures with strict alignment.
5034  */
5035 static int
5036 em_fixup_rx(struct rx_ring *rxr)
5037 {
5038         struct adapter *adapter = rxr->adapter;
5039         struct mbuf *m, *n;
5040         int error;
5041
5042         error = 0;
5043         m = rxr->fmp;
5044         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
5045                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
5046                 m->m_data += ETHER_HDR_LEN;
5047         } else {
5048                 MGETHDR(n, M_NOWAIT, MT_DATA);
5049                 if (n != NULL) {
5050                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
5051                         m->m_data += ETHER_HDR_LEN;
5052                         m->m_len -= ETHER_HDR_LEN;
5053                         n->m_len = ETHER_HDR_LEN;
5054                         M_MOVE_PKTHDR(n, m);
5055                         n->m_next = m;
5056                         rxr->fmp = n;
5057                 } else {
5058                         adapter->dropped_pkts++;
5059                         m_freem(rxr->fmp);
5060                         rxr->fmp = NULL;
5061                         error = ENOMEM;
5062                 }
5063         }
5064
5065         return (error);
5066 }
5067 #endif
5068
5069 static void
5070 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5071 {
5072         rxd->read.buffer_addr = htole64(rxbuf->paddr);
5073         /* DD bits must be cleared */
5074         rxd->wb.upper.status_error= 0;
5075 }
5076
5077 /*********************************************************************
5078  *
5079  *  Verify that the hardware indicated that the checksum is valid.
5080  *  Inform the stack about the status of checksum so that stack
5081  *  doesn't spend time verifying the checksum.
5082  *
5083  *********************************************************************/
5084 static void
5085 em_receive_checksum(uint32_t status, struct mbuf *mp)
5086 {
5087         mp->m_pkthdr.csum_flags = 0;
5088
5089         /* Ignore Checksum bit is set */
5090         if (status & E1000_RXD_STAT_IXSM)
5091                 return;
5092
5093         /* If the IP checksum exists and there is no IP Checksum error */
5094         if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5095                 E1000_RXD_STAT_IPCS) {
5096                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5097         }
5098
5099         /* TCP or UDP checksum */
5100         if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5101             E1000_RXD_STAT_TCPCS) {
5102                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5103                 mp->m_pkthdr.csum_data = htons(0xffff);
5104         }
5105         if (status & E1000_RXD_STAT_UDPCS) {
5106                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5107                 mp->m_pkthdr.csum_data = htons(0xffff);
5108         }
5109 }
5110
5111 /*
5112  * This routine is run via an vlan
5113  * config EVENT
5114  */
5115 static void
5116 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5117 {
5118         struct adapter  *adapter = ifp->if_softc;
5119         u32             index, bit;
5120
5121         if (ifp->if_softc !=  arg)   /* Not our event */
5122                 return;
5123
5124         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5125                 return;
5126
5127         EM_CORE_LOCK(adapter);
5128         index = (vtag >> 5) & 0x7F;
5129         bit = vtag & 0x1F;
5130         adapter->shadow_vfta[index] |= (1 << bit);
5131         ++adapter->num_vlans;
5132         /* Re-init to load the changes */
5133         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5134                 em_init_locked(adapter);
5135         EM_CORE_UNLOCK(adapter);
5136 }
5137
5138 /*
5139  * This routine is run via an vlan
5140  * unconfig EVENT
5141  */
5142 static void
5143 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5144 {
5145         struct adapter  *adapter = ifp->if_softc;
5146         u32             index, bit;
5147
5148         if (ifp->if_softc !=  arg)
5149                 return;
5150
5151         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5152                 return;
5153
5154         EM_CORE_LOCK(adapter);
5155         index = (vtag >> 5) & 0x7F;
5156         bit = vtag & 0x1F;
5157         adapter->shadow_vfta[index] &= ~(1 << bit);
5158         --adapter->num_vlans;
5159         /* Re-init to load the changes */
5160         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5161                 em_init_locked(adapter);
5162         EM_CORE_UNLOCK(adapter);
5163 }
5164
5165 static void
5166 em_setup_vlan_hw_support(struct adapter *adapter)
5167 {
5168         struct e1000_hw *hw = &adapter->hw;
5169         u32             reg;
5170
5171         /*
5172         ** We get here thru init_locked, meaning
5173         ** a soft reset, this has already cleared
5174         ** the VFTA and other state, so if there
5175         ** have been no vlan's registered do nothing.
5176         */
5177         if (adapter->num_vlans == 0)
5178                 return;
5179
5180         /*
5181         ** A soft reset zero's out the VFTA, so
5182         ** we need to repopulate it now.
5183         */
5184         for (int i = 0; i < EM_VFTA_SIZE; i++)
5185                 if (adapter->shadow_vfta[i] != 0)
5186                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5187                             i, adapter->shadow_vfta[i]);
5188
5189         reg = E1000_READ_REG(hw, E1000_CTRL);
5190         reg |= E1000_CTRL_VME;
5191         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5192
5193         /* Enable the Filter Table */
5194         reg = E1000_READ_REG(hw, E1000_RCTL);
5195         reg &= ~E1000_RCTL_CFIEN;
5196         reg |= E1000_RCTL_VFE;
5197         E1000_WRITE_REG(hw, E1000_RCTL, reg);
5198 }
5199
5200 static void
5201 em_enable_intr(struct adapter *adapter)
5202 {
5203         struct e1000_hw *hw = &adapter->hw;
5204         u32 ims_mask = IMS_ENABLE_MASK;
5205
5206         if (hw->mac.type == e1000_82574) {
5207                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5208                 ims_mask |= EM_MSIX_MASK;
5209         } 
5210         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5211 }
5212
5213 static void
5214 em_disable_intr(struct adapter *adapter)
5215 {
5216         struct e1000_hw *hw = &adapter->hw;
5217
5218         if (hw->mac.type == e1000_82574)
5219                 E1000_WRITE_REG(hw, EM_EIAC, 0);
5220         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5221 }
5222
5223 /*
5224  * Bit of a misnomer, what this really means is
5225  * to enable OS management of the system... aka
5226  * to disable special hardware management features 
5227  */
5228 static void
5229 em_init_manageability(struct adapter *adapter)
5230 {
5231         /* A shared code workaround */
5232 #define E1000_82542_MANC2H E1000_MANC2H
5233         if (adapter->has_manage) {
5234                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5235                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5236
5237                 /* disable hardware interception of ARP */
5238                 manc &= ~(E1000_MANC_ARP_EN);
5239
5240                 /* enable receiving management packets to the host */
5241                 manc |= E1000_MANC_EN_MNG2HOST;
5242 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5243 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5244                 manc2h |= E1000_MNG2HOST_PORT_623;
5245                 manc2h |= E1000_MNG2HOST_PORT_664;
5246                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5247                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5248         }
5249 }
5250
5251 /*
5252  * Give control back to hardware management
5253  * controller if there is one.
5254  */
5255 static void
5256 em_release_manageability(struct adapter *adapter)
5257 {
5258         if (adapter->has_manage) {
5259                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5260
5261                 /* re-enable hardware interception of ARP */
5262                 manc |= E1000_MANC_ARP_EN;
5263                 manc &= ~E1000_MANC_EN_MNG2HOST;
5264
5265                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5266         }
5267 }
5268
5269 /*
5270  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5271  * For ASF and Pass Through versions of f/w this means
5272  * that the driver is loaded. For AMT version type f/w
5273  * this means that the network i/f is open.
5274  */
5275 static void
5276 em_get_hw_control(struct adapter *adapter)
5277 {
5278         u32 ctrl_ext, swsm;
5279
5280         if (adapter->hw.mac.type == e1000_82573) {
5281                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5282                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5283                     swsm | E1000_SWSM_DRV_LOAD);
5284                 return;
5285         }
5286         /* else */
5287         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5288         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5289             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5290         return;
5291 }
5292
5293 /*
5294  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5295  * For ASF and Pass Through versions of f/w this means that
5296  * the driver is no longer loaded. For AMT versions of the
5297  * f/w this means that the network i/f is closed.
5298  */
5299 static void
5300 em_release_hw_control(struct adapter *adapter)
5301 {
5302         u32 ctrl_ext, swsm;
5303
5304         if (!adapter->has_manage)
5305                 return;
5306
5307         if (adapter->hw.mac.type == e1000_82573) {
5308                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5309                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5310                     swsm & ~E1000_SWSM_DRV_LOAD);
5311                 return;
5312         }
5313         /* else */
5314         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5315         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5316             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5317         return;
5318 }
5319
5320 static int
5321 em_is_valid_ether_addr(u8 *addr)
5322 {
5323         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5324
5325         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5326                 return (FALSE);
5327         }
5328
5329         return (TRUE);
5330 }
5331
5332 /*
5333 ** Parse the interface capabilities with regard
5334 ** to both system management and wake-on-lan for
5335 ** later use.
5336 */
5337 static void
5338 em_get_wakeup(device_t dev)
5339 {
5340         struct adapter  *adapter = device_get_softc(dev);
5341         u16             eeprom_data = 0, device_id, apme_mask;
5342
5343         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5344         apme_mask = EM_EEPROM_APME;
5345
5346         switch (adapter->hw.mac.type) {
5347         case e1000_82573:
5348         case e1000_82583:
5349                 adapter->has_amt = TRUE;
5350                 /* Falls thru */
5351         case e1000_82571:
5352         case e1000_82572:
5353         case e1000_80003es2lan:
5354                 if (adapter->hw.bus.func == 1) {
5355                         e1000_read_nvm(&adapter->hw,
5356                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5357                         break;
5358                 } else
5359                         e1000_read_nvm(&adapter->hw,
5360                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5361                 break;
5362         case e1000_ich8lan:
5363         case e1000_ich9lan:
5364         case e1000_ich10lan:
5365         case e1000_pchlan:
5366         case e1000_pch2lan:
5367         case e1000_pch_lpt:
5368         case e1000_pch_spt:
5369         case e1000_pch_cnp:
5370                 apme_mask = E1000_WUC_APME;
5371                 adapter->has_amt = TRUE;
5372                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5373                 break;
5374         default:
5375                 e1000_read_nvm(&adapter->hw,
5376                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5377                 break;
5378         }
5379         if (eeprom_data & apme_mask)
5380                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5381         /*
5382          * We have the eeprom settings, now apply the special cases
5383          * where the eeprom may be wrong or the board won't support
5384          * wake on lan on a particular port
5385          */
5386         device_id = pci_get_device(dev);
5387         switch (device_id) {
5388         case E1000_DEV_ID_82571EB_FIBER:
5389                 /* Wake events only supported on port A for dual fiber
5390                  * regardless of eeprom setting */
5391                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5392                     E1000_STATUS_FUNC_1)
5393                         adapter->wol = 0;
5394                 break;
5395         case E1000_DEV_ID_82571EB_QUAD_COPPER:
5396         case E1000_DEV_ID_82571EB_QUAD_FIBER:
5397         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5398                 /* if quad port adapter, disable WoL on all but port A */
5399                 if (global_quad_port_a != 0)
5400                         adapter->wol = 0;
5401                 /* Reset for multiple quad port adapters */
5402                 if (++global_quad_port_a == 4)
5403                         global_quad_port_a = 0;
5404                 break;
5405         }
5406         return;
5407 }
5408
5409
5410 /*
5411  * Enable PCI Wake On Lan capability
5412  */
5413 static void
5414 em_enable_wakeup(device_t dev)
5415 {
5416         struct adapter  *adapter = device_get_softc(dev);
5417         struct ifnet    *ifp = adapter->ifp;
5418         int             error = 0;
5419         u32             pmc, ctrl, ctrl_ext, rctl;
5420         u16             status;
5421
5422         if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5423                 return;
5424
5425         /*
5426         ** Determine type of Wakeup: note that wol
5427         ** is set with all bits on by default.
5428         */
5429         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5430                 adapter->wol &= ~E1000_WUFC_MAG;
5431
5432         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5433                 adapter->wol &= ~E1000_WUFC_MC;
5434         else {
5435                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5436                 rctl |= E1000_RCTL_MPE;
5437                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5438         }
5439
5440         if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5441                 goto pme;
5442
5443         /* Advertise the wakeup capability */
5444         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5445         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5446         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5447
5448         /* Keep the laser running on Fiber adapters */
5449         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5450             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5451                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5452                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5453                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5454         }
5455
5456         if ((adapter->hw.mac.type == e1000_ich8lan) ||
5457             (adapter->hw.mac.type == e1000_pchlan) ||
5458             (adapter->hw.mac.type == e1000_ich9lan) ||
5459             (adapter->hw.mac.type == e1000_ich10lan))
5460                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5461
5462         if ((adapter->hw.mac.type == e1000_pchlan)  ||
5463             (adapter->hw.mac.type == e1000_pch2lan) ||
5464             (adapter->hw.mac.type == e1000_pch_lpt) ||
5465             (adapter->hw.mac.type == e1000_pch_spt) ||
5466             (adapter->hw.mac.type == e1000_pch_cnp)) {
5467                 error = em_enable_phy_wakeup(adapter);
5468                 if (error)
5469                         goto pme;
5470         } else {
5471                 /* Enable wakeup by the MAC */
5472                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5473                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5474         }
5475
5476         if (adapter->hw.phy.type == e1000_phy_igp_3)
5477                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5478
5479 pme:
5480         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5481         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5482         if (!error && (ifp->if_capenable & IFCAP_WOL))
5483                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5484         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5485
5486         return;
5487 }
5488
5489 /*
5490 ** WOL in the newer chipset interfaces (pchlan)
5491 ** require thing to be copied into the phy
5492 */
5493 static int
5494 em_enable_phy_wakeup(struct adapter *adapter)
5495 {
5496         struct e1000_hw *hw = &adapter->hw;
5497         u32 mreg, ret = 0;
5498         u16 preg;
5499
5500         /* copy MAC RARs to PHY RARs */
5501         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5502
5503         /* copy MAC MTA to PHY MTA */
5504         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5505                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5506                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5507                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5508                     (u16)((mreg >> 16) & 0xFFFF));
5509         }
5510
5511         /* configure PHY Rx Control register */
5512         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5513         mreg = E1000_READ_REG(hw, E1000_RCTL);
5514         if (mreg & E1000_RCTL_UPE)
5515                 preg |= BM_RCTL_UPE;
5516         if (mreg & E1000_RCTL_MPE)
5517                 preg |= BM_RCTL_MPE;
5518         preg &= ~(BM_RCTL_MO_MASK);
5519         if (mreg & E1000_RCTL_MO_3)
5520                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5521                                 << BM_RCTL_MO_SHIFT);
5522         if (mreg & E1000_RCTL_BAM)
5523                 preg |= BM_RCTL_BAM;
5524         if (mreg & E1000_RCTL_PMCF)
5525                 preg |= BM_RCTL_PMCF;
5526         mreg = E1000_READ_REG(hw, E1000_CTRL);
5527         if (mreg & E1000_CTRL_RFCE)
5528                 preg |= BM_RCTL_RFCE;
5529         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5530
5531         /* enable PHY wakeup in MAC register */
5532         E1000_WRITE_REG(hw, E1000_WUC,
5533             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5534         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5535
5536         /* configure and enable PHY wakeup in PHY registers */
5537         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5538         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5539
5540         /* activate PHY wakeup */
5541         ret = hw->phy.ops.acquire(hw);
5542         if (ret) {
5543                 printf("Could not acquire PHY\n");
5544                 return ret;
5545         }
5546         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5547                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5548         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5549         if (ret) {
5550                 printf("Could not read PHY page 769\n");
5551                 goto out;
5552         }
5553         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5554         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5555         if (ret)
5556                 printf("Could not set PHY Host Wakeup bit\n");
5557 out:
5558         hw->phy.ops.release(hw);
5559
5560         return ret;
5561 }
5562
5563 static void
5564 em_led_func(void *arg, int onoff)
5565 {
5566         struct adapter  *adapter = arg;
5567  
5568         EM_CORE_LOCK(adapter);
5569         if (onoff) {
5570                 e1000_setup_led(&adapter->hw);
5571                 e1000_led_on(&adapter->hw);
5572         } else {
5573                 e1000_led_off(&adapter->hw);
5574                 e1000_cleanup_led(&adapter->hw);
5575         }
5576         EM_CORE_UNLOCK(adapter);
5577 }
5578
5579 /*
5580 ** Disable the L0S and L1 LINK states
5581 */
5582 static void
5583 em_disable_aspm(struct adapter *adapter)
5584 {
5585         int             base, reg;
5586         u16             link_cap,link_ctrl;
5587         device_t        dev = adapter->dev;
5588
5589         switch (adapter->hw.mac.type) {
5590                 case e1000_82573:
5591                 case e1000_82574:
5592                 case e1000_82583:
5593                         break;
5594                 default:
5595                         return;
5596         }
5597         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5598                 return;
5599         reg = base + PCIER_LINK_CAP;
5600         link_cap = pci_read_config(dev, reg, 2);
5601         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5602                 return;
5603         reg = base + PCIER_LINK_CTL;
5604         link_ctrl = pci_read_config(dev, reg, 2);
5605         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5606         pci_write_config(dev, reg, link_ctrl, 2);
5607         return;
5608 }
5609
5610 /**********************************************************************
5611  *
5612  *  Update the board statistics counters.
5613  *
5614  **********************************************************************/
5615 static void
5616 em_update_stats_counters(struct adapter *adapter)
5617 {
5618         struct ifnet   *ifp;
5619
5620         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5621            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5622                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5623                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5624         }
5625         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5626         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5627         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5628         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5629
5630         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5631         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5632         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5633         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5634         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5635         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5636         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5637         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5638         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5639         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5640         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5641         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5642         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5643         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5644         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5645         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5646         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5647         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5648         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5649         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5650
5651         /* For the 64-bit byte counters the low dword must be read first. */
5652         /* Both registers clear on the read of the high dword */
5653
5654         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5655             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5656         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5657             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5658
5659         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5660         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5661         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5662         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5663         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5664
5665         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5666         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5667
5668         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5669         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5670         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5671         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5672         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5673         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5674         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5675         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5676         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5677         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5678
5679         /* Interrupt Counts */
5680
5681         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5682         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5683         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5684         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5685         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5686         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5687         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5688         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5689         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5690
5691         if (adapter->hw.mac.type >= e1000_82543) {
5692                 adapter->stats.algnerrc += 
5693                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5694                 adapter->stats.rxerrc += 
5695                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5696                 adapter->stats.tncrs += 
5697                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5698                 adapter->stats.cexterr += 
5699                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5700                 adapter->stats.tsctc += 
5701                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5702                 adapter->stats.tsctfc += 
5703                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5704         }
5705         ifp = adapter->ifp;
5706
5707         ifp->if_collisions = adapter->stats.colc;
5708
5709         /* Rx Errors */
5710         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5711             adapter->stats.crcerrs + adapter->stats.algnerrc +
5712             adapter->stats.ruc + adapter->stats.roc +
5713             adapter->stats.mpc + adapter->stats.cexterr;
5714
5715         /* Tx Errors */
5716         ifp->if_oerrors = adapter->stats.ecol +
5717             adapter->stats.latecol + adapter->watchdog_events;
5718 }
5719
5720 /* Export a single 32-bit register via a read-only sysctl. */
5721 static int
5722 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5723 {
5724         struct adapter *adapter;
5725         u_int val;
5726
5727         adapter = oidp->oid_arg1;
5728         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5729         return (sysctl_handle_int(oidp, &val, 0, req));
5730 }
5731
5732 /*
5733  * Add sysctl variables, one per statistic, to the system.
5734  */
5735 static void
5736 em_add_hw_stats(struct adapter *adapter)
5737 {
5738         device_t dev = adapter->dev;
5739
5740         struct tx_ring *txr = adapter->tx_rings;
5741         struct rx_ring *rxr = adapter->rx_rings;
5742
5743         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5744         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5745         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5746         struct e1000_hw_stats *stats = &adapter->stats;
5747
5748         struct sysctl_oid *stat_node, *queue_node, *int_node;
5749         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5750
5751 #define QUEUE_NAME_LEN 32
5752         char namebuf[QUEUE_NAME_LEN];
5753         
5754         /* Driver Statistics */
5755         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5756                         CTLFLAG_RD, &adapter->dropped_pkts,
5757                         "Driver dropped packets");
5758         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5759                         CTLFLAG_RD, &adapter->link_irq,
5760                         "Link MSIX IRQ Handled");
5761         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 
5762                          CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5763                          "Defragmenting mbuf chain failed");
5764         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5765                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5766                         "Driver tx dma failure in xmit");
5767         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5768                         CTLFLAG_RD, &adapter->rx_overruns,
5769                         "RX overruns");
5770         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5771                         CTLFLAG_RD, &adapter->watchdog_events,
5772                         "Watchdog timeouts");
5773         
5774         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5775                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5776                         em_sysctl_reg_handler, "IU",
5777                         "Device Control Register");
5778         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5779                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5780                         em_sysctl_reg_handler, "IU",
5781                         "Receiver Control Register");
5782         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5783                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5784                         "Flow Control High Watermark");
5785         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5786                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5787                         "Flow Control Low Watermark");
5788
5789         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5790                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5791                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5792                                             CTLFLAG_RD, NULL, "TX Queue Name");
5793                 queue_list = SYSCTL_CHILDREN(queue_node);
5794
5795                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5796                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5797                                 E1000_TDH(txr->me),
5798                                 em_sysctl_reg_handler, "IU",
5799                                 "Transmit Descriptor Head");
5800                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5801                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5802                                 E1000_TDT(txr->me),
5803                                 em_sysctl_reg_handler, "IU",
5804                                 "Transmit Descriptor Tail");
5805                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5806                                 CTLFLAG_RD, &txr->tx_irq,
5807                                 "Queue MSI-X Transmit Interrupts");
5808                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5809                                 CTLFLAG_RD, &txr->no_desc_avail,
5810                                 "Queue No Descriptor Available");
5811
5812                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5813                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5814                                             CTLFLAG_RD, NULL, "RX Queue Name");
5815                 queue_list = SYSCTL_CHILDREN(queue_node);
5816
5817                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5818                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5819                                 E1000_RDH(rxr->me),
5820                                 em_sysctl_reg_handler, "IU",
5821                                 "Receive Descriptor Head");
5822                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5823                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5824                                 E1000_RDT(rxr->me),
5825                                 em_sysctl_reg_handler, "IU",
5826                                 "Receive Descriptor Tail");
5827                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5828                                 CTLFLAG_RD, &rxr->rx_irq,
5829                                 "Queue MSI-X Receive Interrupts");
5830         }
5831
5832         /* MAC stats get their own sub node */
5833
5834         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5835                                     CTLFLAG_RD, NULL, "Statistics");
5836         stat_list = SYSCTL_CHILDREN(stat_node);
5837
5838         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5839                         CTLFLAG_RD, &stats->ecol,
5840                         "Excessive collisions");
5841         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5842                         CTLFLAG_RD, &stats->scc,
5843                         "Single collisions");
5844         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5845                         CTLFLAG_RD, &stats->mcc,
5846                         "Multiple collisions");
5847         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5848                         CTLFLAG_RD, &stats->latecol,
5849                         "Late collisions");
5850         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5851                         CTLFLAG_RD, &stats->colc,
5852                         "Collision Count");
5853         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5854                         CTLFLAG_RD, &adapter->stats.symerrs,
5855                         "Symbol Errors");
5856         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5857                         CTLFLAG_RD, &adapter->stats.sec,
5858                         "Sequence Errors");
5859         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5860                         CTLFLAG_RD, &adapter->stats.dc,
5861                         "Defer Count");
5862         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5863                         CTLFLAG_RD, &adapter->stats.mpc,
5864                         "Missed Packets");
5865         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5866                         CTLFLAG_RD, &adapter->stats.rnbc,
5867                         "Receive No Buffers");
5868         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5869                         CTLFLAG_RD, &adapter->stats.ruc,
5870                         "Receive Undersize");
5871         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5872                         CTLFLAG_RD, &adapter->stats.rfc,
5873                         "Fragmented Packets Received ");
5874         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5875                         CTLFLAG_RD, &adapter->stats.roc,
5876                         "Oversized Packets Received");
5877         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5878                         CTLFLAG_RD, &adapter->stats.rjc,
5879                         "Recevied Jabber");
5880         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5881                         CTLFLAG_RD, &adapter->stats.rxerrc,
5882                         "Receive Errors");
5883         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5884                         CTLFLAG_RD, &adapter->stats.crcerrs,
5885                         "CRC errors");
5886         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5887                         CTLFLAG_RD, &adapter->stats.algnerrc,
5888                         "Alignment Errors");
5889         /* On 82575 these are collision counts */
5890         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5891                         CTLFLAG_RD, &adapter->stats.cexterr,
5892                         "Collision/Carrier extension errors");
5893         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5894                         CTLFLAG_RD, &adapter->stats.xonrxc,
5895                         "XON Received");
5896         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5897                         CTLFLAG_RD, &adapter->stats.xontxc,
5898                         "XON Transmitted");
5899         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5900                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5901                         "XOFF Received");
5902         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5903                         CTLFLAG_RD, &adapter->stats.xofftxc,
5904                         "XOFF Transmitted");
5905
5906         /* Packet Reception Stats */
5907         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5908                         CTLFLAG_RD, &adapter->stats.tpr,
5909                         "Total Packets Received ");
5910         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5911                         CTLFLAG_RD, &adapter->stats.gprc,
5912                         "Good Packets Received");
5913         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5914                         CTLFLAG_RD, &adapter->stats.bprc,
5915                         "Broadcast Packets Received");
5916         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5917                         CTLFLAG_RD, &adapter->stats.mprc,
5918                         "Multicast Packets Received");
5919         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5920                         CTLFLAG_RD, &adapter->stats.prc64,
5921                         "64 byte frames received ");
5922         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5923                         CTLFLAG_RD, &adapter->stats.prc127,
5924                         "65-127 byte frames received");
5925         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5926                         CTLFLAG_RD, &adapter->stats.prc255,
5927                         "128-255 byte frames received");
5928         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5929                         CTLFLAG_RD, &adapter->stats.prc511,
5930                         "256-511 byte frames received");
5931         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5932                         CTLFLAG_RD, &adapter->stats.prc1023,
5933                         "512-1023 byte frames received");
5934         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5935                         CTLFLAG_RD, &adapter->stats.prc1522,
5936                         "1023-1522 byte frames received");
5937         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5938                         CTLFLAG_RD, &adapter->stats.gorc, 
5939                         "Good Octets Received"); 
5940
5941         /* Packet Transmission Stats */
5942         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5943                         CTLFLAG_RD, &adapter->stats.gotc, 
5944                         "Good Octets Transmitted"); 
5945         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5946                         CTLFLAG_RD, &adapter->stats.tpt,
5947                         "Total Packets Transmitted");
5948         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5949                         CTLFLAG_RD, &adapter->stats.gptc,
5950                         "Good Packets Transmitted");
5951         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5952                         CTLFLAG_RD, &adapter->stats.bptc,
5953                         "Broadcast Packets Transmitted");
5954         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5955                         CTLFLAG_RD, &adapter->stats.mptc,
5956                         "Multicast Packets Transmitted");
5957         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5958                         CTLFLAG_RD, &adapter->stats.ptc64,
5959                         "64 byte frames transmitted ");
5960         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5961                         CTLFLAG_RD, &adapter->stats.ptc127,
5962                         "65-127 byte frames transmitted");
5963         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5964                         CTLFLAG_RD, &adapter->stats.ptc255,
5965                         "128-255 byte frames transmitted");
5966         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5967                         CTLFLAG_RD, &adapter->stats.ptc511,
5968                         "256-511 byte frames transmitted");
5969         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5970                         CTLFLAG_RD, &adapter->stats.ptc1023,
5971                         "512-1023 byte frames transmitted");
5972         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5973                         CTLFLAG_RD, &adapter->stats.ptc1522,
5974                         "1024-1522 byte frames transmitted");
5975         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5976                         CTLFLAG_RD, &adapter->stats.tsctc,
5977                         "TSO Contexts Transmitted");
5978         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5979                         CTLFLAG_RD, &adapter->stats.tsctfc,
5980                         "TSO Contexts Failed");
5981
5982
5983         /* Interrupt Stats */
5984
5985         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5986                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5987         int_list = SYSCTL_CHILDREN(int_node);
5988
5989         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5990                         CTLFLAG_RD, &adapter->stats.iac,
5991                         "Interrupt Assertion Count");
5992
5993         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5994                         CTLFLAG_RD, &adapter->stats.icrxptc,
5995                         "Interrupt Cause Rx Pkt Timer Expire Count");
5996
5997         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5998                         CTLFLAG_RD, &adapter->stats.icrxatc,
5999                         "Interrupt Cause Rx Abs Timer Expire Count");
6000
6001         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6002                         CTLFLAG_RD, &adapter->stats.ictxptc,
6003                         "Interrupt Cause Tx Pkt Timer Expire Count");
6004
6005         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6006                         CTLFLAG_RD, &adapter->stats.ictxatc,
6007                         "Interrupt Cause Tx Abs Timer Expire Count");
6008
6009         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6010                         CTLFLAG_RD, &adapter->stats.ictxqec,
6011                         "Interrupt Cause Tx Queue Empty Count");
6012
6013         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6014                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
6015                         "Interrupt Cause Tx Queue Min Thresh Count");
6016
6017         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6018                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
6019                         "Interrupt Cause Rx Desc Min Thresh Count");
6020
6021         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6022                         CTLFLAG_RD, &adapter->stats.icrxoc,
6023                         "Interrupt Cause Receiver Overrun Count");
6024 }
6025
6026 /**********************************************************************
6027  *
6028  *  This routine provides a way to dump out the adapter eeprom,
6029  *  often a useful debug/service tool. This only dumps the first
6030  *  32 words, stuff that matters is in that extent.
6031  *
6032  **********************************************************************/
6033 static int
6034 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6035 {
6036         struct adapter *adapter = (struct adapter *)arg1;
6037         int error;
6038         int result;
6039
6040         result = -1;
6041         error = sysctl_handle_int(oidp, &result, 0, req);
6042
6043         if (error || !req->newptr)
6044                 return (error);
6045
6046         /*
6047          * This value will cause a hex dump of the
6048          * first 32 16-bit words of the EEPROM to
6049          * the screen.
6050          */
6051         if (result == 1)
6052                 em_print_nvm_info(adapter);
6053
6054         return (error);
6055 }
6056
6057 static void
6058 em_print_nvm_info(struct adapter *adapter)
6059 {
6060         u16     eeprom_data;
6061         int     i, j, row = 0;
6062
6063         /* Its a bit crude, but it gets the job done */
6064         printf("\nInterface EEPROM Dump:\n");
6065         printf("Offset\n0x0000  ");
6066         for (i = 0, j = 0; i < 32; i++, j++) {
6067                 if (j == 8) { /* Make the offset block */
6068                         j = 0; ++row;
6069                         printf("\n0x00%x0  ",row);
6070                 }
6071                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6072                 printf("%04x ", eeprom_data);
6073         }
6074         printf("\n");
6075 }
6076
6077 static int
6078 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6079 {
6080         struct em_int_delay_info *info;
6081         struct adapter *adapter;
6082         u32 regval;
6083         int error, usecs, ticks;
6084
6085         info = (struct em_int_delay_info *)arg1;
6086         usecs = info->value;
6087         error = sysctl_handle_int(oidp, &usecs, 0, req);
6088         if (error != 0 || req->newptr == NULL)
6089                 return (error);
6090         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6091                 return (EINVAL);
6092         info->value = usecs;
6093         ticks = EM_USECS_TO_TICKS(usecs);
6094         if (info->offset == E1000_ITR)  /* units are 256ns here */
6095                 ticks *= 4;
6096
6097         adapter = info->adapter;
6098         
6099         EM_CORE_LOCK(adapter);
6100         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6101         regval = (regval & ~0xffff) | (ticks & 0xffff);
6102         /* Handle a few special cases. */
6103         switch (info->offset) {
6104         case E1000_RDTR:
6105                 break;
6106         case E1000_TIDV:
6107                 if (ticks == 0) {
6108                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6109                         /* Don't write 0 into the TIDV register. */
6110                         regval++;
6111                 } else
6112                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6113                 break;
6114         }
6115         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6116         EM_CORE_UNLOCK(adapter);
6117         return (0);
6118 }
6119
6120 static void
6121 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6122         const char *description, struct em_int_delay_info *info,
6123         int offset, int value)
6124 {
6125         info->adapter = adapter;
6126         info->offset = offset;
6127         info->value = value;
6128         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6129             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6130             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6131             info, 0, em_sysctl_int_delay, "I", description);
6132 }
6133
6134 static void
6135 em_set_sysctl_value(struct adapter *adapter, const char *name,
6136         const char *description, int *limit, int value)
6137 {
6138         *limit = value;
6139         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6140             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6141             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6142 }
6143
6144
6145 /*
6146 ** Set flow control using sysctl:
6147 ** Flow control values:
6148 **      0 - off
6149 **      1 - rx pause
6150 **      2 - tx pause
6151 **      3 - full
6152 */
6153 static int
6154 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6155 {       
6156         int             error;
6157         static int      input = 3; /* default is full */
6158         struct adapter  *adapter = (struct adapter *) arg1;
6159                     
6160         error = sysctl_handle_int(oidp, &input, 0, req);
6161     
6162         if ((error) || (req->newptr == NULL))
6163                 return (error);
6164                 
6165         if (input == adapter->fc) /* no change? */
6166                 return (error);
6167
6168         switch (input) {
6169                 case e1000_fc_rx_pause:
6170                 case e1000_fc_tx_pause:
6171                 case e1000_fc_full:
6172                 case e1000_fc_none:
6173                         adapter->hw.fc.requested_mode = input;
6174                         adapter->fc = input;
6175                         break;
6176                 default:
6177                         /* Do nothing */
6178                         return (error);
6179         }
6180
6181         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6182         e1000_force_mac_fc(&adapter->hw);
6183         return (error);
6184 }
6185
6186 /*
6187 ** Manage Energy Efficient Ethernet:
6188 ** Control values:
6189 **     0/1 - enabled/disabled
6190 */
6191 static int
6192 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6193 {
6194        struct adapter *adapter = (struct adapter *) arg1;
6195        int             error, value;
6196
6197        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6198        error = sysctl_handle_int(oidp, &value, 0, req);
6199        if (error || req->newptr == NULL)
6200                return (error);
6201        EM_CORE_LOCK(adapter);
6202        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6203        em_init_locked(adapter);
6204        EM_CORE_UNLOCK(adapter);
6205        return (0);
6206 }
6207
6208 static int
6209 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6210 {
6211         struct adapter *adapter;
6212         int error;
6213         int result;
6214
6215         result = -1;
6216         error = sysctl_handle_int(oidp, &result, 0, req);
6217
6218         if (error || !req->newptr)
6219                 return (error);
6220
6221         if (result == 1) {
6222                 adapter = (struct adapter *)arg1;
6223                 em_print_debug_info(adapter);
6224         }
6225
6226         return (error);
6227 }
6228
6229 /*
6230 ** This routine is meant to be fluid, add whatever is
6231 ** needed for debugging a problem.  -jfv
6232 */
6233 static void
6234 em_print_debug_info(struct adapter *adapter)
6235 {
6236         device_t dev = adapter->dev;
6237         struct tx_ring *txr = adapter->tx_rings;
6238         struct rx_ring *rxr = adapter->rx_rings;
6239
6240         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
6241                 printf("Interface is RUNNING ");
6242         else
6243                 printf("Interface is NOT RUNNING\n");
6244
6245         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
6246                 printf("and INACTIVE\n");
6247         else
6248                 printf("and ACTIVE\n");
6249
6250         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6251                 device_printf(dev, "TX Queue %d ------\n", i);
6252                 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6253                         E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6254                         E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6255                 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6256                 device_printf(dev, "TX descriptors avail = %d\n",
6257                         txr->tx_avail);
6258                 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6259                         txr->no_desc_avail);
6260                 device_printf(dev, "RX Queue %d ------\n", i);
6261                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6262                         E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6263                         E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6264                 device_printf(dev, "RX discarded packets = %ld\n",
6265                         rxr->rx_discarded);
6266                 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6267                 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6268         }
6269 }
6270
6271 #ifdef EM_MULTIQUEUE
6272 /*
6273  * 82574 only:
6274  * Write a new value to the EEPROM increasing the number of MSIX
6275  * vectors from 3 to 5, for proper multiqueue support.
6276  */
6277 static void
6278 em_enable_vectors_82574(struct adapter *adapter)
6279 {
6280         struct e1000_hw *hw = &adapter->hw;
6281         device_t dev = adapter->dev;
6282         u16 edata;
6283
6284         e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6285         printf("Current cap: %#06x\n", edata);
6286         if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6287                 device_printf(dev, "Writing to eeprom: increasing "
6288                     "reported MSIX vectors from 3 to 5...\n");
6289                 edata &= ~(EM_NVM_MSIX_N_MASK);
6290                 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6291                 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6292                 e1000_update_nvm_checksum(hw);
6293                 device_printf(dev, "Writing to eeprom: done\n");
6294         }
6295 }
6296 #endif
6297
6298 #ifdef DDB
6299 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6300 {
6301         devclass_t      dc;
6302         int max_em;
6303
6304         dc = devclass_find("em");
6305         max_em = devclass_get_maxunit(dc);
6306
6307         for (int index = 0; index < (max_em - 1); index++) {
6308                 device_t dev;
6309                 dev = devclass_get_device(dc, index);
6310                 if (device_get_driver(dev) == &em_driver) {
6311                         struct adapter *adapter = device_get_softc(dev);
6312                         EM_CORE_LOCK(adapter);
6313                         em_init_locked(adapter);
6314                         EM_CORE_UNLOCK(adapter);
6315                 }
6316         }
6317 }
6318 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6319 {
6320         devclass_t      dc;
6321         int max_em;
6322
6323         dc = devclass_find("em");
6324         max_em = devclass_get_maxunit(dc);
6325
6326         for (int index = 0; index < (max_em - 1); index++) {
6327                 device_t dev;
6328                 dev = devclass_get_device(dc, index);
6329                 if (device_get_driver(dev) == &em_driver)
6330                         em_print_debug_info(device_get_softc(dev));
6331         }
6332
6333 }
6334 #endif