]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/e1000/if_em.c
MFC: r336610 (partial), r339207, r339267
[FreeBSD/stable/10.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_arp.h>
74 #include <net/if_dl.h>
75 #include <net/if_media.h>
76
77 #include <net/if_types.h>
78 #include <net/if_vlan_var.h>
79
80 #include <netinet/in_systm.h>
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip6.h>
85 #include <netinet/tcp.h>
86 #include <netinet/udp.h>
87
88 #include <machine/in_cksum.h>
89 #include <dev/led/led.h>
90 #include <dev/pci/pcivar.h>
91 #include <dev/pci/pcireg.h>
92
93 #include "e1000_api.h"
94 #include "e1000_82571.h"
95 #include "if_em.h"
96
97 /*********************************************************************
98  *  Driver version:
99  *********************************************************************/
100 char em_driver_version[] = "7.6.1-k";
101
102 /*********************************************************************
103  *  PCI Device ID Table
104  *
105  *  Used by probe to select devices to load on
106  *  Last field stores an index into e1000_strings
107  *  Last entry must be all 0s
108  *
109  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
110  *********************************************************************/
111
112 static em_vendor_info_t em_vendor_info_array[] =
113 {
114         /* Intel(R) PRO/1000 Network Connection */
115         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
127                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
134
135         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
144                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
146                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
181                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
182         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
183                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
184         { 0x8086, E1000_DEV_ID_PCH_I218_LM2,    PCI_ANY_ID, PCI_ANY_ID, 0},
185         { 0x8086, E1000_DEV_ID_PCH_I218_V2,     PCI_ANY_ID, PCI_ANY_ID, 0},
186         { 0x8086, E1000_DEV_ID_PCH_I218_LM3,    PCI_ANY_ID, PCI_ANY_ID, 0},
187         { 0x8086, E1000_DEV_ID_PCH_I218_V3,     PCI_ANY_ID, PCI_ANY_ID, 0},
188         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
189         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
190         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
191                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
192         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
193         { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
194                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
195         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
196                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
197         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
198         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
199                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
200         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
201         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
202                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
203         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
204         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
205                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
206         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
207         { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM6,
208                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
209         { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V6, PCI_ANY_ID, PCI_ANY_ID, 0},
210         { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM7,
211                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
212         { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V7, PCI_ANY_ID, PCI_ANY_ID, 0},
213         { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM8,
214                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
215         { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, PCI_ANY_ID, PCI_ANY_ID, 0},
216         { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9,
217                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
218         { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, PCI_ANY_ID, PCI_ANY_ID, 0},
219         /* required last entry */
220         { 0, 0, 0, 0, 0}
221 };
222
223 /*********************************************************************
224  *  Table of branding strings for all supported NICs.
225  *********************************************************************/
226
227 static char *em_strings[] = {
228         "Intel(R) PRO/1000 Network Connection"
229 };
230
231 /*********************************************************************
232  *  Function prototypes
233  *********************************************************************/
234 static int      em_probe(device_t);
235 static int      em_attach(device_t);
236 static int      em_detach(device_t);
237 static int      em_shutdown(device_t);
238 static int      em_suspend(device_t);
239 static int      em_resume(device_t);
240 #ifdef EM_MULTIQUEUE
241 static int      em_mq_start(struct ifnet *, struct mbuf *);
242 static int      em_mq_start_locked(struct ifnet *,
243                     struct tx_ring *);
244 static void     em_qflush(struct ifnet *);
245 #else
246 static void     em_start(struct ifnet *);
247 static void     em_start_locked(struct ifnet *, struct tx_ring *);
248 #endif
249 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
250 static void     em_init(void *);
251 static void     em_init_locked(struct adapter *);
252 static void     em_stop(void *);
253 static void     em_media_status(struct ifnet *, struct ifmediareq *);
254 static int      em_media_change(struct ifnet *);
255 static void     em_identify_hardware(struct adapter *);
256 static int      em_allocate_pci_resources(struct adapter *);
257 static int      em_allocate_legacy(struct adapter *);
258 static int      em_allocate_msix(struct adapter *);
259 static int      em_allocate_queues(struct adapter *);
260 static int      em_setup_msix(struct adapter *);
261 static void     em_free_pci_resources(struct adapter *);
262 static void     em_local_timer(void *);
263 static void     em_reset(struct adapter *);
264 static int      em_setup_interface(device_t, struct adapter *);
265 static void     em_flush_desc_rings(struct adapter *);
266
267 static void     em_setup_transmit_structures(struct adapter *);
268 static void     em_initialize_transmit_unit(struct adapter *);
269 static int      em_allocate_transmit_buffers(struct tx_ring *);
270 static void     em_free_transmit_structures(struct adapter *);
271 static void     em_free_transmit_buffers(struct tx_ring *);
272
273 static int      em_setup_receive_structures(struct adapter *);
274 static int      em_allocate_receive_buffers(struct rx_ring *);
275 static void     em_initialize_receive_unit(struct adapter *);
276 static void     em_free_receive_structures(struct adapter *);
277 static void     em_free_receive_buffers(struct rx_ring *);
278
279 static void     em_enable_intr(struct adapter *);
280 static void     em_disable_intr(struct adapter *);
281 static void     em_update_stats_counters(struct adapter *);
282 static void     em_add_hw_stats(struct adapter *adapter);
283 static void     em_txeof(struct tx_ring *);
284 static bool     em_rxeof(struct rx_ring *, int, int *);
285 #ifndef __NO_STRICT_ALIGNMENT
286 static int      em_fixup_rx(struct rx_ring *);
287 #endif
288 static void     em_setup_rxdesc(union e1000_rx_desc_extended *,
289                     const struct em_rxbuffer *rxbuf);
290 static void     em_receive_checksum(uint32_t status, struct mbuf *);
291 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
292                     struct ip *, u32 *, u32 *);
293 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
294                     struct tcphdr *, u32 *, u32 *);
295 static void     em_set_promisc(struct adapter *);
296 static void     em_disable_promisc(struct adapter *);
297 static void     em_set_multi(struct adapter *);
298 static void     em_update_link_status(struct adapter *);
299 static void     em_refresh_mbufs(struct rx_ring *, int);
300 static void     em_register_vlan(void *, struct ifnet *, u16);
301 static void     em_unregister_vlan(void *, struct ifnet *, u16);
302 static void     em_setup_vlan_hw_support(struct adapter *);
303 static int      em_xmit(struct tx_ring *, struct mbuf **);
304 static int      em_dma_malloc(struct adapter *, bus_size_t,
305                     struct em_dma_alloc *, int);
306 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
307 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
308 static void     em_print_nvm_info(struct adapter *);
309 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
310 static void     em_print_debug_info(struct adapter *);
311 static int      em_is_valid_ether_addr(u8 *);
312 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
313 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
314                     const char *, struct em_int_delay_info *, int, int);
315 /* Management and WOL Support */
316 static void     em_init_manageability(struct adapter *);
317 static void     em_release_manageability(struct adapter *);
318 static void     em_get_hw_control(struct adapter *);
319 static void     em_release_hw_control(struct adapter *);
320 static void     em_get_wakeup(device_t);
321 static void     em_enable_wakeup(device_t);
322 static int      em_enable_phy_wakeup(struct adapter *);
323 static void     em_led_func(void *, int);
324 static void     em_disable_aspm(struct adapter *);
325
326 static int      em_irq_fast(void *);
327
328 /* MSIX handlers */
329 static void     em_msix_tx(void *);
330 static void     em_msix_rx(void *);
331 static void     em_msix_link(void *);
332 static void     em_handle_tx(void *context, int pending);
333 static void     em_handle_rx(void *context, int pending);
334 static void     em_handle_link(void *context, int pending);
335
336 #ifdef EM_MULTIQUEUE
337 static void     em_enable_vectors_82574(struct adapter *);
338 #endif
339
340 static void     em_set_sysctl_value(struct adapter *, const char *,
341                     const char *, int *, int);
342 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
343 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
344
345 static __inline void em_rx_discard(struct rx_ring *, int);
346
347 #ifdef DEVICE_POLLING
348 static poll_handler_t em_poll;
349 #endif /* POLLING */
350
351 /*********************************************************************
352  *  FreeBSD Device Interface Entry Points
353  *********************************************************************/
354
355 static device_method_t em_methods[] = {
356         /* Device interface */
357         DEVMETHOD(device_probe, em_probe),
358         DEVMETHOD(device_attach, em_attach),
359         DEVMETHOD(device_detach, em_detach),
360         DEVMETHOD(device_shutdown, em_shutdown),
361         DEVMETHOD(device_suspend, em_suspend),
362         DEVMETHOD(device_resume, em_resume),
363         DEVMETHOD_END
364 };
365
366 static driver_t em_driver = {
367         "em", em_methods, sizeof(struct adapter),
368 };
369
370 devclass_t em_devclass;
371 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
372 MODULE_DEPEND(em, pci, 1, 1, 1);
373 MODULE_DEPEND(em, ether, 1, 1, 1);
374
375 /*********************************************************************
376  *  Tunable default values.
377  *********************************************************************/
378
379 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
380 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
381 #define M_TSO_LEN                       66
382
383 #define MAX_INTS_PER_SEC        8000
384 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
385
386 #define TSO_WORKAROUND  4
387
388 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
389
390 static int em_disable_crc_stripping = 0;
391 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
392     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
393
394 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
395 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
396 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
397 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
398 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
399     0, "Default transmit interrupt delay in usecs");
400 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
401     0, "Default receive interrupt delay in usecs");
402
403 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
404 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
405 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
406 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
407 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
408     &em_tx_abs_int_delay_dflt, 0,
409     "Default transmit interrupt delay limit in usecs");
410 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
411     &em_rx_abs_int_delay_dflt, 0,
412     "Default receive interrupt delay limit in usecs");
413
414 static int em_rxd = EM_DEFAULT_RXD;
415 static int em_txd = EM_DEFAULT_TXD;
416 TUNABLE_INT("hw.em.rxd", &em_rxd);
417 TUNABLE_INT("hw.em.txd", &em_txd);
418 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
419     "Number of receive descriptors per queue");
420 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
421     "Number of transmit descriptors per queue");
422
423 static int em_smart_pwr_down = FALSE;
424 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
425 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
426     0, "Set to true to leave smart power down enabled on newer adapters");
427
428 /* Controls whether promiscuous also shows bad packets */
429 static int em_debug_sbp = FALSE;
430 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
431 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
432     "Show bad packets in promiscuous mode");
433
434 static int em_enable_msix = TRUE;
435 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
436 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
437     "Enable MSI-X interrupts");
438
439 #ifdef EM_MULTIQUEUE
440 static int em_num_queues = 1;
441 TUNABLE_INT("hw.em.num_queues", &em_num_queues);
442 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
443     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
444 #endif
445
446 /*
447 ** Global variable to store last used CPU when binding queues
448 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
449 ** queue is bound to a cpu.
450 */
451 static int em_last_bind_cpu = -1;
452
453 /* How many packets rxeof tries to clean at a time */
454 static int em_rx_process_limit = 100;
455 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
456 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
457     &em_rx_process_limit, 0,
458     "Maximum number of received packets to process "
459     "at a time, -1 means unlimited");
460
461 /* Energy efficient ethernet - default to OFF */
462 static int eee_setting = 1;
463 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
464 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
465     "Enable Energy Efficient Ethernet");
466
467 /* Global used in WOL setup with multiport cards */
468 static int global_quad_port_a = 0;
469
470 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
471 #include <dev/netmap/if_em_netmap.h>
472 #endif /* DEV_NETMAP */
473
474 /*********************************************************************
475  *  Device identification routine
476  *
477  *  em_probe determines if the driver should be loaded on
478  *  adapter based on PCI vendor/device id of the adapter.
479  *
480  *  return BUS_PROBE_DEFAULT on success, positive on failure
481  *********************************************************************/
482
483 static int
484 em_probe(device_t dev)
485 {
486         char            adapter_name[60];
487         uint16_t        pci_vendor_id = 0;
488         uint16_t        pci_device_id = 0;
489         uint16_t        pci_subvendor_id = 0;
490         uint16_t        pci_subdevice_id = 0;
491         em_vendor_info_t *ent;
492
493         INIT_DEBUGOUT("em_probe: begin");
494
495         pci_vendor_id = pci_get_vendor(dev);
496         if (pci_vendor_id != EM_VENDOR_ID)
497                 return (ENXIO);
498
499         pci_device_id = pci_get_device(dev);
500         pci_subvendor_id = pci_get_subvendor(dev);
501         pci_subdevice_id = pci_get_subdevice(dev);
502
503         ent = em_vendor_info_array;
504         while (ent->vendor_id != 0) {
505                 if ((pci_vendor_id == ent->vendor_id) &&
506                     (pci_device_id == ent->device_id) &&
507
508                     ((pci_subvendor_id == ent->subvendor_id) ||
509                     (ent->subvendor_id == PCI_ANY_ID)) &&
510
511                     ((pci_subdevice_id == ent->subdevice_id) ||
512                     (ent->subdevice_id == PCI_ANY_ID))) {
513                         sprintf(adapter_name, "%s %s",
514                                 em_strings[ent->index],
515                                 em_driver_version);
516                         device_set_desc_copy(dev, adapter_name);
517                         return (BUS_PROBE_DEFAULT);
518                 }
519                 ent++;
520         }
521
522         return (ENXIO);
523 }
524
525 /*********************************************************************
526  *  Device initialization routine
527  *
528  *  The attach entry point is called when the driver is being loaded.
529  *  This routine identifies the type of hardware, allocates all resources
530  *  and initializes the hardware.
531  *
532  *  return 0 on success, positive on failure
533  *********************************************************************/
534
535 static int
536 em_attach(device_t dev)
537 {
538         struct adapter  *adapter;
539         struct e1000_hw *hw;
540         int             error = 0;
541
542         INIT_DEBUGOUT("em_attach: begin");
543
544         if (resource_disabled("em", device_get_unit(dev))) {
545                 device_printf(dev, "Disabled by device hint\n");
546                 return (ENXIO);
547         }
548
549         adapter = device_get_softc(dev);
550         adapter->dev = adapter->osdep.dev = dev;
551         hw = &adapter->hw;
552         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
553
554         /* SYSCTL stuff */
555         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
556             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
557             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
558             em_sysctl_nvm_info, "I", "NVM Information");
559
560         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
561             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
562             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
563             em_sysctl_debug_info, "I", "Debug Information");
564
565         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
566             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
567             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
568             em_set_flowcntl, "I", "Flow Control");
569
570         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
571
572         /* Determine hardware and mac info */
573         em_identify_hardware(adapter);
574
575         /* Setup PCI resources */
576         if (em_allocate_pci_resources(adapter)) {
577                 device_printf(dev, "Allocation of PCI resources failed\n");
578                 error = ENXIO;
579                 goto err_pci;
580         }
581
582         /*
583         ** For ICH8 and family we need to
584         ** map the flash memory, and this
585         ** must happen after the MAC is 
586         ** identified
587         */
588         if ((hw->mac.type == e1000_ich8lan) ||
589             (hw->mac.type == e1000_ich9lan) ||
590             (hw->mac.type == e1000_ich10lan) ||
591             (hw->mac.type == e1000_pchlan) ||
592             (hw->mac.type == e1000_pch2lan) ||
593             (hw->mac.type == e1000_pch_lpt)) {
594                 int rid = EM_BAR_TYPE_FLASH;
595                 adapter->flash = bus_alloc_resource_any(dev,
596                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
597                 if (adapter->flash == NULL) {
598                         device_printf(dev, "Mapping of Flash failed\n");
599                         error = ENXIO;
600                         goto err_pci;
601                 }
602                 /* This is used in the shared code */
603                 hw->flash_address = (u8 *)adapter->flash;
604                 adapter->osdep.flash_bus_space_tag =
605                     rman_get_bustag(adapter->flash);
606                 adapter->osdep.flash_bus_space_handle =
607                     rman_get_bushandle(adapter->flash);
608         }
609         /*
610         ** In the new SPT device flash is not  a
611         ** seperate BAR, rather it is also in BAR0,
612         ** so use the same tag and an offset handle for the
613         ** FLASH read/write macros in the shared code.
614         */
615         else if (hw->mac.type >= e1000_pch_spt) {
616                 adapter->osdep.flash_bus_space_tag =
617                     adapter->osdep.mem_bus_space_tag;
618                 adapter->osdep.flash_bus_space_handle =
619                     adapter->osdep.mem_bus_space_handle
620                     + E1000_FLASH_BASE_ADDR;
621         }
622
623         /* Do Shared Code initialization */
624         error = e1000_setup_init_funcs(hw, TRUE);
625         if (error) {
626                 device_printf(dev, "Setup of Shared code failed, error %d\n",
627                     error);
628                 error = ENXIO;
629                 goto err_pci;
630         }
631
632         /*
633          * Setup MSI/X or MSI if PCI Express
634          */
635         adapter->msix = em_setup_msix(adapter);
636
637         e1000_get_bus_info(hw);
638
639         /* Set up some sysctls for the tunable interrupt delays */
640         em_add_int_delay_sysctl(adapter, "rx_int_delay",
641             "receive interrupt delay in usecs", &adapter->rx_int_delay,
642             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
643         em_add_int_delay_sysctl(adapter, "tx_int_delay",
644             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
645             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
646         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
647             "receive interrupt delay limit in usecs",
648             &adapter->rx_abs_int_delay,
649             E1000_REGISTER(hw, E1000_RADV),
650             em_rx_abs_int_delay_dflt);
651         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
652             "transmit interrupt delay limit in usecs",
653             &adapter->tx_abs_int_delay,
654             E1000_REGISTER(hw, E1000_TADV),
655             em_tx_abs_int_delay_dflt);
656         em_add_int_delay_sysctl(adapter, "itr",
657             "interrupt delay limit in usecs/4",
658             &adapter->tx_itr,
659             E1000_REGISTER(hw, E1000_ITR),
660             DEFAULT_ITR);
661
662         /* Sysctl for limiting the amount of work done in the taskqueue */
663         em_set_sysctl_value(adapter, "rx_processing_limit",
664             "max number of rx packets to process", &adapter->rx_process_limit,
665             em_rx_process_limit);
666
667         /*
668          * Validate number of transmit and receive descriptors. It
669          * must not exceed hardware maximum, and must be multiple
670          * of E1000_DBA_ALIGN.
671          */
672         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
673             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
674                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
675                     EM_DEFAULT_TXD, em_txd);
676                 adapter->num_tx_desc = EM_DEFAULT_TXD;
677         } else
678                 adapter->num_tx_desc = em_txd;
679
680         if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
681             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
682                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
683                     EM_DEFAULT_RXD, em_rxd);
684                 adapter->num_rx_desc = EM_DEFAULT_RXD;
685         } else
686                 adapter->num_rx_desc = em_rxd;
687
688         hw->mac.autoneg = DO_AUTO_NEG;
689         hw->phy.autoneg_wait_to_complete = FALSE;
690         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
691
692         /* Copper options */
693         if (hw->phy.media_type == e1000_media_type_copper) {
694                 hw->phy.mdix = AUTO_ALL_MODES;
695                 hw->phy.disable_polarity_correction = FALSE;
696                 hw->phy.ms_type = EM_MASTER_SLAVE;
697         }
698
699         /*
700          * Set the frame limits assuming
701          * standard ethernet sized frames.
702          */
703         adapter->hw.mac.max_frame_size =
704             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
705
706         /*
707          * This controls when hardware reports transmit completion
708          * status.
709          */
710         hw->mac.report_tx_early = 1;
711
712         /* 
713         ** Get queue/ring memory
714         */
715         if (em_allocate_queues(adapter)) {
716                 error = ENOMEM;
717                 goto err_pci;
718         }
719
720         /* Allocate multicast array memory. */
721         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
722             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
723         if (adapter->mta == NULL) {
724                 device_printf(dev, "Can not allocate multicast setup array\n");
725                 error = ENOMEM;
726                 goto err_late;
727         }
728
729         /* Check SOL/IDER usage */
730         if (e1000_check_reset_block(hw))
731                 device_printf(dev, "PHY reset is blocked"
732                     " due to SOL/IDER session.\n");
733
734         /* Sysctl for setting Energy Efficient Ethernet */
735         hw->dev_spec.ich8lan.eee_disable = eee_setting;
736         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
737             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
738             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
739             adapter, 0, em_sysctl_eee, "I",
740             "Disable Energy Efficient Ethernet");
741
742         /*
743         ** Start from a known state, this is
744         ** important in reading the nvm and
745         ** mac from that.
746         */
747         e1000_reset_hw(hw);
748
749
750         /* Make sure we have a good EEPROM before we read from it */
751         if (e1000_validate_nvm_checksum(hw) < 0) {
752                 /*
753                 ** Some PCI-E parts fail the first check due to
754                 ** the link being in sleep state, call it again,
755                 ** if it fails a second time its a real issue.
756                 */
757                 if (e1000_validate_nvm_checksum(hw) < 0) {
758                         device_printf(dev,
759                             "The EEPROM Checksum Is Not Valid\n");
760                         error = EIO;
761                         goto err_late;
762                 }
763         }
764
765         /* Copy the permanent MAC address out of the EEPROM */
766         if (e1000_read_mac_addr(hw) < 0) {
767                 device_printf(dev, "EEPROM read error while reading MAC"
768                     " address\n");
769                 error = EIO;
770                 goto err_late;
771         }
772
773         if (!em_is_valid_ether_addr(hw->mac.addr)) {
774                 device_printf(dev, "Invalid MAC address\n");
775                 error = EIO;
776                 goto err_late;
777         }
778
779         /* Disable ULP support */
780         e1000_disable_ulp_lpt_lp(hw, TRUE);
781
782         /*
783         **  Do interrupt configuration
784         */
785         if (adapter->msix > 1) /* Do MSIX */
786                 error = em_allocate_msix(adapter);
787         else  /* MSI or Legacy */
788                 error = em_allocate_legacy(adapter);
789         if (error)
790                 goto err_late;
791
792         /*
793          * Get Wake-on-Lan and Management info for later use
794          */
795         em_get_wakeup(dev);
796
797         /* Setup OS specific network interface */
798         if (em_setup_interface(dev, adapter) != 0)
799                 goto err_late;
800
801         em_reset(adapter);
802
803         /* Initialize statistics */
804         em_update_stats_counters(adapter);
805
806         hw->mac.get_link_status = 1;
807         em_update_link_status(adapter);
808
809         /* Register for VLAN events */
810         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
811             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
812         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
813             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
814
815         em_add_hw_stats(adapter);
816
817         /* Non-AMT based hardware can now take control from firmware */
818         if (adapter->has_manage && !adapter->has_amt)
819                 em_get_hw_control(adapter);
820
821         /* Tell the stack that the interface is not active */
822         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
823         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
824
825         adapter->led_dev = led_create(em_led_func, adapter,
826             device_get_nameunit(dev));
827 #ifdef DEV_NETMAP
828         em_netmap_attach(adapter);
829 #endif /* DEV_NETMAP */
830
831         INIT_DEBUGOUT("em_attach: end");
832
833         return (0);
834
835 err_late:
836         em_free_transmit_structures(adapter);
837         em_free_receive_structures(adapter);
838         em_release_hw_control(adapter);
839         if (adapter->ifp != NULL)
840                 if_free(adapter->ifp);
841 err_pci:
842         em_free_pci_resources(adapter);
843         free(adapter->mta, M_DEVBUF);
844         EM_CORE_LOCK_DESTROY(adapter);
845
846         return (error);
847 }
848
849 /*********************************************************************
850  *  Device removal routine
851  *
852  *  The detach entry point is called when the driver is being removed.
853  *  This routine stops the adapter and deallocates all the resources
854  *  that were allocated for driver operation.
855  *
856  *  return 0 on success, positive on failure
857  *********************************************************************/
858
859 static int
860 em_detach(device_t dev)
861 {
862         struct adapter  *adapter = device_get_softc(dev);
863         struct ifnet    *ifp = adapter->ifp;
864
865         INIT_DEBUGOUT("em_detach: begin");
866
867         /* Make sure VLANS are not using driver */
868         if (adapter->ifp->if_vlantrunk != NULL) {
869                 device_printf(dev,"Vlan in use, detach first\n");
870                 return (EBUSY);
871         }
872
873 #ifdef DEVICE_POLLING
874         if (ifp->if_capenable & IFCAP_POLLING)
875                 ether_poll_deregister(ifp);
876 #endif
877
878         if (adapter->led_dev != NULL)
879                 led_destroy(adapter->led_dev);
880
881         EM_CORE_LOCK(adapter);
882         adapter->in_detach = 1;
883         em_stop(adapter);
884         EM_CORE_UNLOCK(adapter);
885         EM_CORE_LOCK_DESTROY(adapter);
886
887         e1000_phy_hw_reset(&adapter->hw);
888
889         em_release_manageability(adapter);
890         em_release_hw_control(adapter);
891
892         /* Unregister VLAN events */
893         if (adapter->vlan_attach != NULL)
894                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
895         if (adapter->vlan_detach != NULL)
896                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
897
898         ether_ifdetach(adapter->ifp);
899         callout_drain(&adapter->timer);
900
901 #ifdef DEV_NETMAP
902         netmap_detach(ifp);
903 #endif /* DEV_NETMAP */
904
905         em_free_pci_resources(adapter);
906         bus_generic_detach(dev);
907         if_free(ifp);
908
909         em_free_transmit_structures(adapter);
910         em_free_receive_structures(adapter);
911
912         em_release_hw_control(adapter);
913         free(adapter->mta, M_DEVBUF);
914
915         return (0);
916 }
917
918 /*********************************************************************
919  *
920  *  Shutdown entry point
921  *
922  **********************************************************************/
923
924 static int
925 em_shutdown(device_t dev)
926 {
927         return em_suspend(dev);
928 }
929
930 /*
931  * Suspend/resume device methods.
932  */
933 static int
934 em_suspend(device_t dev)
935 {
936         struct adapter *adapter = device_get_softc(dev);
937
938         EM_CORE_LOCK(adapter);
939
940         em_release_manageability(adapter);
941         em_release_hw_control(adapter);
942         em_enable_wakeup(dev);
943
944         EM_CORE_UNLOCK(adapter);
945
946         return bus_generic_suspend(dev);
947 }
948
949 static int
950 em_resume(device_t dev)
951 {
952         struct adapter *adapter = device_get_softc(dev);
953         struct tx_ring  *txr = adapter->tx_rings;
954         struct ifnet *ifp = adapter->ifp;
955
956         EM_CORE_LOCK(adapter);
957         if (adapter->hw.mac.type == e1000_pch2lan)
958                 e1000_resume_workarounds_pchlan(&adapter->hw);
959         em_init_locked(adapter);
960         em_init_manageability(adapter);
961
962         if ((ifp->if_flags & IFF_UP) &&
963             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
964                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
965                         EM_TX_LOCK(txr);
966 #ifdef EM_MULTIQUEUE
967                         if (!drbr_empty(ifp, txr->br))
968                                 em_mq_start_locked(ifp, txr);
969 #else
970                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
971                                 em_start_locked(ifp, txr);
972 #endif
973                         EM_TX_UNLOCK(txr);
974                 }
975         }
976         EM_CORE_UNLOCK(adapter);
977
978         return bus_generic_resume(dev);
979 }
980
981
982 #ifndef EM_MULTIQUEUE
983 static void
984 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
985 {
986         struct adapter  *adapter = ifp->if_softc;
987         struct mbuf     *m_head;
988
989         EM_TX_LOCK_ASSERT(txr);
990
991         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
992             IFF_DRV_RUNNING)
993                 return;
994
995         if (!adapter->link_active)
996                 return;
997
998         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
999                 /* Call cleanup if number of TX descriptors low */
1000                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1001                         em_txeof(txr);
1002                 if (txr->tx_avail < EM_MAX_SCATTER) {
1003                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1004                         break;
1005                 }
1006                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1007                 if (m_head == NULL)
1008                         break;
1009                 /*
1010                  *  Encapsulation can modify our pointer, and or make it
1011                  *  NULL on failure.  In that event, we can't requeue.
1012                  */
1013                 if (em_xmit(txr, &m_head)) {
1014                         if (m_head == NULL)
1015                                 break;
1016                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1017                         break;
1018                 }
1019
1020                 /* Mark the queue as having work */
1021                 if (txr->busy == EM_TX_IDLE)
1022                         txr->busy = EM_TX_BUSY;
1023
1024                 /* Send a copy of the frame to the BPF listener */
1025                 ETHER_BPF_MTAP(ifp, m_head);
1026
1027         }
1028
1029         return;
1030 }
1031
1032 static void
1033 em_start(struct ifnet *ifp)
1034 {
1035         struct adapter  *adapter = ifp->if_softc;
1036         struct tx_ring  *txr = adapter->tx_rings;
1037
1038         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1039                 EM_TX_LOCK(txr);
1040                 em_start_locked(ifp, txr);
1041                 EM_TX_UNLOCK(txr);
1042         }
1043         return;
1044 }
1045 #else /* EM_MULTIQUEUE */
1046 /*********************************************************************
1047  *  Multiqueue Transmit routines 
1048  *
1049  *  em_mq_start is called by the stack to initiate a transmit.
1050  *  however, if busy the driver can queue the request rather
1051  *  than do an immediate send. It is this that is an advantage
1052  *  in this driver, rather than also having multiple tx queues.
1053  **********************************************************************/
1054 /*
1055 ** Multiqueue capable stack interface
1056 */
1057 static int
1058 em_mq_start(struct ifnet *ifp, struct mbuf *m)
1059 {
1060         struct adapter  *adapter = ifp->if_softc;
1061         struct tx_ring  *txr = adapter->tx_rings;
1062         unsigned int    i, error;
1063
1064         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1065                 i = m->m_pkthdr.flowid % adapter->num_queues;
1066         else
1067                 i = curcpu % adapter->num_queues;
1068
1069         txr = &adapter->tx_rings[i];
1070
1071         error = drbr_enqueue(ifp, txr->br, m);
1072         if (error)
1073                 return (error);
1074
1075         if (EM_TX_TRYLOCK(txr)) {
1076                 em_mq_start_locked(ifp, txr);
1077                 EM_TX_UNLOCK(txr);
1078         } else 
1079                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1080
1081         return (0);
1082 }
1083
1084 static int
1085 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1086 {
1087         struct adapter  *adapter = txr->adapter;
1088         struct mbuf     *next;
1089         int             err = 0, enq = 0;
1090
1091         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1092             IFF_DRV_RUNNING || adapter->link_active == 0) {
1093                 return (ENETDOWN);
1094         }
1095
1096         /* Process the queue */
1097         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1098                 if ((err = em_xmit(txr, &next)) != 0) {
1099                         if (next == NULL) {
1100                                 /* It was freed, move forward */
1101                                 drbr_advance(ifp, txr->br);
1102                         } else {
1103                                 /* 
1104                                  * Still have one left, it may not be
1105                                  * the same since the transmit function
1106                                  * may have changed it.
1107                                  */
1108                                 drbr_putback(ifp, txr->br, next);
1109                         }
1110                         break;
1111                 }
1112                 drbr_advance(ifp, txr->br);
1113                 enq++;
1114                 ifp->if_obytes += next->m_pkthdr.len;
1115                 if (next->m_flags & M_MCAST)
1116                         ifp->if_omcasts++;
1117                 ETHER_BPF_MTAP(ifp, next);
1118                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1119                         break;
1120         }
1121
1122         /* Mark the queue as having work */
1123         if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1124                 txr->busy = EM_TX_BUSY;
1125
1126         if (txr->tx_avail < EM_MAX_SCATTER)
1127                 em_txeof(txr);
1128         if (txr->tx_avail < EM_MAX_SCATTER) {
1129                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1130         }
1131         return (err);
1132 }
1133
1134 /*
1135 ** Flush all ring buffers
1136 */
1137 static void
1138 em_qflush(struct ifnet *ifp)
1139 {
1140         struct adapter  *adapter = ifp->if_softc;
1141         struct tx_ring  *txr = adapter->tx_rings;
1142         struct mbuf     *m;
1143
1144         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1145                 EM_TX_LOCK(txr);
1146                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1147                         m_freem(m);
1148                 EM_TX_UNLOCK(txr);
1149         }
1150         if_qflush(ifp);
1151 }
1152 #endif /* EM_MULTIQUEUE */
1153
1154 /*********************************************************************
1155  *  Ioctl entry point
1156  *
1157  *  em_ioctl is called when the user wants to configure the
1158  *  interface.
1159  *
1160  *  return 0 on success, positive on failure
1161  **********************************************************************/
1162
1163 static int
1164 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1165 {
1166         struct adapter  *adapter = ifp->if_softc;
1167         struct ifreq    *ifr = (struct ifreq *)data;
1168 #if defined(INET) || defined(INET6)
1169         struct ifaddr   *ifa = (struct ifaddr *)data;
1170 #endif
1171         bool            avoid_reset = FALSE;
1172         int             error = 0;
1173
1174         if (adapter->in_detach)
1175                 return (error);
1176
1177         switch (command) {
1178         case SIOCSIFADDR:
1179 #ifdef INET
1180                 if (ifa->ifa_addr->sa_family == AF_INET)
1181                         avoid_reset = TRUE;
1182 #endif
1183 #ifdef INET6
1184                 if (ifa->ifa_addr->sa_family == AF_INET6)
1185                         avoid_reset = TRUE;
1186 #endif
1187                 /*
1188                 ** Calling init results in link renegotiation,
1189                 ** so we avoid doing it when possible.
1190                 */
1191                 if (avoid_reset) {
1192                         ifp->if_flags |= IFF_UP;
1193                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1194                                 em_init(adapter);
1195 #ifdef INET
1196                         if (!(ifp->if_flags & IFF_NOARP))
1197                                 arp_ifinit(ifp, ifa);
1198 #endif
1199                 } else
1200                         error = ether_ioctl(ifp, command, data);
1201                 break;
1202         case SIOCSIFMTU:
1203             {
1204                 int max_frame_size;
1205
1206                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1207
1208                 EM_CORE_LOCK(adapter);
1209                 switch (adapter->hw.mac.type) {
1210                 case e1000_82571:
1211                 case e1000_82572:
1212                 case e1000_ich9lan:
1213                 case e1000_ich10lan:
1214                 case e1000_pch2lan:
1215                 case e1000_pch_lpt:
1216                 case e1000_pch_spt:
1217                 case e1000_pch_cnp:
1218                 case e1000_82574:
1219                 case e1000_82583:
1220                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1221                         max_frame_size = 9234;
1222                         break;
1223                 case e1000_pchlan:
1224                         max_frame_size = 4096;
1225                         break;
1226                         /* Adapters that do not support jumbo frames */
1227                 case e1000_ich8lan:
1228                         max_frame_size = ETHER_MAX_LEN;
1229                         break;
1230                 default:
1231                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1232                 }
1233                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1234                     ETHER_CRC_LEN) {
1235                         EM_CORE_UNLOCK(adapter);
1236                         error = EINVAL;
1237                         break;
1238                 }
1239
1240                 ifp->if_mtu = ifr->ifr_mtu;
1241                 adapter->hw.mac.max_frame_size =
1242                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1243                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1244                         em_init_locked(adapter);
1245                 EM_CORE_UNLOCK(adapter);
1246                 break;
1247             }
1248         case SIOCSIFFLAGS:
1249                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1250                     SIOCSIFFLAGS (Set Interface Flags)");
1251                 EM_CORE_LOCK(adapter);
1252                 if (ifp->if_flags & IFF_UP) {
1253                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1254                                 if ((ifp->if_flags ^ adapter->if_flags) &
1255                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1256                                         em_disable_promisc(adapter);
1257                                         em_set_promisc(adapter);
1258                                 }
1259                         } else
1260                                 em_init_locked(adapter);
1261                 } else
1262                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1263                                 em_stop(adapter);
1264                 adapter->if_flags = ifp->if_flags;
1265                 EM_CORE_UNLOCK(adapter);
1266                 break;
1267         case SIOCADDMULTI:
1268         case SIOCDELMULTI:
1269                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1270                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1271                         EM_CORE_LOCK(adapter);
1272                         em_disable_intr(adapter);
1273                         em_set_multi(adapter);
1274 #ifdef DEVICE_POLLING
1275                         if (!(ifp->if_capenable & IFCAP_POLLING))
1276 #endif
1277                                 em_enable_intr(adapter);
1278                         EM_CORE_UNLOCK(adapter);
1279                 }
1280                 break;
1281         case SIOCSIFMEDIA:
1282                 /* Check SOL/IDER usage */
1283                 EM_CORE_LOCK(adapter);
1284                 if (e1000_check_reset_block(&adapter->hw)) {
1285                         EM_CORE_UNLOCK(adapter);
1286                         device_printf(adapter->dev, "Media change is"
1287                             " blocked due to SOL/IDER session.\n");
1288                         break;
1289                 }
1290                 EM_CORE_UNLOCK(adapter);
1291                 /* falls thru */
1292         case SIOCGIFMEDIA:
1293                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1294                     SIOCxIFMEDIA (Get/Set Interface Media)");
1295                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1296                 break;
1297         case SIOCSIFCAP:
1298             {
1299                 int mask, reinit;
1300
1301                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1302                 reinit = 0;
1303                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1304 #ifdef DEVICE_POLLING
1305                 if (mask & IFCAP_POLLING) {
1306                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1307                                 error = ether_poll_register(em_poll, ifp);
1308                                 if (error)
1309                                         return (error);
1310                                 EM_CORE_LOCK(adapter);
1311                                 em_disable_intr(adapter);
1312                                 ifp->if_capenable |= IFCAP_POLLING;
1313                                 EM_CORE_UNLOCK(adapter);
1314                         } else {
1315                                 error = ether_poll_deregister(ifp);
1316                                 /* Enable interrupt even in error case */
1317                                 EM_CORE_LOCK(adapter);
1318                                 em_enable_intr(adapter);
1319                                 ifp->if_capenable &= ~IFCAP_POLLING;
1320                                 EM_CORE_UNLOCK(adapter);
1321                         }
1322                 }
1323 #endif
1324                 if (mask & IFCAP_HWCSUM) {
1325                         ifp->if_capenable ^= IFCAP_HWCSUM;
1326                         reinit = 1;
1327                 }
1328                 if (mask & IFCAP_TSO4) {
1329                         ifp->if_capenable ^= IFCAP_TSO4;
1330                         reinit = 1;
1331                 }
1332                 if (mask & IFCAP_VLAN_HWTAGGING) {
1333                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1334                         reinit = 1;
1335                 }
1336                 if (mask & IFCAP_VLAN_HWFILTER) {
1337                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1338                         reinit = 1;
1339                 }
1340                 if (mask & IFCAP_VLAN_HWTSO) {
1341                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1342                         reinit = 1;
1343                 }
1344                 if ((mask & IFCAP_WOL) &&
1345                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1346                         if (mask & IFCAP_WOL_MCAST)
1347                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1348                         if (mask & IFCAP_WOL_MAGIC)
1349                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1350                 }
1351                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1352                         em_init(adapter);
1353                 VLAN_CAPABILITIES(ifp);
1354                 break;
1355             }
1356
1357         default:
1358                 error = ether_ioctl(ifp, command, data);
1359                 break;
1360         }
1361
1362         return (error);
1363 }
1364
1365
1366 /*********************************************************************
1367  *  Init entry point
1368  *
1369  *  This routine is used in two ways. It is used by the stack as
1370  *  init entry point in network interface structure. It is also used
1371  *  by the driver as a hw/sw initialization routine to get to a
1372  *  consistent state.
1373  *
1374  *  return 0 on success, positive on failure
1375  **********************************************************************/
1376
1377 static void
1378 em_init_locked(struct adapter *adapter)
1379 {
1380         struct ifnet    *ifp = adapter->ifp;
1381         device_t        dev = adapter->dev;
1382
1383         INIT_DEBUGOUT("em_init: begin");
1384
1385         EM_CORE_LOCK_ASSERT(adapter);
1386
1387         em_disable_intr(adapter);
1388         callout_stop(&adapter->timer);
1389
1390         /* Get the latest mac address, User can use a LAA */
1391         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1392               ETHER_ADDR_LEN);
1393
1394         /* Put the address into the Receive Address Array */
1395         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1396
1397         /*
1398          * With the 82571 adapter, RAR[0] may be overwritten
1399          * when the other port is reset, we make a duplicate
1400          * in RAR[14] for that eventuality, this assures
1401          * the interface continues to function.
1402          */
1403         if (adapter->hw.mac.type == e1000_82571) {
1404                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1405                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1406                     E1000_RAR_ENTRIES - 1);
1407         }
1408
1409         /* Initialize the hardware */
1410         em_reset(adapter);
1411         em_update_link_status(adapter);
1412
1413         /* Setup VLAN support, basic and offload if available */
1414         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1415
1416         /* Set hardware offload abilities */
1417         if (ifp->if_capenable & IFCAP_TXCSUM)
1418                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1419         else
1420                 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
1421
1422         /* Configure for OS presence */
1423         em_init_manageability(adapter);
1424
1425         /* Prepare transmit descriptors and buffers */
1426         em_setup_transmit_structures(adapter);
1427         em_initialize_transmit_unit(adapter);
1428
1429         /* Setup Multicast table */
1430         em_set_multi(adapter);
1431
1432         /*
1433         ** Figure out the desired mbuf
1434         ** pool for doing jumbos
1435         */
1436         if (adapter->hw.mac.max_frame_size <= 2048)
1437                 adapter->rx_mbuf_sz = MCLBYTES;
1438 #ifndef CONTIGMALLOC_WORKS
1439        else
1440                adapter->rx_mbuf_sz = MJUMPAGESIZE;
1441 #else
1442         else if (adapter->hw.mac.max_frame_size <= 4096)
1443                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1444         else
1445                 adapter->rx_mbuf_sz = MJUM9BYTES;
1446 #endif
1447
1448         /* Prepare receive descriptors and buffers */
1449         if (em_setup_receive_structures(adapter)) {
1450                 device_printf(dev, "Could not setup receive structures\n");
1451                 em_stop(adapter);
1452                 return;
1453         }
1454         em_initialize_receive_unit(adapter);
1455
1456         /* Use real VLAN Filter support? */
1457         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1458                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1459                         /* Use real VLAN Filter support */
1460                         em_setup_vlan_hw_support(adapter);
1461                 else {
1462                         u32 ctrl;
1463                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1464                         ctrl |= E1000_CTRL_VME;
1465                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1466                 }
1467         }
1468
1469         /* Don't lose promiscuous settings */
1470         em_set_promisc(adapter);
1471
1472         /* Set the interface as ACTIVE */
1473         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1474         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1475
1476         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1477         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1478
1479         /* MSI/X configuration for 82574 */
1480         if (adapter->hw.mac.type == e1000_82574) {
1481                 int tmp;
1482                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1483                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1484                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1485                 /* Set the IVAR - interrupt vector routing. */
1486                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1487         }
1488
1489 #ifdef DEVICE_POLLING
1490         /*
1491          * Only enable interrupts if we are not polling, make sure
1492          * they are off otherwise.
1493          */
1494         if (ifp->if_capenable & IFCAP_POLLING)
1495                 em_disable_intr(adapter);
1496         else
1497 #endif /* DEVICE_POLLING */
1498                 em_enable_intr(adapter);
1499
1500         /* AMT based hardware can now take control from firmware */
1501         if (adapter->has_manage && adapter->has_amt)
1502                 em_get_hw_control(adapter);
1503 }
1504
1505 static void
1506 em_init(void *arg)
1507 {
1508         struct adapter *adapter = arg;
1509
1510         EM_CORE_LOCK(adapter);
1511         em_init_locked(adapter);
1512         EM_CORE_UNLOCK(adapter);
1513 }
1514
1515
1516 #ifdef DEVICE_POLLING
1517 /*********************************************************************
1518  *
1519  *  Legacy polling routine: note this only works with single queue
1520  *
1521  *********************************************************************/
1522 static int
1523 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1524 {
1525         struct adapter *adapter = ifp->if_softc;
1526         struct tx_ring  *txr = adapter->tx_rings;
1527         struct rx_ring  *rxr = adapter->rx_rings;
1528         u32             reg_icr;
1529         int             rx_done;
1530
1531         EM_CORE_LOCK(adapter);
1532         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1533                 EM_CORE_UNLOCK(adapter);
1534                 return (0);
1535         }
1536
1537         if (cmd == POLL_AND_CHECK_STATUS) {
1538                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1539                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1540                         callout_stop(&adapter->timer);
1541                         adapter->hw.mac.get_link_status = 1;
1542                         em_update_link_status(adapter);
1543                         callout_reset(&adapter->timer, hz,
1544                             em_local_timer, adapter);
1545                 }
1546         }
1547         EM_CORE_UNLOCK(adapter);
1548
1549         em_rxeof(rxr, count, &rx_done);
1550
1551         EM_TX_LOCK(txr);
1552         em_txeof(txr);
1553 #ifdef EM_MULTIQUEUE
1554         if (!drbr_empty(ifp, txr->br))
1555                 em_mq_start_locked(ifp, txr);
1556 #else
1557         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1558                 em_start_locked(ifp, txr);
1559 #endif
1560         EM_TX_UNLOCK(txr);
1561
1562         return (rx_done);
1563 }
1564 #endif /* DEVICE_POLLING */
1565
1566
1567 /*********************************************************************
1568  *
1569  *  Fast Legacy/MSI Combined Interrupt Service routine  
1570  *
1571  *********************************************************************/
1572 static int
1573 em_irq_fast(void *arg)
1574 {
1575         struct adapter  *adapter = arg;
1576         struct ifnet    *ifp;
1577         u32             reg_icr;
1578
1579         ifp = adapter->ifp;
1580
1581         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1582
1583         /* Hot eject?  */
1584         if (reg_icr == 0xffffffff)
1585                 return FILTER_STRAY;
1586
1587         /* Definitely not our interrupt.  */
1588         if (reg_icr == 0x0)
1589                 return FILTER_STRAY;
1590
1591         /*
1592          * Starting with the 82571 chip, bit 31 should be used to
1593          * determine whether the interrupt belongs to us.
1594          */
1595         if (adapter->hw.mac.type >= e1000_82571 &&
1596             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1597                 return FILTER_STRAY;
1598
1599         em_disable_intr(adapter);
1600         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1601
1602         /* Link status change */
1603         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1604                 adapter->hw.mac.get_link_status = 1;
1605                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1606         }
1607
1608         if (reg_icr & E1000_ICR_RXO)
1609                 adapter->rx_overruns++;
1610         return FILTER_HANDLED;
1611 }
1612
1613 /* Combined RX/TX handler, used by Legacy and MSI */
1614 static void
1615 em_handle_que(void *context, int pending)
1616 {
1617         struct adapter  *adapter = context;
1618         struct ifnet    *ifp = adapter->ifp;
1619         struct tx_ring  *txr = adapter->tx_rings;
1620         struct rx_ring  *rxr = adapter->rx_rings;
1621
1622         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1623                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1624
1625                 EM_TX_LOCK(txr);
1626                 em_txeof(txr);
1627 #ifdef EM_MULTIQUEUE
1628                 if (!drbr_empty(ifp, txr->br))
1629                         em_mq_start_locked(ifp, txr);
1630 #else
1631                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1632                         em_start_locked(ifp, txr);
1633 #endif
1634                 EM_TX_UNLOCK(txr);
1635                 if (more) {
1636                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1637                         return;
1638                 }
1639         }
1640
1641         em_enable_intr(adapter);
1642         return;
1643 }
1644
1645
1646 /*********************************************************************
1647  *
1648  *  MSIX Interrupt Service Routines
1649  *
1650  **********************************************************************/
1651 static void
1652 em_msix_tx(void *arg)
1653 {
1654         struct tx_ring *txr = arg;
1655         struct adapter *adapter = txr->adapter;
1656         struct ifnet    *ifp = adapter->ifp;
1657
1658         ++txr->tx_irq;
1659         EM_TX_LOCK(txr);
1660         em_txeof(txr);
1661 #ifdef EM_MULTIQUEUE
1662         if (!drbr_empty(ifp, txr->br))
1663                 em_mq_start_locked(ifp, txr);
1664 #else
1665         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1666                 em_start_locked(ifp, txr);
1667 #endif
1668
1669         /* Reenable this interrupt */
1670         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1671         EM_TX_UNLOCK(txr);
1672         return;
1673 }
1674
1675 /*********************************************************************
1676  *
1677  *  MSIX RX Interrupt Service routine
1678  *
1679  **********************************************************************/
1680
1681 static void
1682 em_msix_rx(void *arg)
1683 {
1684         struct rx_ring  *rxr = arg;
1685         struct adapter  *adapter = rxr->adapter;
1686         bool            more;
1687
1688         ++rxr->rx_irq;
1689         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1690                 return;
1691         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1692         if (more)
1693                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1694         else {
1695                 /* Reenable this interrupt */
1696                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1697         }
1698         return;
1699 }
1700
1701 /*********************************************************************
1702  *
1703  *  MSIX Link Fast Interrupt Service routine
1704  *
1705  **********************************************************************/
1706 static void
1707 em_msix_link(void *arg)
1708 {
1709         struct adapter  *adapter = arg;
1710         u32             reg_icr;
1711
1712         ++adapter->link_irq;
1713         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1714
1715         if (reg_icr & E1000_ICR_RXO)
1716                 adapter->rx_overruns++;
1717
1718         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1719                 adapter->hw.mac.get_link_status = 1;
1720                 em_handle_link(adapter, 0);
1721         } else
1722                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1723                     EM_MSIX_LINK | E1000_IMS_LSC);
1724         /*
1725         ** Because we must read the ICR for this interrupt
1726         ** it may clear other causes using autoclear, for
1727         ** this reason we simply create a soft interrupt
1728         ** for all these vectors.
1729         */
1730         if (reg_icr) {
1731                 E1000_WRITE_REG(&adapter->hw,
1732                         E1000_ICS, adapter->ims);
1733         }
1734         return;
1735 }
1736
1737 static void
1738 em_handle_rx(void *context, int pending)
1739 {
1740         struct rx_ring  *rxr = context;
1741         struct adapter  *adapter = rxr->adapter;
1742         bool            more;
1743
1744         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1745         if (more)
1746                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1747         else {
1748                 /* Reenable this interrupt */
1749                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1750         }
1751 }
1752
1753 static void
1754 em_handle_tx(void *context, int pending)
1755 {
1756         struct tx_ring  *txr = context;
1757         struct adapter  *adapter = txr->adapter;
1758         struct ifnet    *ifp = adapter->ifp;
1759
1760         EM_TX_LOCK(txr);
1761         em_txeof(txr);
1762 #ifdef EM_MULTIQUEUE
1763         if (!drbr_empty(ifp, txr->br))
1764                 em_mq_start_locked(ifp, txr);
1765 #else
1766         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1767                 em_start_locked(ifp, txr);
1768 #endif
1769         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1770         EM_TX_UNLOCK(txr);
1771 }
1772
1773 static void
1774 em_handle_link(void *context, int pending)
1775 {
1776         struct adapter  *adapter = context;
1777         struct tx_ring  *txr = adapter->tx_rings;
1778         struct ifnet *ifp = adapter->ifp;
1779
1780         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1781                 return;
1782
1783         EM_CORE_LOCK(adapter);
1784         callout_stop(&adapter->timer);
1785         em_update_link_status(adapter);
1786         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1787         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1788             EM_MSIX_LINK | E1000_IMS_LSC);
1789         if (adapter->link_active) {
1790                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1791                         EM_TX_LOCK(txr);
1792 #ifdef EM_MULTIQUEUE
1793                         if (!drbr_empty(ifp, txr->br))
1794                                 em_mq_start_locked(ifp, txr);
1795 #else
1796                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1797                                 em_start_locked(ifp, txr);
1798 #endif
1799                         EM_TX_UNLOCK(txr);
1800                 }
1801         }
1802         EM_CORE_UNLOCK(adapter);
1803 }
1804
1805
1806 /*********************************************************************
1807  *
1808  *  Media Ioctl callback
1809  *
1810  *  This routine is called whenever the user queries the status of
1811  *  the interface using ifconfig.
1812  *
1813  **********************************************************************/
1814 static void
1815 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1816 {
1817         struct adapter *adapter = ifp->if_softc;
1818         u_char fiber_type = IFM_1000_SX;
1819
1820         INIT_DEBUGOUT("em_media_status: begin");
1821
1822         EM_CORE_LOCK(adapter);
1823         em_update_link_status(adapter);
1824
1825         ifmr->ifm_status = IFM_AVALID;
1826         ifmr->ifm_active = IFM_ETHER;
1827
1828         if (!adapter->link_active) {
1829                 EM_CORE_UNLOCK(adapter);
1830                 return;
1831         }
1832
1833         ifmr->ifm_status |= IFM_ACTIVE;
1834
1835         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1836             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1837                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1838         } else {
1839                 switch (adapter->link_speed) {
1840                 case 10:
1841                         ifmr->ifm_active |= IFM_10_T;
1842                         break;
1843                 case 100:
1844                         ifmr->ifm_active |= IFM_100_TX;
1845                         break;
1846                 case 1000:
1847                         ifmr->ifm_active |= IFM_1000_T;
1848                         break;
1849                 }
1850                 if (adapter->link_duplex == FULL_DUPLEX)
1851                         ifmr->ifm_active |= IFM_FDX;
1852                 else
1853                         ifmr->ifm_active |= IFM_HDX;
1854         }
1855         EM_CORE_UNLOCK(adapter);
1856 }
1857
1858 /*********************************************************************
1859  *
1860  *  Media Ioctl callback
1861  *
1862  *  This routine is called when the user changes speed/duplex using
1863  *  media/mediopt option with ifconfig.
1864  *
1865  **********************************************************************/
1866 static int
1867 em_media_change(struct ifnet *ifp)
1868 {
1869         struct adapter *adapter = ifp->if_softc;
1870         struct ifmedia  *ifm = &adapter->media;
1871
1872         INIT_DEBUGOUT("em_media_change: begin");
1873
1874         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1875                 return (EINVAL);
1876
1877         EM_CORE_LOCK(adapter);
1878         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1879         case IFM_AUTO:
1880                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1881                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1882                 break;
1883         case IFM_1000_LX:
1884         case IFM_1000_SX:
1885         case IFM_1000_T:
1886                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1887                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1888                 break;
1889         case IFM_100_TX:
1890                 adapter->hw.mac.autoneg = FALSE;
1891                 adapter->hw.phy.autoneg_advertised = 0;
1892                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1893                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1894                 else
1895                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1896                 break;
1897         case IFM_10_T:
1898                 adapter->hw.mac.autoneg = FALSE;
1899                 adapter->hw.phy.autoneg_advertised = 0;
1900                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1901                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1902                 else
1903                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1904                 break;
1905         default:
1906                 device_printf(adapter->dev, "Unsupported media type\n");
1907         }
1908
1909         em_init_locked(adapter);
1910         EM_CORE_UNLOCK(adapter);
1911
1912         return (0);
1913 }
1914
1915 /*********************************************************************
1916  *
1917  *  This routine maps the mbufs to tx descriptors.
1918  *
1919  *  return 0 on success, positive on failure
1920  **********************************************************************/
1921
1922 static int
1923 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1924 {
1925         struct adapter          *adapter = txr->adapter;
1926         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1927         bus_dmamap_t            map;
1928         struct em_txbuffer      *tx_buffer, *tx_buffer_mapped;
1929         struct e1000_tx_desc    *ctxd = NULL;
1930         struct mbuf             *m_head;
1931         struct ether_header     *eh;
1932         struct ip               *ip = NULL;
1933         struct tcphdr           *tp = NULL;
1934         u32                     txd_upper = 0, txd_lower = 0;
1935         int                     ip_off, poff;
1936         int                     nsegs, i, j, first, last = 0;
1937         int                     error;
1938         bool                    do_tso, tso_desc, remap = TRUE;
1939
1940         m_head = *m_headp;
1941         do_tso = m_head->m_pkthdr.csum_flags & CSUM_IP_TSO;
1942         tso_desc = FALSE;
1943         ip_off = poff = 0;
1944
1945         /*
1946          * Intel recommends entire IP/TCP header length reside in a single
1947          * buffer. If multiple descriptors are used to describe the IP and
1948          * TCP header, each descriptor should describe one or more
1949          * complete headers; descriptors referencing only parts of headers
1950          * are not supported. If all layer headers are not coalesced into
1951          * a single buffer, each buffer should not cross a 4KB boundary,
1952          * or be larger than the maximum read request size.
1953          * Controller also requires modifing IP/TCP header to make TSO work
1954          * so we firstly get a writable mbuf chain then coalesce ethernet/
1955          * IP/TCP header into a single buffer to meet the requirement of
1956          * controller. This also simplifies IP/TCP/UDP checksum offloading
1957          * which also has similiar restrictions.
1958          */
1959         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1960                 if (do_tso || (m_head->m_next != NULL && 
1961                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1962                         if (M_WRITABLE(*m_headp) == 0) {
1963                                 m_head = m_dup(*m_headp, M_NOWAIT);
1964                                 m_freem(*m_headp);
1965                                 if (m_head == NULL) {
1966                                         *m_headp = NULL;
1967                                         return (ENOBUFS);
1968                                 }
1969                                 *m_headp = m_head;
1970                         }
1971                 }
1972                 /*
1973                  * XXX
1974                  * Assume IPv4, we don't have TSO/checksum offload support
1975                  * for IPv6 yet.
1976                  */
1977                 ip_off = sizeof(struct ether_header);
1978                 if (m_head->m_len < ip_off) {
1979                         m_head = m_pullup(m_head, ip_off);
1980                         if (m_head == NULL) {
1981                                 *m_headp = NULL;
1982                                 return (ENOBUFS);
1983                         }
1984                 }
1985                 eh = mtod(m_head, struct ether_header *);
1986                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1987                         ip_off = sizeof(struct ether_vlan_header);
1988                         if (m_head->m_len < ip_off) {
1989                                 m_head = m_pullup(m_head, ip_off);
1990                                 if (m_head == NULL) {
1991                                         *m_headp = NULL;
1992                                         return (ENOBUFS);
1993                                 }
1994                         }
1995                 }
1996                 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1997                         m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1998                         if (m_head == NULL) {
1999                                 *m_headp = NULL;
2000                                 return (ENOBUFS);
2001                         }
2002                 }
2003                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2004                 poff = ip_off + (ip->ip_hl << 2);
2005
2006                 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
2007                         if (m_head->m_len < poff + sizeof(struct tcphdr)) {
2008                                 m_head = m_pullup(m_head, poff +
2009                                     sizeof(struct tcphdr));
2010                                 if (m_head == NULL) {
2011                                         *m_headp = NULL;
2012                                         return (ENOBUFS);
2013                                 }
2014                         }
2015                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2016                         /*
2017                          * TSO workaround:
2018                          *   pull 4 more bytes of data into it.
2019                          */
2020                         if (m_head->m_len < poff + (tp->th_off << 2)) {
2021                                 m_head = m_pullup(m_head, poff +
2022                                                  (tp->th_off << 2) +
2023                                                  TSO_WORKAROUND);
2024                                 if (m_head == NULL) {
2025                                         *m_headp = NULL;
2026                                         return (ENOBUFS);
2027                                 }
2028                         }
2029                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2030                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2031                         if (do_tso) {
2032                                 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2033                                                   (ip->ip_hl << 2) +
2034                                                   (tp->th_off << 2));
2035                                 ip->ip_sum = 0;
2036                                 /*
2037                                  * The pseudo TCP checksum does not include TCP
2038                                  * payload length so driver should recompute
2039                                  * the checksum here what hardware expect to
2040                                  * see. This is adherence of Microsoft's Large
2041                                  * Send specification.
2042                                 */
2043                                 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2044                                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2045                         }
2046                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2047                         if (m_head->m_len < poff + sizeof(struct udphdr)) {
2048                                 m_head = m_pullup(m_head, poff +
2049                                     sizeof(struct udphdr));
2050                                 if (m_head == NULL) {
2051                                         *m_headp = NULL;
2052                                         return (ENOBUFS);
2053                                 }
2054                         }
2055                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2056                 }
2057                 *m_headp = m_head;
2058         }
2059
2060         /*
2061          * Map the packet for DMA
2062          *
2063          * Capture the first descriptor index,
2064          * this descriptor will have the index
2065          * of the EOP which is the only one that
2066          * now gets a DONE bit writeback.
2067          */
2068         first = txr->next_avail_desc;
2069         tx_buffer = &txr->tx_buffers[first];
2070         tx_buffer_mapped = tx_buffer;
2071         map = tx_buffer->map;
2072
2073 retry:
2074         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2075             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2076
2077         /*
2078          * There are two types of errors we can (try) to handle:
2079          * - EFBIG means the mbuf chain was too long and bus_dma ran
2080          *   out of segments.  Defragment the mbuf chain and try again.
2081          * - ENOMEM means bus_dma could not obtain enough bounce buffers
2082          *   at this point in time.  Defer sending and try again later.
2083          * All other errors, in particular EINVAL, are fatal and prevent the
2084          * mbuf chain from ever going through.  Drop it and report error.
2085          */
2086         if (error == EFBIG && remap) {
2087                 struct mbuf *m;
2088
2089                 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2090                 if (m == NULL) {
2091                         adapter->mbuf_defrag_failed++;
2092                         m_freem(*m_headp);
2093                         *m_headp = NULL;
2094                         return (ENOBUFS);
2095                 }
2096                 *m_headp = m;
2097
2098                 /* Try it again, but only once */
2099                 remap = FALSE;
2100                 goto retry;
2101         } else if (error != 0) {
2102                 adapter->no_tx_dma_setup++;
2103                 m_freem(*m_headp);
2104                 *m_headp = NULL;
2105                 return (error);
2106         }
2107
2108         /*
2109          * TSO Hardware workaround, if this packet is not
2110          * TSO, and is only a single descriptor long, and
2111          * it follows a TSO burst, then we need to add a
2112          * sentinel descriptor to prevent premature writeback.
2113          */
2114         if ((!do_tso) && (txr->tx_tso == TRUE)) {
2115                 if (nsegs == 1)
2116                         tso_desc = TRUE;
2117                 txr->tx_tso = FALSE;
2118         }
2119
2120         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2121                 txr->no_desc_avail++;
2122                 bus_dmamap_unload(txr->txtag, map);
2123                 return (ENOBUFS);
2124         }
2125         m_head = *m_headp;
2126
2127         /* Do hardware assists */
2128         if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
2129                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2130                     &txd_upper, &txd_lower);
2131                 /* we need to make a final sentinel transmit desc */
2132                 tso_desc = TRUE;
2133         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2134                 em_transmit_checksum_setup(txr, m_head,
2135                     ip_off, ip, &txd_upper, &txd_lower);
2136
2137         if (m_head->m_flags & M_VLANTAG) {
2138                 /* Set the vlan id. */
2139                 txd_upper |=
2140                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2141                 /* Tell hardware to add tag */
2142                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2143         }
2144
2145         i = txr->next_avail_desc;
2146
2147         /* Set up our transmit descriptors */
2148         for (j = 0; j < nsegs; j++) {
2149                 bus_size_t seg_len;
2150                 bus_addr_t seg_addr;
2151
2152                 tx_buffer = &txr->tx_buffers[i];
2153                 ctxd = &txr->tx_base[i];
2154                 seg_addr = segs[j].ds_addr;
2155                 seg_len  = segs[j].ds_len;
2156                 /*
2157                 ** TSO Workaround:
2158                 ** If this is the last descriptor, we want to
2159                 ** split it so we have a small final sentinel
2160                 */
2161                 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2162                         seg_len -= TSO_WORKAROUND;
2163                         ctxd->buffer_addr = htole64(seg_addr);
2164                         ctxd->lower.data = htole32(
2165                                 adapter->txd_cmd | txd_lower | seg_len);
2166                         ctxd->upper.data = htole32(txd_upper);
2167                         if (++i == adapter->num_tx_desc)
2168                                 i = 0;
2169
2170                         /* Now make the sentinel */     
2171                         txr->tx_avail--;
2172                         ctxd = &txr->tx_base[i];
2173                         tx_buffer = &txr->tx_buffers[i];
2174                         ctxd->buffer_addr =
2175                             htole64(seg_addr + seg_len);
2176                         ctxd->lower.data = htole32(
2177                         adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2178                         ctxd->upper.data =
2179                             htole32(txd_upper);
2180                         last = i;
2181                         if (++i == adapter->num_tx_desc)
2182                                 i = 0;
2183                 } else {
2184                         ctxd->buffer_addr = htole64(seg_addr);
2185                         ctxd->lower.data = htole32(
2186                         adapter->txd_cmd | txd_lower | seg_len);
2187                         ctxd->upper.data = htole32(txd_upper);
2188                         last = i;
2189                         if (++i == adapter->num_tx_desc)
2190                                 i = 0;
2191                 }
2192                 tx_buffer->m_head = NULL;
2193                 tx_buffer->next_eop = -1;
2194         }
2195
2196         txr->next_avail_desc = i;
2197         txr->tx_avail -= nsegs;
2198
2199         tx_buffer->m_head = m_head;
2200         /*
2201         ** Here we swap the map so the last descriptor,
2202         ** which gets the completion interrupt has the
2203         ** real map, and the first descriptor gets the
2204         ** unused map from this descriptor.
2205         */
2206         tx_buffer_mapped->map = tx_buffer->map;
2207         tx_buffer->map = map;
2208         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2209
2210         /*
2211          * Last Descriptor of Packet
2212          * needs End Of Packet (EOP)
2213          * and Report Status (RS)
2214          */
2215         ctxd->lower.data |=
2216             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2217         /*
2218          * Keep track in the first buffer which
2219          * descriptor will be written back
2220          */
2221         tx_buffer = &txr->tx_buffers[first];
2222         tx_buffer->next_eop = last;
2223
2224         /*
2225          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2226          * that this frame is available to transmit.
2227          */
2228         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2229             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2230         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2231
2232         return (0);
2233 }
2234
2235 static void
2236 em_set_promisc(struct adapter *adapter)
2237 {
2238         struct ifnet    *ifp = adapter->ifp;
2239         u32             reg_rctl;
2240
2241         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2242
2243         if (ifp->if_flags & IFF_PROMISC) {
2244                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2245                 /* Turn this on if you want to see bad packets */
2246                 if (em_debug_sbp)
2247                         reg_rctl |= E1000_RCTL_SBP;
2248                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2249         } else if (ifp->if_flags & IFF_ALLMULTI) {
2250                 reg_rctl |= E1000_RCTL_MPE;
2251                 reg_rctl &= ~E1000_RCTL_UPE;
2252                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2253         }
2254 }
2255
2256 static void
2257 em_disable_promisc(struct adapter *adapter)
2258 {
2259         struct ifnet    *ifp = adapter->ifp;
2260         u32             reg_rctl;
2261         int             mcnt = 0;
2262
2263         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2264         reg_rctl &=  (~E1000_RCTL_UPE);
2265         if (ifp->if_flags & IFF_ALLMULTI)
2266                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2267         else {
2268                 struct  ifmultiaddr *ifma;
2269 #if __FreeBSD_version < 800000
2270                 IF_ADDR_LOCK(ifp);
2271 #else   
2272                 if_maddr_rlock(ifp);
2273 #endif
2274                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2275                         if (ifma->ifma_addr->sa_family != AF_LINK)
2276                                 continue;
2277                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2278                                 break;
2279                         mcnt++;
2280                 }
2281 #if __FreeBSD_version < 800000
2282                 IF_ADDR_UNLOCK(ifp);
2283 #else
2284                 if_maddr_runlock(ifp);
2285 #endif
2286         }
2287         /* Don't disable if in MAX groups */
2288         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2289                 reg_rctl &=  (~E1000_RCTL_MPE);
2290         reg_rctl &=  (~E1000_RCTL_SBP);
2291         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2292 }
2293
2294
2295 /*********************************************************************
2296  *  Multicast Update
2297  *
2298  *  This routine is called whenever multicast address list is updated.
2299  *
2300  **********************************************************************/
2301
2302 static void
2303 em_set_multi(struct adapter *adapter)
2304 {
2305         struct ifnet    *ifp = adapter->ifp;
2306         struct ifmultiaddr *ifma;
2307         u32 reg_rctl = 0;
2308         u8  *mta; /* Multicast array memory */
2309         int mcnt = 0;
2310
2311         IOCTL_DEBUGOUT("em_set_multi: begin");
2312
2313         mta = adapter->mta;
2314         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2315
2316         if (adapter->hw.mac.type == e1000_82542 && 
2317             adapter->hw.revision_id == E1000_REVISION_2) {
2318                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2319                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2320                         e1000_pci_clear_mwi(&adapter->hw);
2321                 reg_rctl |= E1000_RCTL_RST;
2322                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2323                 msec_delay(5);
2324         }
2325
2326 #if __FreeBSD_version < 800000
2327         IF_ADDR_LOCK(ifp);
2328 #else
2329         if_maddr_rlock(ifp);
2330 #endif
2331         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2332                 if (ifma->ifma_addr->sa_family != AF_LINK)
2333                         continue;
2334
2335                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2336                         break;
2337
2338                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2339                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2340                 mcnt++;
2341         }
2342 #if __FreeBSD_version < 800000
2343         IF_ADDR_UNLOCK(ifp);
2344 #else
2345         if_maddr_runlock(ifp);
2346 #endif
2347         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2348                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2349                 reg_rctl |= E1000_RCTL_MPE;
2350                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2351         } else
2352                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2353
2354         if (adapter->hw.mac.type == e1000_82542 && 
2355             adapter->hw.revision_id == E1000_REVISION_2) {
2356                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2357                 reg_rctl &= ~E1000_RCTL_RST;
2358                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2359                 msec_delay(5);
2360                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2361                         e1000_pci_set_mwi(&adapter->hw);
2362         }
2363 }
2364
2365
2366 /*********************************************************************
2367  *  Timer routine
2368  *
2369  *  This routine checks for link status and updates statistics.
2370  *
2371  **********************************************************************/
2372
2373 static void
2374 em_local_timer(void *arg)
2375 {
2376         struct adapter  *adapter = arg;
2377         struct ifnet    *ifp = adapter->ifp;
2378         struct tx_ring  *txr = adapter->tx_rings;
2379         struct rx_ring  *rxr = adapter->rx_rings;
2380         u32             trigger = 0;
2381
2382         EM_CORE_LOCK_ASSERT(adapter);
2383
2384         em_update_link_status(adapter);
2385         em_update_stats_counters(adapter);
2386
2387         /* Reset LAA into RAR[0] on 82571 */
2388         if ((adapter->hw.mac.type == e1000_82571) &&
2389             e1000_get_laa_state_82571(&adapter->hw))
2390                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2391
2392         /* Mask to use in the irq trigger */
2393         if (adapter->msix_mem) {
2394                 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2395                         trigger |= rxr->ims;
2396                 rxr = adapter->rx_rings;
2397         } else
2398                 trigger = E1000_ICS_RXDMT0;
2399
2400         /*
2401         ** Check on the state of the TX queue(s), this 
2402         ** can be done without the lock because its RO
2403         ** and the HUNG state will be static if set.
2404         */
2405         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2406                 if (txr->busy == EM_TX_HUNG)
2407                         goto hung;
2408                 if (txr->busy >= EM_TX_MAXTRIES)
2409                         txr->busy = EM_TX_HUNG;
2410                 /* Schedule a TX tasklet if needed */
2411                 if (txr->tx_avail <= EM_MAX_SCATTER)
2412                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2413         }
2414         
2415         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2416 #ifndef DEVICE_POLLING
2417         /* Trigger an RX interrupt to guarantee mbuf refresh */
2418         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2419 #endif
2420         return;
2421 hung:
2422         /* Looks like we're hung */
2423         device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2424                         txr->me);
2425         em_print_debug_info(adapter);
2426         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2427         adapter->watchdog_events++;
2428         em_init_locked(adapter);
2429 }
2430
2431
2432 static void
2433 em_update_link_status(struct adapter *adapter)
2434 {
2435         struct e1000_hw *hw = &adapter->hw;
2436         struct ifnet *ifp = adapter->ifp;
2437         device_t dev = adapter->dev;
2438         struct tx_ring *txr = adapter->tx_rings;
2439         u32 link_check = 0;
2440
2441         /* Get the cached link value or read phy for real */
2442         switch (hw->phy.media_type) {
2443         case e1000_media_type_copper:
2444                 if (hw->mac.get_link_status) {
2445                         if (hw->mac.type == e1000_pch_spt)
2446                                 msec_delay(50);
2447                         /* Do the work to read phy */
2448                         e1000_check_for_link(hw);
2449                         link_check = !hw->mac.get_link_status;
2450                         if (link_check) /* ESB2 fix */
2451                                 e1000_cfg_on_link_up(hw);
2452                 } else
2453                         link_check = TRUE;
2454                 break;
2455         case e1000_media_type_fiber:
2456                 e1000_check_for_link(hw);
2457                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2458                                  E1000_STATUS_LU);
2459                 break;
2460         case e1000_media_type_internal_serdes:
2461                 e1000_check_for_link(hw);
2462                 link_check = adapter->hw.mac.serdes_has_link;
2463                 break;
2464         default:
2465         case e1000_media_type_unknown:
2466                 break;
2467         }
2468
2469         /* Now check for a transition */
2470         if (link_check && (adapter->link_active == 0)) {
2471                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2472                     &adapter->link_duplex);
2473
2474                 /*
2475                 ** There have proven to be problems with TSO when not at full
2476                 ** gigabit speed, so disable the assist automatically when at
2477                 ** lower speeds.  -jfv
2478                 */
2479                 if (ifp->if_capenable & IFCAP_TSO4) {
2480                         if (adapter->link_speed == SPEED_1000)
2481                                 ifp->if_hwassist |= CSUM_IP_TSO;
2482                         else
2483                                 ifp->if_hwassist &= ~CSUM_IP_TSO;
2484                 }
2485
2486                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2487                 if ((adapter->link_speed != SPEED_1000) &&
2488                     ((hw->mac.type == e1000_82571) ||
2489                     (hw->mac.type == e1000_82572))) {
2490                         int tarc0;
2491                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2492                         tarc0 &= ~TARC_SPEED_MODE_BIT;
2493                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2494                 }
2495                 if (bootverbose)
2496                         device_printf(dev, "Link is up %d Mbps %s\n",
2497                             adapter->link_speed,
2498                             ((adapter->link_duplex == FULL_DUPLEX) ?
2499                             "Full Duplex" : "Half Duplex"));
2500                 adapter->link_active = 1;
2501                 adapter->smartspeed = 0;
2502                 ifp->if_baudrate = adapter->link_speed * 1000000;
2503                 if_link_state_change(ifp, LINK_STATE_UP);
2504         } else if (!link_check && (adapter->link_active == 1)) {
2505                 ifp->if_baudrate = adapter->link_speed = 0;
2506                 adapter->link_duplex = 0;
2507                 if (bootverbose)
2508                         device_printf(dev, "Link is Down\n");
2509                 adapter->link_active = 0;
2510                 /* Link down, disable hang detection */
2511                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2512                         txr->busy = EM_TX_IDLE;
2513                 if_link_state_change(ifp, LINK_STATE_DOWN);
2514         }
2515 }
2516
2517 /*********************************************************************
2518  *
2519  *  This routine disables all traffic on the adapter by issuing a
2520  *  global reset on the MAC and deallocates TX/RX buffers.
2521  *
2522  *  This routine should always be called with BOTH the CORE
2523  *  and TX locks.
2524  **********************************************************************/
2525
2526 static void
2527 em_stop(void *arg)
2528 {
2529         struct adapter  *adapter = arg;
2530         struct ifnet    *ifp = adapter->ifp;
2531         struct tx_ring  *txr = adapter->tx_rings;
2532
2533         EM_CORE_LOCK_ASSERT(adapter);
2534
2535         INIT_DEBUGOUT("em_stop: begin");
2536
2537         em_disable_intr(adapter);
2538         callout_stop(&adapter->timer);
2539
2540         /* Tell the stack that the interface is no longer active */
2541         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2542         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2543
2544         /* Disarm Hang Detection. */
2545         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2546                 EM_TX_LOCK(txr);
2547                 txr->busy = EM_TX_IDLE;
2548                 EM_TX_UNLOCK(txr);
2549         }
2550
2551         /* I219 needs some special flushing to avoid hangs */
2552         if (adapter->hw.mac.type == e1000_pch_spt)
2553                 em_flush_desc_rings(adapter);
2554
2555         e1000_reset_hw(&adapter->hw);
2556         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2557
2558         e1000_led_off(&adapter->hw);
2559         e1000_cleanup_led(&adapter->hw);
2560 }
2561
2562
2563 /*********************************************************************
2564  *
2565  *  Determine hardware revision.
2566  *
2567  **********************************************************************/
2568 static void
2569 em_identify_hardware(struct adapter *adapter)
2570 {
2571         device_t dev = adapter->dev;
2572
2573         /* Make sure our PCI config space has the necessary stuff set */
2574         pci_enable_busmaster(dev);
2575         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2576
2577         /* Save off the information about this board */
2578         adapter->hw.vendor_id = pci_get_vendor(dev);
2579         adapter->hw.device_id = pci_get_device(dev);
2580         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2581         adapter->hw.subsystem_vendor_id =
2582             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2583         adapter->hw.subsystem_device_id =
2584             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2585
2586         /* Do Shared Code Init and Setup */
2587         if (e1000_set_mac_type(&adapter->hw)) {
2588                 device_printf(dev, "Setup init failure\n");
2589                 return;
2590         }
2591 }
2592
2593 static int
2594 em_allocate_pci_resources(struct adapter *adapter)
2595 {
2596         device_t        dev = adapter->dev;
2597         int             rid;
2598
2599         rid = PCIR_BAR(0);
2600         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2601             &rid, RF_ACTIVE);
2602         if (adapter->memory == NULL) {
2603                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2604                 return (ENXIO);
2605         }
2606         adapter->osdep.mem_bus_space_tag =
2607             rman_get_bustag(adapter->memory);
2608         adapter->osdep.mem_bus_space_handle =
2609             rman_get_bushandle(adapter->memory);
2610         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2611
2612         adapter->hw.back = &adapter->osdep;
2613
2614         return (0);
2615 }
2616
2617 /*********************************************************************
2618  *
2619  *  Setup the Legacy or MSI Interrupt handler
2620  *
2621  **********************************************************************/
2622 static int
2623 em_allocate_legacy(struct adapter *adapter)
2624 {
2625         device_t dev = adapter->dev;
2626         struct tx_ring  *txr = adapter->tx_rings;
2627         int error, rid = 0;
2628
2629         /* Manually turn off all interrupts */
2630         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2631
2632         if (adapter->msix == 1) /* using MSI */
2633                 rid = 1;
2634         /* We allocate a single interrupt resource */
2635         adapter->res = bus_alloc_resource_any(dev,
2636             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2637         if (adapter->res == NULL) {
2638                 device_printf(dev, "Unable to allocate bus resource: "
2639                     "interrupt\n");
2640                 return (ENXIO);
2641         }
2642
2643         /*
2644          * Allocate a fast interrupt and the associated
2645          * deferred processing contexts.
2646          */
2647         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2648         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2649             taskqueue_thread_enqueue, &adapter->tq);
2650         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2651             device_get_nameunit(adapter->dev));
2652         /* Use a TX only tasklet for local timer */
2653         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2654         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2655             taskqueue_thread_enqueue, &txr->tq);
2656         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2657             device_get_nameunit(adapter->dev));
2658         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2659         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2660             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2661                 device_printf(dev, "Failed to register fast interrupt "
2662                             "handler: %d\n", error);
2663                 taskqueue_free(adapter->tq);
2664                 adapter->tq = NULL;
2665                 return (error);
2666         }
2667         
2668         return (0);
2669 }
2670
2671 /*********************************************************************
2672  *
2673  *  Setup the MSIX Interrupt handlers
2674  *   This is not really Multiqueue, rather
2675  *   its just seperate interrupt vectors
2676  *   for TX, RX, and Link.
2677  *
2678  **********************************************************************/
2679 static int
2680 em_allocate_msix(struct adapter *adapter)
2681 {
2682         device_t        dev = adapter->dev;
2683         struct          tx_ring *txr = adapter->tx_rings;
2684         struct          rx_ring *rxr = adapter->rx_rings;
2685         int             error, rid, vector = 0;
2686         int             cpu_id = 0;
2687
2688
2689         /* Make sure all interrupts are disabled */
2690         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2691
2692         /* First set up ring resources */
2693         for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2694
2695                 /* RX ring */
2696                 rid = vector + 1;
2697
2698                 rxr->res = bus_alloc_resource_any(dev,
2699                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2700                 if (rxr->res == NULL) {
2701                         device_printf(dev,
2702                             "Unable to allocate bus resource: "
2703                             "RX MSIX Interrupt %d\n", i);
2704                         return (ENXIO);
2705                 }
2706                 if ((error = bus_setup_intr(dev, rxr->res,
2707                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2708                     rxr, &rxr->tag)) != 0) {
2709                         device_printf(dev, "Failed to register RX handler");
2710                         return (error);
2711                 }
2712 #if __FreeBSD_version >= 800504
2713                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2714 #endif
2715                 rxr->msix = vector;
2716
2717                 if (em_last_bind_cpu < 0)
2718                         em_last_bind_cpu = CPU_FIRST();
2719                 cpu_id = em_last_bind_cpu;
2720                 bus_bind_intr(dev, rxr->res, cpu_id);
2721
2722                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2723                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2724                     taskqueue_thread_enqueue, &rxr->tq);
2725                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2726                     device_get_nameunit(adapter->dev), cpu_id);
2727                 /*
2728                 ** Set the bit to enable interrupt
2729                 ** in E1000_IMS -- bits 20 and 21
2730                 ** are for RX0 and RX1, note this has
2731                 ** NOTHING to do with the MSIX vector
2732                 */
2733                 rxr->ims = 1 << (20 + i);
2734                 adapter->ims |= rxr->ims;
2735                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2736
2737                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2738         }
2739
2740         for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2741                 /* TX ring */
2742                 rid = vector + 1;
2743                 txr->res = bus_alloc_resource_any(dev,
2744                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2745                 if (txr->res == NULL) {
2746                         device_printf(dev,
2747                             "Unable to allocate bus resource: "
2748                             "TX MSIX Interrupt %d\n", i);
2749                         return (ENXIO);
2750                 }
2751                 if ((error = bus_setup_intr(dev, txr->res,
2752                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2753                     txr, &txr->tag)) != 0) {
2754                         device_printf(dev, "Failed to register TX handler");
2755                         return (error);
2756                 }
2757 #if __FreeBSD_version >= 800504
2758                 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2759 #endif
2760                 txr->msix = vector;
2761
2762                 if (em_last_bind_cpu < 0)
2763                         em_last_bind_cpu = CPU_FIRST();
2764                 cpu_id = em_last_bind_cpu;
2765                 bus_bind_intr(dev, txr->res, cpu_id);
2766
2767                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2768                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2769                     taskqueue_thread_enqueue, &txr->tq);
2770                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2771                     device_get_nameunit(adapter->dev), cpu_id);
2772                 /*
2773                 ** Set the bit to enable interrupt
2774                 ** in E1000_IMS -- bits 22 and 23
2775                 ** are for TX0 and TX1, note this has
2776                 ** NOTHING to do with the MSIX vector
2777                 */
2778                 txr->ims = 1 << (22 + i);
2779                 adapter->ims |= txr->ims;
2780                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2781
2782                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2783         }
2784
2785         /* Link interrupt */
2786         rid = vector + 1;
2787         adapter->res = bus_alloc_resource_any(dev,
2788             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2789         if (!adapter->res) {
2790                 device_printf(dev,"Unable to allocate "
2791                     "bus resource: Link interrupt [%d]\n", rid);
2792                 return (ENXIO);
2793         }
2794         /* Set the link handler function */
2795         error = bus_setup_intr(dev, adapter->res,
2796             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2797             em_msix_link, adapter, &adapter->tag);
2798         if (error) {
2799                 adapter->res = NULL;
2800                 device_printf(dev, "Failed to register LINK handler");
2801                 return (error);
2802         }
2803 #if __FreeBSD_version >= 800504
2804         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2805 #endif
2806         adapter->linkvec = vector;
2807         adapter->ivars |=  (8 | vector) << 16;
2808         adapter->ivars |= 0x80000000;
2809
2810         return (0);
2811 }
2812
2813
2814 static void
2815 em_free_pci_resources(struct adapter *adapter)
2816 {
2817         device_t        dev = adapter->dev;
2818         struct tx_ring  *txr;
2819         struct rx_ring  *rxr;
2820         int             rid;
2821
2822
2823         /*
2824         ** Release all the queue interrupt resources:
2825         */
2826         for (int i = 0; i < adapter->num_queues; i++) {
2827                 txr = &adapter->tx_rings[i];
2828                 /* an early abort? */
2829                 if (txr == NULL)
2830                         break;
2831                 rid = txr->msix +1;
2832                 if (txr->tag != NULL) {
2833                         bus_teardown_intr(dev, txr->res, txr->tag);
2834                         txr->tag = NULL;
2835                 }
2836                 if (txr->res != NULL)
2837                         bus_release_resource(dev, SYS_RES_IRQ,
2838                             rid, txr->res);
2839
2840                 rxr = &adapter->rx_rings[i];
2841                 /* an early abort? */
2842                 if (rxr == NULL)
2843                         break;
2844                 rid = rxr->msix +1;
2845                 if (rxr->tag != NULL) {
2846                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2847                         rxr->tag = NULL;
2848                 }
2849                 if (rxr->res != NULL)
2850                         bus_release_resource(dev, SYS_RES_IRQ,
2851                             rid, rxr->res);
2852         }
2853
2854         if (adapter->linkvec) /* we are doing MSIX */
2855                 rid = adapter->linkvec + 1;
2856         else
2857                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2858
2859         if (adapter->tag != NULL) {
2860                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2861                 adapter->tag = NULL;
2862         }
2863
2864         if (adapter->res != NULL)
2865                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2866
2867
2868         if (adapter->msix)
2869                 pci_release_msi(dev);
2870
2871         if (adapter->msix_mem != NULL)
2872                 bus_release_resource(dev, SYS_RES_MEMORY,
2873                     adapter->memrid, adapter->msix_mem);
2874
2875         if (adapter->memory != NULL)
2876                 bus_release_resource(dev, SYS_RES_MEMORY,
2877                     PCIR_BAR(0), adapter->memory);
2878
2879         if (adapter->flash != NULL)
2880                 bus_release_resource(dev, SYS_RES_MEMORY,
2881                     EM_FLASH, adapter->flash);
2882 }
2883
2884 /*
2885  * Setup MSI or MSI/X
2886  */
2887 static int
2888 em_setup_msix(struct adapter *adapter)
2889 {
2890         device_t dev = adapter->dev;
2891         int val;
2892
2893         /* Nearly always going to use one queue */
2894         adapter->num_queues = 1;
2895
2896         /*
2897         ** Try using MSI-X for Hartwell adapters
2898         */
2899         if ((adapter->hw.mac.type == e1000_82574) &&
2900             (em_enable_msix == TRUE)) {
2901 #ifdef EM_MULTIQUEUE
2902                 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2903                 if (adapter->num_queues > 1)
2904                         em_enable_vectors_82574(adapter);
2905 #endif
2906                 /* Map the MSIX BAR */
2907                 adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2908                 adapter->msix_mem = bus_alloc_resource_any(dev,
2909                     SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2910                 if (adapter->msix_mem == NULL) {
2911                         /* May not be enabled */
2912                         device_printf(adapter->dev,
2913                             "Unable to map MSIX table \n");
2914                         goto msi;
2915                 }
2916                 val = pci_msix_count(dev); 
2917
2918 #ifdef EM_MULTIQUEUE
2919                 /* We need 5 vectors in the multiqueue case */
2920                 if (adapter->num_queues > 1 ) {
2921                         if (val >= 5)
2922                                 val = 5;
2923                         else {
2924                                 adapter->num_queues = 1;
2925                                 device_printf(adapter->dev,
2926                                     "Insufficient MSIX vectors for >1 queue, "
2927                                     "using single queue...\n");
2928                                 goto msix_one;
2929                         }
2930                 } else {
2931 msix_one:
2932 #endif
2933                         if (val >= 3)
2934                                 val = 3;
2935                         else {
2936                                 device_printf(adapter->dev,
2937                                 "Insufficient MSIX vectors, using MSI\n");
2938                                 goto msi;
2939                         }
2940 #ifdef EM_MULTIQUEUE
2941                 }
2942 #endif
2943
2944                 if ((pci_alloc_msix(dev, &val) == 0)) {
2945                         device_printf(adapter->dev,
2946                             "Using MSIX interrupts "
2947                             "with %d vectors\n", val);
2948                         return (val);
2949                 }
2950
2951                 /*
2952                 ** If MSIX alloc failed or provided us with
2953                 ** less than needed, free and fall through to MSI
2954                 */
2955                 pci_release_msi(dev);
2956         }
2957 msi:
2958         if (adapter->msix_mem != NULL) {
2959                 bus_release_resource(dev, SYS_RES_MEMORY,
2960                     adapter->memrid, adapter->msix_mem);
2961                 adapter->msix_mem = NULL;
2962         }
2963         val = 1;
2964         if (pci_alloc_msi(dev, &val) == 0) {
2965                 device_printf(adapter->dev, "Using an MSI interrupt\n");
2966                 return (val);
2967         } 
2968         /* Should only happen due to manual configuration */
2969         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2970         return (0);
2971 }
2972
2973
2974 /*
2975 ** The 3 following flush routines are used as a workaround in the
2976 ** I219 client parts and only for them.
2977 **
2978 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2979 **
2980 ** We want to clear all pending descriptors from the TX ring.
2981 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2982 ** the data of the next descriptor. We don't care about the data we are about
2983 ** to reset the HW.
2984 */
2985 static void
2986 em_flush_tx_ring(struct adapter *adapter)
2987 {
2988         struct e1000_hw         *hw = &adapter->hw;
2989         struct tx_ring          *txr = adapter->tx_rings;
2990         struct e1000_tx_desc    *txd;
2991         u32                     tctl, txd_lower = E1000_TXD_CMD_IFCS;
2992         u16                     size = 512;
2993
2994         tctl = E1000_READ_REG(hw, E1000_TCTL);
2995         E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2996
2997         txd = &txr->tx_base[txr->next_avail_desc++];
2998         if (txr->next_avail_desc == adapter->num_tx_desc)
2999                 txr->next_avail_desc = 0;
3000
3001         /* Just use the ring as a dummy buffer addr */
3002         txd->buffer_addr = txr->txdma.dma_paddr;
3003         txd->lower.data = htole32(txd_lower | size);
3004         txd->upper.data = 0;
3005
3006         /* flush descriptors to memory before notifying the HW */
3007         wmb();
3008
3009         E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
3010         mb();
3011         usec_delay(250);
3012 }
3013
3014 /*
3015 ** em_flush_rx_ring - remove all descriptors from the rx_ring
3016 **
3017 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
3018 */
3019 static void
3020 em_flush_rx_ring(struct adapter *adapter)
3021 {
3022         struct e1000_hw *hw = &adapter->hw;
3023         u32             rctl, rxdctl;
3024
3025         rctl = E1000_READ_REG(hw, E1000_RCTL);
3026         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3027         E1000_WRITE_FLUSH(hw);
3028         usec_delay(150);
3029
3030         rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
3031         /* zero the lower 14 bits (prefetch and host thresholds) */
3032         rxdctl &= 0xffffc000;
3033         /*
3034          * update thresholds: prefetch threshold to 31, host threshold to 1
3035          * and make sure the granularity is "descriptors" and not "cache lines"
3036          */
3037         rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3038         E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
3039
3040         /* momentarily enable the RX ring for the changes to take effect */
3041         E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3042         E1000_WRITE_FLUSH(hw);
3043         usec_delay(150);
3044         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3045 }
3046
3047 /*
3048 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
3049 **
3050 ** In i219, the descriptor rings must be emptied before resetting the HW
3051 ** or before changing the device state to D3 during runtime (runtime PM).
3052 **
3053 ** Failure to do this will cause the HW to enter a unit hang state which can
3054 ** only be released by PCI reset on the device
3055 **
3056 */
3057 static void
3058 em_flush_desc_rings(struct adapter *adapter)
3059 {
3060         struct e1000_hw *hw = &adapter->hw;
3061         device_t        dev = adapter->dev;
3062         u16             hang_state;
3063         u32             fext_nvm11, tdlen;
3064  
3065         /* First, disable MULR fix in FEXTNVM11 */
3066         fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3067         fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3068         E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3069         
3070         /* do nothing if we're not in faulty state, or if the queue is empty */
3071         tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3072         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3073         if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3074                 return;
3075         em_flush_tx_ring(adapter);
3076
3077         /* recheck, maybe the fault is caused by the rx ring */
3078         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3079         if (hang_state & FLUSH_DESC_REQUIRED)
3080                 em_flush_rx_ring(adapter);
3081 }
3082
3083
3084 /*********************************************************************
3085  *
3086  *  Initialize the hardware to a configuration
3087  *  as specified by the adapter structure.
3088  *
3089  **********************************************************************/
3090 static void
3091 em_reset(struct adapter *adapter)
3092 {
3093         device_t        dev = adapter->dev;
3094         struct ifnet    *ifp = adapter->ifp;
3095         struct e1000_hw *hw = &adapter->hw;
3096         u16             rx_buffer_size;
3097         u32             pba;
3098
3099         INIT_DEBUGOUT("em_reset: begin");
3100
3101         /* Set up smart power down as default off on newer adapters. */
3102         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3103             hw->mac.type == e1000_82572)) {
3104                 u16 phy_tmp = 0;
3105
3106                 /* Speed up time to link by disabling smart power down. */
3107                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3108                 phy_tmp &= ~IGP02E1000_PM_SPD;
3109                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3110         }
3111
3112         /*
3113          * Packet Buffer Allocation (PBA)
3114          * Writing PBA sets the receive portion of the buffer
3115          * the remainder is used for the transmit buffer.
3116          */
3117         switch (hw->mac.type) {
3118         /* Total Packet Buffer on these is 48K */
3119         case e1000_82571:
3120         case e1000_82572:
3121         case e1000_80003es2lan:
3122                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3123                 break;
3124         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3125                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3126                 break;
3127         case e1000_82574:
3128         case e1000_82583:
3129                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3130                 break;
3131         case e1000_ich8lan:
3132                 pba = E1000_PBA_8K;
3133                 break;
3134         case e1000_ich9lan:
3135         case e1000_ich10lan:
3136                 /* Boost Receive side for jumbo frames */
3137                 if (adapter->hw.mac.max_frame_size > 4096)
3138                         pba = E1000_PBA_14K;
3139                 else
3140                         pba = E1000_PBA_10K;
3141                 break;
3142         case e1000_pchlan:
3143         case e1000_pch2lan:
3144         case e1000_pch_lpt:
3145         case e1000_pch_spt:
3146         case e1000_pch_cnp:
3147                 pba = E1000_PBA_26K;
3148                 break;
3149         default:
3150                 if (adapter->hw.mac.max_frame_size > 8192)
3151                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3152                 else
3153                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3154         }
3155         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3156
3157         /*
3158          * These parameters control the automatic generation (Tx) and
3159          * response (Rx) to Ethernet PAUSE frames.
3160          * - High water mark should allow for at least two frames to be
3161          *   received after sending an XOFF.
3162          * - Low water mark works best when it is very near the high water mark.
3163          *   This allows the receiver to restart by sending XON when it has
3164          *   drained a bit. Here we use an arbitary value of 1500 which will
3165          *   restart after one full frame is pulled from the buffer. There
3166          *   could be several smaller frames in the buffer and if so they will
3167          *   not trigger the XON until their total number reduces the buffer
3168          *   by 1500.
3169          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3170          */
3171         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3172         hw->fc.high_water = rx_buffer_size -
3173             roundup2(adapter->hw.mac.max_frame_size, 1024);
3174         hw->fc.low_water = hw->fc.high_water - 1500;
3175
3176         if (adapter->fc) /* locally set flow control value? */
3177                 hw->fc.requested_mode = adapter->fc;
3178         else
3179                 hw->fc.requested_mode = e1000_fc_full;
3180
3181         if (hw->mac.type == e1000_80003es2lan)
3182                 hw->fc.pause_time = 0xFFFF;
3183         else
3184                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3185
3186         hw->fc.send_xon = TRUE;
3187
3188         /* Device specific overrides/settings */
3189         switch (hw->mac.type) {
3190         case e1000_pchlan:
3191                 /* Workaround: no TX flow ctrl for PCH */
3192                 hw->fc.requested_mode = e1000_fc_rx_pause;
3193                 hw->fc.pause_time = 0xFFFF; /* override */
3194                 if (ifp->if_mtu > ETHERMTU) {
3195                         hw->fc.high_water = 0x3500;
3196                         hw->fc.low_water = 0x1500;
3197                 } else {
3198                         hw->fc.high_water = 0x5000;
3199                         hw->fc.low_water = 0x3000;
3200                 }
3201                 hw->fc.refresh_time = 0x1000;
3202                 break;
3203         case e1000_pch2lan:
3204         case e1000_pch_lpt:
3205         case e1000_pch_spt:
3206         case e1000_pch_cnp:
3207                 hw->fc.high_water = 0x5C20;
3208                 hw->fc.low_water = 0x5048;
3209                 hw->fc.pause_time = 0x0650;
3210                 hw->fc.refresh_time = 0x0400;
3211                 /* Jumbos need adjusted PBA */
3212                 if (ifp->if_mtu > ETHERMTU)
3213                         E1000_WRITE_REG(hw, E1000_PBA, 12);
3214                 else
3215                         E1000_WRITE_REG(hw, E1000_PBA, 26);
3216                 break;
3217         case e1000_ich9lan:
3218         case e1000_ich10lan:
3219                 if (ifp->if_mtu > ETHERMTU) {
3220                         hw->fc.high_water = 0x2800;
3221                         hw->fc.low_water = hw->fc.high_water - 8;
3222                         break;
3223                 } 
3224                 /* else fall thru */
3225         default:
3226                 if (hw->mac.type == e1000_80003es2lan)
3227                         hw->fc.pause_time = 0xFFFF;
3228                 break;
3229         }
3230
3231         /* I219 needs some special flushing to avoid hangs */
3232         if (hw->mac.type == e1000_pch_spt)
3233                 em_flush_desc_rings(adapter);
3234
3235         /* Issue a global reset */
3236         e1000_reset_hw(hw);
3237         E1000_WRITE_REG(hw, E1000_WUC, 0);
3238         em_disable_aspm(adapter);
3239         /* and a re-init */
3240         if (e1000_init_hw(hw) < 0) {
3241                 device_printf(dev, "Hardware Initialization Failed\n");
3242                 return;
3243         }
3244
3245         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3246         e1000_get_phy_info(hw);
3247         e1000_check_for_link(hw);
3248         return;
3249 }
3250
3251 /*********************************************************************
3252  *
3253  *  Setup networking device structure and register an interface.
3254  *
3255  **********************************************************************/
3256 static int
3257 em_setup_interface(device_t dev, struct adapter *adapter)
3258 {
3259         struct ifnet   *ifp;
3260
3261         INIT_DEBUGOUT("em_setup_interface: begin");
3262
3263         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3264         if (ifp == NULL) {
3265                 device_printf(dev, "can not allocate ifnet structure\n");
3266                 return (-1);
3267         }
3268         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3269         ifp->if_init =  em_init;
3270         ifp->if_softc = adapter;
3271         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3272         ifp->if_ioctl = em_ioctl;
3273
3274         /* TSO parameters */
3275         ifp->if_hw_tsomax = IP_MAXPACKET;
3276         /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3277         ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3278         ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3279
3280 #ifdef EM_MULTIQUEUE
3281         /* Multiqueue stack interface */
3282         ifp->if_transmit = em_mq_start;
3283         ifp->if_qflush = em_qflush;
3284 #else
3285         ifp->if_start = em_start;
3286         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3287         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3288         IFQ_SET_READY(&ifp->if_snd);
3289 #endif  
3290
3291         ether_ifattach(ifp, adapter->hw.mac.addr);
3292
3293         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3294         ifp->if_capenable = ifp->if_capabilities;
3295
3296         /*
3297          * Tell the upper layer(s) we
3298          * support full VLAN capability
3299          */
3300         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3301         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3302                              |  IFCAP_VLAN_HWTSO
3303                              |  IFCAP_VLAN_MTU;
3304         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3305                           |  IFCAP_VLAN_MTU;
3306
3307         /*
3308          * We don't enable IFCAP_{TSO4,VLAN_HWTSO} by default because:
3309          * - Although the silicon bug of TSO only working at gigabit speed is
3310          *   worked around in em_update_link_status() by selectively setting
3311          *   CSUM_IP_TSO, we cannot atomically flush already queued TSO-using
3312          *   descriptors.  Thus, such descriptors may still cause the MAC to
3313          *   hang and, consequently, TSO is only safe to be used in setups
3314          *   where the link isn't expected to switch from gigabit to lower
3315          *   speeds.
3316          * - Similarly, there's currently no way to trigger a reconfiguration
3317          *   of vlan(4) when the state of IFCAP_VLAN_HWTSO support changes at
3318          *   runtime.  Therefore, IFCAP_VLAN_HWTSO also only is safe to use
3319          *   when link speed changes are not to be expected.
3320          * - Despite all the workarounds for TSO-related silicon bugs, at
3321          *   least 82579 still may hang at gigabit speed with IFCAP_TSO4.
3322          */
3323         ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO;
3324
3325         /*
3326         ** Don't turn this on by default, if vlans are
3327         ** created on another pseudo device (eg. lagg)
3328         ** then vlan events are not passed thru, breaking
3329         ** operation, but with HW FILTER off it works. If
3330         ** using vlans directly on the em driver you can
3331         ** enable this and get full hardware tag filtering.
3332         */
3333         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3334
3335 #ifdef DEVICE_POLLING
3336         ifp->if_capabilities |= IFCAP_POLLING;
3337 #endif
3338
3339         /* Enable only WOL MAGIC by default */
3340         if (adapter->wol) {
3341                 ifp->if_capabilities |= IFCAP_WOL;
3342                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3343         }
3344                 
3345         /*
3346          * Specify the media types supported by this adapter and register
3347          * callbacks to update media and link information
3348          */
3349         ifmedia_init(&adapter->media, IFM_IMASK,
3350             em_media_change, em_media_status);
3351         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3352             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3353                 u_char fiber_type = IFM_1000_SX;        /* default type */
3354
3355                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3356                             0, NULL);
3357                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3358         } else {
3359                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3360                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3361                             0, NULL);
3362                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3363                             0, NULL);
3364                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3365                             0, NULL);
3366                 if (adapter->hw.phy.type != e1000_phy_ife) {
3367                         ifmedia_add(&adapter->media,
3368                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3369                         ifmedia_add(&adapter->media,
3370                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3371                 }
3372         }
3373         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3374         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3375         return (0);
3376 }
3377
3378
3379 /*
3380  * Manage DMA'able memory.
3381  */
3382 static void
3383 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3384 {
3385         if (error)
3386                 return;
3387         *(bus_addr_t *) arg = segs[0].ds_addr;
3388 }
3389
3390 static int
3391 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3392         struct em_dma_alloc *dma, int mapflags)
3393 {
3394         int error;
3395
3396         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3397                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3398                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3399                                 BUS_SPACE_MAXADDR,      /* highaddr */
3400                                 NULL, NULL,             /* filter, filterarg */
3401                                 size,                   /* maxsize */
3402                                 1,                      /* nsegments */
3403                                 size,                   /* maxsegsize */
3404                                 0,                      /* flags */
3405                                 NULL,                   /* lockfunc */
3406                                 NULL,                   /* lockarg */
3407                                 &dma->dma_tag);
3408         if (error) {
3409                 device_printf(adapter->dev,
3410                     "%s: bus_dma_tag_create failed: %d\n",
3411                     __func__, error);
3412                 goto fail_0;
3413         }
3414
3415         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3416             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3417         if (error) {
3418                 device_printf(adapter->dev,
3419                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3420                     __func__, (uintmax_t)size, error);
3421                 goto fail_2;
3422         }
3423
3424         dma->dma_paddr = 0;
3425         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3426             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3427         if (error || dma->dma_paddr == 0) {
3428                 device_printf(adapter->dev,
3429                     "%s: bus_dmamap_load failed: %d\n",
3430                     __func__, error);
3431                 goto fail_3;
3432         }
3433
3434         return (0);
3435
3436 fail_3:
3437         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3438 fail_2:
3439         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3440         bus_dma_tag_destroy(dma->dma_tag);
3441 fail_0:
3442         dma->dma_tag = NULL;
3443
3444         return (error);
3445 }
3446
3447 static void
3448 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3449 {
3450         if (dma->dma_tag == NULL)
3451                 return;
3452         if (dma->dma_paddr != 0) {
3453                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3454                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3455                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3456                 dma->dma_paddr = 0;
3457         }
3458         if (dma->dma_vaddr != NULL) {
3459                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3460                 dma->dma_vaddr = NULL;
3461         }
3462         bus_dma_tag_destroy(dma->dma_tag);
3463         dma->dma_tag = NULL;
3464 }
3465
3466
3467 /*********************************************************************
3468  *
3469  *  Allocate memory for the transmit and receive rings, and then
3470  *  the descriptors associated with each, called only once at attach.
3471  *
3472  **********************************************************************/
3473 static int
3474 em_allocate_queues(struct adapter *adapter)
3475 {
3476         device_t                dev = adapter->dev;
3477         struct tx_ring          *txr = NULL;
3478         struct rx_ring          *rxr = NULL;
3479         int rsize, tsize, error = E1000_SUCCESS;
3480         int txconf = 0, rxconf = 0;
3481
3482
3483         /* Allocate the TX ring struct memory */
3484         if (!(adapter->tx_rings =
3485             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3486             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3487                 device_printf(dev, "Unable to allocate TX ring memory\n");
3488                 error = ENOMEM;
3489                 goto fail;
3490         }
3491
3492         /* Now allocate the RX */
3493         if (!(adapter->rx_rings =
3494             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3495             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3496                 device_printf(dev, "Unable to allocate RX ring memory\n");
3497                 error = ENOMEM;
3498                 goto rx_fail;
3499         }
3500
3501         tsize = roundup2(adapter->num_tx_desc *
3502             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3503         /*
3504          * Now set up the TX queues, txconf is needed to handle the
3505          * possibility that things fail midcourse and we need to
3506          * undo memory gracefully
3507          */ 
3508         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3509                 /* Set up some basics */
3510                 txr = &adapter->tx_rings[i];
3511                 txr->adapter = adapter;
3512                 txr->me = i;
3513
3514                 /* Initialize the TX lock */
3515                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3516                     device_get_nameunit(dev), txr->me);
3517                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3518
3519                 if (em_dma_malloc(adapter, tsize,
3520                         &txr->txdma, BUS_DMA_NOWAIT)) {
3521                         device_printf(dev,
3522                             "Unable to allocate TX Descriptor memory\n");
3523                         error = ENOMEM;
3524                         goto err_tx_desc;
3525                 }
3526                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3527                 bzero((void *)txr->tx_base, tsize);
3528
3529                 if (em_allocate_transmit_buffers(txr)) {
3530                         device_printf(dev,
3531                             "Critical Failure setting up transmit buffers\n");
3532                         error = ENOMEM;
3533                         goto err_tx_desc;
3534                 }
3535 #if __FreeBSD_version >= 800000
3536                 /* Allocate a buf ring */
3537                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3538                     M_WAITOK, &txr->tx_mtx);
3539 #endif
3540         }
3541
3542         /*
3543          * Next the RX queues...
3544          */ 
3545         rsize = roundup2(adapter->num_rx_desc *
3546             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3547         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3548                 rxr = &adapter->rx_rings[i];
3549                 rxr->adapter = adapter;
3550                 rxr->me = i;
3551
3552                 /* Initialize the RX lock */
3553                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3554                     device_get_nameunit(dev), txr->me);
3555                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3556
3557                 if (em_dma_malloc(adapter, rsize,
3558                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3559                         device_printf(dev,
3560                             "Unable to allocate RxDescriptor memory\n");
3561                         error = ENOMEM;
3562                         goto err_rx_desc;
3563                 }
3564                 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3565                 bzero((void *)rxr->rx_base, rsize);
3566
3567                 /* Allocate receive buffers for the ring*/
3568                 if (em_allocate_receive_buffers(rxr)) {
3569                         device_printf(dev,
3570                             "Critical Failure setting up receive buffers\n");
3571                         error = ENOMEM;
3572                         goto err_rx_desc;
3573                 }
3574         }
3575
3576         return (0);
3577
3578 err_rx_desc:
3579         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3580                 em_dma_free(adapter, &rxr->rxdma);
3581 err_tx_desc:
3582         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3583                 em_dma_free(adapter, &txr->txdma);
3584         free(adapter->rx_rings, M_DEVBUF);
3585 rx_fail:
3586 #if __FreeBSD_version >= 800000
3587         buf_ring_free(txr->br, M_DEVBUF);
3588 #endif
3589         free(adapter->tx_rings, M_DEVBUF);
3590 fail:
3591         return (error);
3592 }
3593
3594
3595 /*********************************************************************
3596  *
3597  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3598  *  the information needed to transmit a packet on the wire. This is
3599  *  called only once at attach, setup is done every reset.
3600  *
3601  **********************************************************************/
3602 static int
3603 em_allocate_transmit_buffers(struct tx_ring *txr)
3604 {
3605         struct adapter *adapter = txr->adapter;
3606         device_t dev = adapter->dev;
3607         struct em_txbuffer *txbuf;
3608         int error, i;
3609
3610         /*
3611          * Setup DMA descriptor areas.
3612          */
3613         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3614                                1, 0,                    /* alignment, bounds */
3615                                BUS_SPACE_MAXADDR,       /* lowaddr */
3616                                BUS_SPACE_MAXADDR,       /* highaddr */
3617                                NULL, NULL,              /* filter, filterarg */
3618                                EM_TSO_SIZE,             /* maxsize */
3619                                EM_MAX_SCATTER,          /* nsegments */
3620                                PAGE_SIZE,               /* maxsegsize */
3621                                0,                       /* flags */
3622                                NULL,                    /* lockfunc */
3623                                NULL,                    /* lockfuncarg */
3624                                &txr->txtag))) {
3625                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3626                 goto fail;
3627         }
3628
3629         if (!(txr->tx_buffers =
3630             (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3631             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3632                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3633                 error = ENOMEM;
3634                 goto fail;
3635         }
3636
3637         /* Create the descriptor buffer dma maps */
3638         txbuf = txr->tx_buffers;
3639         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3640                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3641                 if (error != 0) {
3642                         device_printf(dev, "Unable to create TX DMA map\n");
3643                         goto fail;
3644                 }
3645         }
3646
3647         return 0;
3648 fail:
3649         /* We free all, it handles case where we are in the middle */
3650         em_free_transmit_structures(adapter);
3651         return (error);
3652 }
3653
3654 /*********************************************************************
3655  *
3656  *  Initialize a transmit ring.
3657  *
3658  **********************************************************************/
3659 static void
3660 em_setup_transmit_ring(struct tx_ring *txr)
3661 {
3662         struct adapter *adapter = txr->adapter;
3663         struct em_txbuffer *txbuf;
3664         int i;
3665 #ifdef DEV_NETMAP
3666         struct netmap_adapter *na = NA(adapter->ifp);
3667         struct netmap_slot *slot;
3668 #endif /* DEV_NETMAP */
3669
3670         /* Clear the old descriptor contents */
3671         EM_TX_LOCK(txr);
3672 #ifdef DEV_NETMAP
3673         slot = netmap_reset(na, NR_TX, txr->me, 0);
3674 #endif /* DEV_NETMAP */
3675
3676         bzero((void *)txr->tx_base,
3677               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3678         /* Reset indices */
3679         txr->next_avail_desc = 0;
3680         txr->next_to_clean = 0;
3681
3682         /* Free any existing tx buffers. */
3683         txbuf = txr->tx_buffers;
3684         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3685                 if (txbuf->m_head != NULL) {
3686                         bus_dmamap_sync(txr->txtag, txbuf->map,
3687                             BUS_DMASYNC_POSTWRITE);
3688                         bus_dmamap_unload(txr->txtag, txbuf->map);
3689                         m_freem(txbuf->m_head);
3690                         txbuf->m_head = NULL;
3691                 }
3692 #ifdef DEV_NETMAP
3693                 if (slot) {
3694                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3695                         uint64_t paddr;
3696                         void *addr;
3697
3698                         addr = PNMB(na, slot + si, &paddr);
3699                         txr->tx_base[i].buffer_addr = htole64(paddr);
3700                         /* reload the map for netmap mode */
3701                         netmap_load_map(na, txr->txtag, txbuf->map, addr);
3702                 }
3703 #endif /* DEV_NETMAP */
3704
3705                 /* clear the watch index */
3706                 txbuf->next_eop = -1;
3707         }
3708
3709         /* Set number of descriptors available */
3710         txr->tx_avail = adapter->num_tx_desc;
3711         txr->busy = EM_TX_IDLE;
3712
3713         /* Clear checksum offload context. */
3714         txr->last_hw_offload = 0;
3715         txr->last_hw_ipcss = 0;
3716         txr->last_hw_ipcso = 0;
3717         txr->last_hw_tucss = 0;
3718         txr->last_hw_tucso = 0;
3719
3720         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3721             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3722         EM_TX_UNLOCK(txr);
3723 }
3724
3725 /*********************************************************************
3726  *
3727  *  Initialize all transmit rings.
3728  *
3729  **********************************************************************/
3730 static void
3731 em_setup_transmit_structures(struct adapter *adapter)
3732 {
3733         struct tx_ring *txr = adapter->tx_rings;
3734
3735         for (int i = 0; i < adapter->num_queues; i++, txr++)
3736                 em_setup_transmit_ring(txr);
3737
3738         return;
3739 }
3740
3741 /*********************************************************************
3742  *
3743  *  Enable transmit unit.
3744  *
3745  **********************************************************************/
3746 static void
3747 em_initialize_transmit_unit(struct adapter *adapter)
3748 {
3749         struct tx_ring  *txr = adapter->tx_rings;
3750         struct e1000_hw *hw = &adapter->hw;
3751         u32     tctl, txdctl = 0, tarc, tipg = 0;
3752
3753          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3754
3755         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3756                 u64 bus_addr = txr->txdma.dma_paddr;
3757                 /* Base and Len of TX Ring */
3758                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3759                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3760                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3761                     (u32)(bus_addr >> 32));
3762                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3763                     (u32)bus_addr);
3764                 /* Init the HEAD/TAIL indices */
3765                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3766                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3767
3768                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3769                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3770                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3771
3772                 txr->busy = EM_TX_IDLE;
3773                 txdctl = 0; /* clear txdctl */
3774                 txdctl |= 0x1f; /* PTHRESH */
3775                 txdctl |= 1 << 8; /* HTHRESH */
3776                 txdctl |= 1 << 16;/* WTHRESH */
3777                 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3778                 txdctl |= E1000_TXDCTL_GRAN;
3779                 txdctl |= 1 << 25; /* LWTHRESH */
3780
3781                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3782         }
3783
3784         /* Set the default values for the Tx Inter Packet Gap timer */
3785         switch (adapter->hw.mac.type) {
3786         case e1000_80003es2lan:
3787                 tipg = DEFAULT_82543_TIPG_IPGR1;
3788                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3789                     E1000_TIPG_IPGR2_SHIFT;
3790                 break;
3791         default:
3792                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3793                     (adapter->hw.phy.media_type ==
3794                     e1000_media_type_internal_serdes))
3795                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3796                 else
3797                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3798                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3799                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3800         }
3801
3802         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3803         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3804
3805         if(adapter->hw.mac.type >= e1000_82540)
3806                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3807                     adapter->tx_abs_int_delay.value);
3808
3809         if ((adapter->hw.mac.type == e1000_82571) ||
3810             (adapter->hw.mac.type == e1000_82572)) {
3811                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3812                 tarc |= TARC_SPEED_MODE_BIT;
3813                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3814         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3815                 /* errata: program both queues to unweighted RR */
3816                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3817                 tarc |= 1;
3818                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3819                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3820                 tarc |= 1;
3821                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3822         } else if (adapter->hw.mac.type == e1000_82574) {
3823                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3824                 tarc |= TARC_ERRATA_BIT;
3825                 if ( adapter->num_queues > 1) {
3826                         tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3827                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3828                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3829                 } else
3830                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3831         }
3832
3833         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3834         if (adapter->tx_int_delay.value > 0)
3835                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3836
3837         /* Program the Transmit Control Register */
3838         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3839         tctl &= ~E1000_TCTL_CT;
3840         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3841                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3842
3843         if (adapter->hw.mac.type >= e1000_82571)
3844                 tctl |= E1000_TCTL_MULR;
3845
3846         /* This write will effectively turn on the transmit unit. */
3847         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3848
3849         /* SPT and KBL errata workarounds */
3850         if (hw->mac.type == e1000_pch_spt) {
3851                 u32 reg;
3852                 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3853                 reg |= E1000_RCTL_RDMTS_HEX;
3854                 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3855                 /* i218-i219 Specification Update 1.5.4.5 */
3856                 reg = E1000_READ_REG(hw, E1000_TARC(0));
3857                 reg &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
3858                 reg |= E1000_TARC0_CB_MULTIQ_2_REQ;
3859                 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3860         }
3861 }
3862
3863
3864 /*********************************************************************
3865  *
3866  *  Free all transmit rings.
3867  *
3868  **********************************************************************/
3869 static void
3870 em_free_transmit_structures(struct adapter *adapter)
3871 {
3872         struct tx_ring *txr = adapter->tx_rings;
3873
3874         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3875                 EM_TX_LOCK(txr);
3876                 em_free_transmit_buffers(txr);
3877                 em_dma_free(adapter, &txr->txdma);
3878                 EM_TX_UNLOCK(txr);
3879                 EM_TX_LOCK_DESTROY(txr);
3880         }
3881
3882         free(adapter->tx_rings, M_DEVBUF);
3883 }
3884
3885 /*********************************************************************
3886  *
3887  *  Free transmit ring related data structures.
3888  *
3889  **********************************************************************/
3890 static void
3891 em_free_transmit_buffers(struct tx_ring *txr)
3892 {
3893         struct adapter          *adapter = txr->adapter;
3894         struct em_txbuffer      *txbuf;
3895
3896         INIT_DEBUGOUT("free_transmit_ring: begin");
3897
3898         if (txr->tx_buffers == NULL)
3899                 return;
3900
3901         for (int i = 0; i < adapter->num_tx_desc; i++) {
3902                 txbuf = &txr->tx_buffers[i];
3903                 if (txbuf->m_head != NULL) {
3904                         bus_dmamap_sync(txr->txtag, txbuf->map,
3905                             BUS_DMASYNC_POSTWRITE);
3906                         bus_dmamap_unload(txr->txtag,
3907                             txbuf->map);
3908                         m_freem(txbuf->m_head);
3909                         txbuf->m_head = NULL;
3910                         if (txbuf->map != NULL) {
3911                                 bus_dmamap_destroy(txr->txtag,
3912                                     txbuf->map);
3913                                 txbuf->map = NULL;
3914                         }
3915                 } else if (txbuf->map != NULL) {
3916                         bus_dmamap_unload(txr->txtag,
3917                             txbuf->map);
3918                         bus_dmamap_destroy(txr->txtag,
3919                             txbuf->map);
3920                         txbuf->map = NULL;
3921                 }
3922         }
3923 #if __FreeBSD_version >= 800000
3924         if (txr->br != NULL)
3925                 buf_ring_free(txr->br, M_DEVBUF);
3926 #endif
3927         if (txr->tx_buffers != NULL) {
3928                 free(txr->tx_buffers, M_DEVBUF);
3929                 txr->tx_buffers = NULL;
3930         }
3931         if (txr->txtag != NULL) {
3932                 bus_dma_tag_destroy(txr->txtag);
3933                 txr->txtag = NULL;
3934         }
3935         return;
3936 }
3937
3938
3939 /*********************************************************************
3940  *  The offload context is protocol specific (TCP/UDP) and thus
3941  *  only needs to be set when the protocol changes. The occasion
3942  *  of a context change can be a performance detriment, and
3943  *  might be better just disabled. The reason arises in the way
3944  *  in which the controller supports pipelined requests from the
3945  *  Tx data DMA. Up to four requests can be pipelined, and they may
3946  *  belong to the same packet or to multiple packets. However all
3947  *  requests for one packet are issued before a request is issued
3948  *  for a subsequent packet and if a request for the next packet
3949  *  requires a context change, that request will be stalled
3950  *  until the previous request completes. This means setting up
3951  *  a new context effectively disables pipelined Tx data DMA which
3952  *  in turn greatly slow down performance to send small sized
3953  *  frames. 
3954  **********************************************************************/
3955 static void
3956 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3957     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3958 {
3959         struct adapter                  *adapter = txr->adapter;
3960         struct e1000_context_desc       *TXD = NULL;
3961         struct em_txbuffer              *tx_buffer;
3962         int                             cur, hdr_len;
3963         u32                             cmd = 0;
3964         u16                             offload = 0;
3965         u8                              ipcso, ipcss, tucso, tucss;
3966
3967         ipcss = ipcso = tucss = tucso = 0;
3968         hdr_len = ip_off + (ip->ip_hl << 2);
3969         cur = txr->next_avail_desc;
3970
3971         /* Setup of IP header checksum. */
3972         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3973                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3974                 offload |= CSUM_IP;
3975                 ipcss = ip_off;
3976                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3977                 /*
3978                  * Start offset for header checksum calculation.
3979                  * End offset for header checksum calculation.
3980                  * Offset of place to put the checksum.
3981                  */
3982                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3983                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3984                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3985                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3986                 cmd |= E1000_TXD_CMD_IP;
3987         }
3988
3989         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3990                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3991                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3992                 offload |= CSUM_TCP;
3993                 tucss = hdr_len;
3994                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3995                 /*
3996                  * The 82574L can only remember the *last* context used
3997                  * regardless of queue that it was use for.  We cannot reuse
3998                  * contexts on this hardware platform and must generate a new
3999                  * context every time.  82574L hardware spec, section 7.2.6,
4000                  * second note.
4001                  */
4002                 if (adapter->num_queues < 2) {
4003                         /*
4004                         * Setting up new checksum offload context for every
4005                         * frames takes a lot of processing time for hardware.
4006                         * This also reduces performance a lot for small sized
4007                         * frames so avoid it if driver can use previously
4008                         * configured checksum offload context.
4009                         */
4010                         if (txr->last_hw_offload == offload) {
4011                                 if (offload & CSUM_IP) {
4012                                         if (txr->last_hw_ipcss == ipcss &&
4013                                         txr->last_hw_ipcso == ipcso &&
4014                                         txr->last_hw_tucss == tucss &&
4015                                         txr->last_hw_tucso == tucso)
4016                                                 return;
4017                                 } else {
4018                                         if (txr->last_hw_tucss == tucss &&
4019                                         txr->last_hw_tucso == tucso)
4020                                                 return;
4021                                 }
4022                         }
4023                         txr->last_hw_offload = offload;
4024                         txr->last_hw_tucss = tucss;
4025                         txr->last_hw_tucso = tucso;
4026                 }
4027                 /*
4028                  * Start offset for payload checksum calculation.
4029                  * End offset for payload checksum calculation.
4030                  * Offset of place to put the checksum.
4031                  */
4032                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4033                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
4034                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4035                 TXD->upper_setup.tcp_fields.tucso = tucso;
4036                 cmd |= E1000_TXD_CMD_TCP;
4037         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
4038                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
4039                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
4040                 tucss = hdr_len;
4041                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
4042                 /*
4043                  * The 82574L can only remember the *last* context used
4044                  * regardless of queue that it was use for.  We cannot reuse
4045                  * contexts on this hardware platform and must generate a new
4046                  * context every time.  82574L hardware spec, section 7.2.6,
4047                  * second note.
4048                  */
4049                 if (adapter->num_queues < 2) {
4050                         /*
4051                         * Setting up new checksum offload context for every
4052                         * frames takes a lot of processing time for hardware.
4053                         * This also reduces performance a lot for small sized
4054                         * frames so avoid it if driver can use previously
4055                         * configured checksum offload context.
4056                         */
4057                         if (txr->last_hw_offload == offload) {
4058                                 if (offload & CSUM_IP) {
4059                                         if (txr->last_hw_ipcss == ipcss &&
4060                                         txr->last_hw_ipcso == ipcso &&
4061                                         txr->last_hw_tucss == tucss &&
4062                                         txr->last_hw_tucso == tucso)
4063                                                 return;
4064                                 } else {
4065                                         if (txr->last_hw_tucss == tucss &&
4066                                         txr->last_hw_tucso == tucso)
4067                                                 return;
4068                                 }
4069                         }
4070                         txr->last_hw_offload = offload;
4071                         txr->last_hw_tucss = tucss;
4072                         txr->last_hw_tucso = tucso;
4073                 }
4074                 /*
4075                  * Start offset for header checksum calculation.
4076                  * End offset for header checksum calculation.
4077                  * Offset of place to put the checksum.
4078                  */
4079                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4080                 TXD->upper_setup.tcp_fields.tucss = tucss;
4081                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4082                 TXD->upper_setup.tcp_fields.tucso = tucso;
4083         }
4084   
4085         if (offload & CSUM_IP) {
4086                 txr->last_hw_ipcss = ipcss;
4087                 txr->last_hw_ipcso = ipcso;
4088         }
4089
4090         TXD->tcp_seg_setup.data = htole32(0);
4091         TXD->cmd_and_length =
4092             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4093         tx_buffer = &txr->tx_buffers[cur];
4094         tx_buffer->m_head = NULL;
4095         tx_buffer->next_eop = -1;
4096
4097         if (++cur == adapter->num_tx_desc)
4098                 cur = 0;
4099
4100         txr->tx_avail--;
4101         txr->next_avail_desc = cur;
4102 }
4103
4104
4105 /**********************************************************************
4106  *
4107  *  Setup work for hardware segmentation offload (TSO)
4108  *
4109  **********************************************************************/
4110 static void
4111 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4112     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4113 {
4114         struct adapter                  *adapter = txr->adapter;
4115         struct e1000_context_desc       *TXD;
4116         struct em_txbuffer              *tx_buffer;
4117         int cur, hdr_len;
4118
4119         /*
4120          * In theory we can use the same TSO context if and only if
4121          * frame is the same type(IP/TCP) and the same MSS. However
4122          * checking whether a frame has the same IP/TCP structure is
4123          * hard thing so just ignore that and always restablish a
4124          * new TSO context.
4125          */
4126         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4127         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
4128                       E1000_TXD_DTYP_D |        /* Data descr type */
4129                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
4130
4131         /* IP and/or TCP header checksum calculation and insertion. */
4132         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4133
4134         cur = txr->next_avail_desc;
4135         tx_buffer = &txr->tx_buffers[cur];
4136         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4137
4138         /*
4139          * Start offset for header checksum calculation.
4140          * End offset for header checksum calculation.
4141          * Offset of place put the checksum.
4142          */
4143         TXD->lower_setup.ip_fields.ipcss = ip_off;
4144         TXD->lower_setup.ip_fields.ipcse =
4145             htole16(ip_off + (ip->ip_hl << 2) - 1);
4146         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4147         /*
4148          * Start offset for payload checksum calculation.
4149          * End offset for payload checksum calculation.
4150          * Offset of place to put the checksum.
4151          */
4152         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4153         TXD->upper_setup.tcp_fields.tucse = 0;
4154         TXD->upper_setup.tcp_fields.tucso =
4155             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4156         /*
4157          * Payload size per packet w/o any headers.
4158          * Length of all headers up to payload.
4159          */
4160         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4161         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4162
4163         TXD->cmd_and_length = htole32(adapter->txd_cmd |
4164                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
4165                                 E1000_TXD_CMD_TSE |     /* TSE context */
4166                                 E1000_TXD_CMD_IP |      /* Do IP csum */
4167                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
4168                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4169
4170         tx_buffer->m_head = NULL;
4171         tx_buffer->next_eop = -1;
4172
4173         if (++cur == adapter->num_tx_desc)
4174                 cur = 0;
4175
4176         txr->tx_avail--;
4177         txr->next_avail_desc = cur;
4178         txr->tx_tso = TRUE;
4179 }
4180
4181
4182 /**********************************************************************
4183  *
4184  *  Examine each tx_buffer in the used queue. If the hardware is done
4185  *  processing the packet then free associated resources. The
4186  *  tx_buffer is put back on the free queue.
4187  *
4188  **********************************************************************/
4189 static void
4190 em_txeof(struct tx_ring *txr)
4191 {
4192         struct adapter  *adapter = txr->adapter;
4193         int first, last, done, processed;
4194         struct em_txbuffer *tx_buffer;
4195         struct e1000_tx_desc   *tx_desc, *eop_desc;
4196         struct ifnet   *ifp = adapter->ifp;
4197
4198         EM_TX_LOCK_ASSERT(txr);
4199 #ifdef DEV_NETMAP
4200         if (netmap_tx_irq(ifp, txr->me))
4201                 return;
4202 #endif /* DEV_NETMAP */
4203
4204         /* No work, make sure hang detection is disabled */
4205         if (txr->tx_avail == adapter->num_tx_desc) {
4206                 txr->busy = EM_TX_IDLE;
4207                 return;
4208         }
4209
4210         processed = 0;
4211         first = txr->next_to_clean;
4212         tx_desc = &txr->tx_base[first];
4213         tx_buffer = &txr->tx_buffers[first];
4214         last = tx_buffer->next_eop;
4215         eop_desc = &txr->tx_base[last];
4216
4217         /*
4218          * What this does is get the index of the
4219          * first descriptor AFTER the EOP of the 
4220          * first packet, that way we can do the
4221          * simple comparison on the inner while loop.
4222          */
4223         if (++last == adapter->num_tx_desc)
4224                 last = 0;
4225         done = last;
4226
4227         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4228             BUS_DMASYNC_POSTREAD);
4229
4230         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4231                 /* We clean the range of the packet */
4232                 while (first != done) {
4233                         tx_desc->upper.data = 0;
4234                         tx_desc->lower.data = 0;
4235                         tx_desc->buffer_addr = 0;
4236                         ++txr->tx_avail;
4237                         ++processed;
4238
4239                         if (tx_buffer->m_head) {
4240                                 bus_dmamap_sync(txr->txtag,
4241                                     tx_buffer->map,
4242                                     BUS_DMASYNC_POSTWRITE);
4243                                 bus_dmamap_unload(txr->txtag,
4244                                     tx_buffer->map);
4245                                 m_freem(tx_buffer->m_head);
4246                                 tx_buffer->m_head = NULL;
4247                         }
4248                         tx_buffer->next_eop = -1;
4249
4250                         if (++first == adapter->num_tx_desc)
4251                                 first = 0;
4252
4253                         tx_buffer = &txr->tx_buffers[first];
4254                         tx_desc = &txr->tx_base[first];
4255                 }
4256                 ++ifp->if_opackets;
4257                 /* See if we can continue to the next packet */
4258                 last = tx_buffer->next_eop;
4259                 if (last != -1) {
4260                         eop_desc = &txr->tx_base[last];
4261                         /* Get new done point */
4262                         if (++last == adapter->num_tx_desc) last = 0;
4263                         done = last;
4264                 } else
4265                         break;
4266         }
4267         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4268             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4269
4270         txr->next_to_clean = first;
4271
4272         /*
4273         ** Hang detection: we know there's work outstanding
4274         ** or the entry return would have been taken, so no
4275         ** descriptor processed here indicates a potential hang.
4276         ** The local timer will examine this and do a reset if needed.
4277         */
4278         if (processed == 0) {
4279                 if (txr->busy != EM_TX_HUNG)
4280                         ++txr->busy;
4281         } else /* At least one descriptor was cleaned */
4282                 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4283
4284         /*
4285          * If we have a minimum free, clear IFF_DRV_OACTIVE
4286          * to tell the stack that it is OK to send packets.
4287          * Notice that all writes of OACTIVE happen under the
4288          * TX lock which, with a single queue, guarantees 
4289          * sanity.
4290          */
4291         if (txr->tx_avail >= EM_MAX_SCATTER) {
4292                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
4293         }
4294
4295         /* Disable hang detection if all clean */
4296         if (txr->tx_avail == adapter->num_tx_desc)
4297                 txr->busy = EM_TX_IDLE;
4298 }
4299
4300 /*********************************************************************
4301  *
4302  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4303  *
4304  **********************************************************************/
4305 static void
4306 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4307 {
4308         struct adapter          *adapter = rxr->adapter;
4309         struct mbuf             *m;
4310         bus_dma_segment_t       segs;
4311         struct em_rxbuffer      *rxbuf;
4312         int                     i, j, error, nsegs;
4313         bool                    cleaned = FALSE;
4314
4315         i = j = rxr->next_to_refresh;
4316         /*
4317         ** Get one descriptor beyond
4318         ** our work mark to control
4319         ** the loop.
4320         */
4321         if (++j == adapter->num_rx_desc)
4322                 j = 0;
4323
4324         while (j != limit) {
4325                 rxbuf = &rxr->rx_buffers[i];
4326                 if (rxbuf->m_head == NULL) {
4327                         m = m_getjcl(M_NOWAIT, MT_DATA,
4328                             M_PKTHDR, adapter->rx_mbuf_sz);
4329                         /*
4330                         ** If we have a temporary resource shortage
4331                         ** that causes a failure, just abort refresh
4332                         ** for now, we will return to this point when
4333                         ** reinvoked from em_rxeof.
4334                         */
4335                         if (m == NULL)
4336                                 goto update;
4337                 } else
4338                         m = rxbuf->m_head;
4339
4340                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4341                 m->m_flags |= M_PKTHDR;
4342                 m->m_data = m->m_ext.ext_buf;
4343
4344                 /* Use bus_dma machinery to setup the memory mapping  */
4345                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4346                     m, &segs, &nsegs, BUS_DMA_NOWAIT);
4347                 if (error != 0) {
4348                         printf("Refresh mbufs: hdr dmamap load"
4349                             " failure - %d\n", error);
4350                         m_free(m);
4351                         rxbuf->m_head = NULL;
4352                         goto update;
4353                 }
4354                 rxbuf->m_head = m;
4355                 rxbuf->paddr = segs.ds_addr;
4356                 bus_dmamap_sync(rxr->rxtag,
4357                     rxbuf->map, BUS_DMASYNC_PREREAD);
4358                 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4359                 cleaned = TRUE;
4360
4361                 i = j; /* Next is precalulated for us */
4362                 rxr->next_to_refresh = i;
4363                 /* Calculate next controlling index */
4364                 if (++j == adapter->num_rx_desc)
4365                         j = 0;
4366         }
4367 update:
4368         /*
4369         ** Update the tail pointer only if,
4370         ** and as far as we have refreshed.
4371         */
4372         if (cleaned)
4373                 E1000_WRITE_REG(&adapter->hw,
4374                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4375
4376         return;
4377 }
4378
4379
4380 /*********************************************************************
4381  *
4382  *  Allocate memory for rx_buffer structures. Since we use one
4383  *  rx_buffer per received packet, the maximum number of rx_buffer's
4384  *  that we'll need is equal to the number of receive descriptors
4385  *  that we've allocated.
4386  *
4387  **********************************************************************/
4388 static int
4389 em_allocate_receive_buffers(struct rx_ring *rxr)
4390 {
4391         struct adapter          *adapter = rxr->adapter;
4392         device_t                dev = adapter->dev;
4393         struct em_rxbuffer      *rxbuf;
4394         int                     error;
4395
4396         rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4397             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4398         if (rxr->rx_buffers == NULL) {
4399                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4400                 return (ENOMEM);
4401         }
4402
4403         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4404                                 1, 0,                   /* alignment, bounds */
4405                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4406                                 BUS_SPACE_MAXADDR,      /* highaddr */
4407                                 NULL, NULL,             /* filter, filterarg */
4408                                 MJUM9BYTES,             /* maxsize */
4409                                 1,                      /* nsegments */
4410                                 MJUM9BYTES,             /* maxsegsize */
4411                                 0,                      /* flags */
4412                                 NULL,                   /* lockfunc */
4413                                 NULL,                   /* lockarg */
4414                                 &rxr->rxtag);
4415         if (error) {
4416                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4417                     __func__, error);
4418                 goto fail;
4419         }
4420
4421         rxbuf = rxr->rx_buffers;
4422         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4423                 rxbuf = &rxr->rx_buffers[i];
4424                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4425                 if (error) {
4426                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4427                             __func__, error);
4428                         goto fail;
4429                 }
4430         }
4431
4432         return (0);
4433
4434 fail:
4435         em_free_receive_structures(adapter);
4436         return (error);
4437 }
4438
4439
4440 /*********************************************************************
4441  *
4442  *  Initialize a receive ring and its buffers.
4443  *
4444  **********************************************************************/
4445 static int
4446 em_setup_receive_ring(struct rx_ring *rxr)
4447 {
4448         struct  adapter         *adapter = rxr->adapter;
4449         struct em_rxbuffer      *rxbuf;
4450         bus_dma_segment_t       seg[1];
4451         int                     rsize, nsegs, error = 0;
4452 #ifdef DEV_NETMAP
4453         struct netmap_adapter *na = NA(adapter->ifp);
4454         struct netmap_slot *slot;
4455 #endif
4456
4457
4458         /* Clear the ring contents */
4459         EM_RX_LOCK(rxr);
4460         rsize = roundup2(adapter->num_rx_desc *
4461             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4462         bzero((void *)rxr->rx_base, rsize);
4463 #ifdef DEV_NETMAP
4464         slot = netmap_reset(na, NR_RX, 0, 0);
4465 #endif
4466
4467         /*
4468         ** Free current RX buffer structs and their mbufs
4469         */
4470         for (int i = 0; i < adapter->num_rx_desc; i++) {
4471                 rxbuf = &rxr->rx_buffers[i];
4472                 if (rxbuf->m_head != NULL) {
4473                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4474                             BUS_DMASYNC_POSTREAD);
4475                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4476                         m_freem(rxbuf->m_head);
4477                         rxbuf->m_head = NULL; /* mark as freed */
4478                 }
4479         }
4480
4481         /* Now replenish the mbufs */
4482         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4483                 rxbuf = &rxr->rx_buffers[j];
4484 #ifdef DEV_NETMAP
4485                 if (slot) {
4486                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4487                         uint64_t paddr;
4488                         void *addr;
4489
4490                         addr = PNMB(na, slot + si, &paddr);
4491                         netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4492                         rxbuf->paddr = paddr;
4493                         em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4494                         continue;
4495                 }
4496 #endif /* DEV_NETMAP */
4497                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4498                     M_PKTHDR, adapter->rx_mbuf_sz);
4499                 if (rxbuf->m_head == NULL) {
4500                         error = ENOBUFS;
4501                         goto fail;
4502                 }
4503                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4504                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4505                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4506
4507                 /* Get the memory mapping */
4508                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4509                     rxbuf->map, rxbuf->m_head, seg,
4510                     &nsegs, BUS_DMA_NOWAIT);
4511                 if (error != 0) {
4512                         m_freem(rxbuf->m_head);
4513                         rxbuf->m_head = NULL;
4514                         goto fail;
4515                 }
4516                 bus_dmamap_sync(rxr->rxtag,
4517                     rxbuf->map, BUS_DMASYNC_PREREAD);
4518
4519                 rxbuf->paddr = seg[0].ds_addr;
4520                 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4521         }
4522         rxr->next_to_check = 0;
4523         rxr->next_to_refresh = 0;
4524         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4525             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4526
4527 fail:
4528         EM_RX_UNLOCK(rxr);
4529         return (error);
4530 }
4531
4532 /*********************************************************************
4533  *
4534  *  Initialize all receive rings.
4535  *
4536  **********************************************************************/
4537 static int
4538 em_setup_receive_structures(struct adapter *adapter)
4539 {
4540         struct rx_ring *rxr = adapter->rx_rings;
4541         int q;
4542
4543         for (q = 0; q < adapter->num_queues; q++, rxr++)
4544                 if (em_setup_receive_ring(rxr))
4545                         goto fail;
4546
4547         return (0);
4548 fail:
4549         /*
4550          * Free RX buffers allocated so far, we will only handle
4551          * the rings that completed, the failing case will have
4552          * cleaned up for itself. 'q' failed, so its the terminus.
4553          */
4554         for (int i = 0; i < q; ++i) {
4555                 rxr = &adapter->rx_rings[i];
4556                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4557                         struct em_rxbuffer *rxbuf;
4558                         rxbuf = &rxr->rx_buffers[n];
4559                         if (rxbuf->m_head != NULL) {
4560                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4561                                   BUS_DMASYNC_POSTREAD);
4562                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4563                                 m_freem(rxbuf->m_head);
4564                                 rxbuf->m_head = NULL;
4565                         }
4566                 }
4567                 rxr->next_to_check = 0;
4568                 rxr->next_to_refresh = 0;
4569         }
4570
4571         return (ENOBUFS);
4572 }
4573
4574 /*********************************************************************
4575  *
4576  *  Free all receive rings.
4577  *
4578  **********************************************************************/
4579 static void
4580 em_free_receive_structures(struct adapter *adapter)
4581 {
4582         struct rx_ring *rxr = adapter->rx_rings;
4583
4584         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4585                 em_free_receive_buffers(rxr);
4586                 /* Free the ring memory as well */
4587                 em_dma_free(adapter, &rxr->rxdma);
4588                 EM_RX_LOCK_DESTROY(rxr);
4589         }
4590
4591         free(adapter->rx_rings, M_DEVBUF);
4592 }
4593
4594
4595 /*********************************************************************
4596  *
4597  *  Free receive ring data structures
4598  *
4599  **********************************************************************/
4600 static void
4601 em_free_receive_buffers(struct rx_ring *rxr)
4602 {
4603         struct adapter          *adapter = rxr->adapter;
4604         struct em_rxbuffer      *rxbuf = NULL;
4605
4606         INIT_DEBUGOUT("free_receive_buffers: begin");
4607
4608         if (rxr->rx_buffers != NULL) {
4609                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4610                         rxbuf = &rxr->rx_buffers[i];
4611                         if (rxbuf->map != NULL) {
4612                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4613                                     BUS_DMASYNC_POSTREAD);
4614                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4615                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4616                         }
4617                         if (rxbuf->m_head != NULL) {
4618                                 m_freem(rxbuf->m_head);
4619                                 rxbuf->m_head = NULL;
4620                         }
4621                 }
4622                 free(rxr->rx_buffers, M_DEVBUF);
4623                 rxr->rx_buffers = NULL;
4624                 rxr->next_to_check = 0;
4625                 rxr->next_to_refresh = 0;
4626         }
4627
4628         if (rxr->rxtag != NULL) {
4629                 bus_dma_tag_destroy(rxr->rxtag);
4630                 rxr->rxtag = NULL;
4631         }
4632
4633         return;
4634 }
4635
4636
4637 /*********************************************************************
4638  *
4639  *  Enable receive unit.
4640  *
4641  **********************************************************************/
4642
4643 static void
4644 em_initialize_receive_unit(struct adapter *adapter)
4645 {
4646         struct rx_ring *rxr = adapter->rx_rings;
4647         struct ifnet    *ifp = adapter->ifp;
4648         struct e1000_hw *hw = &adapter->hw;
4649         u32     rctl, rxcsum, rfctl;
4650
4651         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4652
4653         /*
4654          * Make sure receives are disabled while setting
4655          * up the descriptor ring
4656          */
4657         rctl = E1000_READ_REG(hw, E1000_RCTL);
4658         /* Do not disable if ever enabled on this hardware */
4659         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4660                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4661
4662         /* Setup the Receive Control Register */
4663         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4664         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4665             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4666             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4667
4668         /* Do not store bad packets */
4669         rctl &= ~E1000_RCTL_SBP;
4670
4671         /* Enable Long Packet receive */
4672         if (ifp->if_mtu > ETHERMTU)
4673                 rctl |= E1000_RCTL_LPE;
4674         else
4675                 rctl &= ~E1000_RCTL_LPE;
4676
4677         /* Strip the CRC */
4678         if (!em_disable_crc_stripping)
4679                 rctl |= E1000_RCTL_SECRC;
4680
4681         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4682             adapter->rx_abs_int_delay.value);
4683
4684         E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4685             adapter->rx_int_delay.value);
4686         /*
4687          * Set the interrupt throttling rate. Value is calculated
4688          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4689          */
4690         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4691
4692         /* Use extended rx descriptor formats */
4693         rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4694         rfctl |= E1000_RFCTL_EXTEN;
4695         /*
4696         ** When using MSIX interrupts we need to throttle
4697         ** using the EITR register (82574 only)
4698         */
4699         if (hw->mac.type == e1000_82574) {
4700                 for (int i = 0; i < 4; i++)
4701                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4702                             DEFAULT_ITR);
4703                 /* Disable accelerated acknowledge */
4704                 rfctl |= E1000_RFCTL_ACK_DIS;
4705         }
4706         E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4707
4708         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4709         if (ifp->if_capenable & IFCAP_RXCSUM) {
4710 #ifdef EM_MULTIQUEUE
4711                 rxcsum |= E1000_RXCSUM_TUOFL |
4712                           E1000_RXCSUM_IPOFL |
4713                           E1000_RXCSUM_PCSD;
4714 #else
4715                 rxcsum |= E1000_RXCSUM_TUOFL;
4716 #endif
4717         } else
4718                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4719
4720         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4721
4722 #ifdef EM_MULTIQUEUE
4723 #define RSSKEYLEN 10
4724         if (adapter->num_queues > 1) {
4725                 uint8_t  rss_key[4 * RSSKEYLEN];
4726                 uint32_t reta = 0;
4727                 int i;
4728
4729                 /*
4730                 * Configure RSS key
4731                 */
4732                 arc4rand(rss_key, sizeof(rss_key), 0);
4733                 for (i = 0; i < RSSKEYLEN; ++i) {
4734                         uint32_t rssrk = 0;
4735
4736                         rssrk = EM_RSSRK_VAL(rss_key, i);
4737                         E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4738                 }
4739
4740                 /*
4741                 * Configure RSS redirect table in following fashion:
4742                 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4743                 */
4744                 for (i = 0; i < sizeof(reta); ++i) {
4745                         uint32_t q;
4746
4747                         q = (i % adapter->num_queues) << 7;
4748                         reta |= q << (8 * i);
4749                 }
4750
4751                 for (i = 0; i < 32; ++i) {
4752                         E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4753                 }
4754
4755                 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 
4756                                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4757                                 E1000_MRQC_RSS_FIELD_IPV4 |
4758                                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4759                                 E1000_MRQC_RSS_FIELD_IPV6_EX |
4760                                 E1000_MRQC_RSS_FIELD_IPV6);
4761         }
4762 #endif
4763         /*
4764         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4765         ** long latencies are observed, like Lenovo X60. This
4766         ** change eliminates the problem, but since having positive
4767         ** values in RDTR is a known source of problems on other
4768         ** platforms another solution is being sought.
4769         */
4770         if (hw->mac.type == e1000_82573)
4771                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4772
4773         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4774                 /* Setup the Base and Length of the Rx Descriptor Ring */
4775                 u64 bus_addr = rxr->rxdma.dma_paddr;
4776                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4777
4778                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4779                     adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4780                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4781                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4782                 /* Setup the Head and Tail Descriptor Pointers */
4783                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4784 #ifdef DEV_NETMAP
4785                 /*
4786                  * an init() while a netmap client is active must
4787                  * preserve the rx buffers passed to userspace.
4788                  */
4789                 if (ifp->if_capenable & IFCAP_NETMAP)
4790                         rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4791 #endif /* DEV_NETMAP */
4792                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4793         }
4794
4795         /*
4796          * Set PTHRESH for improved jumbo performance
4797          * According to 10.2.5.11 of Intel 82574 Datasheet,
4798          * RXDCTL(1) is written whenever RXDCTL(0) is written.
4799          * Only write to RXDCTL(1) if there is a need for different
4800          * settings.
4801          */
4802         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4803             (adapter->hw.mac.type == e1000_pch2lan) ||
4804             (adapter->hw.mac.type == e1000_ich10lan)) &&
4805             (ifp->if_mtu > ETHERMTU)) {
4806                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4807                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4808         } else if (adapter->hw.mac.type == e1000_82574) {
4809                 for (int i = 0; i < adapter->num_queues; i++) {
4810                         u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4811
4812                         rxdctl |= 0x20; /* PTHRESH */
4813                         rxdctl |= 4 << 8; /* HTHRESH */
4814                         rxdctl |= 4 << 16;/* WTHRESH */
4815                         rxdctl |= 1 << 24; /* Switch to granularity */
4816                         E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4817                 }
4818         }
4819                 
4820         if (adapter->hw.mac.type >= e1000_pch2lan) {
4821                 if (ifp->if_mtu > ETHERMTU)
4822                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4823                 else
4824                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4825         }
4826
4827         /* Make sure VLAN Filters are off */
4828         rctl &= ~E1000_RCTL_VFE;
4829
4830         if (adapter->rx_mbuf_sz == MCLBYTES)
4831                 rctl |= E1000_RCTL_SZ_2048;
4832         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4833                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4834         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4835                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4836
4837         /* ensure we clear use DTYPE of 00 here */
4838         rctl &= ~0x00000C00;
4839         /* Write out the settings */
4840         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4841
4842         return;
4843 }
4844
4845
4846 /*********************************************************************
4847  *
4848  *  This routine executes in interrupt context. It replenishes
4849  *  the mbufs in the descriptor and sends data which has been
4850  *  dma'ed into host memory to upper layer.
4851  *
4852  *  We loop at most count times if count is > 0, or until done if
4853  *  count < 0.
4854  *  
4855  *  For polling we also now return the number of cleaned packets
4856  *********************************************************************/
4857 static bool
4858 em_rxeof(struct rx_ring *rxr, int count, int *done)
4859 {
4860         struct adapter          *adapter = rxr->adapter;
4861         struct ifnet            *ifp = adapter->ifp;
4862         struct mbuf             *mp, *sendmp;
4863         u32                     status = 0;
4864         u16                     len;
4865         int                     i, processed, rxdone = 0;
4866         bool                    eop;
4867         union e1000_rx_desc_extended    *cur;
4868
4869         EM_RX_LOCK(rxr);
4870
4871         /* Sync the ring */
4872         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4873             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4874
4875
4876 #ifdef DEV_NETMAP
4877         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4878                 EM_RX_UNLOCK(rxr);
4879                 return (FALSE);
4880         }
4881 #endif /* DEV_NETMAP */
4882
4883         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4884                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4885                         break;
4886
4887                 cur = &rxr->rx_base[i];
4888                 status = le32toh(cur->wb.upper.status_error);
4889                 mp = sendmp = NULL;
4890
4891                 if ((status & E1000_RXD_STAT_DD) == 0)
4892                         break;
4893
4894                 len = le16toh(cur->wb.upper.length);
4895                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4896
4897                 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4898                     (rxr->discard == TRUE)) {
4899                         adapter->dropped_pkts++;
4900                         ++rxr->rx_discarded;
4901                         if (!eop) /* Catch subsequent segs */
4902                                 rxr->discard = TRUE;
4903                         else
4904                                 rxr->discard = FALSE;
4905                         em_rx_discard(rxr, i);
4906                         goto next_desc;
4907                 }
4908                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4909
4910                 /* Assign correct length to the current fragment */
4911                 mp = rxr->rx_buffers[i].m_head;
4912                 mp->m_len = len;
4913
4914                 /* Trigger for refresh */
4915                 rxr->rx_buffers[i].m_head = NULL;
4916
4917                 /* First segment? */
4918                 if (rxr->fmp == NULL) {
4919                         mp->m_pkthdr.len = len;
4920                         rxr->fmp = rxr->lmp = mp;
4921                 } else {
4922                         /* Chain mbuf's together */
4923                         mp->m_flags &= ~M_PKTHDR;
4924                         rxr->lmp->m_next = mp;
4925                         rxr->lmp = mp;
4926                         rxr->fmp->m_pkthdr.len += len;
4927                 }
4928
4929                 if (eop) {
4930                         --count;
4931                         sendmp = rxr->fmp;
4932                         sendmp->m_pkthdr.rcvif = ifp;
4933                         ifp->if_ipackets++;
4934                         em_receive_checksum(status, sendmp);
4935 #ifndef __NO_STRICT_ALIGNMENT
4936                         if (adapter->hw.mac.max_frame_size >
4937                             (MCLBYTES - ETHER_ALIGN) &&
4938                             em_fixup_rx(rxr) != 0)
4939                                 goto skip;
4940 #endif
4941                         if (status & E1000_RXD_STAT_VP) {
4942                                 sendmp->m_pkthdr.ether_vtag =
4943                                     le16toh(cur->wb.upper.vlan);
4944                                 sendmp->m_flags |= M_VLANTAG;
4945                         }
4946 #ifndef __NO_STRICT_ALIGNMENT
4947 skip:
4948 #endif
4949                         rxr->fmp = rxr->lmp = NULL;
4950                 }
4951 next_desc:
4952                 /* Sync the ring */
4953                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4954                         BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4955
4956                 /* Zero out the receive descriptors status. */
4957                 cur->wb.upper.status_error &= htole32(~0xFF);
4958                 ++rxdone;       /* cumulative for POLL */
4959                 ++processed;
4960
4961                 /* Advance our pointers to the next descriptor. */
4962                 if (++i == adapter->num_rx_desc)
4963                         i = 0;
4964
4965                 /* Send to the stack */
4966                 if (sendmp != NULL) {
4967                         rxr->next_to_check = i;
4968                         EM_RX_UNLOCK(rxr);
4969                         (*ifp->if_input)(ifp, sendmp);
4970                         EM_RX_LOCK(rxr);
4971                         i = rxr->next_to_check;
4972                 }
4973
4974                 /* Only refresh mbufs every 8 descriptors */
4975                 if (processed == 8) {
4976                         em_refresh_mbufs(rxr, i);
4977                         processed = 0;
4978                 }
4979         }
4980
4981         /* Catch any remaining refresh work */
4982         if (e1000_rx_unrefreshed(rxr))
4983                 em_refresh_mbufs(rxr, i);
4984
4985         rxr->next_to_check = i;
4986         if (done != NULL)
4987                 *done = rxdone;
4988         EM_RX_UNLOCK(rxr);
4989
4990         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4991 }
4992
4993 static __inline void
4994 em_rx_discard(struct rx_ring *rxr, int i)
4995 {
4996         struct em_rxbuffer      *rbuf;
4997
4998         rbuf = &rxr->rx_buffers[i];
4999         bus_dmamap_unload(rxr->rxtag, rbuf->map);
5000
5001         /* Free any previous pieces */
5002         if (rxr->fmp != NULL) {
5003                 rxr->fmp->m_flags |= M_PKTHDR;
5004                 m_freem(rxr->fmp);
5005                 rxr->fmp = NULL;
5006                 rxr->lmp = NULL;
5007         }
5008         /*
5009         ** Free buffer and allow em_refresh_mbufs()
5010         ** to clean up and recharge buffer.
5011         */
5012         if (rbuf->m_head) {
5013                 m_free(rbuf->m_head);
5014                 rbuf->m_head = NULL;
5015         }
5016         return;
5017 }
5018
5019 #ifndef __NO_STRICT_ALIGNMENT
5020 /*
5021  * When jumbo frames are enabled we should realign entire payload on
5022  * architecures with strict alignment. This is serious design mistake of 8254x
5023  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
5024  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
5025  * payload. On architecures without strict alignment restrictions 8254x still
5026  * performs unaligned memory access which would reduce the performance too.
5027  * To avoid copying over an entire frame to align, we allocate a new mbuf and
5028  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
5029  * existing mbuf chain.
5030  *
5031  * Be aware, best performance of the 8254x is achived only when jumbo frame is
5032  * not used at all on architectures with strict alignment.
5033  */
5034 static int
5035 em_fixup_rx(struct rx_ring *rxr)
5036 {
5037         struct adapter *adapter = rxr->adapter;
5038         struct mbuf *m, *n;
5039         int error;
5040
5041         error = 0;
5042         m = rxr->fmp;
5043         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
5044                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
5045                 m->m_data += ETHER_HDR_LEN;
5046         } else {
5047                 MGETHDR(n, M_NOWAIT, MT_DATA);
5048                 if (n != NULL) {
5049                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
5050                         m->m_data += ETHER_HDR_LEN;
5051                         m->m_len -= ETHER_HDR_LEN;
5052                         n->m_len = ETHER_HDR_LEN;
5053                         M_MOVE_PKTHDR(n, m);
5054                         n->m_next = m;
5055                         rxr->fmp = n;
5056                 } else {
5057                         adapter->dropped_pkts++;
5058                         m_freem(rxr->fmp);
5059                         rxr->fmp = NULL;
5060                         error = ENOMEM;
5061                 }
5062         }
5063
5064         return (error);
5065 }
5066 #endif
5067
5068 static void
5069 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5070 {
5071         rxd->read.buffer_addr = htole64(rxbuf->paddr);
5072         /* DD bits must be cleared */
5073         rxd->wb.upper.status_error= 0;
5074 }
5075
5076 /*********************************************************************
5077  *
5078  *  Verify that the hardware indicated that the checksum is valid.
5079  *  Inform the stack about the status of checksum so that stack
5080  *  doesn't spend time verifying the checksum.
5081  *
5082  *********************************************************************/
5083 static void
5084 em_receive_checksum(uint32_t status, struct mbuf *mp)
5085 {
5086         mp->m_pkthdr.csum_flags = 0;
5087
5088         /* Ignore Checksum bit is set */
5089         if (status & E1000_RXD_STAT_IXSM)
5090                 return;
5091
5092         /* If the IP checksum exists and there is no IP Checksum error */
5093         if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5094                 E1000_RXD_STAT_IPCS) {
5095                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5096         }
5097
5098         /* TCP or UDP checksum */
5099         if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5100             E1000_RXD_STAT_TCPCS) {
5101                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5102                 mp->m_pkthdr.csum_data = htons(0xffff);
5103         }
5104         if (status & E1000_RXD_STAT_UDPCS) {
5105                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5106                 mp->m_pkthdr.csum_data = htons(0xffff);
5107         }
5108 }
5109
5110 /*
5111  * This routine is run via an vlan
5112  * config EVENT
5113  */
5114 static void
5115 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5116 {
5117         struct adapter  *adapter = ifp->if_softc;
5118         u32             index, bit;
5119
5120         if (ifp->if_softc !=  arg)   /* Not our event */
5121                 return;
5122
5123         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5124                 return;
5125
5126         EM_CORE_LOCK(adapter);
5127         index = (vtag >> 5) & 0x7F;
5128         bit = vtag & 0x1F;
5129         adapter->shadow_vfta[index] |= (1 << bit);
5130         ++adapter->num_vlans;
5131         /* Re-init to load the changes */
5132         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5133                 em_init_locked(adapter);
5134         EM_CORE_UNLOCK(adapter);
5135 }
5136
5137 /*
5138  * This routine is run via an vlan
5139  * unconfig EVENT
5140  */
5141 static void
5142 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5143 {
5144         struct adapter  *adapter = ifp->if_softc;
5145         u32             index, bit;
5146
5147         if (ifp->if_softc !=  arg)
5148                 return;
5149
5150         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5151                 return;
5152
5153         EM_CORE_LOCK(adapter);
5154         index = (vtag >> 5) & 0x7F;
5155         bit = vtag & 0x1F;
5156         adapter->shadow_vfta[index] &= ~(1 << bit);
5157         --adapter->num_vlans;
5158         /* Re-init to load the changes */
5159         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5160                 em_init_locked(adapter);
5161         EM_CORE_UNLOCK(adapter);
5162 }
5163
5164 static void
5165 em_setup_vlan_hw_support(struct adapter *adapter)
5166 {
5167         struct e1000_hw *hw = &adapter->hw;
5168         u32             reg;
5169
5170         /*
5171         ** We get here thru init_locked, meaning
5172         ** a soft reset, this has already cleared
5173         ** the VFTA and other state, so if there
5174         ** have been no vlan's registered do nothing.
5175         */
5176         if (adapter->num_vlans == 0)
5177                 return;
5178
5179         /*
5180         ** A soft reset zero's out the VFTA, so
5181         ** we need to repopulate it now.
5182         */
5183         for (int i = 0; i < EM_VFTA_SIZE; i++)
5184                 if (adapter->shadow_vfta[i] != 0)
5185                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5186                             i, adapter->shadow_vfta[i]);
5187
5188         reg = E1000_READ_REG(hw, E1000_CTRL);
5189         reg |= E1000_CTRL_VME;
5190         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5191
5192         /* Enable the Filter Table */
5193         reg = E1000_READ_REG(hw, E1000_RCTL);
5194         reg &= ~E1000_RCTL_CFIEN;
5195         reg |= E1000_RCTL_VFE;
5196         E1000_WRITE_REG(hw, E1000_RCTL, reg);
5197 }
5198
5199 static void
5200 em_enable_intr(struct adapter *adapter)
5201 {
5202         struct e1000_hw *hw = &adapter->hw;
5203         u32 ims_mask = IMS_ENABLE_MASK;
5204
5205         if (hw->mac.type == e1000_82574) {
5206                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5207                 ims_mask |= EM_MSIX_MASK;
5208         } 
5209         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5210 }
5211
5212 static void
5213 em_disable_intr(struct adapter *adapter)
5214 {
5215         struct e1000_hw *hw = &adapter->hw;
5216
5217         if (hw->mac.type == e1000_82574)
5218                 E1000_WRITE_REG(hw, EM_EIAC, 0);
5219         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5220 }
5221
5222 /*
5223  * Bit of a misnomer, what this really means is
5224  * to enable OS management of the system... aka
5225  * to disable special hardware management features 
5226  */
5227 static void
5228 em_init_manageability(struct adapter *adapter)
5229 {
5230         /* A shared code workaround */
5231 #define E1000_82542_MANC2H E1000_MANC2H
5232         if (adapter->has_manage) {
5233                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5234                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5235
5236                 /* disable hardware interception of ARP */
5237                 manc &= ~(E1000_MANC_ARP_EN);
5238
5239                 /* enable receiving management packets to the host */
5240                 manc |= E1000_MANC_EN_MNG2HOST;
5241 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5242 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5243                 manc2h |= E1000_MNG2HOST_PORT_623;
5244                 manc2h |= E1000_MNG2HOST_PORT_664;
5245                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5246                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5247         }
5248 }
5249
5250 /*
5251  * Give control back to hardware management
5252  * controller if there is one.
5253  */
5254 static void
5255 em_release_manageability(struct adapter *adapter)
5256 {
5257         if (adapter->has_manage) {
5258                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5259
5260                 /* re-enable hardware interception of ARP */
5261                 manc |= E1000_MANC_ARP_EN;
5262                 manc &= ~E1000_MANC_EN_MNG2HOST;
5263
5264                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5265         }
5266 }
5267
5268 /*
5269  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5270  * For ASF and Pass Through versions of f/w this means
5271  * that the driver is loaded. For AMT version type f/w
5272  * this means that the network i/f is open.
5273  */
5274 static void
5275 em_get_hw_control(struct adapter *adapter)
5276 {
5277         u32 ctrl_ext, swsm;
5278
5279         if (adapter->hw.mac.type == e1000_82573) {
5280                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5281                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5282                     swsm | E1000_SWSM_DRV_LOAD);
5283                 return;
5284         }
5285         /* else */
5286         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5287         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5288             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5289         return;
5290 }
5291
5292 /*
5293  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5294  * For ASF and Pass Through versions of f/w this means that
5295  * the driver is no longer loaded. For AMT versions of the
5296  * f/w this means that the network i/f is closed.
5297  */
5298 static void
5299 em_release_hw_control(struct adapter *adapter)
5300 {
5301         u32 ctrl_ext, swsm;
5302
5303         if (!adapter->has_manage)
5304                 return;
5305
5306         if (adapter->hw.mac.type == e1000_82573) {
5307                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5308                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5309                     swsm & ~E1000_SWSM_DRV_LOAD);
5310                 return;
5311         }
5312         /* else */
5313         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5314         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5315             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5316         return;
5317 }
5318
5319 static int
5320 em_is_valid_ether_addr(u8 *addr)
5321 {
5322         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5323
5324         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5325                 return (FALSE);
5326         }
5327
5328         return (TRUE);
5329 }
5330
5331 /*
5332 ** Parse the interface capabilities with regard
5333 ** to both system management and wake-on-lan for
5334 ** later use.
5335 */
5336 static void
5337 em_get_wakeup(device_t dev)
5338 {
5339         struct adapter  *adapter = device_get_softc(dev);
5340         u16             eeprom_data = 0, device_id, apme_mask;
5341
5342         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5343         apme_mask = EM_EEPROM_APME;
5344
5345         switch (adapter->hw.mac.type) {
5346         case e1000_82573:
5347         case e1000_82583:
5348                 adapter->has_amt = TRUE;
5349                 /* Falls thru */
5350         case e1000_82571:
5351         case e1000_82572:
5352         case e1000_80003es2lan:
5353                 if (adapter->hw.bus.func == 1) {
5354                         e1000_read_nvm(&adapter->hw,
5355                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5356                         break;
5357                 } else
5358                         e1000_read_nvm(&adapter->hw,
5359                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5360                 break;
5361         case e1000_ich8lan:
5362         case e1000_ich9lan:
5363         case e1000_ich10lan:
5364         case e1000_pchlan:
5365         case e1000_pch2lan:
5366         case e1000_pch_lpt:
5367         case e1000_pch_spt:
5368         case e1000_pch_cnp:
5369                 apme_mask = E1000_WUC_APME;
5370                 adapter->has_amt = TRUE;
5371                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5372                 break;
5373         default:
5374                 e1000_read_nvm(&adapter->hw,
5375                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5376                 break;
5377         }
5378         if (eeprom_data & apme_mask)
5379                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5380         /*
5381          * We have the eeprom settings, now apply the special cases
5382          * where the eeprom may be wrong or the board won't support
5383          * wake on lan on a particular port
5384          */
5385         device_id = pci_get_device(dev);
5386         switch (device_id) {
5387         case E1000_DEV_ID_82571EB_FIBER:
5388                 /* Wake events only supported on port A for dual fiber
5389                  * regardless of eeprom setting */
5390                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5391                     E1000_STATUS_FUNC_1)
5392                         adapter->wol = 0;
5393                 break;
5394         case E1000_DEV_ID_82571EB_QUAD_COPPER:
5395         case E1000_DEV_ID_82571EB_QUAD_FIBER:
5396         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5397                 /* if quad port adapter, disable WoL on all but port A */
5398                 if (global_quad_port_a != 0)
5399                         adapter->wol = 0;
5400                 /* Reset for multiple quad port adapters */
5401                 if (++global_quad_port_a == 4)
5402                         global_quad_port_a = 0;
5403                 break;
5404         }
5405         return;
5406 }
5407
5408
5409 /*
5410  * Enable PCI Wake On Lan capability
5411  */
5412 static void
5413 em_enable_wakeup(device_t dev)
5414 {
5415         struct adapter  *adapter = device_get_softc(dev);
5416         struct ifnet    *ifp = adapter->ifp;
5417         int             error = 0;
5418         u32             pmc, ctrl, ctrl_ext, rctl;
5419         u16             status;
5420
5421         if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5422                 return;
5423
5424         /*
5425         ** Determine type of Wakeup: note that wol
5426         ** is set with all bits on by default.
5427         */
5428         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5429                 adapter->wol &= ~E1000_WUFC_MAG;
5430
5431         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5432                 adapter->wol &= ~E1000_WUFC_MC;
5433         else {
5434                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5435                 rctl |= E1000_RCTL_MPE;
5436                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5437         }
5438
5439         if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5440                 goto pme;
5441
5442         /* Advertise the wakeup capability */
5443         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5444         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5445         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5446
5447         /* Keep the laser running on Fiber adapters */
5448         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5449             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5450                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5451                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5452                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5453         }
5454
5455         if ((adapter->hw.mac.type == e1000_ich8lan) ||
5456             (adapter->hw.mac.type == e1000_pchlan) ||
5457             (adapter->hw.mac.type == e1000_ich9lan) ||
5458             (adapter->hw.mac.type == e1000_ich10lan))
5459                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5460
5461         if ((adapter->hw.mac.type == e1000_pchlan)  ||
5462             (adapter->hw.mac.type == e1000_pch2lan) ||
5463             (adapter->hw.mac.type == e1000_pch_lpt) ||
5464             (adapter->hw.mac.type == e1000_pch_spt) ||
5465             (adapter->hw.mac.type == e1000_pch_cnp)) {
5466                 error = em_enable_phy_wakeup(adapter);
5467                 if (error)
5468                         goto pme;
5469         } else {
5470                 /* Enable wakeup by the MAC */
5471                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5472                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5473         }
5474
5475         if (adapter->hw.phy.type == e1000_phy_igp_3)
5476                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5477
5478 pme:
5479         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5480         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5481         if (!error && (ifp->if_capenable & IFCAP_WOL))
5482                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5483         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5484
5485         return;
5486 }
5487
5488 /*
5489 ** WOL in the newer chipset interfaces (pchlan)
5490 ** require thing to be copied into the phy
5491 */
5492 static int
5493 em_enable_phy_wakeup(struct adapter *adapter)
5494 {
5495         struct e1000_hw *hw = &adapter->hw;
5496         u32 mreg, ret = 0;
5497         u16 preg;
5498
5499         /* copy MAC RARs to PHY RARs */
5500         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5501
5502         /* copy MAC MTA to PHY MTA */
5503         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5504                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5505                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5506                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5507                     (u16)((mreg >> 16) & 0xFFFF));
5508         }
5509
5510         /* configure PHY Rx Control register */
5511         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5512         mreg = E1000_READ_REG(hw, E1000_RCTL);
5513         if (mreg & E1000_RCTL_UPE)
5514                 preg |= BM_RCTL_UPE;
5515         if (mreg & E1000_RCTL_MPE)
5516                 preg |= BM_RCTL_MPE;
5517         preg &= ~(BM_RCTL_MO_MASK);
5518         if (mreg & E1000_RCTL_MO_3)
5519                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5520                                 << BM_RCTL_MO_SHIFT);
5521         if (mreg & E1000_RCTL_BAM)
5522                 preg |= BM_RCTL_BAM;
5523         if (mreg & E1000_RCTL_PMCF)
5524                 preg |= BM_RCTL_PMCF;
5525         mreg = E1000_READ_REG(hw, E1000_CTRL);
5526         if (mreg & E1000_CTRL_RFCE)
5527                 preg |= BM_RCTL_RFCE;
5528         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5529
5530         /* enable PHY wakeup in MAC register */
5531         E1000_WRITE_REG(hw, E1000_WUC,
5532             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5533         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5534
5535         /* configure and enable PHY wakeup in PHY registers */
5536         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5537         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5538
5539         /* activate PHY wakeup */
5540         ret = hw->phy.ops.acquire(hw);
5541         if (ret) {
5542                 printf("Could not acquire PHY\n");
5543                 return ret;
5544         }
5545         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5546                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5547         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5548         if (ret) {
5549                 printf("Could not read PHY page 769\n");
5550                 goto out;
5551         }
5552         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5553         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5554         if (ret)
5555                 printf("Could not set PHY Host Wakeup bit\n");
5556 out:
5557         hw->phy.ops.release(hw);
5558
5559         return ret;
5560 }
5561
5562 static void
5563 em_led_func(void *arg, int onoff)
5564 {
5565         struct adapter  *adapter = arg;
5566  
5567         EM_CORE_LOCK(adapter);
5568         if (onoff) {
5569                 e1000_setup_led(&adapter->hw);
5570                 e1000_led_on(&adapter->hw);
5571         } else {
5572                 e1000_led_off(&adapter->hw);
5573                 e1000_cleanup_led(&adapter->hw);
5574         }
5575         EM_CORE_UNLOCK(adapter);
5576 }
5577
5578 /*
5579 ** Disable the L0S and L1 LINK states
5580 */
5581 static void
5582 em_disable_aspm(struct adapter *adapter)
5583 {
5584         int             base, reg;
5585         u16             link_cap,link_ctrl;
5586         device_t        dev = adapter->dev;
5587
5588         switch (adapter->hw.mac.type) {
5589                 case e1000_82573:
5590                 case e1000_82574:
5591                 case e1000_82583:
5592                         break;
5593                 default:
5594                         return;
5595         }
5596         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5597                 return;
5598         reg = base + PCIER_LINK_CAP;
5599         link_cap = pci_read_config(dev, reg, 2);
5600         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5601                 return;
5602         reg = base + PCIER_LINK_CTL;
5603         link_ctrl = pci_read_config(dev, reg, 2);
5604         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5605         pci_write_config(dev, reg, link_ctrl, 2);
5606         return;
5607 }
5608
5609 /**********************************************************************
5610  *
5611  *  Update the board statistics counters.
5612  *
5613  **********************************************************************/
5614 static void
5615 em_update_stats_counters(struct adapter *adapter)
5616 {
5617         struct ifnet   *ifp;
5618
5619         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5620            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5621                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5622                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5623         }
5624         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5625         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5626         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5627         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5628
5629         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5630         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5631         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5632         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5633         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5634         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5635         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5636         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5637         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5638         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5639         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5640         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5641         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5642         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5643         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5644         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5645         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5646         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5647         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5648         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5649
5650         /* For the 64-bit byte counters the low dword must be read first. */
5651         /* Both registers clear on the read of the high dword */
5652
5653         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5654             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5655         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5656             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5657
5658         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5659         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5660         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5661         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5662         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5663
5664         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5665         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5666
5667         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5668         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5669         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5670         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5671         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5672         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5673         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5674         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5675         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5676         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5677
5678         /* Interrupt Counts */
5679
5680         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5681         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5682         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5683         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5684         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5685         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5686         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5687         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5688         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5689
5690         if (adapter->hw.mac.type >= e1000_82543) {
5691                 adapter->stats.algnerrc += 
5692                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5693                 adapter->stats.rxerrc += 
5694                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5695                 adapter->stats.tncrs += 
5696                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5697                 adapter->stats.cexterr += 
5698                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5699                 adapter->stats.tsctc += 
5700                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5701                 adapter->stats.tsctfc += 
5702                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5703         }
5704         ifp = adapter->ifp;
5705
5706         ifp->if_collisions = adapter->stats.colc;
5707
5708         /* Rx Errors */
5709         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5710             adapter->stats.crcerrs + adapter->stats.algnerrc +
5711             adapter->stats.ruc + adapter->stats.roc +
5712             adapter->stats.mpc + adapter->stats.cexterr;
5713
5714         /* Tx Errors */
5715         ifp->if_oerrors = adapter->stats.ecol +
5716             adapter->stats.latecol + adapter->watchdog_events;
5717 }
5718
5719 /* Export a single 32-bit register via a read-only sysctl. */
5720 static int
5721 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5722 {
5723         struct adapter *adapter;
5724         u_int val;
5725
5726         adapter = oidp->oid_arg1;
5727         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5728         return (sysctl_handle_int(oidp, &val, 0, req));
5729 }
5730
5731 /*
5732  * Add sysctl variables, one per statistic, to the system.
5733  */
5734 static void
5735 em_add_hw_stats(struct adapter *adapter)
5736 {
5737         device_t dev = adapter->dev;
5738
5739         struct tx_ring *txr = adapter->tx_rings;
5740         struct rx_ring *rxr = adapter->rx_rings;
5741
5742         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5743         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5744         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5745         struct e1000_hw_stats *stats = &adapter->stats;
5746
5747         struct sysctl_oid *stat_node, *queue_node, *int_node;
5748         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5749
5750 #define QUEUE_NAME_LEN 32
5751         char namebuf[QUEUE_NAME_LEN];
5752         
5753         /* Driver Statistics */
5754         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5755                         CTLFLAG_RD, &adapter->dropped_pkts,
5756                         "Driver dropped packets");
5757         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5758                         CTLFLAG_RD, &adapter->link_irq,
5759                         "Link MSIX IRQ Handled");
5760         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 
5761                          CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5762                          "Defragmenting mbuf chain failed");
5763         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5764                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5765                         "Driver tx dma failure in xmit");
5766         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5767                         CTLFLAG_RD, &adapter->rx_overruns,
5768                         "RX overruns");
5769         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5770                         CTLFLAG_RD, &adapter->watchdog_events,
5771                         "Watchdog timeouts");
5772         
5773         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5774                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5775                         em_sysctl_reg_handler, "IU",
5776                         "Device Control Register");
5777         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5778                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5779                         em_sysctl_reg_handler, "IU",
5780                         "Receiver Control Register");
5781         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5782                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5783                         "Flow Control High Watermark");
5784         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5785                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5786                         "Flow Control Low Watermark");
5787
5788         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5789                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5790                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5791                                             CTLFLAG_RD, NULL, "TX Queue Name");
5792                 queue_list = SYSCTL_CHILDREN(queue_node);
5793
5794                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5795                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5796                                 E1000_TDH(txr->me),
5797                                 em_sysctl_reg_handler, "IU",
5798                                 "Transmit Descriptor Head");
5799                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5800                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5801                                 E1000_TDT(txr->me),
5802                                 em_sysctl_reg_handler, "IU",
5803                                 "Transmit Descriptor Tail");
5804                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5805                                 CTLFLAG_RD, &txr->tx_irq,
5806                                 "Queue MSI-X Transmit Interrupts");
5807                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5808                                 CTLFLAG_RD, &txr->no_desc_avail,
5809                                 "Queue No Descriptor Available");
5810
5811                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5812                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5813                                             CTLFLAG_RD, NULL, "RX Queue Name");
5814                 queue_list = SYSCTL_CHILDREN(queue_node);
5815
5816                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5817                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5818                                 E1000_RDH(rxr->me),
5819                                 em_sysctl_reg_handler, "IU",
5820                                 "Receive Descriptor Head");
5821                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5822                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5823                                 E1000_RDT(rxr->me),
5824                                 em_sysctl_reg_handler, "IU",
5825                                 "Receive Descriptor Tail");
5826                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5827                                 CTLFLAG_RD, &rxr->rx_irq,
5828                                 "Queue MSI-X Receive Interrupts");
5829         }
5830
5831         /* MAC stats get their own sub node */
5832
5833         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5834                                     CTLFLAG_RD, NULL, "Statistics");
5835         stat_list = SYSCTL_CHILDREN(stat_node);
5836
5837         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5838                         CTLFLAG_RD, &stats->ecol,
5839                         "Excessive collisions");
5840         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5841                         CTLFLAG_RD, &stats->scc,
5842                         "Single collisions");
5843         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5844                         CTLFLAG_RD, &stats->mcc,
5845                         "Multiple collisions");
5846         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5847                         CTLFLAG_RD, &stats->latecol,
5848                         "Late collisions");
5849         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5850                         CTLFLAG_RD, &stats->colc,
5851                         "Collision Count");
5852         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5853                         CTLFLAG_RD, &adapter->stats.symerrs,
5854                         "Symbol Errors");
5855         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5856                         CTLFLAG_RD, &adapter->stats.sec,
5857                         "Sequence Errors");
5858         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5859                         CTLFLAG_RD, &adapter->stats.dc,
5860                         "Defer Count");
5861         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5862                         CTLFLAG_RD, &adapter->stats.mpc,
5863                         "Missed Packets");
5864         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5865                         CTLFLAG_RD, &adapter->stats.rnbc,
5866                         "Receive No Buffers");
5867         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5868                         CTLFLAG_RD, &adapter->stats.ruc,
5869                         "Receive Undersize");
5870         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5871                         CTLFLAG_RD, &adapter->stats.rfc,
5872                         "Fragmented Packets Received ");
5873         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5874                         CTLFLAG_RD, &adapter->stats.roc,
5875                         "Oversized Packets Received");
5876         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5877                         CTLFLAG_RD, &adapter->stats.rjc,
5878                         "Recevied Jabber");
5879         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5880                         CTLFLAG_RD, &adapter->stats.rxerrc,
5881                         "Receive Errors");
5882         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5883                         CTLFLAG_RD, &adapter->stats.crcerrs,
5884                         "CRC errors");
5885         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5886                         CTLFLAG_RD, &adapter->stats.algnerrc,
5887                         "Alignment Errors");
5888         /* On 82575 these are collision counts */
5889         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5890                         CTLFLAG_RD, &adapter->stats.cexterr,
5891                         "Collision/Carrier extension errors");
5892         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5893                         CTLFLAG_RD, &adapter->stats.xonrxc,
5894                         "XON Received");
5895         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5896                         CTLFLAG_RD, &adapter->stats.xontxc,
5897                         "XON Transmitted");
5898         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5899                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5900                         "XOFF Received");
5901         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5902                         CTLFLAG_RD, &adapter->stats.xofftxc,
5903                         "XOFF Transmitted");
5904
5905         /* Packet Reception Stats */
5906         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5907                         CTLFLAG_RD, &adapter->stats.tpr,
5908                         "Total Packets Received ");
5909         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5910                         CTLFLAG_RD, &adapter->stats.gprc,
5911                         "Good Packets Received");
5912         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5913                         CTLFLAG_RD, &adapter->stats.bprc,
5914                         "Broadcast Packets Received");
5915         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5916                         CTLFLAG_RD, &adapter->stats.mprc,
5917                         "Multicast Packets Received");
5918         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5919                         CTLFLAG_RD, &adapter->stats.prc64,
5920                         "64 byte frames received ");
5921         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5922                         CTLFLAG_RD, &adapter->stats.prc127,
5923                         "65-127 byte frames received");
5924         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5925                         CTLFLAG_RD, &adapter->stats.prc255,
5926                         "128-255 byte frames received");
5927         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5928                         CTLFLAG_RD, &adapter->stats.prc511,
5929                         "256-511 byte frames received");
5930         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5931                         CTLFLAG_RD, &adapter->stats.prc1023,
5932                         "512-1023 byte frames received");
5933         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5934                         CTLFLAG_RD, &adapter->stats.prc1522,
5935                         "1023-1522 byte frames received");
5936         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5937                         CTLFLAG_RD, &adapter->stats.gorc, 
5938                         "Good Octets Received"); 
5939
5940         /* Packet Transmission Stats */
5941         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5942                         CTLFLAG_RD, &adapter->stats.gotc, 
5943                         "Good Octets Transmitted"); 
5944         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5945                         CTLFLAG_RD, &adapter->stats.tpt,
5946                         "Total Packets Transmitted");
5947         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5948                         CTLFLAG_RD, &adapter->stats.gptc,
5949                         "Good Packets Transmitted");
5950         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5951                         CTLFLAG_RD, &adapter->stats.bptc,
5952                         "Broadcast Packets Transmitted");
5953         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5954                         CTLFLAG_RD, &adapter->stats.mptc,
5955                         "Multicast Packets Transmitted");
5956         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5957                         CTLFLAG_RD, &adapter->stats.ptc64,
5958                         "64 byte frames transmitted ");
5959         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5960                         CTLFLAG_RD, &adapter->stats.ptc127,
5961                         "65-127 byte frames transmitted");
5962         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5963                         CTLFLAG_RD, &adapter->stats.ptc255,
5964                         "128-255 byte frames transmitted");
5965         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5966                         CTLFLAG_RD, &adapter->stats.ptc511,
5967                         "256-511 byte frames transmitted");
5968         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5969                         CTLFLAG_RD, &adapter->stats.ptc1023,
5970                         "512-1023 byte frames transmitted");
5971         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5972                         CTLFLAG_RD, &adapter->stats.ptc1522,
5973                         "1024-1522 byte frames transmitted");
5974         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5975                         CTLFLAG_RD, &adapter->stats.tsctc,
5976                         "TSO Contexts Transmitted");
5977         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5978                         CTLFLAG_RD, &adapter->stats.tsctfc,
5979                         "TSO Contexts Failed");
5980
5981
5982         /* Interrupt Stats */
5983
5984         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5985                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5986         int_list = SYSCTL_CHILDREN(int_node);
5987
5988         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5989                         CTLFLAG_RD, &adapter->stats.iac,
5990                         "Interrupt Assertion Count");
5991
5992         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5993                         CTLFLAG_RD, &adapter->stats.icrxptc,
5994                         "Interrupt Cause Rx Pkt Timer Expire Count");
5995
5996         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5997                         CTLFLAG_RD, &adapter->stats.icrxatc,
5998                         "Interrupt Cause Rx Abs Timer Expire Count");
5999
6000         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6001                         CTLFLAG_RD, &adapter->stats.ictxptc,
6002                         "Interrupt Cause Tx Pkt Timer Expire Count");
6003
6004         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6005                         CTLFLAG_RD, &adapter->stats.ictxatc,
6006                         "Interrupt Cause Tx Abs Timer Expire Count");
6007
6008         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6009                         CTLFLAG_RD, &adapter->stats.ictxqec,
6010                         "Interrupt Cause Tx Queue Empty Count");
6011
6012         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6013                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
6014                         "Interrupt Cause Tx Queue Min Thresh Count");
6015
6016         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6017                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
6018                         "Interrupt Cause Rx Desc Min Thresh Count");
6019
6020         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6021                         CTLFLAG_RD, &adapter->stats.icrxoc,
6022                         "Interrupt Cause Receiver Overrun Count");
6023 }
6024
6025 /**********************************************************************
6026  *
6027  *  This routine provides a way to dump out the adapter eeprom,
6028  *  often a useful debug/service tool. This only dumps the first
6029  *  32 words, stuff that matters is in that extent.
6030  *
6031  **********************************************************************/
6032 static int
6033 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6034 {
6035         struct adapter *adapter = (struct adapter *)arg1;
6036         int error;
6037         int result;
6038
6039         result = -1;
6040         error = sysctl_handle_int(oidp, &result, 0, req);
6041
6042         if (error || !req->newptr)
6043                 return (error);
6044
6045         /*
6046          * This value will cause a hex dump of the
6047          * first 32 16-bit words of the EEPROM to
6048          * the screen.
6049          */
6050         if (result == 1)
6051                 em_print_nvm_info(adapter);
6052
6053         return (error);
6054 }
6055
6056 static void
6057 em_print_nvm_info(struct adapter *adapter)
6058 {
6059         u16     eeprom_data;
6060         int     i, j, row = 0;
6061
6062         /* Its a bit crude, but it gets the job done */
6063         printf("\nInterface EEPROM Dump:\n");
6064         printf("Offset\n0x0000  ");
6065         for (i = 0, j = 0; i < 32; i++, j++) {
6066                 if (j == 8) { /* Make the offset block */
6067                         j = 0; ++row;
6068                         printf("\n0x00%x0  ",row);
6069                 }
6070                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6071                 printf("%04x ", eeprom_data);
6072         }
6073         printf("\n");
6074 }
6075
6076 static int
6077 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6078 {
6079         struct em_int_delay_info *info;
6080         struct adapter *adapter;
6081         u32 regval;
6082         int error, usecs, ticks;
6083
6084         info = (struct em_int_delay_info *)arg1;
6085         usecs = info->value;
6086         error = sysctl_handle_int(oidp, &usecs, 0, req);
6087         if (error != 0 || req->newptr == NULL)
6088                 return (error);
6089         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6090                 return (EINVAL);
6091         info->value = usecs;
6092         ticks = EM_USECS_TO_TICKS(usecs);
6093         if (info->offset == E1000_ITR)  /* units are 256ns here */
6094                 ticks *= 4;
6095
6096         adapter = info->adapter;
6097         
6098         EM_CORE_LOCK(adapter);
6099         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6100         regval = (regval & ~0xffff) | (ticks & 0xffff);
6101         /* Handle a few special cases. */
6102         switch (info->offset) {
6103         case E1000_RDTR:
6104                 break;
6105         case E1000_TIDV:
6106                 if (ticks == 0) {
6107                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6108                         /* Don't write 0 into the TIDV register. */
6109                         regval++;
6110                 } else
6111                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6112                 break;
6113         }
6114         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6115         EM_CORE_UNLOCK(adapter);
6116         return (0);
6117 }
6118
6119 static void
6120 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6121         const char *description, struct em_int_delay_info *info,
6122         int offset, int value)
6123 {
6124         info->adapter = adapter;
6125         info->offset = offset;
6126         info->value = value;
6127         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6128             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6129             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6130             info, 0, em_sysctl_int_delay, "I", description);
6131 }
6132
6133 static void
6134 em_set_sysctl_value(struct adapter *adapter, const char *name,
6135         const char *description, int *limit, int value)
6136 {
6137         *limit = value;
6138         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6139             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6140             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6141 }
6142
6143
6144 /*
6145 ** Set flow control using sysctl:
6146 ** Flow control values:
6147 **      0 - off
6148 **      1 - rx pause
6149 **      2 - tx pause
6150 **      3 - full
6151 */
6152 static int
6153 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6154 {       
6155         int             error;
6156         static int      input = 3; /* default is full */
6157         struct adapter  *adapter = (struct adapter *) arg1;
6158                     
6159         error = sysctl_handle_int(oidp, &input, 0, req);
6160     
6161         if ((error) || (req->newptr == NULL))
6162                 return (error);
6163                 
6164         if (input == adapter->fc) /* no change? */
6165                 return (error);
6166
6167         switch (input) {
6168                 case e1000_fc_rx_pause:
6169                 case e1000_fc_tx_pause:
6170                 case e1000_fc_full:
6171                 case e1000_fc_none:
6172                         adapter->hw.fc.requested_mode = input;
6173                         adapter->fc = input;
6174                         break;
6175                 default:
6176                         /* Do nothing */
6177                         return (error);
6178         }
6179
6180         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6181         e1000_force_mac_fc(&adapter->hw);
6182         return (error);
6183 }
6184
6185 /*
6186 ** Manage Energy Efficient Ethernet:
6187 ** Control values:
6188 **     0/1 - enabled/disabled
6189 */
6190 static int
6191 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6192 {
6193        struct adapter *adapter = (struct adapter *) arg1;
6194        int             error, value;
6195
6196        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6197        error = sysctl_handle_int(oidp, &value, 0, req);
6198        if (error || req->newptr == NULL)
6199                return (error);
6200        EM_CORE_LOCK(adapter);
6201        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6202        em_init_locked(adapter);
6203        EM_CORE_UNLOCK(adapter);
6204        return (0);
6205 }
6206
6207 static int
6208 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6209 {
6210         struct adapter *adapter;
6211         int error;
6212         int result;
6213
6214         result = -1;
6215         error = sysctl_handle_int(oidp, &result, 0, req);
6216
6217         if (error || !req->newptr)
6218                 return (error);
6219
6220         if (result == 1) {
6221                 adapter = (struct adapter *)arg1;
6222                 em_print_debug_info(adapter);
6223         }
6224
6225         return (error);
6226 }
6227
6228 /*
6229 ** This routine is meant to be fluid, add whatever is
6230 ** needed for debugging a problem.  -jfv
6231 */
6232 static void
6233 em_print_debug_info(struct adapter *adapter)
6234 {
6235         device_t dev = adapter->dev;
6236         struct tx_ring *txr = adapter->tx_rings;
6237         struct rx_ring *rxr = adapter->rx_rings;
6238
6239         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
6240                 printf("Interface is RUNNING ");
6241         else
6242                 printf("Interface is NOT RUNNING\n");
6243
6244         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
6245                 printf("and INACTIVE\n");
6246         else
6247                 printf("and ACTIVE\n");
6248
6249         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6250                 device_printf(dev, "TX Queue %d ------\n", i);
6251                 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6252                         E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6253                         E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6254                 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6255                 device_printf(dev, "TX descriptors avail = %d\n",
6256                         txr->tx_avail);
6257                 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6258                         txr->no_desc_avail);
6259                 device_printf(dev, "RX Queue %d ------\n", i);
6260                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6261                         E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6262                         E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6263                 device_printf(dev, "RX discarded packets = %ld\n",
6264                         rxr->rx_discarded);
6265                 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6266                 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6267         }
6268 }
6269
6270 #ifdef EM_MULTIQUEUE
6271 /*
6272  * 82574 only:
6273  * Write a new value to the EEPROM increasing the number of MSIX
6274  * vectors from 3 to 5, for proper multiqueue support.
6275  */
6276 static void
6277 em_enable_vectors_82574(struct adapter *adapter)
6278 {
6279         struct e1000_hw *hw = &adapter->hw;
6280         device_t dev = adapter->dev;
6281         u16 edata;
6282
6283         e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6284         printf("Current cap: %#06x\n", edata);
6285         if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6286                 device_printf(dev, "Writing to eeprom: increasing "
6287                     "reported MSIX vectors from 3 to 5...\n");
6288                 edata &= ~(EM_NVM_MSIX_N_MASK);
6289                 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6290                 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6291                 e1000_update_nvm_checksum(hw);
6292                 device_printf(dev, "Writing to eeprom: done\n");
6293         }
6294 }
6295 #endif
6296
6297 #ifdef DDB
6298 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6299 {
6300         devclass_t      dc;
6301         int max_em;
6302
6303         dc = devclass_find("em");
6304         max_em = devclass_get_maxunit(dc);
6305
6306         for (int index = 0; index < (max_em - 1); index++) {
6307                 device_t dev;
6308                 dev = devclass_get_device(dc, index);
6309                 if (device_get_driver(dev) == &em_driver) {
6310                         struct adapter *adapter = device_get_softc(dev);
6311                         EM_CORE_LOCK(adapter);
6312                         em_init_locked(adapter);
6313                         EM_CORE_UNLOCK(adapter);
6314                 }
6315         }
6316 }
6317 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6318 {
6319         devclass_t      dc;
6320         int max_em;
6321
6322         dc = devclass_find("em");
6323         max_em = devclass_get_maxunit(dc);
6324
6325         for (int index = 0; index < (max_em - 1); index++) {
6326                 device_t dev;
6327                 dev = devclass_get_device(dc, index);
6328                 if (device_get_driver(dev) == &em_driver)
6329                         em_print_debug_info(device_get_softc(dev));
6330         }
6331
6332 }
6333 #endif