]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/e1000/if_em.c
MFC r331737:
[FreeBSD/stable/10.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_arp.h>
74 #include <net/if_dl.h>
75 #include <net/if_media.h>
76
77 #include <net/if_types.h>
78 #include <net/if_vlan_var.h>
79
80 #include <netinet/in_systm.h>
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip6.h>
85 #include <netinet/tcp.h>
86 #include <netinet/udp.h>
87
88 #include <machine/in_cksum.h>
89 #include <dev/led/led.h>
90 #include <dev/pci/pcivar.h>
91 #include <dev/pci/pcireg.h>
92
93 #include "e1000_api.h"
94 #include "e1000_82571.h"
95 #include "if_em.h"
96
97 /*********************************************************************
98  *  Driver version:
99  *********************************************************************/
100 char em_driver_version[] = "7.6.1-k";
101
102 /*********************************************************************
103  *  PCI Device ID Table
104  *
105  *  Used by probe to select devices to load on
106  *  Last field stores an index into e1000_strings
107  *  Last entry must be all 0s
108  *
109  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
110  *********************************************************************/
111
112 static em_vendor_info_t em_vendor_info_array[] =
113 {
114         /* Intel(R) PRO/1000 Network Connection */
115         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
127                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
134
135         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
144                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
146                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
181                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
182         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
183                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
184         { 0x8086, E1000_DEV_ID_PCH_I218_LM2,    PCI_ANY_ID, PCI_ANY_ID, 0},
185         { 0x8086, E1000_DEV_ID_PCH_I218_V2,     PCI_ANY_ID, PCI_ANY_ID, 0},
186         { 0x8086, E1000_DEV_ID_PCH_I218_LM3,    PCI_ANY_ID, PCI_ANY_ID, 0},
187         { 0x8086, E1000_DEV_ID_PCH_I218_V3,     PCI_ANY_ID, PCI_ANY_ID, 0},
188         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
189         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
190         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
191                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
192         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
193         { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
194                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
195         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
196                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
197         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
198         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
199                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
200         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
201         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
202                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
203         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
204         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
205                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
206         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
207         /* required last entry */
208         { 0, 0, 0, 0, 0}
209 };
210
211 /*********************************************************************
212  *  Table of branding strings for all supported NICs.
213  *********************************************************************/
214
215 static char *em_strings[] = {
216         "Intel(R) PRO/1000 Network Connection"
217 };
218
219 /*********************************************************************
220  *  Function prototypes
221  *********************************************************************/
222 static int      em_probe(device_t);
223 static int      em_attach(device_t);
224 static int      em_detach(device_t);
225 static int      em_shutdown(device_t);
226 static int      em_suspend(device_t);
227 static int      em_resume(device_t);
228 #ifdef EM_MULTIQUEUE
229 static int      em_mq_start(struct ifnet *, struct mbuf *);
230 static int      em_mq_start_locked(struct ifnet *,
231                     struct tx_ring *);
232 static void     em_qflush(struct ifnet *);
233 #else
234 static void     em_start(struct ifnet *);
235 static void     em_start_locked(struct ifnet *, struct tx_ring *);
236 #endif
237 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
238 static void     em_init(void *);
239 static void     em_init_locked(struct adapter *);
240 static void     em_stop(void *);
241 static void     em_media_status(struct ifnet *, struct ifmediareq *);
242 static int      em_media_change(struct ifnet *);
243 static void     em_identify_hardware(struct adapter *);
244 static int      em_allocate_pci_resources(struct adapter *);
245 static int      em_allocate_legacy(struct adapter *);
246 static int      em_allocate_msix(struct adapter *);
247 static int      em_allocate_queues(struct adapter *);
248 static int      em_setup_msix(struct adapter *);
249 static void     em_free_pci_resources(struct adapter *);
250 static void     em_local_timer(void *);
251 static void     em_reset(struct adapter *);
252 static int      em_setup_interface(device_t, struct adapter *);
253 static void     em_flush_desc_rings(struct adapter *);
254
255 static void     em_setup_transmit_structures(struct adapter *);
256 static void     em_initialize_transmit_unit(struct adapter *);
257 static int      em_allocate_transmit_buffers(struct tx_ring *);
258 static void     em_free_transmit_structures(struct adapter *);
259 static void     em_free_transmit_buffers(struct tx_ring *);
260
261 static int      em_setup_receive_structures(struct adapter *);
262 static int      em_allocate_receive_buffers(struct rx_ring *);
263 static void     em_initialize_receive_unit(struct adapter *);
264 static void     em_free_receive_structures(struct adapter *);
265 static void     em_free_receive_buffers(struct rx_ring *);
266
267 static void     em_enable_intr(struct adapter *);
268 static void     em_disable_intr(struct adapter *);
269 static void     em_update_stats_counters(struct adapter *);
270 static void     em_add_hw_stats(struct adapter *adapter);
271 static void     em_txeof(struct tx_ring *);
272 static bool     em_rxeof(struct rx_ring *, int, int *);
273 #ifndef __NO_STRICT_ALIGNMENT
274 static int      em_fixup_rx(struct rx_ring *);
275 #endif
276 static void     em_setup_rxdesc(union e1000_rx_desc_extended *,
277                     const struct em_rxbuffer *rxbuf);
278 static void     em_receive_checksum(uint32_t status, struct mbuf *);
279 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
280                     struct ip *, u32 *, u32 *);
281 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
282                     struct tcphdr *, u32 *, u32 *);
283 static void     em_set_promisc(struct adapter *);
284 static void     em_disable_promisc(struct adapter *);
285 static void     em_set_multi(struct adapter *);
286 static void     em_update_link_status(struct adapter *);
287 static void     em_refresh_mbufs(struct rx_ring *, int);
288 static void     em_register_vlan(void *, struct ifnet *, u16);
289 static void     em_unregister_vlan(void *, struct ifnet *, u16);
290 static void     em_setup_vlan_hw_support(struct adapter *);
291 static int      em_xmit(struct tx_ring *, struct mbuf **);
292 static int      em_dma_malloc(struct adapter *, bus_size_t,
293                     struct em_dma_alloc *, int);
294 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
295 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
296 static void     em_print_nvm_info(struct adapter *);
297 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
298 static void     em_print_debug_info(struct adapter *);
299 static int      em_is_valid_ether_addr(u8 *);
300 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
301 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
302                     const char *, struct em_int_delay_info *, int, int);
303 /* Management and WOL Support */
304 static void     em_init_manageability(struct adapter *);
305 static void     em_release_manageability(struct adapter *);
306 static void     em_get_hw_control(struct adapter *);
307 static void     em_release_hw_control(struct adapter *);
308 static void     em_get_wakeup(device_t);
309 static void     em_enable_wakeup(device_t);
310 static int      em_enable_phy_wakeup(struct adapter *);
311 static void     em_led_func(void *, int);
312 static void     em_disable_aspm(struct adapter *);
313
314 static int      em_irq_fast(void *);
315
316 /* MSIX handlers */
317 static void     em_msix_tx(void *);
318 static void     em_msix_rx(void *);
319 static void     em_msix_link(void *);
320 static void     em_handle_tx(void *context, int pending);
321 static void     em_handle_rx(void *context, int pending);
322 static void     em_handle_link(void *context, int pending);
323
324 #ifdef EM_MULTIQUEUE
325 static void     em_enable_vectors_82574(struct adapter *);
326 #endif
327
328 static void     em_set_sysctl_value(struct adapter *, const char *,
329                     const char *, int *, int);
330 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
331 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
332
333 static __inline void em_rx_discard(struct rx_ring *, int);
334
335 #ifdef DEVICE_POLLING
336 static poll_handler_t em_poll;
337 #endif /* POLLING */
338
339 /*********************************************************************
340  *  FreeBSD Device Interface Entry Points
341  *********************************************************************/
342
343 static device_method_t em_methods[] = {
344         /* Device interface */
345         DEVMETHOD(device_probe, em_probe),
346         DEVMETHOD(device_attach, em_attach),
347         DEVMETHOD(device_detach, em_detach),
348         DEVMETHOD(device_shutdown, em_shutdown),
349         DEVMETHOD(device_suspend, em_suspend),
350         DEVMETHOD(device_resume, em_resume),
351         DEVMETHOD_END
352 };
353
354 static driver_t em_driver = {
355         "em", em_methods, sizeof(struct adapter),
356 };
357
358 devclass_t em_devclass;
359 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
360 MODULE_DEPEND(em, pci, 1, 1, 1);
361 MODULE_DEPEND(em, ether, 1, 1, 1);
362
363 /*********************************************************************
364  *  Tunable default values.
365  *********************************************************************/
366
367 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
368 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
369 #define M_TSO_LEN                       66
370
371 #define MAX_INTS_PER_SEC        8000
372 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
373
374 #define TSO_WORKAROUND  4
375
376 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
377
378 static int em_disable_crc_stripping = 0;
379 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
380     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
381
382 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
383 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
384 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
385 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
386 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
387     0, "Default transmit interrupt delay in usecs");
388 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
389     0, "Default receive interrupt delay in usecs");
390
391 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
392 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
393 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
394 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
395 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
396     &em_tx_abs_int_delay_dflt, 0,
397     "Default transmit interrupt delay limit in usecs");
398 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
399     &em_rx_abs_int_delay_dflt, 0,
400     "Default receive interrupt delay limit in usecs");
401
402 static int em_rxd = EM_DEFAULT_RXD;
403 static int em_txd = EM_DEFAULT_TXD;
404 TUNABLE_INT("hw.em.rxd", &em_rxd);
405 TUNABLE_INT("hw.em.txd", &em_txd);
406 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
407     "Number of receive descriptors per queue");
408 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
409     "Number of transmit descriptors per queue");
410
411 static int em_smart_pwr_down = FALSE;
412 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
413 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
414     0, "Set to true to leave smart power down enabled on newer adapters");
415
416 /* Controls whether promiscuous also shows bad packets */
417 static int em_debug_sbp = FALSE;
418 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
419 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
420     "Show bad packets in promiscuous mode");
421
422 static int em_enable_msix = TRUE;
423 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
424 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
425     "Enable MSI-X interrupts");
426
427 #ifdef EM_MULTIQUEUE
428 static int em_num_queues = 1;
429 TUNABLE_INT("hw.em.num_queues", &em_num_queues);
430 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
431     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
432 #endif
433
434 /*
435 ** Global variable to store last used CPU when binding queues
436 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
437 ** queue is bound to a cpu.
438 */
439 static int em_last_bind_cpu = -1;
440
441 /* How many packets rxeof tries to clean at a time */
442 static int em_rx_process_limit = 100;
443 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
444 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
445     &em_rx_process_limit, 0,
446     "Maximum number of received packets to process "
447     "at a time, -1 means unlimited");
448
449 /* Energy efficient ethernet - default to OFF */
450 static int eee_setting = 1;
451 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
452 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
453     "Enable Energy Efficient Ethernet");
454
455 /* Global used in WOL setup with multiport cards */
456 static int global_quad_port_a = 0;
457
458 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
459 #include <dev/netmap/if_em_netmap.h>
460 #endif /* DEV_NETMAP */
461
462 /*********************************************************************
463  *  Device identification routine
464  *
465  *  em_probe determines if the driver should be loaded on
466  *  adapter based on PCI vendor/device id of the adapter.
467  *
468  *  return BUS_PROBE_DEFAULT on success, positive on failure
469  *********************************************************************/
470
471 static int
472 em_probe(device_t dev)
473 {
474         char            adapter_name[60];
475         uint16_t        pci_vendor_id = 0;
476         uint16_t        pci_device_id = 0;
477         uint16_t        pci_subvendor_id = 0;
478         uint16_t        pci_subdevice_id = 0;
479         em_vendor_info_t *ent;
480
481         INIT_DEBUGOUT("em_probe: begin");
482
483         pci_vendor_id = pci_get_vendor(dev);
484         if (pci_vendor_id != EM_VENDOR_ID)
485                 return (ENXIO);
486
487         pci_device_id = pci_get_device(dev);
488         pci_subvendor_id = pci_get_subvendor(dev);
489         pci_subdevice_id = pci_get_subdevice(dev);
490
491         ent = em_vendor_info_array;
492         while (ent->vendor_id != 0) {
493                 if ((pci_vendor_id == ent->vendor_id) &&
494                     (pci_device_id == ent->device_id) &&
495
496                     ((pci_subvendor_id == ent->subvendor_id) ||
497                     (ent->subvendor_id == PCI_ANY_ID)) &&
498
499                     ((pci_subdevice_id == ent->subdevice_id) ||
500                     (ent->subdevice_id == PCI_ANY_ID))) {
501                         sprintf(adapter_name, "%s %s",
502                                 em_strings[ent->index],
503                                 em_driver_version);
504                         device_set_desc_copy(dev, adapter_name);
505                         return (BUS_PROBE_DEFAULT);
506                 }
507                 ent++;
508         }
509
510         return (ENXIO);
511 }
512
513 /*********************************************************************
514  *  Device initialization routine
515  *
516  *  The attach entry point is called when the driver is being loaded.
517  *  This routine identifies the type of hardware, allocates all resources
518  *  and initializes the hardware.
519  *
520  *  return 0 on success, positive on failure
521  *********************************************************************/
522
523 static int
524 em_attach(device_t dev)
525 {
526         struct adapter  *adapter;
527         struct e1000_hw *hw;
528         int             error = 0;
529
530         INIT_DEBUGOUT("em_attach: begin");
531
532         if (resource_disabled("em", device_get_unit(dev))) {
533                 device_printf(dev, "Disabled by device hint\n");
534                 return (ENXIO);
535         }
536
537         adapter = device_get_softc(dev);
538         adapter->dev = adapter->osdep.dev = dev;
539         hw = &adapter->hw;
540         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
541
542         /* SYSCTL stuff */
543         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
546             em_sysctl_nvm_info, "I", "NVM Information");
547
548         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
549             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
550             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
551             em_sysctl_debug_info, "I", "Debug Information");
552
553         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
554             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
555             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
556             em_set_flowcntl, "I", "Flow Control");
557
558         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
559
560         /* Determine hardware and mac info */
561         em_identify_hardware(adapter);
562
563         /* Setup PCI resources */
564         if (em_allocate_pci_resources(adapter)) {
565                 device_printf(dev, "Allocation of PCI resources failed\n");
566                 error = ENXIO;
567                 goto err_pci;
568         }
569
570         /*
571         ** For ICH8 and family we need to
572         ** map the flash memory, and this
573         ** must happen after the MAC is 
574         ** identified
575         */
576         if ((hw->mac.type == e1000_ich8lan) ||
577             (hw->mac.type == e1000_ich9lan) ||
578             (hw->mac.type == e1000_ich10lan) ||
579             (hw->mac.type == e1000_pchlan) ||
580             (hw->mac.type == e1000_pch2lan) ||
581             (hw->mac.type == e1000_pch_lpt)) {
582                 int rid = EM_BAR_TYPE_FLASH;
583                 adapter->flash = bus_alloc_resource_any(dev,
584                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
585                 if (adapter->flash == NULL) {
586                         device_printf(dev, "Mapping of Flash failed\n");
587                         error = ENXIO;
588                         goto err_pci;
589                 }
590                 /* This is used in the shared code */
591                 hw->flash_address = (u8 *)adapter->flash;
592                 adapter->osdep.flash_bus_space_tag =
593                     rman_get_bustag(adapter->flash);
594                 adapter->osdep.flash_bus_space_handle =
595                     rman_get_bushandle(adapter->flash);
596         }
597         /*
598         ** In the new SPT device flash is not  a
599         ** seperate BAR, rather it is also in BAR0,
600         ** so use the same tag and an offset handle for the
601         ** FLASH read/write macros in the shared code.
602         */
603         else if (hw->mac.type == e1000_pch_spt) {
604                 adapter->osdep.flash_bus_space_tag =
605                     adapter->osdep.mem_bus_space_tag;
606                 adapter->osdep.flash_bus_space_handle =
607                     adapter->osdep.mem_bus_space_handle
608                     + E1000_FLASH_BASE_ADDR;
609         }
610
611         /* Do Shared Code initialization */
612         error = e1000_setup_init_funcs(hw, TRUE);
613         if (error) {
614                 device_printf(dev, "Setup of Shared code failed, error %d\n",
615                     error);
616                 error = ENXIO;
617                 goto err_pci;
618         }
619
620         /*
621          * Setup MSI/X or MSI if PCI Express
622          */
623         adapter->msix = em_setup_msix(adapter);
624
625         e1000_get_bus_info(hw);
626
627         /* Set up some sysctls for the tunable interrupt delays */
628         em_add_int_delay_sysctl(adapter, "rx_int_delay",
629             "receive interrupt delay in usecs", &adapter->rx_int_delay,
630             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
631         em_add_int_delay_sysctl(adapter, "tx_int_delay",
632             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
633             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
634         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
635             "receive interrupt delay limit in usecs",
636             &adapter->rx_abs_int_delay,
637             E1000_REGISTER(hw, E1000_RADV),
638             em_rx_abs_int_delay_dflt);
639         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
640             "transmit interrupt delay limit in usecs",
641             &adapter->tx_abs_int_delay,
642             E1000_REGISTER(hw, E1000_TADV),
643             em_tx_abs_int_delay_dflt);
644         em_add_int_delay_sysctl(adapter, "itr",
645             "interrupt delay limit in usecs/4",
646             &adapter->tx_itr,
647             E1000_REGISTER(hw, E1000_ITR),
648             DEFAULT_ITR);
649
650         /* Sysctl for limiting the amount of work done in the taskqueue */
651         em_set_sysctl_value(adapter, "rx_processing_limit",
652             "max number of rx packets to process", &adapter->rx_process_limit,
653             em_rx_process_limit);
654
655         /*
656          * Validate number of transmit and receive descriptors. It
657          * must not exceed hardware maximum, and must be multiple
658          * of E1000_DBA_ALIGN.
659          */
660         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
661             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
662                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
663                     EM_DEFAULT_TXD, em_txd);
664                 adapter->num_tx_desc = EM_DEFAULT_TXD;
665         } else
666                 adapter->num_tx_desc = em_txd;
667
668         if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
669             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
670                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
671                     EM_DEFAULT_RXD, em_rxd);
672                 adapter->num_rx_desc = EM_DEFAULT_RXD;
673         } else
674                 adapter->num_rx_desc = em_rxd;
675
676         hw->mac.autoneg = DO_AUTO_NEG;
677         hw->phy.autoneg_wait_to_complete = FALSE;
678         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
679
680         /* Copper options */
681         if (hw->phy.media_type == e1000_media_type_copper) {
682                 hw->phy.mdix = AUTO_ALL_MODES;
683                 hw->phy.disable_polarity_correction = FALSE;
684                 hw->phy.ms_type = EM_MASTER_SLAVE;
685         }
686
687         /*
688          * Set the frame limits assuming
689          * standard ethernet sized frames.
690          */
691         adapter->hw.mac.max_frame_size =
692             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
693
694         /*
695          * This controls when hardware reports transmit completion
696          * status.
697          */
698         hw->mac.report_tx_early = 1;
699
700         /* 
701         ** Get queue/ring memory
702         */
703         if (em_allocate_queues(adapter)) {
704                 error = ENOMEM;
705                 goto err_pci;
706         }
707
708         /* Allocate multicast array memory. */
709         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
710             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
711         if (adapter->mta == NULL) {
712                 device_printf(dev, "Can not allocate multicast setup array\n");
713                 error = ENOMEM;
714                 goto err_late;
715         }
716
717         /* Check SOL/IDER usage */
718         if (e1000_check_reset_block(hw))
719                 device_printf(dev, "PHY reset is blocked"
720                     " due to SOL/IDER session.\n");
721
722         /* Sysctl for setting Energy Efficient Ethernet */
723         hw->dev_spec.ich8lan.eee_disable = eee_setting;
724         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
725             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
726             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
727             adapter, 0, em_sysctl_eee, "I",
728             "Disable Energy Efficient Ethernet");
729
730         /*
731         ** Start from a known state, this is
732         ** important in reading the nvm and
733         ** mac from that.
734         */
735         e1000_reset_hw(hw);
736
737
738         /* Make sure we have a good EEPROM before we read from it */
739         if (e1000_validate_nvm_checksum(hw) < 0) {
740                 /*
741                 ** Some PCI-E parts fail the first check due to
742                 ** the link being in sleep state, call it again,
743                 ** if it fails a second time its a real issue.
744                 */
745                 if (e1000_validate_nvm_checksum(hw) < 0) {
746                         device_printf(dev,
747                             "The EEPROM Checksum Is Not Valid\n");
748                         error = EIO;
749                         goto err_late;
750                 }
751         }
752
753         /* Copy the permanent MAC address out of the EEPROM */
754         if (e1000_read_mac_addr(hw) < 0) {
755                 device_printf(dev, "EEPROM read error while reading MAC"
756                     " address\n");
757                 error = EIO;
758                 goto err_late;
759         }
760
761         if (!em_is_valid_ether_addr(hw->mac.addr)) {
762                 device_printf(dev, "Invalid MAC address\n");
763                 error = EIO;
764                 goto err_late;
765         }
766
767         /* Disable ULP support */
768         e1000_disable_ulp_lpt_lp(hw, TRUE);
769
770         /*
771         **  Do interrupt configuration
772         */
773         if (adapter->msix > 1) /* Do MSIX */
774                 error = em_allocate_msix(adapter);
775         else  /* MSI or Legacy */
776                 error = em_allocate_legacy(adapter);
777         if (error)
778                 goto err_late;
779
780         /*
781          * Get Wake-on-Lan and Management info for later use
782          */
783         em_get_wakeup(dev);
784
785         /* Setup OS specific network interface */
786         if (em_setup_interface(dev, adapter) != 0)
787                 goto err_late;
788
789         em_reset(adapter);
790
791         /* Initialize statistics */
792         em_update_stats_counters(adapter);
793
794         hw->mac.get_link_status = 1;
795         em_update_link_status(adapter);
796
797         /* Register for VLAN events */
798         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
799             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
800         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
801             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
802
803         em_add_hw_stats(adapter);
804
805         /* Non-AMT based hardware can now take control from firmware */
806         if (adapter->has_manage && !adapter->has_amt)
807                 em_get_hw_control(adapter);
808
809         /* Tell the stack that the interface is not active */
810         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
811         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
812
813         adapter->led_dev = led_create(em_led_func, adapter,
814             device_get_nameunit(dev));
815 #ifdef DEV_NETMAP
816         em_netmap_attach(adapter);
817 #endif /* DEV_NETMAP */
818
819         INIT_DEBUGOUT("em_attach: end");
820
821         return (0);
822
823 err_late:
824         em_free_transmit_structures(adapter);
825         em_free_receive_structures(adapter);
826         em_release_hw_control(adapter);
827         if (adapter->ifp != NULL)
828                 if_free(adapter->ifp);
829 err_pci:
830         em_free_pci_resources(adapter);
831         free(adapter->mta, M_DEVBUF);
832         EM_CORE_LOCK_DESTROY(adapter);
833
834         return (error);
835 }
836
837 /*********************************************************************
838  *  Device removal routine
839  *
840  *  The detach entry point is called when the driver is being removed.
841  *  This routine stops the adapter and deallocates all the resources
842  *  that were allocated for driver operation.
843  *
844  *  return 0 on success, positive on failure
845  *********************************************************************/
846
847 static int
848 em_detach(device_t dev)
849 {
850         struct adapter  *adapter = device_get_softc(dev);
851         struct ifnet    *ifp = adapter->ifp;
852
853         INIT_DEBUGOUT("em_detach: begin");
854
855         /* Make sure VLANS are not using driver */
856         if (adapter->ifp->if_vlantrunk != NULL) {
857                 device_printf(dev,"Vlan in use, detach first\n");
858                 return (EBUSY);
859         }
860
861 #ifdef DEVICE_POLLING
862         if (ifp->if_capenable & IFCAP_POLLING)
863                 ether_poll_deregister(ifp);
864 #endif
865
866         if (adapter->led_dev != NULL)
867                 led_destroy(adapter->led_dev);
868
869         EM_CORE_LOCK(adapter);
870         adapter->in_detach = 1;
871         em_stop(adapter);
872         EM_CORE_UNLOCK(adapter);
873         EM_CORE_LOCK_DESTROY(adapter);
874
875         e1000_phy_hw_reset(&adapter->hw);
876
877         em_release_manageability(adapter);
878         em_release_hw_control(adapter);
879
880         /* Unregister VLAN events */
881         if (adapter->vlan_attach != NULL)
882                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
883         if (adapter->vlan_detach != NULL)
884                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
885
886         ether_ifdetach(adapter->ifp);
887         callout_drain(&adapter->timer);
888
889 #ifdef DEV_NETMAP
890         netmap_detach(ifp);
891 #endif /* DEV_NETMAP */
892
893         em_free_pci_resources(adapter);
894         bus_generic_detach(dev);
895         if_free(ifp);
896
897         em_free_transmit_structures(adapter);
898         em_free_receive_structures(adapter);
899
900         em_release_hw_control(adapter);
901         free(adapter->mta, M_DEVBUF);
902
903         return (0);
904 }
905
906 /*********************************************************************
907  *
908  *  Shutdown entry point
909  *
910  **********************************************************************/
911
912 static int
913 em_shutdown(device_t dev)
914 {
915         return em_suspend(dev);
916 }
917
918 /*
919  * Suspend/resume device methods.
920  */
921 static int
922 em_suspend(device_t dev)
923 {
924         struct adapter *adapter = device_get_softc(dev);
925
926         EM_CORE_LOCK(adapter);
927
928         em_release_manageability(adapter);
929         em_release_hw_control(adapter);
930         em_enable_wakeup(dev);
931
932         EM_CORE_UNLOCK(adapter);
933
934         return bus_generic_suspend(dev);
935 }
936
937 static int
938 em_resume(device_t dev)
939 {
940         struct adapter *adapter = device_get_softc(dev);
941         struct tx_ring  *txr = adapter->tx_rings;
942         struct ifnet *ifp = adapter->ifp;
943
944         EM_CORE_LOCK(adapter);
945         if (adapter->hw.mac.type == e1000_pch2lan)
946                 e1000_resume_workarounds_pchlan(&adapter->hw);
947         em_init_locked(adapter);
948         em_init_manageability(adapter);
949
950         if ((ifp->if_flags & IFF_UP) &&
951             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
952                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
953                         EM_TX_LOCK(txr);
954 #ifdef EM_MULTIQUEUE
955                         if (!drbr_empty(ifp, txr->br))
956                                 em_mq_start_locked(ifp, txr);
957 #else
958                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
959                                 em_start_locked(ifp, txr);
960 #endif
961                         EM_TX_UNLOCK(txr);
962                 }
963         }
964         EM_CORE_UNLOCK(adapter);
965
966         return bus_generic_resume(dev);
967 }
968
969
970 #ifndef EM_MULTIQUEUE
971 static void
972 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
973 {
974         struct adapter  *adapter = ifp->if_softc;
975         struct mbuf     *m_head;
976
977         EM_TX_LOCK_ASSERT(txr);
978
979         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
980             IFF_DRV_RUNNING)
981                 return;
982
983         if (!adapter->link_active)
984                 return;
985
986         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
987                 /* Call cleanup if number of TX descriptors low */
988                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
989                         em_txeof(txr);
990                 if (txr->tx_avail < EM_MAX_SCATTER) {
991                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
992                         break;
993                 }
994                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
995                 if (m_head == NULL)
996                         break;
997                 /*
998                  *  Encapsulation can modify our pointer, and or make it
999                  *  NULL on failure.  In that event, we can't requeue.
1000                  */
1001                 if (em_xmit(txr, &m_head)) {
1002                         if (m_head == NULL)
1003                                 break;
1004                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1005                         break;
1006                 }
1007
1008                 /* Mark the queue as having work */
1009                 if (txr->busy == EM_TX_IDLE)
1010                         txr->busy = EM_TX_BUSY;
1011
1012                 /* Send a copy of the frame to the BPF listener */
1013                 ETHER_BPF_MTAP(ifp, m_head);
1014
1015         }
1016
1017         return;
1018 }
1019
1020 static void
1021 em_start(struct ifnet *ifp)
1022 {
1023         struct adapter  *adapter = ifp->if_softc;
1024         struct tx_ring  *txr = adapter->tx_rings;
1025
1026         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1027                 EM_TX_LOCK(txr);
1028                 em_start_locked(ifp, txr);
1029                 EM_TX_UNLOCK(txr);
1030         }
1031         return;
1032 }
1033 #else /* EM_MULTIQUEUE */
1034 /*********************************************************************
1035  *  Multiqueue Transmit routines 
1036  *
1037  *  em_mq_start is called by the stack to initiate a transmit.
1038  *  however, if busy the driver can queue the request rather
1039  *  than do an immediate send. It is this that is an advantage
1040  *  in this driver, rather than also having multiple tx queues.
1041  **********************************************************************/
1042 /*
1043 ** Multiqueue capable stack interface
1044 */
1045 static int
1046 em_mq_start(struct ifnet *ifp, struct mbuf *m)
1047 {
1048         struct adapter  *adapter = ifp->if_softc;
1049         struct tx_ring  *txr = adapter->tx_rings;
1050         unsigned int    i, error;
1051
1052         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1053                 i = m->m_pkthdr.flowid % adapter->num_queues;
1054         else
1055                 i = curcpu % adapter->num_queues;
1056
1057         txr = &adapter->tx_rings[i];
1058
1059         error = drbr_enqueue(ifp, txr->br, m);
1060         if (error)
1061                 return (error);
1062
1063         if (EM_TX_TRYLOCK(txr)) {
1064                 em_mq_start_locked(ifp, txr);
1065                 EM_TX_UNLOCK(txr);
1066         } else 
1067                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1068
1069         return (0);
1070 }
1071
1072 static int
1073 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1074 {
1075         struct adapter  *adapter = txr->adapter;
1076         struct mbuf     *next;
1077         int             err = 0, enq = 0;
1078
1079         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1080             IFF_DRV_RUNNING || adapter->link_active == 0) {
1081                 return (ENETDOWN);
1082         }
1083
1084         /* Process the queue */
1085         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1086                 if ((err = em_xmit(txr, &next)) != 0) {
1087                         if (next == NULL) {
1088                                 /* It was freed, move forward */
1089                                 drbr_advance(ifp, txr->br);
1090                         } else {
1091                                 /* 
1092                                  * Still have one left, it may not be
1093                                  * the same since the transmit function
1094                                  * may have changed it.
1095                                  */
1096                                 drbr_putback(ifp, txr->br, next);
1097                         }
1098                         break;
1099                 }
1100                 drbr_advance(ifp, txr->br);
1101                 enq++;
1102                 ifp->if_obytes += next->m_pkthdr.len;
1103                 if (next->m_flags & M_MCAST)
1104                         ifp->if_omcasts++;
1105                 ETHER_BPF_MTAP(ifp, next);
1106                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1107                         break;
1108         }
1109
1110         /* Mark the queue as having work */
1111         if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1112                 txr->busy = EM_TX_BUSY;
1113
1114         if (txr->tx_avail < EM_MAX_SCATTER)
1115                 em_txeof(txr);
1116         if (txr->tx_avail < EM_MAX_SCATTER) {
1117                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1118         }
1119         return (err);
1120 }
1121
1122 /*
1123 ** Flush all ring buffers
1124 */
1125 static void
1126 em_qflush(struct ifnet *ifp)
1127 {
1128         struct adapter  *adapter = ifp->if_softc;
1129         struct tx_ring  *txr = adapter->tx_rings;
1130         struct mbuf     *m;
1131
1132         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1133                 EM_TX_LOCK(txr);
1134                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1135                         m_freem(m);
1136                 EM_TX_UNLOCK(txr);
1137         }
1138         if_qflush(ifp);
1139 }
1140 #endif /* EM_MULTIQUEUE */
1141
1142 /*********************************************************************
1143  *  Ioctl entry point
1144  *
1145  *  em_ioctl is called when the user wants to configure the
1146  *  interface.
1147  *
1148  *  return 0 on success, positive on failure
1149  **********************************************************************/
1150
1151 static int
1152 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1153 {
1154         struct adapter  *adapter = ifp->if_softc;
1155         struct ifreq    *ifr = (struct ifreq *)data;
1156 #if defined(INET) || defined(INET6)
1157         struct ifaddr   *ifa = (struct ifaddr *)data;
1158 #endif
1159         bool            avoid_reset = FALSE;
1160         int             error = 0;
1161
1162         if (adapter->in_detach)
1163                 return (error);
1164
1165         switch (command) {
1166         case SIOCSIFADDR:
1167 #ifdef INET
1168                 if (ifa->ifa_addr->sa_family == AF_INET)
1169                         avoid_reset = TRUE;
1170 #endif
1171 #ifdef INET6
1172                 if (ifa->ifa_addr->sa_family == AF_INET6)
1173                         avoid_reset = TRUE;
1174 #endif
1175                 /*
1176                 ** Calling init results in link renegotiation,
1177                 ** so we avoid doing it when possible.
1178                 */
1179                 if (avoid_reset) {
1180                         ifp->if_flags |= IFF_UP;
1181                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1182                                 em_init(adapter);
1183 #ifdef INET
1184                         if (!(ifp->if_flags & IFF_NOARP))
1185                                 arp_ifinit(ifp, ifa);
1186 #endif
1187                 } else
1188                         error = ether_ioctl(ifp, command, data);
1189                 break;
1190         case SIOCSIFMTU:
1191             {
1192                 int max_frame_size;
1193
1194                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1195
1196                 EM_CORE_LOCK(adapter);
1197                 switch (adapter->hw.mac.type) {
1198                 case e1000_82571:
1199                 case e1000_82572:
1200                 case e1000_ich9lan:
1201                 case e1000_ich10lan:
1202                 case e1000_pch2lan:
1203                 case e1000_pch_lpt:
1204                 case e1000_pch_spt:
1205                 case e1000_82574:
1206                 case e1000_82583:
1207                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1208                         max_frame_size = 9234;
1209                         break;
1210                 case e1000_pchlan:
1211                         max_frame_size = 4096;
1212                         break;
1213                         /* Adapters that do not support jumbo frames */
1214                 case e1000_ich8lan:
1215                         max_frame_size = ETHER_MAX_LEN;
1216                         break;
1217                 default:
1218                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1219                 }
1220                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1221                     ETHER_CRC_LEN) {
1222                         EM_CORE_UNLOCK(adapter);
1223                         error = EINVAL;
1224                         break;
1225                 }
1226
1227                 ifp->if_mtu = ifr->ifr_mtu;
1228                 adapter->hw.mac.max_frame_size =
1229                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1230                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1231                         em_init_locked(adapter);
1232                 EM_CORE_UNLOCK(adapter);
1233                 break;
1234             }
1235         case SIOCSIFFLAGS:
1236                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1237                     SIOCSIFFLAGS (Set Interface Flags)");
1238                 EM_CORE_LOCK(adapter);
1239                 if (ifp->if_flags & IFF_UP) {
1240                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1241                                 if ((ifp->if_flags ^ adapter->if_flags) &
1242                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1243                                         em_disable_promisc(adapter);
1244                                         em_set_promisc(adapter);
1245                                 }
1246                         } else
1247                                 em_init_locked(adapter);
1248                 } else
1249                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1250                                 em_stop(adapter);
1251                 adapter->if_flags = ifp->if_flags;
1252                 EM_CORE_UNLOCK(adapter);
1253                 break;
1254         case SIOCADDMULTI:
1255         case SIOCDELMULTI:
1256                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1257                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1258                         EM_CORE_LOCK(adapter);
1259                         em_disable_intr(adapter);
1260                         em_set_multi(adapter);
1261 #ifdef DEVICE_POLLING
1262                         if (!(ifp->if_capenable & IFCAP_POLLING))
1263 #endif
1264                                 em_enable_intr(adapter);
1265                         EM_CORE_UNLOCK(adapter);
1266                 }
1267                 break;
1268         case SIOCSIFMEDIA:
1269                 /* Check SOL/IDER usage */
1270                 EM_CORE_LOCK(adapter);
1271                 if (e1000_check_reset_block(&adapter->hw)) {
1272                         EM_CORE_UNLOCK(adapter);
1273                         device_printf(adapter->dev, "Media change is"
1274                             " blocked due to SOL/IDER session.\n");
1275                         break;
1276                 }
1277                 EM_CORE_UNLOCK(adapter);
1278                 /* falls thru */
1279         case SIOCGIFMEDIA:
1280                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1281                     SIOCxIFMEDIA (Get/Set Interface Media)");
1282                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1283                 break;
1284         case SIOCSIFCAP:
1285             {
1286                 int mask, reinit;
1287
1288                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1289                 reinit = 0;
1290                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1291 #ifdef DEVICE_POLLING
1292                 if (mask & IFCAP_POLLING) {
1293                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1294                                 error = ether_poll_register(em_poll, ifp);
1295                                 if (error)
1296                                         return (error);
1297                                 EM_CORE_LOCK(adapter);
1298                                 em_disable_intr(adapter);
1299                                 ifp->if_capenable |= IFCAP_POLLING;
1300                                 EM_CORE_UNLOCK(adapter);
1301                         } else {
1302                                 error = ether_poll_deregister(ifp);
1303                                 /* Enable interrupt even in error case */
1304                                 EM_CORE_LOCK(adapter);
1305                                 em_enable_intr(adapter);
1306                                 ifp->if_capenable &= ~IFCAP_POLLING;
1307                                 EM_CORE_UNLOCK(adapter);
1308                         }
1309                 }
1310 #endif
1311                 if (mask & IFCAP_HWCSUM) {
1312                         ifp->if_capenable ^= IFCAP_HWCSUM;
1313                         reinit = 1;
1314                 }
1315                 if (mask & IFCAP_TSO4) {
1316                         ifp->if_capenable ^= IFCAP_TSO4;
1317                         reinit = 1;
1318                 }
1319                 if (mask & IFCAP_VLAN_HWTAGGING) {
1320                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1321                         reinit = 1;
1322                 }
1323                 if (mask & IFCAP_VLAN_HWFILTER) {
1324                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1325                         reinit = 1;
1326                 }
1327                 if (mask & IFCAP_VLAN_HWTSO) {
1328                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1329                         reinit = 1;
1330                 }
1331                 if ((mask & IFCAP_WOL) &&
1332                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1333                         if (mask & IFCAP_WOL_MCAST)
1334                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1335                         if (mask & IFCAP_WOL_MAGIC)
1336                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1337                 }
1338                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1339                         em_init(adapter);
1340                 VLAN_CAPABILITIES(ifp);
1341                 break;
1342             }
1343
1344         default:
1345                 error = ether_ioctl(ifp, command, data);
1346                 break;
1347         }
1348
1349         return (error);
1350 }
1351
1352
1353 /*********************************************************************
1354  *  Init entry point
1355  *
1356  *  This routine is used in two ways. It is used by the stack as
1357  *  init entry point in network interface structure. It is also used
1358  *  by the driver as a hw/sw initialization routine to get to a
1359  *  consistent state.
1360  *
1361  *  return 0 on success, positive on failure
1362  **********************************************************************/
1363
1364 static void
1365 em_init_locked(struct adapter *adapter)
1366 {
1367         struct ifnet    *ifp = adapter->ifp;
1368         device_t        dev = adapter->dev;
1369
1370         INIT_DEBUGOUT("em_init: begin");
1371
1372         EM_CORE_LOCK_ASSERT(adapter);
1373
1374         em_disable_intr(adapter);
1375         callout_stop(&adapter->timer);
1376
1377         /* Get the latest mac address, User can use a LAA */
1378         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1379               ETHER_ADDR_LEN);
1380
1381         /* Put the address into the Receive Address Array */
1382         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1383
1384         /*
1385          * With the 82571 adapter, RAR[0] may be overwritten
1386          * when the other port is reset, we make a duplicate
1387          * in RAR[14] for that eventuality, this assures
1388          * the interface continues to function.
1389          */
1390         if (adapter->hw.mac.type == e1000_82571) {
1391                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1392                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1393                     E1000_RAR_ENTRIES - 1);
1394         }
1395
1396         /* Initialize the hardware */
1397         em_reset(adapter);
1398         em_update_link_status(adapter);
1399
1400         /* Setup VLAN support, basic and offload if available */
1401         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1402
1403         /* Set hardware offload abilities */
1404         if (ifp->if_capenable & IFCAP_TXCSUM)
1405                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1406         else
1407                 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
1408
1409         /* Configure for OS presence */
1410         em_init_manageability(adapter);
1411
1412         /* Prepare transmit descriptors and buffers */
1413         em_setup_transmit_structures(adapter);
1414         em_initialize_transmit_unit(adapter);
1415
1416         /* Setup Multicast table */
1417         em_set_multi(adapter);
1418
1419         /*
1420         ** Figure out the desired mbuf
1421         ** pool for doing jumbos
1422         */
1423         if (adapter->hw.mac.max_frame_size <= 2048)
1424                 adapter->rx_mbuf_sz = MCLBYTES;
1425         else if (adapter->hw.mac.max_frame_size <= 4096)
1426                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1427         else
1428                 adapter->rx_mbuf_sz = MJUM9BYTES;
1429
1430         /* Prepare receive descriptors and buffers */
1431         if (em_setup_receive_structures(adapter)) {
1432                 device_printf(dev, "Could not setup receive structures\n");
1433                 em_stop(adapter);
1434                 return;
1435         }
1436         em_initialize_receive_unit(adapter);
1437
1438         /* Use real VLAN Filter support? */
1439         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1440                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1441                         /* Use real VLAN Filter support */
1442                         em_setup_vlan_hw_support(adapter);
1443                 else {
1444                         u32 ctrl;
1445                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1446                         ctrl |= E1000_CTRL_VME;
1447                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1448                 }
1449         }
1450
1451         /* Don't lose promiscuous settings */
1452         em_set_promisc(adapter);
1453
1454         /* Set the interface as ACTIVE */
1455         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1456         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1457
1458         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1459         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1460
1461         /* MSI/X configuration for 82574 */
1462         if (adapter->hw.mac.type == e1000_82574) {
1463                 int tmp;
1464                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1465                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1466                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1467                 /* Set the IVAR - interrupt vector routing. */
1468                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1469         }
1470
1471 #ifdef DEVICE_POLLING
1472         /*
1473          * Only enable interrupts if we are not polling, make sure
1474          * they are off otherwise.
1475          */
1476         if (ifp->if_capenable & IFCAP_POLLING)
1477                 em_disable_intr(adapter);
1478         else
1479 #endif /* DEVICE_POLLING */
1480                 em_enable_intr(adapter);
1481
1482         /* AMT based hardware can now take control from firmware */
1483         if (adapter->has_manage && adapter->has_amt)
1484                 em_get_hw_control(adapter);
1485 }
1486
1487 static void
1488 em_init(void *arg)
1489 {
1490         struct adapter *adapter = arg;
1491
1492         EM_CORE_LOCK(adapter);
1493         em_init_locked(adapter);
1494         EM_CORE_UNLOCK(adapter);
1495 }
1496
1497
1498 #ifdef DEVICE_POLLING
1499 /*********************************************************************
1500  *
1501  *  Legacy polling routine: note this only works with single queue
1502  *
1503  *********************************************************************/
1504 static int
1505 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1506 {
1507         struct adapter *adapter = ifp->if_softc;
1508         struct tx_ring  *txr = adapter->tx_rings;
1509         struct rx_ring  *rxr = adapter->rx_rings;
1510         u32             reg_icr;
1511         int             rx_done;
1512
1513         EM_CORE_LOCK(adapter);
1514         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1515                 EM_CORE_UNLOCK(adapter);
1516                 return (0);
1517         }
1518
1519         if (cmd == POLL_AND_CHECK_STATUS) {
1520                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1521                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1522                         callout_stop(&adapter->timer);
1523                         adapter->hw.mac.get_link_status = 1;
1524                         em_update_link_status(adapter);
1525                         callout_reset(&adapter->timer, hz,
1526                             em_local_timer, adapter);
1527                 }
1528         }
1529         EM_CORE_UNLOCK(adapter);
1530
1531         em_rxeof(rxr, count, &rx_done);
1532
1533         EM_TX_LOCK(txr);
1534         em_txeof(txr);
1535 #ifdef EM_MULTIQUEUE
1536         if (!drbr_empty(ifp, txr->br))
1537                 em_mq_start_locked(ifp, txr);
1538 #else
1539         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1540                 em_start_locked(ifp, txr);
1541 #endif
1542         EM_TX_UNLOCK(txr);
1543
1544         return (rx_done);
1545 }
1546 #endif /* DEVICE_POLLING */
1547
1548
1549 /*********************************************************************
1550  *
1551  *  Fast Legacy/MSI Combined Interrupt Service routine  
1552  *
1553  *********************************************************************/
1554 static int
1555 em_irq_fast(void *arg)
1556 {
1557         struct adapter  *adapter = arg;
1558         struct ifnet    *ifp;
1559         u32             reg_icr;
1560
1561         ifp = adapter->ifp;
1562
1563         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1564
1565         /* Hot eject?  */
1566         if (reg_icr == 0xffffffff)
1567                 return FILTER_STRAY;
1568
1569         /* Definitely not our interrupt.  */
1570         if (reg_icr == 0x0)
1571                 return FILTER_STRAY;
1572
1573         /*
1574          * Starting with the 82571 chip, bit 31 should be used to
1575          * determine whether the interrupt belongs to us.
1576          */
1577         if (adapter->hw.mac.type >= e1000_82571 &&
1578             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1579                 return FILTER_STRAY;
1580
1581         em_disable_intr(adapter);
1582         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1583
1584         /* Link status change */
1585         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1586                 adapter->hw.mac.get_link_status = 1;
1587                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1588         }
1589
1590         if (reg_icr & E1000_ICR_RXO)
1591                 adapter->rx_overruns++;
1592         return FILTER_HANDLED;
1593 }
1594
1595 /* Combined RX/TX handler, used by Legacy and MSI */
1596 static void
1597 em_handle_que(void *context, int pending)
1598 {
1599         struct adapter  *adapter = context;
1600         struct ifnet    *ifp = adapter->ifp;
1601         struct tx_ring  *txr = adapter->tx_rings;
1602         struct rx_ring  *rxr = adapter->rx_rings;
1603
1604         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1605                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1606
1607                 EM_TX_LOCK(txr);
1608                 em_txeof(txr);
1609 #ifdef EM_MULTIQUEUE
1610                 if (!drbr_empty(ifp, txr->br))
1611                         em_mq_start_locked(ifp, txr);
1612 #else
1613                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1614                         em_start_locked(ifp, txr);
1615 #endif
1616                 EM_TX_UNLOCK(txr);
1617                 if (more) {
1618                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1619                         return;
1620                 }
1621         }
1622
1623         em_enable_intr(adapter);
1624         return;
1625 }
1626
1627
1628 /*********************************************************************
1629  *
1630  *  MSIX Interrupt Service Routines
1631  *
1632  **********************************************************************/
1633 static void
1634 em_msix_tx(void *arg)
1635 {
1636         struct tx_ring *txr = arg;
1637         struct adapter *adapter = txr->adapter;
1638         struct ifnet    *ifp = adapter->ifp;
1639
1640         ++txr->tx_irq;
1641         EM_TX_LOCK(txr);
1642         em_txeof(txr);
1643 #ifdef EM_MULTIQUEUE
1644         if (!drbr_empty(ifp, txr->br))
1645                 em_mq_start_locked(ifp, txr);
1646 #else
1647         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1648                 em_start_locked(ifp, txr);
1649 #endif
1650
1651         /* Reenable this interrupt */
1652         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1653         EM_TX_UNLOCK(txr);
1654         return;
1655 }
1656
1657 /*********************************************************************
1658  *
1659  *  MSIX RX Interrupt Service routine
1660  *
1661  **********************************************************************/
1662
1663 static void
1664 em_msix_rx(void *arg)
1665 {
1666         struct rx_ring  *rxr = arg;
1667         struct adapter  *adapter = rxr->adapter;
1668         bool            more;
1669
1670         ++rxr->rx_irq;
1671         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1672                 return;
1673         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1674         if (more)
1675                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1676         else {
1677                 /* Reenable this interrupt */
1678                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1679         }
1680         return;
1681 }
1682
1683 /*********************************************************************
1684  *
1685  *  MSIX Link Fast Interrupt Service routine
1686  *
1687  **********************************************************************/
1688 static void
1689 em_msix_link(void *arg)
1690 {
1691         struct adapter  *adapter = arg;
1692         u32             reg_icr;
1693
1694         ++adapter->link_irq;
1695         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1696
1697         if (reg_icr & E1000_ICR_RXO)
1698                 adapter->rx_overruns++;
1699
1700         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1701                 adapter->hw.mac.get_link_status = 1;
1702                 em_handle_link(adapter, 0);
1703         } else
1704                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1705                     EM_MSIX_LINK | E1000_IMS_LSC);
1706         /*
1707         ** Because we must read the ICR for this interrupt
1708         ** it may clear other causes using autoclear, for
1709         ** this reason we simply create a soft interrupt
1710         ** for all these vectors.
1711         */
1712         if (reg_icr) {
1713                 E1000_WRITE_REG(&adapter->hw,
1714                         E1000_ICS, adapter->ims);
1715         }
1716         return;
1717 }
1718
1719 static void
1720 em_handle_rx(void *context, int pending)
1721 {
1722         struct rx_ring  *rxr = context;
1723         struct adapter  *adapter = rxr->adapter;
1724         bool            more;
1725
1726         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1727         if (more)
1728                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1729         else {
1730                 /* Reenable this interrupt */
1731                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1732         }
1733 }
1734
1735 static void
1736 em_handle_tx(void *context, int pending)
1737 {
1738         struct tx_ring  *txr = context;
1739         struct adapter  *adapter = txr->adapter;
1740         struct ifnet    *ifp = adapter->ifp;
1741
1742         EM_TX_LOCK(txr);
1743         em_txeof(txr);
1744 #ifdef EM_MULTIQUEUE
1745         if (!drbr_empty(ifp, txr->br))
1746                 em_mq_start_locked(ifp, txr);
1747 #else
1748         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1749                 em_start_locked(ifp, txr);
1750 #endif
1751         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1752         EM_TX_UNLOCK(txr);
1753 }
1754
1755 static void
1756 em_handle_link(void *context, int pending)
1757 {
1758         struct adapter  *adapter = context;
1759         struct tx_ring  *txr = adapter->tx_rings;
1760         struct ifnet *ifp = adapter->ifp;
1761
1762         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1763                 return;
1764
1765         EM_CORE_LOCK(adapter);
1766         callout_stop(&adapter->timer);
1767         em_update_link_status(adapter);
1768         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1769         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1770             EM_MSIX_LINK | E1000_IMS_LSC);
1771         if (adapter->link_active) {
1772                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1773                         EM_TX_LOCK(txr);
1774 #ifdef EM_MULTIQUEUE
1775                         if (!drbr_empty(ifp, txr->br))
1776                                 em_mq_start_locked(ifp, txr);
1777 #else
1778                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1779                                 em_start_locked(ifp, txr);
1780 #endif
1781                         EM_TX_UNLOCK(txr);
1782                 }
1783         }
1784         EM_CORE_UNLOCK(adapter);
1785 }
1786
1787
1788 /*********************************************************************
1789  *
1790  *  Media Ioctl callback
1791  *
1792  *  This routine is called whenever the user queries the status of
1793  *  the interface using ifconfig.
1794  *
1795  **********************************************************************/
1796 static void
1797 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1798 {
1799         struct adapter *adapter = ifp->if_softc;
1800         u_char fiber_type = IFM_1000_SX;
1801
1802         INIT_DEBUGOUT("em_media_status: begin");
1803
1804         EM_CORE_LOCK(adapter);
1805         em_update_link_status(adapter);
1806
1807         ifmr->ifm_status = IFM_AVALID;
1808         ifmr->ifm_active = IFM_ETHER;
1809
1810         if (!adapter->link_active) {
1811                 EM_CORE_UNLOCK(adapter);
1812                 return;
1813         }
1814
1815         ifmr->ifm_status |= IFM_ACTIVE;
1816
1817         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1818             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1819                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1820         } else {
1821                 switch (adapter->link_speed) {
1822                 case 10:
1823                         ifmr->ifm_active |= IFM_10_T;
1824                         break;
1825                 case 100:
1826                         ifmr->ifm_active |= IFM_100_TX;
1827                         break;
1828                 case 1000:
1829                         ifmr->ifm_active |= IFM_1000_T;
1830                         break;
1831                 }
1832                 if (adapter->link_duplex == FULL_DUPLEX)
1833                         ifmr->ifm_active |= IFM_FDX;
1834                 else
1835                         ifmr->ifm_active |= IFM_HDX;
1836         }
1837         EM_CORE_UNLOCK(adapter);
1838 }
1839
1840 /*********************************************************************
1841  *
1842  *  Media Ioctl callback
1843  *
1844  *  This routine is called when the user changes speed/duplex using
1845  *  media/mediopt option with ifconfig.
1846  *
1847  **********************************************************************/
1848 static int
1849 em_media_change(struct ifnet *ifp)
1850 {
1851         struct adapter *adapter = ifp->if_softc;
1852         struct ifmedia  *ifm = &adapter->media;
1853
1854         INIT_DEBUGOUT("em_media_change: begin");
1855
1856         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1857                 return (EINVAL);
1858
1859         EM_CORE_LOCK(adapter);
1860         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1861         case IFM_AUTO:
1862                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1863                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1864                 break;
1865         case IFM_1000_LX:
1866         case IFM_1000_SX:
1867         case IFM_1000_T:
1868                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1869                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1870                 break;
1871         case IFM_100_TX:
1872                 adapter->hw.mac.autoneg = FALSE;
1873                 adapter->hw.phy.autoneg_advertised = 0;
1874                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1875                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1876                 else
1877                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1878                 break;
1879         case IFM_10_T:
1880                 adapter->hw.mac.autoneg = FALSE;
1881                 adapter->hw.phy.autoneg_advertised = 0;
1882                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1883                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1884                 else
1885                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1886                 break;
1887         default:
1888                 device_printf(adapter->dev, "Unsupported media type\n");
1889         }
1890
1891         em_init_locked(adapter);
1892         EM_CORE_UNLOCK(adapter);
1893
1894         return (0);
1895 }
1896
1897 /*********************************************************************
1898  *
1899  *  This routine maps the mbufs to tx descriptors.
1900  *
1901  *  return 0 on success, positive on failure
1902  **********************************************************************/
1903
1904 static int
1905 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1906 {
1907         struct adapter          *adapter = txr->adapter;
1908         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1909         bus_dmamap_t            map;
1910         struct em_txbuffer      *tx_buffer, *tx_buffer_mapped;
1911         struct e1000_tx_desc    *ctxd = NULL;
1912         struct mbuf             *m_head;
1913         struct ether_header     *eh;
1914         struct ip               *ip = NULL;
1915         struct tcphdr           *tp = NULL;
1916         u32                     txd_upper = 0, txd_lower = 0;
1917         int                     ip_off, poff;
1918         int                     nsegs, i, j, first, last = 0;
1919         int                     error;
1920         bool                    do_tso, tso_desc, remap = TRUE;
1921
1922         m_head = *m_headp;
1923         do_tso = m_head->m_pkthdr.csum_flags & CSUM_IP_TSO;
1924         tso_desc = FALSE;
1925         ip_off = poff = 0;
1926
1927         /*
1928          * Intel recommends entire IP/TCP header length reside in a single
1929          * buffer. If multiple descriptors are used to describe the IP and
1930          * TCP header, each descriptor should describe one or more
1931          * complete headers; descriptors referencing only parts of headers
1932          * are not supported. If all layer headers are not coalesced into
1933          * a single buffer, each buffer should not cross a 4KB boundary,
1934          * or be larger than the maximum read request size.
1935          * Controller also requires modifing IP/TCP header to make TSO work
1936          * so we firstly get a writable mbuf chain then coalesce ethernet/
1937          * IP/TCP header into a single buffer to meet the requirement of
1938          * controller. This also simplifies IP/TCP/UDP checksum offloading
1939          * which also has similiar restrictions.
1940          */
1941         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1942                 if (do_tso || (m_head->m_next != NULL && 
1943                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1944                         if (M_WRITABLE(*m_headp) == 0) {
1945                                 m_head = m_dup(*m_headp, M_NOWAIT);
1946                                 m_freem(*m_headp);
1947                                 if (m_head == NULL) {
1948                                         *m_headp = NULL;
1949                                         return (ENOBUFS);
1950                                 }
1951                                 *m_headp = m_head;
1952                         }
1953                 }
1954                 /*
1955                  * XXX
1956                  * Assume IPv4, we don't have TSO/checksum offload support
1957                  * for IPv6 yet.
1958                  */
1959                 ip_off = sizeof(struct ether_header);
1960                 if (m_head->m_len < ip_off) {
1961                         m_head = m_pullup(m_head, ip_off);
1962                         if (m_head == NULL) {
1963                                 *m_headp = NULL;
1964                                 return (ENOBUFS);
1965                         }
1966                 }
1967                 eh = mtod(m_head, struct ether_header *);
1968                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1969                         ip_off = sizeof(struct ether_vlan_header);
1970                         if (m_head->m_len < ip_off) {
1971                                 m_head = m_pullup(m_head, ip_off);
1972                                 if (m_head == NULL) {
1973                                         *m_headp = NULL;
1974                                         return (ENOBUFS);
1975                                 }
1976                         }
1977                 }
1978                 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1979                         m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1980                         if (m_head == NULL) {
1981                                 *m_headp = NULL;
1982                                 return (ENOBUFS);
1983                         }
1984                 }
1985                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1986                 poff = ip_off + (ip->ip_hl << 2);
1987
1988                 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1989                         if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1990                                 m_head = m_pullup(m_head, poff +
1991                                     sizeof(struct tcphdr));
1992                                 if (m_head == NULL) {
1993                                         *m_headp = NULL;
1994                                         return (ENOBUFS);
1995                                 }
1996                         }
1997                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1998                         /*
1999                          * TSO workaround:
2000                          *   pull 4 more bytes of data into it.
2001                          */
2002                         if (m_head->m_len < poff + (tp->th_off << 2)) {
2003                                 m_head = m_pullup(m_head, poff +
2004                                                  (tp->th_off << 2) +
2005                                                  TSO_WORKAROUND);
2006                                 if (m_head == NULL) {
2007                                         *m_headp = NULL;
2008                                         return (ENOBUFS);
2009                                 }
2010                         }
2011                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2012                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2013                         if (do_tso) {
2014                                 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2015                                                   (ip->ip_hl << 2) +
2016                                                   (tp->th_off << 2));
2017                                 ip->ip_sum = 0;
2018                                 /*
2019                                  * The pseudo TCP checksum does not include TCP
2020                                  * payload length so driver should recompute
2021                                  * the checksum here what hardware expect to
2022                                  * see. This is adherence of Microsoft's Large
2023                                  * Send specification.
2024                                 */
2025                                 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2026                                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2027                         }
2028                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2029                         if (m_head->m_len < poff + sizeof(struct udphdr)) {
2030                                 m_head = m_pullup(m_head, poff +
2031                                     sizeof(struct udphdr));
2032                                 if (m_head == NULL) {
2033                                         *m_headp = NULL;
2034                                         return (ENOBUFS);
2035                                 }
2036                         }
2037                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2038                 }
2039                 *m_headp = m_head;
2040         }
2041
2042         /*
2043          * Map the packet for DMA
2044          *
2045          * Capture the first descriptor index,
2046          * this descriptor will have the index
2047          * of the EOP which is the only one that
2048          * now gets a DONE bit writeback.
2049          */
2050         first = txr->next_avail_desc;
2051         tx_buffer = &txr->tx_buffers[first];
2052         tx_buffer_mapped = tx_buffer;
2053         map = tx_buffer->map;
2054
2055 retry:
2056         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2057             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2058
2059         /*
2060          * There are two types of errors we can (try) to handle:
2061          * - EFBIG means the mbuf chain was too long and bus_dma ran
2062          *   out of segments.  Defragment the mbuf chain and try again.
2063          * - ENOMEM means bus_dma could not obtain enough bounce buffers
2064          *   at this point in time.  Defer sending and try again later.
2065          * All other errors, in particular EINVAL, are fatal and prevent the
2066          * mbuf chain from ever going through.  Drop it and report error.
2067          */
2068         if (error == EFBIG && remap) {
2069                 struct mbuf *m;
2070
2071                 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2072                 if (m == NULL) {
2073                         adapter->mbuf_defrag_failed++;
2074                         m_freem(*m_headp);
2075                         *m_headp = NULL;
2076                         return (ENOBUFS);
2077                 }
2078                 *m_headp = m;
2079
2080                 /* Try it again, but only once */
2081                 remap = FALSE;
2082                 goto retry;
2083         } else if (error != 0) {
2084                 adapter->no_tx_dma_setup++;
2085                 m_freem(*m_headp);
2086                 *m_headp = NULL;
2087                 return (error);
2088         }
2089
2090         /*
2091          * TSO Hardware workaround, if this packet is not
2092          * TSO, and is only a single descriptor long, and
2093          * it follows a TSO burst, then we need to add a
2094          * sentinel descriptor to prevent premature writeback.
2095          */
2096         if ((!do_tso) && (txr->tx_tso == TRUE)) {
2097                 if (nsegs == 1)
2098                         tso_desc = TRUE;
2099                 txr->tx_tso = FALSE;
2100         }
2101
2102         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2103                 txr->no_desc_avail++;
2104                 bus_dmamap_unload(txr->txtag, map);
2105                 return (ENOBUFS);
2106         }
2107         m_head = *m_headp;
2108
2109         /* Do hardware assists */
2110         if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
2111                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2112                     &txd_upper, &txd_lower);
2113                 /* we need to make a final sentinel transmit desc */
2114                 tso_desc = TRUE;
2115         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2116                 em_transmit_checksum_setup(txr, m_head,
2117                     ip_off, ip, &txd_upper, &txd_lower);
2118
2119         if (m_head->m_flags & M_VLANTAG) {
2120                 /* Set the vlan id. */
2121                 txd_upper |=
2122                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2123                 /* Tell hardware to add tag */
2124                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2125         }
2126
2127         i = txr->next_avail_desc;
2128
2129         /* Set up our transmit descriptors */
2130         for (j = 0; j < nsegs; j++) {
2131                 bus_size_t seg_len;
2132                 bus_addr_t seg_addr;
2133
2134                 tx_buffer = &txr->tx_buffers[i];
2135                 ctxd = &txr->tx_base[i];
2136                 seg_addr = segs[j].ds_addr;
2137                 seg_len  = segs[j].ds_len;
2138                 /*
2139                 ** TSO Workaround:
2140                 ** If this is the last descriptor, we want to
2141                 ** split it so we have a small final sentinel
2142                 */
2143                 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2144                         seg_len -= TSO_WORKAROUND;
2145                         ctxd->buffer_addr = htole64(seg_addr);
2146                         ctxd->lower.data = htole32(
2147                                 adapter->txd_cmd | txd_lower | seg_len);
2148                         ctxd->upper.data = htole32(txd_upper);
2149                         if (++i == adapter->num_tx_desc)
2150                                 i = 0;
2151
2152                         /* Now make the sentinel */     
2153                         txr->tx_avail--;
2154                         ctxd = &txr->tx_base[i];
2155                         tx_buffer = &txr->tx_buffers[i];
2156                         ctxd->buffer_addr =
2157                             htole64(seg_addr + seg_len);
2158                         ctxd->lower.data = htole32(
2159                         adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2160                         ctxd->upper.data =
2161                             htole32(txd_upper);
2162                         last = i;
2163                         if (++i == adapter->num_tx_desc)
2164                                 i = 0;
2165                 } else {
2166                         ctxd->buffer_addr = htole64(seg_addr);
2167                         ctxd->lower.data = htole32(
2168                         adapter->txd_cmd | txd_lower | seg_len);
2169                         ctxd->upper.data = htole32(txd_upper);
2170                         last = i;
2171                         if (++i == adapter->num_tx_desc)
2172                                 i = 0;
2173                 }
2174                 tx_buffer->m_head = NULL;
2175                 tx_buffer->next_eop = -1;
2176         }
2177
2178         txr->next_avail_desc = i;
2179         txr->tx_avail -= nsegs;
2180
2181         tx_buffer->m_head = m_head;
2182         /*
2183         ** Here we swap the map so the last descriptor,
2184         ** which gets the completion interrupt has the
2185         ** real map, and the first descriptor gets the
2186         ** unused map from this descriptor.
2187         */
2188         tx_buffer_mapped->map = tx_buffer->map;
2189         tx_buffer->map = map;
2190         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2191
2192         /*
2193          * Last Descriptor of Packet
2194          * needs End Of Packet (EOP)
2195          * and Report Status (RS)
2196          */
2197         ctxd->lower.data |=
2198             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2199         /*
2200          * Keep track in the first buffer which
2201          * descriptor will be written back
2202          */
2203         tx_buffer = &txr->tx_buffers[first];
2204         tx_buffer->next_eop = last;
2205
2206         /*
2207          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2208          * that this frame is available to transmit.
2209          */
2210         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2211             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2212         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2213
2214         return (0);
2215 }
2216
2217 static void
2218 em_set_promisc(struct adapter *adapter)
2219 {
2220         struct ifnet    *ifp = adapter->ifp;
2221         u32             reg_rctl;
2222
2223         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2224
2225         if (ifp->if_flags & IFF_PROMISC) {
2226                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2227                 /* Turn this on if you want to see bad packets */
2228                 if (em_debug_sbp)
2229                         reg_rctl |= E1000_RCTL_SBP;
2230                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2231         } else if (ifp->if_flags & IFF_ALLMULTI) {
2232                 reg_rctl |= E1000_RCTL_MPE;
2233                 reg_rctl &= ~E1000_RCTL_UPE;
2234                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2235         }
2236 }
2237
2238 static void
2239 em_disable_promisc(struct adapter *adapter)
2240 {
2241         struct ifnet    *ifp = adapter->ifp;
2242         u32             reg_rctl;
2243         int             mcnt = 0;
2244
2245         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2246         reg_rctl &=  (~E1000_RCTL_UPE);
2247         if (ifp->if_flags & IFF_ALLMULTI)
2248                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2249         else {
2250                 struct  ifmultiaddr *ifma;
2251 #if __FreeBSD_version < 800000
2252                 IF_ADDR_LOCK(ifp);
2253 #else   
2254                 if_maddr_rlock(ifp);
2255 #endif
2256                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2257                         if (ifma->ifma_addr->sa_family != AF_LINK)
2258                                 continue;
2259                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2260                                 break;
2261                         mcnt++;
2262                 }
2263 #if __FreeBSD_version < 800000
2264                 IF_ADDR_UNLOCK(ifp);
2265 #else
2266                 if_maddr_runlock(ifp);
2267 #endif
2268         }
2269         /* Don't disable if in MAX groups */
2270         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2271                 reg_rctl &=  (~E1000_RCTL_MPE);
2272         reg_rctl &=  (~E1000_RCTL_SBP);
2273         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2274 }
2275
2276
2277 /*********************************************************************
2278  *  Multicast Update
2279  *
2280  *  This routine is called whenever multicast address list is updated.
2281  *
2282  **********************************************************************/
2283
2284 static void
2285 em_set_multi(struct adapter *adapter)
2286 {
2287         struct ifnet    *ifp = adapter->ifp;
2288         struct ifmultiaddr *ifma;
2289         u32 reg_rctl = 0;
2290         u8  *mta; /* Multicast array memory */
2291         int mcnt = 0;
2292
2293         IOCTL_DEBUGOUT("em_set_multi: begin");
2294
2295         mta = adapter->mta;
2296         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2297
2298         if (adapter->hw.mac.type == e1000_82542 && 
2299             adapter->hw.revision_id == E1000_REVISION_2) {
2300                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2301                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2302                         e1000_pci_clear_mwi(&adapter->hw);
2303                 reg_rctl |= E1000_RCTL_RST;
2304                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2305                 msec_delay(5);
2306         }
2307
2308 #if __FreeBSD_version < 800000
2309         IF_ADDR_LOCK(ifp);
2310 #else
2311         if_maddr_rlock(ifp);
2312 #endif
2313         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2314                 if (ifma->ifma_addr->sa_family != AF_LINK)
2315                         continue;
2316
2317                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2318                         break;
2319
2320                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2321                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2322                 mcnt++;
2323         }
2324 #if __FreeBSD_version < 800000
2325         IF_ADDR_UNLOCK(ifp);
2326 #else
2327         if_maddr_runlock(ifp);
2328 #endif
2329         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2330                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2331                 reg_rctl |= E1000_RCTL_MPE;
2332                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2333         } else
2334                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2335
2336         if (adapter->hw.mac.type == e1000_82542 && 
2337             adapter->hw.revision_id == E1000_REVISION_2) {
2338                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2339                 reg_rctl &= ~E1000_RCTL_RST;
2340                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2341                 msec_delay(5);
2342                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2343                         e1000_pci_set_mwi(&adapter->hw);
2344         }
2345 }
2346
2347
2348 /*********************************************************************
2349  *  Timer routine
2350  *
2351  *  This routine checks for link status and updates statistics.
2352  *
2353  **********************************************************************/
2354
2355 static void
2356 em_local_timer(void *arg)
2357 {
2358         struct adapter  *adapter = arg;
2359         struct ifnet    *ifp = adapter->ifp;
2360         struct tx_ring  *txr = adapter->tx_rings;
2361         struct rx_ring  *rxr = adapter->rx_rings;
2362         u32             trigger = 0;
2363
2364         EM_CORE_LOCK_ASSERT(adapter);
2365
2366         em_update_link_status(adapter);
2367         em_update_stats_counters(adapter);
2368
2369         /* Reset LAA into RAR[0] on 82571 */
2370         if ((adapter->hw.mac.type == e1000_82571) &&
2371             e1000_get_laa_state_82571(&adapter->hw))
2372                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2373
2374         /* Mask to use in the irq trigger */
2375         if (adapter->msix_mem) {
2376                 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2377                         trigger |= rxr->ims;
2378                 rxr = adapter->rx_rings;
2379         } else
2380                 trigger = E1000_ICS_RXDMT0;
2381
2382         /*
2383         ** Check on the state of the TX queue(s), this 
2384         ** can be done without the lock because its RO
2385         ** and the HUNG state will be static if set.
2386         */
2387         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2388                 if (txr->busy == EM_TX_HUNG)
2389                         goto hung;
2390                 if (txr->busy >= EM_TX_MAXTRIES)
2391                         txr->busy = EM_TX_HUNG;
2392                 /* Schedule a TX tasklet if needed */
2393                 if (txr->tx_avail <= EM_MAX_SCATTER)
2394                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2395         }
2396         
2397         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2398 #ifndef DEVICE_POLLING
2399         /* Trigger an RX interrupt to guarantee mbuf refresh */
2400         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2401 #endif
2402         return;
2403 hung:
2404         /* Looks like we're hung */
2405         device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2406                         txr->me);
2407         em_print_debug_info(adapter);
2408         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2409         adapter->watchdog_events++;
2410         em_init_locked(adapter);
2411 }
2412
2413
2414 static void
2415 em_update_link_status(struct adapter *adapter)
2416 {
2417         struct e1000_hw *hw = &adapter->hw;
2418         struct ifnet *ifp = adapter->ifp;
2419         device_t dev = adapter->dev;
2420         struct tx_ring *txr = adapter->tx_rings;
2421         u32 link_check = 0;
2422
2423         /* Get the cached link value or read phy for real */
2424         switch (hw->phy.media_type) {
2425         case e1000_media_type_copper:
2426                 if (hw->mac.get_link_status) {
2427                         if (hw->mac.type == e1000_pch_spt)
2428                                 msec_delay(50);
2429                         /* Do the work to read phy */
2430                         e1000_check_for_link(hw);
2431                         link_check = !hw->mac.get_link_status;
2432                         if (link_check) /* ESB2 fix */
2433                                 e1000_cfg_on_link_up(hw);
2434                 } else
2435                         link_check = TRUE;
2436                 break;
2437         case e1000_media_type_fiber:
2438                 e1000_check_for_link(hw);
2439                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2440                                  E1000_STATUS_LU);
2441                 break;
2442         case e1000_media_type_internal_serdes:
2443                 e1000_check_for_link(hw);
2444                 link_check = adapter->hw.mac.serdes_has_link;
2445                 break;
2446         default:
2447         case e1000_media_type_unknown:
2448                 break;
2449         }
2450
2451         /* Now check for a transition */
2452         if (link_check && (adapter->link_active == 0)) {
2453                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2454                     &adapter->link_duplex);
2455
2456                 /*
2457                 ** There have proven to be problems with TSO when not at full
2458                 ** gigabit speed, so disable the assist automatically when at
2459                 ** lower speeds.  -jfv
2460                 */
2461                 if (ifp->if_capenable & IFCAP_TSO4) {
2462                         if (adapter->link_speed == SPEED_1000)
2463                                 ifp->if_hwassist |= CSUM_IP_TSO;
2464                         else
2465                                 ifp->if_hwassist &= ~CSUM_IP_TSO;
2466                 }
2467
2468                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2469                 if ((adapter->link_speed != SPEED_1000) &&
2470                     ((hw->mac.type == e1000_82571) ||
2471                     (hw->mac.type == e1000_82572))) {
2472                         int tarc0;
2473                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2474                         tarc0 &= ~TARC_SPEED_MODE_BIT;
2475                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2476                 }
2477                 if (bootverbose)
2478                         device_printf(dev, "Link is up %d Mbps %s\n",
2479                             adapter->link_speed,
2480                             ((adapter->link_duplex == FULL_DUPLEX) ?
2481                             "Full Duplex" : "Half Duplex"));
2482                 adapter->link_active = 1;
2483                 adapter->smartspeed = 0;
2484                 ifp->if_baudrate = adapter->link_speed * 1000000;
2485                 if_link_state_change(ifp, LINK_STATE_UP);
2486         } else if (!link_check && (adapter->link_active == 1)) {
2487                 ifp->if_baudrate = adapter->link_speed = 0;
2488                 adapter->link_duplex = 0;
2489                 if (bootverbose)
2490                         device_printf(dev, "Link is Down\n");
2491                 adapter->link_active = 0;
2492                 /* Link down, disable hang detection */
2493                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2494                         txr->busy = EM_TX_IDLE;
2495                 if_link_state_change(ifp, LINK_STATE_DOWN);
2496         }
2497 }
2498
2499 /*********************************************************************
2500  *
2501  *  This routine disables all traffic on the adapter by issuing a
2502  *  global reset on the MAC and deallocates TX/RX buffers.
2503  *
2504  *  This routine should always be called with BOTH the CORE
2505  *  and TX locks.
2506  **********************************************************************/
2507
2508 static void
2509 em_stop(void *arg)
2510 {
2511         struct adapter  *adapter = arg;
2512         struct ifnet    *ifp = adapter->ifp;
2513         struct tx_ring  *txr = adapter->tx_rings;
2514
2515         EM_CORE_LOCK_ASSERT(adapter);
2516
2517         INIT_DEBUGOUT("em_stop: begin");
2518
2519         em_disable_intr(adapter);
2520         callout_stop(&adapter->timer);
2521
2522         /* Tell the stack that the interface is no longer active */
2523         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2524         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2525
2526         /* Disarm Hang Detection. */
2527         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2528                 EM_TX_LOCK(txr);
2529                 txr->busy = EM_TX_IDLE;
2530                 EM_TX_UNLOCK(txr);
2531         }
2532
2533         /* I219 needs some special flushing to avoid hangs */
2534         if (adapter->hw.mac.type == e1000_pch_spt)
2535                 em_flush_desc_rings(adapter);
2536
2537         e1000_reset_hw(&adapter->hw);
2538         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2539
2540         e1000_led_off(&adapter->hw);
2541         e1000_cleanup_led(&adapter->hw);
2542 }
2543
2544
2545 /*********************************************************************
2546  *
2547  *  Determine hardware revision.
2548  *
2549  **********************************************************************/
2550 static void
2551 em_identify_hardware(struct adapter *adapter)
2552 {
2553         device_t dev = adapter->dev;
2554
2555         /* Make sure our PCI config space has the necessary stuff set */
2556         pci_enable_busmaster(dev);
2557         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2558
2559         /* Save off the information about this board */
2560         adapter->hw.vendor_id = pci_get_vendor(dev);
2561         adapter->hw.device_id = pci_get_device(dev);
2562         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2563         adapter->hw.subsystem_vendor_id =
2564             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2565         adapter->hw.subsystem_device_id =
2566             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2567
2568         /* Do Shared Code Init and Setup */
2569         if (e1000_set_mac_type(&adapter->hw)) {
2570                 device_printf(dev, "Setup init failure\n");
2571                 return;
2572         }
2573 }
2574
2575 static int
2576 em_allocate_pci_resources(struct adapter *adapter)
2577 {
2578         device_t        dev = adapter->dev;
2579         int             rid;
2580
2581         rid = PCIR_BAR(0);
2582         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2583             &rid, RF_ACTIVE);
2584         if (adapter->memory == NULL) {
2585                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2586                 return (ENXIO);
2587         }
2588         adapter->osdep.mem_bus_space_tag =
2589             rman_get_bustag(adapter->memory);
2590         adapter->osdep.mem_bus_space_handle =
2591             rman_get_bushandle(adapter->memory);
2592         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2593
2594         adapter->hw.back = &adapter->osdep;
2595
2596         return (0);
2597 }
2598
2599 /*********************************************************************
2600  *
2601  *  Setup the Legacy or MSI Interrupt handler
2602  *
2603  **********************************************************************/
2604 static int
2605 em_allocate_legacy(struct adapter *adapter)
2606 {
2607         device_t dev = adapter->dev;
2608         struct tx_ring  *txr = adapter->tx_rings;
2609         int error, rid = 0;
2610
2611         /* Manually turn off all interrupts */
2612         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2613
2614         if (adapter->msix == 1) /* using MSI */
2615                 rid = 1;
2616         /* We allocate a single interrupt resource */
2617         adapter->res = bus_alloc_resource_any(dev,
2618             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2619         if (adapter->res == NULL) {
2620                 device_printf(dev, "Unable to allocate bus resource: "
2621                     "interrupt\n");
2622                 return (ENXIO);
2623         }
2624
2625         /*
2626          * Allocate a fast interrupt and the associated
2627          * deferred processing contexts.
2628          */
2629         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2630         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2631             taskqueue_thread_enqueue, &adapter->tq);
2632         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2633             device_get_nameunit(adapter->dev));
2634         /* Use a TX only tasklet for local timer */
2635         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2636         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2637             taskqueue_thread_enqueue, &txr->tq);
2638         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2639             device_get_nameunit(adapter->dev));
2640         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2641         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2642             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2643                 device_printf(dev, "Failed to register fast interrupt "
2644                             "handler: %d\n", error);
2645                 taskqueue_free(adapter->tq);
2646                 adapter->tq = NULL;
2647                 return (error);
2648         }
2649         
2650         return (0);
2651 }
2652
2653 /*********************************************************************
2654  *
2655  *  Setup the MSIX Interrupt handlers
2656  *   This is not really Multiqueue, rather
2657  *   its just seperate interrupt vectors
2658  *   for TX, RX, and Link.
2659  *
2660  **********************************************************************/
2661 static int
2662 em_allocate_msix(struct adapter *adapter)
2663 {
2664         device_t        dev = adapter->dev;
2665         struct          tx_ring *txr = adapter->tx_rings;
2666         struct          rx_ring *rxr = adapter->rx_rings;
2667         int             error, rid, vector = 0;
2668         int             cpu_id = 0;
2669
2670
2671         /* Make sure all interrupts are disabled */
2672         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2673
2674         /* First set up ring resources */
2675         for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2676
2677                 /* RX ring */
2678                 rid = vector + 1;
2679
2680                 rxr->res = bus_alloc_resource_any(dev,
2681                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2682                 if (rxr->res == NULL) {
2683                         device_printf(dev,
2684                             "Unable to allocate bus resource: "
2685                             "RX MSIX Interrupt %d\n", i);
2686                         return (ENXIO);
2687                 }
2688                 if ((error = bus_setup_intr(dev, rxr->res,
2689                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2690                     rxr, &rxr->tag)) != 0) {
2691                         device_printf(dev, "Failed to register RX handler");
2692                         return (error);
2693                 }
2694 #if __FreeBSD_version >= 800504
2695                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2696 #endif
2697                 rxr->msix = vector;
2698
2699                 if (em_last_bind_cpu < 0)
2700                         em_last_bind_cpu = CPU_FIRST();
2701                 cpu_id = em_last_bind_cpu;
2702                 bus_bind_intr(dev, rxr->res, cpu_id);
2703
2704                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2705                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2706                     taskqueue_thread_enqueue, &rxr->tq);
2707                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2708                     device_get_nameunit(adapter->dev), cpu_id);
2709                 /*
2710                 ** Set the bit to enable interrupt
2711                 ** in E1000_IMS -- bits 20 and 21
2712                 ** are for RX0 and RX1, note this has
2713                 ** NOTHING to do with the MSIX vector
2714                 */
2715                 rxr->ims = 1 << (20 + i);
2716                 adapter->ims |= rxr->ims;
2717                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2718
2719                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2720         }
2721
2722         for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2723                 /* TX ring */
2724                 rid = vector + 1;
2725                 txr->res = bus_alloc_resource_any(dev,
2726                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2727                 if (txr->res == NULL) {
2728                         device_printf(dev,
2729                             "Unable to allocate bus resource: "
2730                             "TX MSIX Interrupt %d\n", i);
2731                         return (ENXIO);
2732                 }
2733                 if ((error = bus_setup_intr(dev, txr->res,
2734                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2735                     txr, &txr->tag)) != 0) {
2736                         device_printf(dev, "Failed to register TX handler");
2737                         return (error);
2738                 }
2739 #if __FreeBSD_version >= 800504
2740                 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2741 #endif
2742                 txr->msix = vector;
2743
2744                 if (em_last_bind_cpu < 0)
2745                         em_last_bind_cpu = CPU_FIRST();
2746                 cpu_id = em_last_bind_cpu;
2747                 bus_bind_intr(dev, txr->res, cpu_id);
2748
2749                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2750                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2751                     taskqueue_thread_enqueue, &txr->tq);
2752                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2753                     device_get_nameunit(adapter->dev), cpu_id);
2754                 /*
2755                 ** Set the bit to enable interrupt
2756                 ** in E1000_IMS -- bits 22 and 23
2757                 ** are for TX0 and TX1, note this has
2758                 ** NOTHING to do with the MSIX vector
2759                 */
2760                 txr->ims = 1 << (22 + i);
2761                 adapter->ims |= txr->ims;
2762                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2763
2764                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2765         }
2766
2767         /* Link interrupt */
2768         rid = vector + 1;
2769         adapter->res = bus_alloc_resource_any(dev,
2770             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2771         if (!adapter->res) {
2772                 device_printf(dev,"Unable to allocate "
2773                     "bus resource: Link interrupt [%d]\n", rid);
2774                 return (ENXIO);
2775         }
2776         /* Set the link handler function */
2777         error = bus_setup_intr(dev, adapter->res,
2778             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2779             em_msix_link, adapter, &adapter->tag);
2780         if (error) {
2781                 adapter->res = NULL;
2782                 device_printf(dev, "Failed to register LINK handler");
2783                 return (error);
2784         }
2785 #if __FreeBSD_version >= 800504
2786         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2787 #endif
2788         adapter->linkvec = vector;
2789         adapter->ivars |=  (8 | vector) << 16;
2790         adapter->ivars |= 0x80000000;
2791
2792         return (0);
2793 }
2794
2795
2796 static void
2797 em_free_pci_resources(struct adapter *adapter)
2798 {
2799         device_t        dev = adapter->dev;
2800         struct tx_ring  *txr;
2801         struct rx_ring  *rxr;
2802         int             rid;
2803
2804
2805         /*
2806         ** Release all the queue interrupt resources:
2807         */
2808         for (int i = 0; i < adapter->num_queues; i++) {
2809                 txr = &adapter->tx_rings[i];
2810                 /* an early abort? */
2811                 if (txr == NULL)
2812                         break;
2813                 rid = txr->msix +1;
2814                 if (txr->tag != NULL) {
2815                         bus_teardown_intr(dev, txr->res, txr->tag);
2816                         txr->tag = NULL;
2817                 }
2818                 if (txr->res != NULL)
2819                         bus_release_resource(dev, SYS_RES_IRQ,
2820                             rid, txr->res);
2821
2822                 rxr = &adapter->rx_rings[i];
2823                 /* an early abort? */
2824                 if (rxr == NULL)
2825                         break;
2826                 rid = rxr->msix +1;
2827                 if (rxr->tag != NULL) {
2828                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2829                         rxr->tag = NULL;
2830                 }
2831                 if (rxr->res != NULL)
2832                         bus_release_resource(dev, SYS_RES_IRQ,
2833                             rid, rxr->res);
2834         }
2835
2836         if (adapter->linkvec) /* we are doing MSIX */
2837                 rid = adapter->linkvec + 1;
2838         else
2839                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2840
2841         if (adapter->tag != NULL) {
2842                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2843                 adapter->tag = NULL;
2844         }
2845
2846         if (adapter->res != NULL)
2847                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2848
2849
2850         if (adapter->msix)
2851                 pci_release_msi(dev);
2852
2853         if (adapter->msix_mem != NULL)
2854                 bus_release_resource(dev, SYS_RES_MEMORY,
2855                     adapter->memrid, adapter->msix_mem);
2856
2857         if (adapter->memory != NULL)
2858                 bus_release_resource(dev, SYS_RES_MEMORY,
2859                     PCIR_BAR(0), adapter->memory);
2860
2861         if (adapter->flash != NULL)
2862                 bus_release_resource(dev, SYS_RES_MEMORY,
2863                     EM_FLASH, adapter->flash);
2864 }
2865
2866 /*
2867  * Setup MSI or MSI/X
2868  */
2869 static int
2870 em_setup_msix(struct adapter *adapter)
2871 {
2872         device_t dev = adapter->dev;
2873         int val;
2874
2875         /* Nearly always going to use one queue */
2876         adapter->num_queues = 1;
2877
2878         /*
2879         ** Try using MSI-X for Hartwell adapters
2880         */
2881         if ((adapter->hw.mac.type == e1000_82574) &&
2882             (em_enable_msix == TRUE)) {
2883 #ifdef EM_MULTIQUEUE
2884                 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2885                 if (adapter->num_queues > 1)
2886                         em_enable_vectors_82574(adapter);
2887 #endif
2888                 /* Map the MSIX BAR */
2889                 adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2890                 adapter->msix_mem = bus_alloc_resource_any(dev,
2891                     SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2892                 if (adapter->msix_mem == NULL) {
2893                         /* May not be enabled */
2894                         device_printf(adapter->dev,
2895                             "Unable to map MSIX table \n");
2896                         goto msi;
2897                 }
2898                 val = pci_msix_count(dev); 
2899
2900 #ifdef EM_MULTIQUEUE
2901                 /* We need 5 vectors in the multiqueue case */
2902                 if (adapter->num_queues > 1 ) {
2903                         if (val >= 5)
2904                                 val = 5;
2905                         else {
2906                                 adapter->num_queues = 1;
2907                                 device_printf(adapter->dev,
2908                                     "Insufficient MSIX vectors for >1 queue, "
2909                                     "using single queue...\n");
2910                                 goto msix_one;
2911                         }
2912                 } else {
2913 msix_one:
2914 #endif
2915                         if (val >= 3)
2916                                 val = 3;
2917                         else {
2918                                 device_printf(adapter->dev,
2919                                 "Insufficient MSIX vectors, using MSI\n");
2920                                 goto msi;
2921                         }
2922 #ifdef EM_MULTIQUEUE
2923                 }
2924 #endif
2925
2926                 if ((pci_alloc_msix(dev, &val) == 0)) {
2927                         device_printf(adapter->dev,
2928                             "Using MSIX interrupts "
2929                             "with %d vectors\n", val);
2930                         return (val);
2931                 }
2932
2933                 /*
2934                 ** If MSIX alloc failed or provided us with
2935                 ** less than needed, free and fall through to MSI
2936                 */
2937                 pci_release_msi(dev);
2938         }
2939 msi:
2940         if (adapter->msix_mem != NULL) {
2941                 bus_release_resource(dev, SYS_RES_MEMORY,
2942                     adapter->memrid, adapter->msix_mem);
2943                 adapter->msix_mem = NULL;
2944         }
2945         val = 1;
2946         if (pci_alloc_msi(dev, &val) == 0) {
2947                 device_printf(adapter->dev, "Using an MSI interrupt\n");
2948                 return (val);
2949         } 
2950         /* Should only happen due to manual configuration */
2951         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2952         return (0);
2953 }
2954
2955
2956 /*
2957 ** The 3 following flush routines are used as a workaround in the
2958 ** I219 client parts and only for them.
2959 **
2960 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2961 **
2962 ** We want to clear all pending descriptors from the TX ring.
2963 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2964 ** the data of the next descriptor. We don't care about the data we are about
2965 ** to reset the HW.
2966 */
2967 static void
2968 em_flush_tx_ring(struct adapter *adapter)
2969 {
2970         struct e1000_hw         *hw = &adapter->hw;
2971         struct tx_ring          *txr = adapter->tx_rings;
2972         struct e1000_tx_desc    *txd;
2973         u32                     tctl, txd_lower = E1000_TXD_CMD_IFCS;
2974         u16                     size = 512;
2975
2976         tctl = E1000_READ_REG(hw, E1000_TCTL);
2977         E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2978
2979         txd = &txr->tx_base[txr->next_avail_desc++];
2980         if (txr->next_avail_desc == adapter->num_tx_desc)
2981                 txr->next_avail_desc = 0;
2982
2983         /* Just use the ring as a dummy buffer addr */
2984         txd->buffer_addr = txr->txdma.dma_paddr;
2985         txd->lower.data = htole32(txd_lower | size);
2986         txd->upper.data = 0;
2987
2988         /* flush descriptors to memory before notifying the HW */
2989         wmb();
2990
2991         E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2992         mb();
2993         usec_delay(250);
2994 }
2995
2996 /*
2997 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2998 **
2999 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
3000 */
3001 static void
3002 em_flush_rx_ring(struct adapter *adapter)
3003 {
3004         struct e1000_hw *hw = &adapter->hw;
3005         u32             rctl, rxdctl;
3006
3007         rctl = E1000_READ_REG(hw, E1000_RCTL);
3008         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3009         E1000_WRITE_FLUSH(hw);
3010         usec_delay(150);
3011
3012         rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
3013         /* zero the lower 14 bits (prefetch and host thresholds) */
3014         rxdctl &= 0xffffc000;
3015         /*
3016          * update thresholds: prefetch threshold to 31, host threshold to 1
3017          * and make sure the granularity is "descriptors" and not "cache lines"
3018          */
3019         rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3020         E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
3021
3022         /* momentarily enable the RX ring for the changes to take effect */
3023         E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3024         E1000_WRITE_FLUSH(hw);
3025         usec_delay(150);
3026         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3027 }
3028
3029 /*
3030 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
3031 **
3032 ** In i219, the descriptor rings must be emptied before resetting the HW
3033 ** or before changing the device state to D3 during runtime (runtime PM).
3034 **
3035 ** Failure to do this will cause the HW to enter a unit hang state which can
3036 ** only be released by PCI reset on the device
3037 **
3038 */
3039 static void
3040 em_flush_desc_rings(struct adapter *adapter)
3041 {
3042         struct e1000_hw *hw = &adapter->hw;
3043         device_t        dev = adapter->dev;
3044         u16             hang_state;
3045         u32             fext_nvm11, tdlen;
3046  
3047         /* First, disable MULR fix in FEXTNVM11 */
3048         fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3049         fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3050         E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3051         
3052         /* do nothing if we're not in faulty state, or if the queue is empty */
3053         tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3054         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3055         if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3056                 return;
3057         em_flush_tx_ring(adapter);
3058
3059         /* recheck, maybe the fault is caused by the rx ring */
3060         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3061         if (hang_state & FLUSH_DESC_REQUIRED)
3062                 em_flush_rx_ring(adapter);
3063 }
3064
3065
3066 /*********************************************************************
3067  *
3068  *  Initialize the hardware to a configuration
3069  *  as specified by the adapter structure.
3070  *
3071  **********************************************************************/
3072 static void
3073 em_reset(struct adapter *adapter)
3074 {
3075         device_t        dev = adapter->dev;
3076         struct ifnet    *ifp = adapter->ifp;
3077         struct e1000_hw *hw = &adapter->hw;
3078         u16             rx_buffer_size;
3079         u32             pba;
3080
3081         INIT_DEBUGOUT("em_reset: begin");
3082
3083         /* Set up smart power down as default off on newer adapters. */
3084         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3085             hw->mac.type == e1000_82572)) {
3086                 u16 phy_tmp = 0;
3087
3088                 /* Speed up time to link by disabling smart power down. */
3089                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3090                 phy_tmp &= ~IGP02E1000_PM_SPD;
3091                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3092         }
3093
3094         /*
3095          * Packet Buffer Allocation (PBA)
3096          * Writing PBA sets the receive portion of the buffer
3097          * the remainder is used for the transmit buffer.
3098          */
3099         switch (hw->mac.type) {
3100         /* Total Packet Buffer on these is 48K */
3101         case e1000_82571:
3102         case e1000_82572:
3103         case e1000_80003es2lan:
3104                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3105                 break;
3106         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3107                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3108                 break;
3109         case e1000_82574:
3110         case e1000_82583:
3111                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3112                 break;
3113         case e1000_ich8lan:
3114                 pba = E1000_PBA_8K;
3115                 break;
3116         case e1000_ich9lan:
3117         case e1000_ich10lan:
3118                 /* Boost Receive side for jumbo frames */
3119                 if (adapter->hw.mac.max_frame_size > 4096)
3120                         pba = E1000_PBA_14K;
3121                 else
3122                         pba = E1000_PBA_10K;
3123                 break;
3124         case e1000_pchlan:
3125         case e1000_pch2lan:
3126         case e1000_pch_lpt:
3127         case e1000_pch_spt:
3128                 pba = E1000_PBA_26K;
3129                 break;
3130         default:
3131                 if (adapter->hw.mac.max_frame_size > 8192)
3132                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3133                 else
3134                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3135         }
3136         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3137
3138         /*
3139          * These parameters control the automatic generation (Tx) and
3140          * response (Rx) to Ethernet PAUSE frames.
3141          * - High water mark should allow for at least two frames to be
3142          *   received after sending an XOFF.
3143          * - Low water mark works best when it is very near the high water mark.
3144          *   This allows the receiver to restart by sending XON when it has
3145          *   drained a bit. Here we use an arbitary value of 1500 which will
3146          *   restart after one full frame is pulled from the buffer. There
3147          *   could be several smaller frames in the buffer and if so they will
3148          *   not trigger the XON until their total number reduces the buffer
3149          *   by 1500.
3150          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3151          */
3152         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3153         hw->fc.high_water = rx_buffer_size -
3154             roundup2(adapter->hw.mac.max_frame_size, 1024);
3155         hw->fc.low_water = hw->fc.high_water - 1500;
3156
3157         if (adapter->fc) /* locally set flow control value? */
3158                 hw->fc.requested_mode = adapter->fc;
3159         else
3160                 hw->fc.requested_mode = e1000_fc_full;
3161
3162         if (hw->mac.type == e1000_80003es2lan)
3163                 hw->fc.pause_time = 0xFFFF;
3164         else
3165                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3166
3167         hw->fc.send_xon = TRUE;
3168
3169         /* Device specific overrides/settings */
3170         switch (hw->mac.type) {
3171         case e1000_pchlan:
3172                 /* Workaround: no TX flow ctrl for PCH */
3173                 hw->fc.requested_mode = e1000_fc_rx_pause;
3174                 hw->fc.pause_time = 0xFFFF; /* override */
3175                 if (ifp->if_mtu > ETHERMTU) {
3176                         hw->fc.high_water = 0x3500;
3177                         hw->fc.low_water = 0x1500;
3178                 } else {
3179                         hw->fc.high_water = 0x5000;
3180                         hw->fc.low_water = 0x3000;
3181                 }
3182                 hw->fc.refresh_time = 0x1000;
3183                 break;
3184         case e1000_pch2lan:
3185         case e1000_pch_lpt:
3186         case e1000_pch_spt:
3187                 hw->fc.high_water = 0x5C20;
3188                 hw->fc.low_water = 0x5048;
3189                 hw->fc.pause_time = 0x0650;
3190                 hw->fc.refresh_time = 0x0400;
3191                 /* Jumbos need adjusted PBA */
3192                 if (ifp->if_mtu > ETHERMTU)
3193                         E1000_WRITE_REG(hw, E1000_PBA, 12);
3194                 else
3195                         E1000_WRITE_REG(hw, E1000_PBA, 26);
3196                 break;
3197         case e1000_ich9lan:
3198         case e1000_ich10lan:
3199                 if (ifp->if_mtu > ETHERMTU) {
3200                         hw->fc.high_water = 0x2800;
3201                         hw->fc.low_water = hw->fc.high_water - 8;
3202                         break;
3203                 } 
3204                 /* else fall thru */
3205         default:
3206                 if (hw->mac.type == e1000_80003es2lan)
3207                         hw->fc.pause_time = 0xFFFF;
3208                 break;
3209         }
3210
3211         /* I219 needs some special flushing to avoid hangs */
3212         if (hw->mac.type == e1000_pch_spt)
3213                 em_flush_desc_rings(adapter);
3214
3215         /* Issue a global reset */
3216         e1000_reset_hw(hw);
3217         E1000_WRITE_REG(hw, E1000_WUC, 0);
3218         em_disable_aspm(adapter);
3219         /* and a re-init */
3220         if (e1000_init_hw(hw) < 0) {
3221                 device_printf(dev, "Hardware Initialization Failed\n");
3222                 return;
3223         }
3224
3225         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3226         e1000_get_phy_info(hw);
3227         e1000_check_for_link(hw);
3228         return;
3229 }
3230
3231 /*********************************************************************
3232  *
3233  *  Setup networking device structure and register an interface.
3234  *
3235  **********************************************************************/
3236 static int
3237 em_setup_interface(device_t dev, struct adapter *adapter)
3238 {
3239         struct ifnet   *ifp;
3240
3241         INIT_DEBUGOUT("em_setup_interface: begin");
3242
3243         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3244         if (ifp == NULL) {
3245                 device_printf(dev, "can not allocate ifnet structure\n");
3246                 return (-1);
3247         }
3248         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3249         ifp->if_init =  em_init;
3250         ifp->if_softc = adapter;
3251         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3252         ifp->if_ioctl = em_ioctl;
3253
3254         /* TSO parameters */
3255         ifp->if_hw_tsomax = IP_MAXPACKET;
3256         /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3257         ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3258         ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3259
3260 #ifdef EM_MULTIQUEUE
3261         /* Multiqueue stack interface */
3262         ifp->if_transmit = em_mq_start;
3263         ifp->if_qflush = em_qflush;
3264 #else
3265         ifp->if_start = em_start;
3266         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3267         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3268         IFQ_SET_READY(&ifp->if_snd);
3269 #endif  
3270
3271         ether_ifattach(ifp, adapter->hw.mac.addr);
3272
3273         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3274         ifp->if_capenable = ifp->if_capabilities;
3275
3276         /*
3277          * Tell the upper layer(s) we
3278          * support full VLAN capability
3279          */
3280         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3281         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3282                              |  IFCAP_VLAN_HWTSO
3283                              |  IFCAP_VLAN_MTU;
3284         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3285                           |  IFCAP_VLAN_MTU;
3286
3287         /*
3288          * We don't enable IFCAP_{TSO4,VLAN_HWTSO} by default because:
3289          * - Although the silicon bug of TSO only working at gigabit speed is
3290          *   worked around in em_update_link_status() by selectively setting
3291          *   CSUM_IP_TSO, we cannot atomically flush already queued TSO-using
3292          *   descriptors.  Thus, such descriptors may still cause the MAC to
3293          *   hang and, consequently, TSO is only safe to be used in setups
3294          *   where the link isn't expected to switch from gigabit to lower
3295          *   speeds.
3296          * - Similarly, there's currently no way to trigger a reconfiguration
3297          *   of vlan(4) when the state of IFCAP_VLAN_HWTSO support changes at
3298          *   runtime.  Therefore, IFCAP_VLAN_HWTSO also only is safe to use
3299          *   when link speed changes are not to be expected.
3300          * - Despite all the workarounds for TSO-related silicon bugs, at
3301          *   least 82579 still may hang at gigabit speed with IFCAP_TSO4.
3302          */
3303         ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO;
3304
3305         /*
3306         ** Don't turn this on by default, if vlans are
3307         ** created on another pseudo device (eg. lagg)
3308         ** then vlan events are not passed thru, breaking
3309         ** operation, but with HW FILTER off it works. If
3310         ** using vlans directly on the em driver you can
3311         ** enable this and get full hardware tag filtering.
3312         */
3313         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3314
3315 #ifdef DEVICE_POLLING
3316         ifp->if_capabilities |= IFCAP_POLLING;
3317 #endif
3318
3319         /* Enable only WOL MAGIC by default */
3320         if (adapter->wol) {
3321                 ifp->if_capabilities |= IFCAP_WOL;
3322                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3323         }
3324                 
3325         /*
3326          * Specify the media types supported by this adapter and register
3327          * callbacks to update media and link information
3328          */
3329         ifmedia_init(&adapter->media, IFM_IMASK,
3330             em_media_change, em_media_status);
3331         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3332             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3333                 u_char fiber_type = IFM_1000_SX;        /* default type */
3334
3335                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3336                             0, NULL);
3337                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3338         } else {
3339                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3340                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3341                             0, NULL);
3342                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3343                             0, NULL);
3344                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3345                             0, NULL);
3346                 if (adapter->hw.phy.type != e1000_phy_ife) {
3347                         ifmedia_add(&adapter->media,
3348                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3349                         ifmedia_add(&adapter->media,
3350                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3351                 }
3352         }
3353         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3354         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3355         return (0);
3356 }
3357
3358
3359 /*
3360  * Manage DMA'able memory.
3361  */
3362 static void
3363 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3364 {
3365         if (error)
3366                 return;
3367         *(bus_addr_t *) arg = segs[0].ds_addr;
3368 }
3369
3370 static int
3371 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3372         struct em_dma_alloc *dma, int mapflags)
3373 {
3374         int error;
3375
3376         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3377                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3378                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3379                                 BUS_SPACE_MAXADDR,      /* highaddr */
3380                                 NULL, NULL,             /* filter, filterarg */
3381                                 size,                   /* maxsize */
3382                                 1,                      /* nsegments */
3383                                 size,                   /* maxsegsize */
3384                                 0,                      /* flags */
3385                                 NULL,                   /* lockfunc */
3386                                 NULL,                   /* lockarg */
3387                                 &dma->dma_tag);
3388         if (error) {
3389                 device_printf(adapter->dev,
3390                     "%s: bus_dma_tag_create failed: %d\n",
3391                     __func__, error);
3392                 goto fail_0;
3393         }
3394
3395         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3396             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3397         if (error) {
3398                 device_printf(adapter->dev,
3399                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3400                     __func__, (uintmax_t)size, error);
3401                 goto fail_2;
3402         }
3403
3404         dma->dma_paddr = 0;
3405         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3406             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3407         if (error || dma->dma_paddr == 0) {
3408                 device_printf(adapter->dev,
3409                     "%s: bus_dmamap_load failed: %d\n",
3410                     __func__, error);
3411                 goto fail_3;
3412         }
3413
3414         return (0);
3415
3416 fail_3:
3417         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3418 fail_2:
3419         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3420         bus_dma_tag_destroy(dma->dma_tag);
3421 fail_0:
3422         dma->dma_tag = NULL;
3423
3424         return (error);
3425 }
3426
3427 static void
3428 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3429 {
3430         if (dma->dma_tag == NULL)
3431                 return;
3432         if (dma->dma_paddr != 0) {
3433                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3434                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3435                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3436                 dma->dma_paddr = 0;
3437         }
3438         if (dma->dma_vaddr != NULL) {
3439                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3440                 dma->dma_vaddr = NULL;
3441         }
3442         bus_dma_tag_destroy(dma->dma_tag);
3443         dma->dma_tag = NULL;
3444 }
3445
3446
3447 /*********************************************************************
3448  *
3449  *  Allocate memory for the transmit and receive rings, and then
3450  *  the descriptors associated with each, called only once at attach.
3451  *
3452  **********************************************************************/
3453 static int
3454 em_allocate_queues(struct adapter *adapter)
3455 {
3456         device_t                dev = adapter->dev;
3457         struct tx_ring          *txr = NULL;
3458         struct rx_ring          *rxr = NULL;
3459         int rsize, tsize, error = E1000_SUCCESS;
3460         int txconf = 0, rxconf = 0;
3461
3462
3463         /* Allocate the TX ring struct memory */
3464         if (!(adapter->tx_rings =
3465             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3466             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3467                 device_printf(dev, "Unable to allocate TX ring memory\n");
3468                 error = ENOMEM;
3469                 goto fail;
3470         }
3471
3472         /* Now allocate the RX */
3473         if (!(adapter->rx_rings =
3474             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3475             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3476                 device_printf(dev, "Unable to allocate RX ring memory\n");
3477                 error = ENOMEM;
3478                 goto rx_fail;
3479         }
3480
3481         tsize = roundup2(adapter->num_tx_desc *
3482             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3483         /*
3484          * Now set up the TX queues, txconf is needed to handle the
3485          * possibility that things fail midcourse and we need to
3486          * undo memory gracefully
3487          */ 
3488         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3489                 /* Set up some basics */
3490                 txr = &adapter->tx_rings[i];
3491                 txr->adapter = adapter;
3492                 txr->me = i;
3493
3494                 /* Initialize the TX lock */
3495                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3496                     device_get_nameunit(dev), txr->me);
3497                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3498
3499                 if (em_dma_malloc(adapter, tsize,
3500                         &txr->txdma, BUS_DMA_NOWAIT)) {
3501                         device_printf(dev,
3502                             "Unable to allocate TX Descriptor memory\n");
3503                         error = ENOMEM;
3504                         goto err_tx_desc;
3505                 }
3506                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3507                 bzero((void *)txr->tx_base, tsize);
3508
3509                 if (em_allocate_transmit_buffers(txr)) {
3510                         device_printf(dev,
3511                             "Critical Failure setting up transmit buffers\n");
3512                         error = ENOMEM;
3513                         goto err_tx_desc;
3514                 }
3515 #if __FreeBSD_version >= 800000
3516                 /* Allocate a buf ring */
3517                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3518                     M_WAITOK, &txr->tx_mtx);
3519 #endif
3520         }
3521
3522         /*
3523          * Next the RX queues...
3524          */ 
3525         rsize = roundup2(adapter->num_rx_desc *
3526             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3527         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3528                 rxr = &adapter->rx_rings[i];
3529                 rxr->adapter = adapter;
3530                 rxr->me = i;
3531
3532                 /* Initialize the RX lock */
3533                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3534                     device_get_nameunit(dev), txr->me);
3535                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3536
3537                 if (em_dma_malloc(adapter, rsize,
3538                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3539                         device_printf(dev,
3540                             "Unable to allocate RxDescriptor memory\n");
3541                         error = ENOMEM;
3542                         goto err_rx_desc;
3543                 }
3544                 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3545                 bzero((void *)rxr->rx_base, rsize);
3546
3547                 /* Allocate receive buffers for the ring*/
3548                 if (em_allocate_receive_buffers(rxr)) {
3549                         device_printf(dev,
3550                             "Critical Failure setting up receive buffers\n");
3551                         error = ENOMEM;
3552                         goto err_rx_desc;
3553                 }
3554         }
3555
3556         return (0);
3557
3558 err_rx_desc:
3559         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3560                 em_dma_free(adapter, &rxr->rxdma);
3561 err_tx_desc:
3562         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3563                 em_dma_free(adapter, &txr->txdma);
3564         free(adapter->rx_rings, M_DEVBUF);
3565 rx_fail:
3566 #if __FreeBSD_version >= 800000
3567         buf_ring_free(txr->br, M_DEVBUF);
3568 #endif
3569         free(adapter->tx_rings, M_DEVBUF);
3570 fail:
3571         return (error);
3572 }
3573
3574
3575 /*********************************************************************
3576  *
3577  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3578  *  the information needed to transmit a packet on the wire. This is
3579  *  called only once at attach, setup is done every reset.
3580  *
3581  **********************************************************************/
3582 static int
3583 em_allocate_transmit_buffers(struct tx_ring *txr)
3584 {
3585         struct adapter *adapter = txr->adapter;
3586         device_t dev = adapter->dev;
3587         struct em_txbuffer *txbuf;
3588         int error, i;
3589
3590         /*
3591          * Setup DMA descriptor areas.
3592          */
3593         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3594                                1, 0,                    /* alignment, bounds */
3595                                BUS_SPACE_MAXADDR,       /* lowaddr */
3596                                BUS_SPACE_MAXADDR,       /* highaddr */
3597                                NULL, NULL,              /* filter, filterarg */
3598                                EM_TSO_SIZE,             /* maxsize */
3599                                EM_MAX_SCATTER,          /* nsegments */
3600                                PAGE_SIZE,               /* maxsegsize */
3601                                0,                       /* flags */
3602                                NULL,                    /* lockfunc */
3603                                NULL,                    /* lockfuncarg */
3604                                &txr->txtag))) {
3605                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3606                 goto fail;
3607         }
3608
3609         if (!(txr->tx_buffers =
3610             (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3611             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3612                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3613                 error = ENOMEM;
3614                 goto fail;
3615         }
3616
3617         /* Create the descriptor buffer dma maps */
3618         txbuf = txr->tx_buffers;
3619         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3620                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3621                 if (error != 0) {
3622                         device_printf(dev, "Unable to create TX DMA map\n");
3623                         goto fail;
3624                 }
3625         }
3626
3627         return 0;
3628 fail:
3629         /* We free all, it handles case where we are in the middle */
3630         em_free_transmit_structures(adapter);
3631         return (error);
3632 }
3633
3634 /*********************************************************************
3635  *
3636  *  Initialize a transmit ring.
3637  *
3638  **********************************************************************/
3639 static void
3640 em_setup_transmit_ring(struct tx_ring *txr)
3641 {
3642         struct adapter *adapter = txr->adapter;
3643         struct em_txbuffer *txbuf;
3644         int i;
3645 #ifdef DEV_NETMAP
3646         struct netmap_adapter *na = NA(adapter->ifp);
3647         struct netmap_slot *slot;
3648 #endif /* DEV_NETMAP */
3649
3650         /* Clear the old descriptor contents */
3651         EM_TX_LOCK(txr);
3652 #ifdef DEV_NETMAP
3653         slot = netmap_reset(na, NR_TX, txr->me, 0);
3654 #endif /* DEV_NETMAP */
3655
3656         bzero((void *)txr->tx_base,
3657               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3658         /* Reset indices */
3659         txr->next_avail_desc = 0;
3660         txr->next_to_clean = 0;
3661
3662         /* Free any existing tx buffers. */
3663         txbuf = txr->tx_buffers;
3664         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3665                 if (txbuf->m_head != NULL) {
3666                         bus_dmamap_sync(txr->txtag, txbuf->map,
3667                             BUS_DMASYNC_POSTWRITE);
3668                         bus_dmamap_unload(txr->txtag, txbuf->map);
3669                         m_freem(txbuf->m_head);
3670                         txbuf->m_head = NULL;
3671                 }
3672 #ifdef DEV_NETMAP
3673                 if (slot) {
3674                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3675                         uint64_t paddr;
3676                         void *addr;
3677
3678                         addr = PNMB(na, slot + si, &paddr);
3679                         txr->tx_base[i].buffer_addr = htole64(paddr);
3680                         /* reload the map for netmap mode */
3681                         netmap_load_map(na, txr->txtag, txbuf->map, addr);
3682                 }
3683 #endif /* DEV_NETMAP */
3684
3685                 /* clear the watch index */
3686                 txbuf->next_eop = -1;
3687         }
3688
3689         /* Set number of descriptors available */
3690         txr->tx_avail = adapter->num_tx_desc;
3691         txr->busy = EM_TX_IDLE;
3692
3693         /* Clear checksum offload context. */
3694         txr->last_hw_offload = 0;
3695         txr->last_hw_ipcss = 0;
3696         txr->last_hw_ipcso = 0;
3697         txr->last_hw_tucss = 0;
3698         txr->last_hw_tucso = 0;
3699
3700         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3701             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3702         EM_TX_UNLOCK(txr);
3703 }
3704
3705 /*********************************************************************
3706  *
3707  *  Initialize all transmit rings.
3708  *
3709  **********************************************************************/
3710 static void
3711 em_setup_transmit_structures(struct adapter *adapter)
3712 {
3713         struct tx_ring *txr = adapter->tx_rings;
3714
3715         for (int i = 0; i < adapter->num_queues; i++, txr++)
3716                 em_setup_transmit_ring(txr);
3717
3718         return;
3719 }
3720
3721 /*********************************************************************
3722  *
3723  *  Enable transmit unit.
3724  *
3725  **********************************************************************/
3726 static void
3727 em_initialize_transmit_unit(struct adapter *adapter)
3728 {
3729         struct tx_ring  *txr = adapter->tx_rings;
3730         struct e1000_hw *hw = &adapter->hw;
3731         u32     tctl, txdctl = 0, tarc, tipg = 0;
3732
3733          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3734
3735         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3736                 u64 bus_addr = txr->txdma.dma_paddr;
3737                 /* Base and Len of TX Ring */
3738                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3739                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3740                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3741                     (u32)(bus_addr >> 32));
3742                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3743                     (u32)bus_addr);
3744                 /* Init the HEAD/TAIL indices */
3745                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3746                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3747
3748                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3749                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3750                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3751
3752                 txr->busy = EM_TX_IDLE;
3753                 txdctl = 0; /* clear txdctl */
3754                 txdctl |= 0x1f; /* PTHRESH */
3755                 txdctl |= 1 << 8; /* HTHRESH */
3756                 txdctl |= 1 << 16;/* WTHRESH */
3757                 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3758                 txdctl |= E1000_TXDCTL_GRAN;
3759                 txdctl |= 1 << 25; /* LWTHRESH */
3760
3761                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3762         }
3763
3764         /* Set the default values for the Tx Inter Packet Gap timer */
3765         switch (adapter->hw.mac.type) {
3766         case e1000_80003es2lan:
3767                 tipg = DEFAULT_82543_TIPG_IPGR1;
3768                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3769                     E1000_TIPG_IPGR2_SHIFT;
3770                 break;
3771         default:
3772                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3773                     (adapter->hw.phy.media_type ==
3774                     e1000_media_type_internal_serdes))
3775                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3776                 else
3777                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3778                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3779                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3780         }
3781
3782         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3783         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3784
3785         if(adapter->hw.mac.type >= e1000_82540)
3786                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3787                     adapter->tx_abs_int_delay.value);
3788
3789         if ((adapter->hw.mac.type == e1000_82571) ||
3790             (adapter->hw.mac.type == e1000_82572)) {
3791                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3792                 tarc |= TARC_SPEED_MODE_BIT;
3793                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3794         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3795                 /* errata: program both queues to unweighted RR */
3796                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3797                 tarc |= 1;
3798                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3799                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3800                 tarc |= 1;
3801                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3802         } else if (adapter->hw.mac.type == e1000_82574) {
3803                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3804                 tarc |= TARC_ERRATA_BIT;
3805                 if ( adapter->num_queues > 1) {
3806                         tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3807                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3808                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3809                 } else
3810                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3811         }
3812
3813         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3814         if (adapter->tx_int_delay.value > 0)
3815                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3816
3817         /* Program the Transmit Control Register */
3818         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3819         tctl &= ~E1000_TCTL_CT;
3820         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3821                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3822
3823         if (adapter->hw.mac.type >= e1000_82571)
3824                 tctl |= E1000_TCTL_MULR;
3825
3826         /* This write will effectively turn on the transmit unit. */
3827         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3828
3829         if (hw->mac.type == e1000_pch_spt) {
3830                 u32 reg;
3831                 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3832                 reg |= E1000_RCTL_RDMTS_HEX;
3833                 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3834                 reg = E1000_READ_REG(hw, E1000_TARC(0));
3835                 reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3836                 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3837         }
3838 }
3839
3840
3841 /*********************************************************************
3842  *
3843  *  Free all transmit rings.
3844  *
3845  **********************************************************************/
3846 static void
3847 em_free_transmit_structures(struct adapter *adapter)
3848 {
3849         struct tx_ring *txr = adapter->tx_rings;
3850
3851         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3852                 EM_TX_LOCK(txr);
3853                 em_free_transmit_buffers(txr);
3854                 em_dma_free(adapter, &txr->txdma);
3855                 EM_TX_UNLOCK(txr);
3856                 EM_TX_LOCK_DESTROY(txr);
3857         }
3858
3859         free(adapter->tx_rings, M_DEVBUF);
3860 }
3861
3862 /*********************************************************************
3863  *
3864  *  Free transmit ring related data structures.
3865  *
3866  **********************************************************************/
3867 static void
3868 em_free_transmit_buffers(struct tx_ring *txr)
3869 {
3870         struct adapter          *adapter = txr->adapter;
3871         struct em_txbuffer      *txbuf;
3872
3873         INIT_DEBUGOUT("free_transmit_ring: begin");
3874
3875         if (txr->tx_buffers == NULL)
3876                 return;
3877
3878         for (int i = 0; i < adapter->num_tx_desc; i++) {
3879                 txbuf = &txr->tx_buffers[i];
3880                 if (txbuf->m_head != NULL) {
3881                         bus_dmamap_sync(txr->txtag, txbuf->map,
3882                             BUS_DMASYNC_POSTWRITE);
3883                         bus_dmamap_unload(txr->txtag,
3884                             txbuf->map);
3885                         m_freem(txbuf->m_head);
3886                         txbuf->m_head = NULL;
3887                         if (txbuf->map != NULL) {
3888                                 bus_dmamap_destroy(txr->txtag,
3889                                     txbuf->map);
3890                                 txbuf->map = NULL;
3891                         }
3892                 } else if (txbuf->map != NULL) {
3893                         bus_dmamap_unload(txr->txtag,
3894                             txbuf->map);
3895                         bus_dmamap_destroy(txr->txtag,
3896                             txbuf->map);
3897                         txbuf->map = NULL;
3898                 }
3899         }
3900 #if __FreeBSD_version >= 800000
3901         if (txr->br != NULL)
3902                 buf_ring_free(txr->br, M_DEVBUF);
3903 #endif
3904         if (txr->tx_buffers != NULL) {
3905                 free(txr->tx_buffers, M_DEVBUF);
3906                 txr->tx_buffers = NULL;
3907         }
3908         if (txr->txtag != NULL) {
3909                 bus_dma_tag_destroy(txr->txtag);
3910                 txr->txtag = NULL;
3911         }
3912         return;
3913 }
3914
3915
3916 /*********************************************************************
3917  *  The offload context is protocol specific (TCP/UDP) and thus
3918  *  only needs to be set when the protocol changes. The occasion
3919  *  of a context change can be a performance detriment, and
3920  *  might be better just disabled. The reason arises in the way
3921  *  in which the controller supports pipelined requests from the
3922  *  Tx data DMA. Up to four requests can be pipelined, and they may
3923  *  belong to the same packet or to multiple packets. However all
3924  *  requests for one packet are issued before a request is issued
3925  *  for a subsequent packet and if a request for the next packet
3926  *  requires a context change, that request will be stalled
3927  *  until the previous request completes. This means setting up
3928  *  a new context effectively disables pipelined Tx data DMA which
3929  *  in turn greatly slow down performance to send small sized
3930  *  frames. 
3931  **********************************************************************/
3932 static void
3933 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3934     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3935 {
3936         struct adapter                  *adapter = txr->adapter;
3937         struct e1000_context_desc       *TXD = NULL;
3938         struct em_txbuffer              *tx_buffer;
3939         int                             cur, hdr_len;
3940         u32                             cmd = 0;
3941         u16                             offload = 0;
3942         u8                              ipcso, ipcss, tucso, tucss;
3943
3944         ipcss = ipcso = tucss = tucso = 0;
3945         hdr_len = ip_off + (ip->ip_hl << 2);
3946         cur = txr->next_avail_desc;
3947
3948         /* Setup of IP header checksum. */
3949         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3950                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3951                 offload |= CSUM_IP;
3952                 ipcss = ip_off;
3953                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3954                 /*
3955                  * Start offset for header checksum calculation.
3956                  * End offset for header checksum calculation.
3957                  * Offset of place to put the checksum.
3958                  */
3959                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3960                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3961                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3962                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3963                 cmd |= E1000_TXD_CMD_IP;
3964         }
3965
3966         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3967                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3968                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3969                 offload |= CSUM_TCP;
3970                 tucss = hdr_len;
3971                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3972                 /*
3973                  * The 82574L can only remember the *last* context used
3974                  * regardless of queue that it was use for.  We cannot reuse
3975                  * contexts on this hardware platform and must generate a new
3976                  * context every time.  82574L hardware spec, section 7.2.6,
3977                  * second note.
3978                  */
3979                 if (adapter->num_queues < 2) {
3980                         /*
3981                         * Setting up new checksum offload context for every
3982                         * frames takes a lot of processing time for hardware.
3983                         * This also reduces performance a lot for small sized
3984                         * frames so avoid it if driver can use previously
3985                         * configured checksum offload context.
3986                         */
3987                         if (txr->last_hw_offload == offload) {
3988                                 if (offload & CSUM_IP) {
3989                                         if (txr->last_hw_ipcss == ipcss &&
3990                                         txr->last_hw_ipcso == ipcso &&
3991                                         txr->last_hw_tucss == tucss &&
3992                                         txr->last_hw_tucso == tucso)
3993                                                 return;
3994                                 } else {
3995                                         if (txr->last_hw_tucss == tucss &&
3996                                         txr->last_hw_tucso == tucso)
3997                                                 return;
3998                                 }
3999                         }
4000                         txr->last_hw_offload = offload;
4001                         txr->last_hw_tucss = tucss;
4002                         txr->last_hw_tucso = tucso;
4003                 }
4004                 /*
4005                  * Start offset for payload checksum calculation.
4006                  * End offset for payload checksum calculation.
4007                  * Offset of place to put the checksum.
4008                  */
4009                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4010                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
4011                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4012                 TXD->upper_setup.tcp_fields.tucso = tucso;
4013                 cmd |= E1000_TXD_CMD_TCP;
4014         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
4015                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
4016                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
4017                 tucss = hdr_len;
4018                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
4019                 /*
4020                  * The 82574L can only remember the *last* context used
4021                  * regardless of queue that it was use for.  We cannot reuse
4022                  * contexts on this hardware platform and must generate a new
4023                  * context every time.  82574L hardware spec, section 7.2.6,
4024                  * second note.
4025                  */
4026                 if (adapter->num_queues < 2) {
4027                         /*
4028                         * Setting up new checksum offload context for every
4029                         * frames takes a lot of processing time for hardware.
4030                         * This also reduces performance a lot for small sized
4031                         * frames so avoid it if driver can use previously
4032                         * configured checksum offload context.
4033                         */
4034                         if (txr->last_hw_offload == offload) {
4035                                 if (offload & CSUM_IP) {
4036                                         if (txr->last_hw_ipcss == ipcss &&
4037                                         txr->last_hw_ipcso == ipcso &&
4038                                         txr->last_hw_tucss == tucss &&
4039                                         txr->last_hw_tucso == tucso)
4040                                                 return;
4041                                 } else {
4042                                         if (txr->last_hw_tucss == tucss &&
4043                                         txr->last_hw_tucso == tucso)
4044                                                 return;
4045                                 }
4046                         }
4047                         txr->last_hw_offload = offload;
4048                         txr->last_hw_tucss = tucss;
4049                         txr->last_hw_tucso = tucso;
4050                 }
4051                 /*
4052                  * Start offset for header checksum calculation.
4053                  * End offset for header checksum calculation.
4054                  * Offset of place to put the checksum.
4055                  */
4056                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4057                 TXD->upper_setup.tcp_fields.tucss = tucss;
4058                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4059                 TXD->upper_setup.tcp_fields.tucso = tucso;
4060         }
4061   
4062         if (offload & CSUM_IP) {
4063                 txr->last_hw_ipcss = ipcss;
4064                 txr->last_hw_ipcso = ipcso;
4065         }
4066
4067         TXD->tcp_seg_setup.data = htole32(0);
4068         TXD->cmd_and_length =
4069             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4070         tx_buffer = &txr->tx_buffers[cur];
4071         tx_buffer->m_head = NULL;
4072         tx_buffer->next_eop = -1;
4073
4074         if (++cur == adapter->num_tx_desc)
4075                 cur = 0;
4076
4077         txr->tx_avail--;
4078         txr->next_avail_desc = cur;
4079 }
4080
4081
4082 /**********************************************************************
4083  *
4084  *  Setup work for hardware segmentation offload (TSO)
4085  *
4086  **********************************************************************/
4087 static void
4088 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4089     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4090 {
4091         struct adapter                  *adapter = txr->adapter;
4092         struct e1000_context_desc       *TXD;
4093         struct em_txbuffer              *tx_buffer;
4094         int cur, hdr_len;
4095
4096         /*
4097          * In theory we can use the same TSO context if and only if
4098          * frame is the same type(IP/TCP) and the same MSS. However
4099          * checking whether a frame has the same IP/TCP structure is
4100          * hard thing so just ignore that and always restablish a
4101          * new TSO context.
4102          */
4103         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4104         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
4105                       E1000_TXD_DTYP_D |        /* Data descr type */
4106                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
4107
4108         /* IP and/or TCP header checksum calculation and insertion. */
4109         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4110
4111         cur = txr->next_avail_desc;
4112         tx_buffer = &txr->tx_buffers[cur];
4113         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4114
4115         /*
4116          * Start offset for header checksum calculation.
4117          * End offset for header checksum calculation.
4118          * Offset of place put the checksum.
4119          */
4120         TXD->lower_setup.ip_fields.ipcss = ip_off;
4121         TXD->lower_setup.ip_fields.ipcse =
4122             htole16(ip_off + (ip->ip_hl << 2) - 1);
4123         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4124         /*
4125          * Start offset for payload checksum calculation.
4126          * End offset for payload checksum calculation.
4127          * Offset of place to put the checksum.
4128          */
4129         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4130         TXD->upper_setup.tcp_fields.tucse = 0;
4131         TXD->upper_setup.tcp_fields.tucso =
4132             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4133         /*
4134          * Payload size per packet w/o any headers.
4135          * Length of all headers up to payload.
4136          */
4137         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4138         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4139
4140         TXD->cmd_and_length = htole32(adapter->txd_cmd |
4141                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
4142                                 E1000_TXD_CMD_TSE |     /* TSE context */
4143                                 E1000_TXD_CMD_IP |      /* Do IP csum */
4144                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
4145                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4146
4147         tx_buffer->m_head = NULL;
4148         tx_buffer->next_eop = -1;
4149
4150         if (++cur == adapter->num_tx_desc)
4151                 cur = 0;
4152
4153         txr->tx_avail--;
4154         txr->next_avail_desc = cur;
4155         txr->tx_tso = TRUE;
4156 }
4157
4158
4159 /**********************************************************************
4160  *
4161  *  Examine each tx_buffer in the used queue. If the hardware is done
4162  *  processing the packet then free associated resources. The
4163  *  tx_buffer is put back on the free queue.
4164  *
4165  **********************************************************************/
4166 static void
4167 em_txeof(struct tx_ring *txr)
4168 {
4169         struct adapter  *adapter = txr->adapter;
4170         int first, last, done, processed;
4171         struct em_txbuffer *tx_buffer;
4172         struct e1000_tx_desc   *tx_desc, *eop_desc;
4173         struct ifnet   *ifp = adapter->ifp;
4174
4175         EM_TX_LOCK_ASSERT(txr);
4176 #ifdef DEV_NETMAP
4177         if (netmap_tx_irq(ifp, txr->me))
4178                 return;
4179 #endif /* DEV_NETMAP */
4180
4181         /* No work, make sure hang detection is disabled */
4182         if (txr->tx_avail == adapter->num_tx_desc) {
4183                 txr->busy = EM_TX_IDLE;
4184                 return;
4185         }
4186
4187         processed = 0;
4188         first = txr->next_to_clean;
4189         tx_desc = &txr->tx_base[first];
4190         tx_buffer = &txr->tx_buffers[first];
4191         last = tx_buffer->next_eop;
4192         eop_desc = &txr->tx_base[last];
4193
4194         /*
4195          * What this does is get the index of the
4196          * first descriptor AFTER the EOP of the 
4197          * first packet, that way we can do the
4198          * simple comparison on the inner while loop.
4199          */
4200         if (++last == adapter->num_tx_desc)
4201                 last = 0;
4202         done = last;
4203
4204         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4205             BUS_DMASYNC_POSTREAD);
4206
4207         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4208                 /* We clean the range of the packet */
4209                 while (first != done) {
4210                         tx_desc->upper.data = 0;
4211                         tx_desc->lower.data = 0;
4212                         tx_desc->buffer_addr = 0;
4213                         ++txr->tx_avail;
4214                         ++processed;
4215
4216                         if (tx_buffer->m_head) {
4217                                 bus_dmamap_sync(txr->txtag,
4218                                     tx_buffer->map,
4219                                     BUS_DMASYNC_POSTWRITE);
4220                                 bus_dmamap_unload(txr->txtag,
4221                                     tx_buffer->map);
4222                                 m_freem(tx_buffer->m_head);
4223                                 tx_buffer->m_head = NULL;
4224                         }
4225                         tx_buffer->next_eop = -1;
4226
4227                         if (++first == adapter->num_tx_desc)
4228                                 first = 0;
4229
4230                         tx_buffer = &txr->tx_buffers[first];
4231                         tx_desc = &txr->tx_base[first];
4232                 }
4233                 ++ifp->if_opackets;
4234                 /* See if we can continue to the next packet */
4235                 last = tx_buffer->next_eop;
4236                 if (last != -1) {
4237                         eop_desc = &txr->tx_base[last];
4238                         /* Get new done point */
4239                         if (++last == adapter->num_tx_desc) last = 0;
4240                         done = last;
4241                 } else
4242                         break;
4243         }
4244         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4245             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4246
4247         txr->next_to_clean = first;
4248
4249         /*
4250         ** Hang detection: we know there's work outstanding
4251         ** or the entry return would have been taken, so no
4252         ** descriptor processed here indicates a potential hang.
4253         ** The local timer will examine this and do a reset if needed.
4254         */
4255         if (processed == 0) {
4256                 if (txr->busy != EM_TX_HUNG)
4257                         ++txr->busy;
4258         } else /* At least one descriptor was cleaned */
4259                 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4260
4261         /*
4262          * If we have a minimum free, clear IFF_DRV_OACTIVE
4263          * to tell the stack that it is OK to send packets.
4264          * Notice that all writes of OACTIVE happen under the
4265          * TX lock which, with a single queue, guarantees 
4266          * sanity.
4267          */
4268         if (txr->tx_avail >= EM_MAX_SCATTER) {
4269                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
4270         }
4271
4272         /* Disable hang detection if all clean */
4273         if (txr->tx_avail == adapter->num_tx_desc)
4274                 txr->busy = EM_TX_IDLE;
4275 }
4276
4277 /*********************************************************************
4278  *
4279  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4280  *
4281  **********************************************************************/
4282 static void
4283 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4284 {
4285         struct adapter          *adapter = rxr->adapter;
4286         struct mbuf             *m;
4287         bus_dma_segment_t       segs;
4288         struct em_rxbuffer      *rxbuf;
4289         int                     i, j, error, nsegs;
4290         bool                    cleaned = FALSE;
4291
4292         i = j = rxr->next_to_refresh;
4293         /*
4294         ** Get one descriptor beyond
4295         ** our work mark to control
4296         ** the loop.
4297         */
4298         if (++j == adapter->num_rx_desc)
4299                 j = 0;
4300
4301         while (j != limit) {
4302                 rxbuf = &rxr->rx_buffers[i];
4303                 if (rxbuf->m_head == NULL) {
4304                         m = m_getjcl(M_NOWAIT, MT_DATA,
4305                             M_PKTHDR, adapter->rx_mbuf_sz);
4306                         /*
4307                         ** If we have a temporary resource shortage
4308                         ** that causes a failure, just abort refresh
4309                         ** for now, we will return to this point when
4310                         ** reinvoked from em_rxeof.
4311                         */
4312                         if (m == NULL)
4313                                 goto update;
4314                 } else
4315                         m = rxbuf->m_head;
4316
4317                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4318                 m->m_flags |= M_PKTHDR;
4319                 m->m_data = m->m_ext.ext_buf;
4320
4321                 /* Use bus_dma machinery to setup the memory mapping  */
4322                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4323                     m, &segs, &nsegs, BUS_DMA_NOWAIT);
4324                 if (error != 0) {
4325                         printf("Refresh mbufs: hdr dmamap load"
4326                             " failure - %d\n", error);
4327                         m_free(m);
4328                         rxbuf->m_head = NULL;
4329                         goto update;
4330                 }
4331                 rxbuf->m_head = m;
4332                 rxbuf->paddr = segs.ds_addr;
4333                 bus_dmamap_sync(rxr->rxtag,
4334                     rxbuf->map, BUS_DMASYNC_PREREAD);
4335                 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4336                 cleaned = TRUE;
4337
4338                 i = j; /* Next is precalulated for us */
4339                 rxr->next_to_refresh = i;
4340                 /* Calculate next controlling index */
4341                 if (++j == adapter->num_rx_desc)
4342                         j = 0;
4343         }
4344 update:
4345         /*
4346         ** Update the tail pointer only if,
4347         ** and as far as we have refreshed.
4348         */
4349         if (cleaned)
4350                 E1000_WRITE_REG(&adapter->hw,
4351                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4352
4353         return;
4354 }
4355
4356
4357 /*********************************************************************
4358  *
4359  *  Allocate memory for rx_buffer structures. Since we use one
4360  *  rx_buffer per received packet, the maximum number of rx_buffer's
4361  *  that we'll need is equal to the number of receive descriptors
4362  *  that we've allocated.
4363  *
4364  **********************************************************************/
4365 static int
4366 em_allocate_receive_buffers(struct rx_ring *rxr)
4367 {
4368         struct adapter          *adapter = rxr->adapter;
4369         device_t                dev = adapter->dev;
4370         struct em_rxbuffer      *rxbuf;
4371         int                     error;
4372
4373         rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4374             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4375         if (rxr->rx_buffers == NULL) {
4376                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4377                 return (ENOMEM);
4378         }
4379
4380         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4381                                 1, 0,                   /* alignment, bounds */
4382                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4383                                 BUS_SPACE_MAXADDR,      /* highaddr */
4384                                 NULL, NULL,             /* filter, filterarg */
4385                                 MJUM9BYTES,             /* maxsize */
4386                                 1,                      /* nsegments */
4387                                 MJUM9BYTES,             /* maxsegsize */
4388                                 0,                      /* flags */
4389                                 NULL,                   /* lockfunc */
4390                                 NULL,                   /* lockarg */
4391                                 &rxr->rxtag);
4392         if (error) {
4393                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4394                     __func__, error);
4395                 goto fail;
4396         }
4397
4398         rxbuf = rxr->rx_buffers;
4399         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4400                 rxbuf = &rxr->rx_buffers[i];
4401                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4402                 if (error) {
4403                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4404                             __func__, error);
4405                         goto fail;
4406                 }
4407         }
4408
4409         return (0);
4410
4411 fail:
4412         em_free_receive_structures(adapter);
4413         return (error);
4414 }
4415
4416
4417 /*********************************************************************
4418  *
4419  *  Initialize a receive ring and its buffers.
4420  *
4421  **********************************************************************/
4422 static int
4423 em_setup_receive_ring(struct rx_ring *rxr)
4424 {
4425         struct  adapter         *adapter = rxr->adapter;
4426         struct em_rxbuffer      *rxbuf;
4427         bus_dma_segment_t       seg[1];
4428         int                     rsize, nsegs, error = 0;
4429 #ifdef DEV_NETMAP
4430         struct netmap_adapter *na = NA(adapter->ifp);
4431         struct netmap_slot *slot;
4432 #endif
4433
4434
4435         /* Clear the ring contents */
4436         EM_RX_LOCK(rxr);
4437         rsize = roundup2(adapter->num_rx_desc *
4438             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4439         bzero((void *)rxr->rx_base, rsize);
4440 #ifdef DEV_NETMAP
4441         slot = netmap_reset(na, NR_RX, 0, 0);
4442 #endif
4443
4444         /*
4445         ** Free current RX buffer structs and their mbufs
4446         */
4447         for (int i = 0; i < adapter->num_rx_desc; i++) {
4448                 rxbuf = &rxr->rx_buffers[i];
4449                 if (rxbuf->m_head != NULL) {
4450                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4451                             BUS_DMASYNC_POSTREAD);
4452                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4453                         m_freem(rxbuf->m_head);
4454                         rxbuf->m_head = NULL; /* mark as freed */
4455                 }
4456         }
4457
4458         /* Now replenish the mbufs */
4459         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4460                 rxbuf = &rxr->rx_buffers[j];
4461 #ifdef DEV_NETMAP
4462                 if (slot) {
4463                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4464                         uint64_t paddr;
4465                         void *addr;
4466
4467                         addr = PNMB(na, slot + si, &paddr);
4468                         netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4469                         rxbuf->paddr = paddr;
4470                         em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4471                         continue;
4472                 }
4473 #endif /* DEV_NETMAP */
4474                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4475                     M_PKTHDR, adapter->rx_mbuf_sz);
4476                 if (rxbuf->m_head == NULL) {
4477                         error = ENOBUFS;
4478                         goto fail;
4479                 }
4480                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4481                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4482                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4483
4484                 /* Get the memory mapping */
4485                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4486                     rxbuf->map, rxbuf->m_head, seg,
4487                     &nsegs, BUS_DMA_NOWAIT);
4488                 if (error != 0) {
4489                         m_freem(rxbuf->m_head);
4490                         rxbuf->m_head = NULL;
4491                         goto fail;
4492                 }
4493                 bus_dmamap_sync(rxr->rxtag,
4494                     rxbuf->map, BUS_DMASYNC_PREREAD);
4495
4496                 rxbuf->paddr = seg[0].ds_addr;
4497                 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4498         }
4499         rxr->next_to_check = 0;
4500         rxr->next_to_refresh = 0;
4501         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4502             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4503
4504 fail:
4505         EM_RX_UNLOCK(rxr);
4506         return (error);
4507 }
4508
4509 /*********************************************************************
4510  *
4511  *  Initialize all receive rings.
4512  *
4513  **********************************************************************/
4514 static int
4515 em_setup_receive_structures(struct adapter *adapter)
4516 {
4517         struct rx_ring *rxr = adapter->rx_rings;
4518         int q;
4519
4520         for (q = 0; q < adapter->num_queues; q++, rxr++)
4521                 if (em_setup_receive_ring(rxr))
4522                         goto fail;
4523
4524         return (0);
4525 fail:
4526         /*
4527          * Free RX buffers allocated so far, we will only handle
4528          * the rings that completed, the failing case will have
4529          * cleaned up for itself. 'q' failed, so its the terminus.
4530          */
4531         for (int i = 0; i < q; ++i) {
4532                 rxr = &adapter->rx_rings[i];
4533                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4534                         struct em_rxbuffer *rxbuf;
4535                         rxbuf = &rxr->rx_buffers[n];
4536                         if (rxbuf->m_head != NULL) {
4537                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4538                                   BUS_DMASYNC_POSTREAD);
4539                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4540                                 m_freem(rxbuf->m_head);
4541                                 rxbuf->m_head = NULL;
4542                         }
4543                 }
4544                 rxr->next_to_check = 0;
4545                 rxr->next_to_refresh = 0;
4546         }
4547
4548         return (ENOBUFS);
4549 }
4550
4551 /*********************************************************************
4552  *
4553  *  Free all receive rings.
4554  *
4555  **********************************************************************/
4556 static void
4557 em_free_receive_structures(struct adapter *adapter)
4558 {
4559         struct rx_ring *rxr = adapter->rx_rings;
4560
4561         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4562                 em_free_receive_buffers(rxr);
4563                 /* Free the ring memory as well */
4564                 em_dma_free(adapter, &rxr->rxdma);
4565                 EM_RX_LOCK_DESTROY(rxr);
4566         }
4567
4568         free(adapter->rx_rings, M_DEVBUF);
4569 }
4570
4571
4572 /*********************************************************************
4573  *
4574  *  Free receive ring data structures
4575  *
4576  **********************************************************************/
4577 static void
4578 em_free_receive_buffers(struct rx_ring *rxr)
4579 {
4580         struct adapter          *adapter = rxr->adapter;
4581         struct em_rxbuffer      *rxbuf = NULL;
4582
4583         INIT_DEBUGOUT("free_receive_buffers: begin");
4584
4585         if (rxr->rx_buffers != NULL) {
4586                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4587                         rxbuf = &rxr->rx_buffers[i];
4588                         if (rxbuf->map != NULL) {
4589                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4590                                     BUS_DMASYNC_POSTREAD);
4591                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4592                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4593                         }
4594                         if (rxbuf->m_head != NULL) {
4595                                 m_freem(rxbuf->m_head);
4596                                 rxbuf->m_head = NULL;
4597                         }
4598                 }
4599                 free(rxr->rx_buffers, M_DEVBUF);
4600                 rxr->rx_buffers = NULL;
4601                 rxr->next_to_check = 0;
4602                 rxr->next_to_refresh = 0;
4603         }
4604
4605         if (rxr->rxtag != NULL) {
4606                 bus_dma_tag_destroy(rxr->rxtag);
4607                 rxr->rxtag = NULL;
4608         }
4609
4610         return;
4611 }
4612
4613
4614 /*********************************************************************
4615  *
4616  *  Enable receive unit.
4617  *
4618  **********************************************************************/
4619
4620 static void
4621 em_initialize_receive_unit(struct adapter *adapter)
4622 {
4623         struct rx_ring *rxr = adapter->rx_rings;
4624         struct ifnet    *ifp = adapter->ifp;
4625         struct e1000_hw *hw = &adapter->hw;
4626         u32     rctl, rxcsum, rfctl;
4627
4628         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4629
4630         /*
4631          * Make sure receives are disabled while setting
4632          * up the descriptor ring
4633          */
4634         rctl = E1000_READ_REG(hw, E1000_RCTL);
4635         /* Do not disable if ever enabled on this hardware */
4636         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4637                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4638
4639         /* Setup the Receive Control Register */
4640         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4641         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4642             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4643             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4644
4645         /* Do not store bad packets */
4646         rctl &= ~E1000_RCTL_SBP;
4647
4648         /* Enable Long Packet receive */
4649         if (ifp->if_mtu > ETHERMTU)
4650                 rctl |= E1000_RCTL_LPE;
4651         else
4652                 rctl &= ~E1000_RCTL_LPE;
4653
4654         /* Strip the CRC */
4655         if (!em_disable_crc_stripping)
4656                 rctl |= E1000_RCTL_SECRC;
4657
4658         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4659             adapter->rx_abs_int_delay.value);
4660
4661         E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4662             adapter->rx_int_delay.value);
4663         /*
4664          * Set the interrupt throttling rate. Value is calculated
4665          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4666          */
4667         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4668
4669         /* Use extended rx descriptor formats */
4670         rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4671         rfctl |= E1000_RFCTL_EXTEN;
4672         /*
4673         ** When using MSIX interrupts we need to throttle
4674         ** using the EITR register (82574 only)
4675         */
4676         if (hw->mac.type == e1000_82574) {
4677                 for (int i = 0; i < 4; i++)
4678                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4679                             DEFAULT_ITR);
4680                 /* Disable accelerated acknowledge */
4681                 rfctl |= E1000_RFCTL_ACK_DIS;
4682         }
4683         E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4684
4685         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4686         if (ifp->if_capenable & IFCAP_RXCSUM) {
4687 #ifdef EM_MULTIQUEUE
4688                 rxcsum |= E1000_RXCSUM_TUOFL |
4689                           E1000_RXCSUM_IPOFL |
4690                           E1000_RXCSUM_PCSD;
4691 #else
4692                 rxcsum |= E1000_RXCSUM_TUOFL;
4693 #endif
4694         } else
4695                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4696
4697         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4698
4699 #ifdef EM_MULTIQUEUE
4700 #define RSSKEYLEN 10
4701         if (adapter->num_queues > 1) {
4702                 uint8_t  rss_key[4 * RSSKEYLEN];
4703                 uint32_t reta = 0;
4704                 int i;
4705
4706                 /*
4707                 * Configure RSS key
4708                 */
4709                 arc4rand(rss_key, sizeof(rss_key), 0);
4710                 for (i = 0; i < RSSKEYLEN; ++i) {
4711                         uint32_t rssrk = 0;
4712
4713                         rssrk = EM_RSSRK_VAL(rss_key, i);
4714                         E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4715                 }
4716
4717                 /*
4718                 * Configure RSS redirect table in following fashion:
4719                 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4720                 */
4721                 for (i = 0; i < sizeof(reta); ++i) {
4722                         uint32_t q;
4723
4724                         q = (i % adapter->num_queues) << 7;
4725                         reta |= q << (8 * i);
4726                 }
4727
4728                 for (i = 0; i < 32; ++i) {
4729                         E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4730                 }
4731
4732                 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 
4733                                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4734                                 E1000_MRQC_RSS_FIELD_IPV4 |
4735                                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4736                                 E1000_MRQC_RSS_FIELD_IPV6_EX |
4737                                 E1000_MRQC_RSS_FIELD_IPV6);
4738         }
4739 #endif
4740         /*
4741         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4742         ** long latencies are observed, like Lenovo X60. This
4743         ** change eliminates the problem, but since having positive
4744         ** values in RDTR is a known source of problems on other
4745         ** platforms another solution is being sought.
4746         */
4747         if (hw->mac.type == e1000_82573)
4748                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4749
4750         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4751                 /* Setup the Base and Length of the Rx Descriptor Ring */
4752                 u64 bus_addr = rxr->rxdma.dma_paddr;
4753                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4754
4755                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4756                     adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4757                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4758                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4759                 /* Setup the Head and Tail Descriptor Pointers */
4760                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4761 #ifdef DEV_NETMAP
4762                 /*
4763                  * an init() while a netmap client is active must
4764                  * preserve the rx buffers passed to userspace.
4765                  */
4766                 if (ifp->if_capenable & IFCAP_NETMAP)
4767                         rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4768 #endif /* DEV_NETMAP */
4769                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4770         }
4771
4772         /*
4773          * Set PTHRESH for improved jumbo performance
4774          * According to 10.2.5.11 of Intel 82574 Datasheet,
4775          * RXDCTL(1) is written whenever RXDCTL(0) is written.
4776          * Only write to RXDCTL(1) if there is a need for different
4777          * settings.
4778          */
4779         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4780             (adapter->hw.mac.type == e1000_pch2lan) ||
4781             (adapter->hw.mac.type == e1000_ich10lan)) &&
4782             (ifp->if_mtu > ETHERMTU)) {
4783                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4784                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4785         } else if (adapter->hw.mac.type == e1000_82574) {
4786                 for (int i = 0; i < adapter->num_queues; i++) {
4787                         u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4788
4789                         rxdctl |= 0x20; /* PTHRESH */
4790                         rxdctl |= 4 << 8; /* HTHRESH */
4791                         rxdctl |= 4 << 16;/* WTHRESH */
4792                         rxdctl |= 1 << 24; /* Switch to granularity */
4793                         E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4794                 }
4795         }
4796                 
4797         if (adapter->hw.mac.type >= e1000_pch2lan) {
4798                 if (ifp->if_mtu > ETHERMTU)
4799                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4800                 else
4801                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4802         }
4803
4804         /* Make sure VLAN Filters are off */
4805         rctl &= ~E1000_RCTL_VFE;
4806
4807         if (adapter->rx_mbuf_sz == MCLBYTES)
4808                 rctl |= E1000_RCTL_SZ_2048;
4809         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4810                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4811         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4812                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4813
4814         /* ensure we clear use DTYPE of 00 here */
4815         rctl &= ~0x00000C00;
4816         /* Write out the settings */
4817         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4818
4819         return;
4820 }
4821
4822
4823 /*********************************************************************
4824  *
4825  *  This routine executes in interrupt context. It replenishes
4826  *  the mbufs in the descriptor and sends data which has been
4827  *  dma'ed into host memory to upper layer.
4828  *
4829  *  We loop at most count times if count is > 0, or until done if
4830  *  count < 0.
4831  *  
4832  *  For polling we also now return the number of cleaned packets
4833  *********************************************************************/
4834 static bool
4835 em_rxeof(struct rx_ring *rxr, int count, int *done)
4836 {
4837         struct adapter          *adapter = rxr->adapter;
4838         struct ifnet            *ifp = adapter->ifp;
4839         struct mbuf             *mp, *sendmp;
4840         u32                     status = 0;
4841         u16                     len;
4842         int                     i, processed, rxdone = 0;
4843         bool                    eop;
4844         union e1000_rx_desc_extended    *cur;
4845
4846         EM_RX_LOCK(rxr);
4847
4848         /* Sync the ring */
4849         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4850             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4851
4852
4853 #ifdef DEV_NETMAP
4854         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4855                 EM_RX_UNLOCK(rxr);
4856                 return (FALSE);
4857         }
4858 #endif /* DEV_NETMAP */
4859
4860         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4861                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4862                         break;
4863
4864                 cur = &rxr->rx_base[i];
4865                 status = le32toh(cur->wb.upper.status_error);
4866                 mp = sendmp = NULL;
4867
4868                 if ((status & E1000_RXD_STAT_DD) == 0)
4869                         break;
4870
4871                 len = le16toh(cur->wb.upper.length);
4872                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4873
4874                 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4875                     (rxr->discard == TRUE)) {
4876                         adapter->dropped_pkts++;
4877                         ++rxr->rx_discarded;
4878                         if (!eop) /* Catch subsequent segs */
4879                                 rxr->discard = TRUE;
4880                         else
4881                                 rxr->discard = FALSE;
4882                         em_rx_discard(rxr, i);
4883                         goto next_desc;
4884                 }
4885                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4886
4887                 /* Assign correct length to the current fragment */
4888                 mp = rxr->rx_buffers[i].m_head;
4889                 mp->m_len = len;
4890
4891                 /* Trigger for refresh */
4892                 rxr->rx_buffers[i].m_head = NULL;
4893
4894                 /* First segment? */
4895                 if (rxr->fmp == NULL) {
4896                         mp->m_pkthdr.len = len;
4897                         rxr->fmp = rxr->lmp = mp;
4898                 } else {
4899                         /* Chain mbuf's together */
4900                         mp->m_flags &= ~M_PKTHDR;
4901                         rxr->lmp->m_next = mp;
4902                         rxr->lmp = mp;
4903                         rxr->fmp->m_pkthdr.len += len;
4904                 }
4905
4906                 if (eop) {
4907                         --count;
4908                         sendmp = rxr->fmp;
4909                         sendmp->m_pkthdr.rcvif = ifp;
4910                         ifp->if_ipackets++;
4911                         em_receive_checksum(status, sendmp);
4912 #ifndef __NO_STRICT_ALIGNMENT
4913                         if (adapter->hw.mac.max_frame_size >
4914                             (MCLBYTES - ETHER_ALIGN) &&
4915                             em_fixup_rx(rxr) != 0)
4916                                 goto skip;
4917 #endif
4918                         if (status & E1000_RXD_STAT_VP) {
4919                                 sendmp->m_pkthdr.ether_vtag =
4920                                     le16toh(cur->wb.upper.vlan);
4921                                 sendmp->m_flags |= M_VLANTAG;
4922                         }
4923 #ifndef __NO_STRICT_ALIGNMENT
4924 skip:
4925 #endif
4926                         rxr->fmp = rxr->lmp = NULL;
4927                 }
4928 next_desc:
4929                 /* Sync the ring */
4930                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4931                         BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4932
4933                 /* Zero out the receive descriptors status. */
4934                 cur->wb.upper.status_error &= htole32(~0xFF);
4935                 ++rxdone;       /* cumulative for POLL */
4936                 ++processed;
4937
4938                 /* Advance our pointers to the next descriptor. */
4939                 if (++i == adapter->num_rx_desc)
4940                         i = 0;
4941
4942                 /* Send to the stack */
4943                 if (sendmp != NULL) {
4944                         rxr->next_to_check = i;
4945                         EM_RX_UNLOCK(rxr);
4946                         (*ifp->if_input)(ifp, sendmp);
4947                         EM_RX_LOCK(rxr);
4948                         i = rxr->next_to_check;
4949                 }
4950
4951                 /* Only refresh mbufs every 8 descriptors */
4952                 if (processed == 8) {
4953                         em_refresh_mbufs(rxr, i);
4954                         processed = 0;
4955                 }
4956         }
4957
4958         /* Catch any remaining refresh work */
4959         if (e1000_rx_unrefreshed(rxr))
4960                 em_refresh_mbufs(rxr, i);
4961
4962         rxr->next_to_check = i;
4963         if (done != NULL)
4964                 *done = rxdone;
4965         EM_RX_UNLOCK(rxr);
4966
4967         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4968 }
4969
4970 static __inline void
4971 em_rx_discard(struct rx_ring *rxr, int i)
4972 {
4973         struct em_rxbuffer      *rbuf;
4974
4975         rbuf = &rxr->rx_buffers[i];
4976         bus_dmamap_unload(rxr->rxtag, rbuf->map);
4977
4978         /* Free any previous pieces */
4979         if (rxr->fmp != NULL) {
4980                 rxr->fmp->m_flags |= M_PKTHDR;
4981                 m_freem(rxr->fmp);
4982                 rxr->fmp = NULL;
4983                 rxr->lmp = NULL;
4984         }
4985         /*
4986         ** Free buffer and allow em_refresh_mbufs()
4987         ** to clean up and recharge buffer.
4988         */
4989         if (rbuf->m_head) {
4990                 m_free(rbuf->m_head);
4991                 rbuf->m_head = NULL;
4992         }
4993         return;
4994 }
4995
4996 #ifndef __NO_STRICT_ALIGNMENT
4997 /*
4998  * When jumbo frames are enabled we should realign entire payload on
4999  * architecures with strict alignment. This is serious design mistake of 8254x
5000  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
5001  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
5002  * payload. On architecures without strict alignment restrictions 8254x still
5003  * performs unaligned memory access which would reduce the performance too.
5004  * To avoid copying over an entire frame to align, we allocate a new mbuf and
5005  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
5006  * existing mbuf chain.
5007  *
5008  * Be aware, best performance of the 8254x is achived only when jumbo frame is
5009  * not used at all on architectures with strict alignment.
5010  */
5011 static int
5012 em_fixup_rx(struct rx_ring *rxr)
5013 {
5014         struct adapter *adapter = rxr->adapter;
5015         struct mbuf *m, *n;
5016         int error;
5017
5018         error = 0;
5019         m = rxr->fmp;
5020         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
5021                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
5022                 m->m_data += ETHER_HDR_LEN;
5023         } else {
5024                 MGETHDR(n, M_NOWAIT, MT_DATA);
5025                 if (n != NULL) {
5026                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
5027                         m->m_data += ETHER_HDR_LEN;
5028                         m->m_len -= ETHER_HDR_LEN;
5029                         n->m_len = ETHER_HDR_LEN;
5030                         M_MOVE_PKTHDR(n, m);
5031                         n->m_next = m;
5032                         rxr->fmp = n;
5033                 } else {
5034                         adapter->dropped_pkts++;
5035                         m_freem(rxr->fmp);
5036                         rxr->fmp = NULL;
5037                         error = ENOMEM;
5038                 }
5039         }
5040
5041         return (error);
5042 }
5043 #endif
5044
5045 static void
5046 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5047 {
5048         rxd->read.buffer_addr = htole64(rxbuf->paddr);
5049         /* DD bits must be cleared */
5050         rxd->wb.upper.status_error= 0;
5051 }
5052
5053 /*********************************************************************
5054  *
5055  *  Verify that the hardware indicated that the checksum is valid.
5056  *  Inform the stack about the status of checksum so that stack
5057  *  doesn't spend time verifying the checksum.
5058  *
5059  *********************************************************************/
5060 static void
5061 em_receive_checksum(uint32_t status, struct mbuf *mp)
5062 {
5063         mp->m_pkthdr.csum_flags = 0;
5064
5065         /* Ignore Checksum bit is set */
5066         if (status & E1000_RXD_STAT_IXSM)
5067                 return;
5068
5069         /* If the IP checksum exists and there is no IP Checksum error */
5070         if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5071                 E1000_RXD_STAT_IPCS) {
5072                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5073         }
5074
5075         /* TCP or UDP checksum */
5076         if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5077             E1000_RXD_STAT_TCPCS) {
5078                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5079                 mp->m_pkthdr.csum_data = htons(0xffff);
5080         }
5081         if (status & E1000_RXD_STAT_UDPCS) {
5082                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5083                 mp->m_pkthdr.csum_data = htons(0xffff);
5084         }
5085 }
5086
5087 /*
5088  * This routine is run via an vlan
5089  * config EVENT
5090  */
5091 static void
5092 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5093 {
5094         struct adapter  *adapter = ifp->if_softc;
5095         u32             index, bit;
5096
5097         if (ifp->if_softc !=  arg)   /* Not our event */
5098                 return;
5099
5100         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5101                 return;
5102
5103         EM_CORE_LOCK(adapter);
5104         index = (vtag >> 5) & 0x7F;
5105         bit = vtag & 0x1F;
5106         adapter->shadow_vfta[index] |= (1 << bit);
5107         ++adapter->num_vlans;
5108         /* Re-init to load the changes */
5109         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5110                 em_init_locked(adapter);
5111         EM_CORE_UNLOCK(adapter);
5112 }
5113
5114 /*
5115  * This routine is run via an vlan
5116  * unconfig EVENT
5117  */
5118 static void
5119 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5120 {
5121         struct adapter  *adapter = ifp->if_softc;
5122         u32             index, bit;
5123
5124         if (ifp->if_softc !=  arg)
5125                 return;
5126
5127         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5128                 return;
5129
5130         EM_CORE_LOCK(adapter);
5131         index = (vtag >> 5) & 0x7F;
5132         bit = vtag & 0x1F;
5133         adapter->shadow_vfta[index] &= ~(1 << bit);
5134         --adapter->num_vlans;
5135         /* Re-init to load the changes */
5136         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5137                 em_init_locked(adapter);
5138         EM_CORE_UNLOCK(adapter);
5139 }
5140
5141 static void
5142 em_setup_vlan_hw_support(struct adapter *adapter)
5143 {
5144         struct e1000_hw *hw = &adapter->hw;
5145         u32             reg;
5146
5147         /*
5148         ** We get here thru init_locked, meaning
5149         ** a soft reset, this has already cleared
5150         ** the VFTA and other state, so if there
5151         ** have been no vlan's registered do nothing.
5152         */
5153         if (adapter->num_vlans == 0)
5154                 return;
5155
5156         /*
5157         ** A soft reset zero's out the VFTA, so
5158         ** we need to repopulate it now.
5159         */
5160         for (int i = 0; i < EM_VFTA_SIZE; i++)
5161                 if (adapter->shadow_vfta[i] != 0)
5162                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5163                             i, adapter->shadow_vfta[i]);
5164
5165         reg = E1000_READ_REG(hw, E1000_CTRL);
5166         reg |= E1000_CTRL_VME;
5167         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5168
5169         /* Enable the Filter Table */
5170         reg = E1000_READ_REG(hw, E1000_RCTL);
5171         reg &= ~E1000_RCTL_CFIEN;
5172         reg |= E1000_RCTL_VFE;
5173         E1000_WRITE_REG(hw, E1000_RCTL, reg);
5174 }
5175
5176 static void
5177 em_enable_intr(struct adapter *adapter)
5178 {
5179         struct e1000_hw *hw = &adapter->hw;
5180         u32 ims_mask = IMS_ENABLE_MASK;
5181
5182         if (hw->mac.type == e1000_82574) {
5183                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5184                 ims_mask |= EM_MSIX_MASK;
5185         } 
5186         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5187 }
5188
5189 static void
5190 em_disable_intr(struct adapter *adapter)
5191 {
5192         struct e1000_hw *hw = &adapter->hw;
5193
5194         if (hw->mac.type == e1000_82574)
5195                 E1000_WRITE_REG(hw, EM_EIAC, 0);
5196         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5197 }
5198
5199 /*
5200  * Bit of a misnomer, what this really means is
5201  * to enable OS management of the system... aka
5202  * to disable special hardware management features 
5203  */
5204 static void
5205 em_init_manageability(struct adapter *adapter)
5206 {
5207         /* A shared code workaround */
5208 #define E1000_82542_MANC2H E1000_MANC2H
5209         if (adapter->has_manage) {
5210                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5211                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5212
5213                 /* disable hardware interception of ARP */
5214                 manc &= ~(E1000_MANC_ARP_EN);
5215
5216                 /* enable receiving management packets to the host */
5217                 manc |= E1000_MANC_EN_MNG2HOST;
5218 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5219 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5220                 manc2h |= E1000_MNG2HOST_PORT_623;
5221                 manc2h |= E1000_MNG2HOST_PORT_664;
5222                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5223                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5224         }
5225 }
5226
5227 /*
5228  * Give control back to hardware management
5229  * controller if there is one.
5230  */
5231 static void
5232 em_release_manageability(struct adapter *adapter)
5233 {
5234         if (adapter->has_manage) {
5235                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5236
5237                 /* re-enable hardware interception of ARP */
5238                 manc |= E1000_MANC_ARP_EN;
5239                 manc &= ~E1000_MANC_EN_MNG2HOST;
5240
5241                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5242         }
5243 }
5244
5245 /*
5246  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5247  * For ASF and Pass Through versions of f/w this means
5248  * that the driver is loaded. For AMT version type f/w
5249  * this means that the network i/f is open.
5250  */
5251 static void
5252 em_get_hw_control(struct adapter *adapter)
5253 {
5254         u32 ctrl_ext, swsm;
5255
5256         if (adapter->hw.mac.type == e1000_82573) {
5257                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5258                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5259                     swsm | E1000_SWSM_DRV_LOAD);
5260                 return;
5261         }
5262         /* else */
5263         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5264         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5265             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5266         return;
5267 }
5268
5269 /*
5270  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5271  * For ASF and Pass Through versions of f/w this means that
5272  * the driver is no longer loaded. For AMT versions of the
5273  * f/w this means that the network i/f is closed.
5274  */
5275 static void
5276 em_release_hw_control(struct adapter *adapter)
5277 {
5278         u32 ctrl_ext, swsm;
5279
5280         if (!adapter->has_manage)
5281                 return;
5282
5283         if (adapter->hw.mac.type == e1000_82573) {
5284                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5285                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5286                     swsm & ~E1000_SWSM_DRV_LOAD);
5287                 return;
5288         }
5289         /* else */
5290         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5291         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5292             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5293         return;
5294 }
5295
5296 static int
5297 em_is_valid_ether_addr(u8 *addr)
5298 {
5299         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5300
5301         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5302                 return (FALSE);
5303         }
5304
5305         return (TRUE);
5306 }
5307
5308 /*
5309 ** Parse the interface capabilities with regard
5310 ** to both system management and wake-on-lan for
5311 ** later use.
5312 */
5313 static void
5314 em_get_wakeup(device_t dev)
5315 {
5316         struct adapter  *adapter = device_get_softc(dev);
5317         u16             eeprom_data = 0, device_id, apme_mask;
5318
5319         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5320         apme_mask = EM_EEPROM_APME;
5321
5322         switch (adapter->hw.mac.type) {
5323         case e1000_82573:
5324         case e1000_82583:
5325                 adapter->has_amt = TRUE;
5326                 /* Falls thru */
5327         case e1000_82571:
5328         case e1000_82572:
5329         case e1000_80003es2lan:
5330                 if (adapter->hw.bus.func == 1) {
5331                         e1000_read_nvm(&adapter->hw,
5332                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5333                         break;
5334                 } else
5335                         e1000_read_nvm(&adapter->hw,
5336                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5337                 break;
5338         case e1000_ich8lan:
5339         case e1000_ich9lan:
5340         case e1000_ich10lan:
5341         case e1000_pchlan:
5342         case e1000_pch2lan:
5343         case e1000_pch_lpt:
5344         case e1000_pch_spt:
5345                 apme_mask = E1000_WUC_APME;
5346                 adapter->has_amt = TRUE;
5347                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5348                 break;
5349         default:
5350                 e1000_read_nvm(&adapter->hw,
5351                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5352                 break;
5353         }
5354         if (eeprom_data & apme_mask)
5355                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5356         /*
5357          * We have the eeprom settings, now apply the special cases
5358          * where the eeprom may be wrong or the board won't support
5359          * wake on lan on a particular port
5360          */
5361         device_id = pci_get_device(dev);
5362         switch (device_id) {
5363         case E1000_DEV_ID_82571EB_FIBER:
5364                 /* Wake events only supported on port A for dual fiber
5365                  * regardless of eeprom setting */
5366                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5367                     E1000_STATUS_FUNC_1)
5368                         adapter->wol = 0;
5369                 break;
5370         case E1000_DEV_ID_82571EB_QUAD_COPPER:
5371         case E1000_DEV_ID_82571EB_QUAD_FIBER:
5372         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5373                 /* if quad port adapter, disable WoL on all but port A */
5374                 if (global_quad_port_a != 0)
5375                         adapter->wol = 0;
5376                 /* Reset for multiple quad port adapters */
5377                 if (++global_quad_port_a == 4)
5378                         global_quad_port_a = 0;
5379                 break;
5380         }
5381         return;
5382 }
5383
5384
5385 /*
5386  * Enable PCI Wake On Lan capability
5387  */
5388 static void
5389 em_enable_wakeup(device_t dev)
5390 {
5391         struct adapter  *adapter = device_get_softc(dev);
5392         struct ifnet    *ifp = adapter->ifp;
5393         int             error = 0;
5394         u32             pmc, ctrl, ctrl_ext, rctl;
5395         u16             status;
5396
5397         if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5398                 return;
5399
5400         /*
5401         ** Determine type of Wakeup: note that wol
5402         ** is set with all bits on by default.
5403         */
5404         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5405                 adapter->wol &= ~E1000_WUFC_MAG;
5406
5407         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5408                 adapter->wol &= ~E1000_WUFC_MC;
5409         else {
5410                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5411                 rctl |= E1000_RCTL_MPE;
5412                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5413         }
5414
5415         if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5416                 goto pme;
5417
5418         /* Advertise the wakeup capability */
5419         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5420         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5421         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5422
5423         /* Keep the laser running on Fiber adapters */
5424         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5425             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5426                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5427                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5428                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5429         }
5430
5431         if ((adapter->hw.mac.type == e1000_ich8lan) ||
5432             (adapter->hw.mac.type == e1000_pchlan) ||
5433             (adapter->hw.mac.type == e1000_ich9lan) ||
5434             (adapter->hw.mac.type == e1000_ich10lan))
5435                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5436
5437         if ((adapter->hw.mac.type == e1000_pchlan)  ||
5438             (adapter->hw.mac.type == e1000_pch2lan) ||
5439             (adapter->hw.mac.type == e1000_pch_lpt) ||
5440             (adapter->hw.mac.type == e1000_pch_spt)) {
5441                 error = em_enable_phy_wakeup(adapter);
5442                 if (error)
5443                         goto pme;
5444         } else {
5445                 /* Enable wakeup by the MAC */
5446                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5447                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5448         }
5449
5450         if (adapter->hw.phy.type == e1000_phy_igp_3)
5451                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5452
5453 pme:
5454         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5455         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5456         if (!error && (ifp->if_capenable & IFCAP_WOL))
5457                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5458         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5459
5460         return;
5461 }
5462
5463 /*
5464 ** WOL in the newer chipset interfaces (pchlan)
5465 ** require thing to be copied into the phy
5466 */
5467 static int
5468 em_enable_phy_wakeup(struct adapter *adapter)
5469 {
5470         struct e1000_hw *hw = &adapter->hw;
5471         u32 mreg, ret = 0;
5472         u16 preg;
5473
5474         /* copy MAC RARs to PHY RARs */
5475         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5476
5477         /* copy MAC MTA to PHY MTA */
5478         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5479                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5480                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5481                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5482                     (u16)((mreg >> 16) & 0xFFFF));
5483         }
5484
5485         /* configure PHY Rx Control register */
5486         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5487         mreg = E1000_READ_REG(hw, E1000_RCTL);
5488         if (mreg & E1000_RCTL_UPE)
5489                 preg |= BM_RCTL_UPE;
5490         if (mreg & E1000_RCTL_MPE)
5491                 preg |= BM_RCTL_MPE;
5492         preg &= ~(BM_RCTL_MO_MASK);
5493         if (mreg & E1000_RCTL_MO_3)
5494                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5495                                 << BM_RCTL_MO_SHIFT);
5496         if (mreg & E1000_RCTL_BAM)
5497                 preg |= BM_RCTL_BAM;
5498         if (mreg & E1000_RCTL_PMCF)
5499                 preg |= BM_RCTL_PMCF;
5500         mreg = E1000_READ_REG(hw, E1000_CTRL);
5501         if (mreg & E1000_CTRL_RFCE)
5502                 preg |= BM_RCTL_RFCE;
5503         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5504
5505         /* enable PHY wakeup in MAC register */
5506         E1000_WRITE_REG(hw, E1000_WUC,
5507             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5508         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5509
5510         /* configure and enable PHY wakeup in PHY registers */
5511         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5512         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5513
5514         /* activate PHY wakeup */
5515         ret = hw->phy.ops.acquire(hw);
5516         if (ret) {
5517                 printf("Could not acquire PHY\n");
5518                 return ret;
5519         }
5520         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5521                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5522         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5523         if (ret) {
5524                 printf("Could not read PHY page 769\n");
5525                 goto out;
5526         }
5527         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5528         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5529         if (ret)
5530                 printf("Could not set PHY Host Wakeup bit\n");
5531 out:
5532         hw->phy.ops.release(hw);
5533
5534         return ret;
5535 }
5536
5537 static void
5538 em_led_func(void *arg, int onoff)
5539 {
5540         struct adapter  *adapter = arg;
5541  
5542         EM_CORE_LOCK(adapter);
5543         if (onoff) {
5544                 e1000_setup_led(&adapter->hw);
5545                 e1000_led_on(&adapter->hw);
5546         } else {
5547                 e1000_led_off(&adapter->hw);
5548                 e1000_cleanup_led(&adapter->hw);
5549         }
5550         EM_CORE_UNLOCK(adapter);
5551 }
5552
5553 /*
5554 ** Disable the L0S and L1 LINK states
5555 */
5556 static void
5557 em_disable_aspm(struct adapter *adapter)
5558 {
5559         int             base, reg;
5560         u16             link_cap,link_ctrl;
5561         device_t        dev = adapter->dev;
5562
5563         switch (adapter->hw.mac.type) {
5564                 case e1000_82573:
5565                 case e1000_82574:
5566                 case e1000_82583:
5567                         break;
5568                 default:
5569                         return;
5570         }
5571         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5572                 return;
5573         reg = base + PCIER_LINK_CAP;
5574         link_cap = pci_read_config(dev, reg, 2);
5575         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5576                 return;
5577         reg = base + PCIER_LINK_CTL;
5578         link_ctrl = pci_read_config(dev, reg, 2);
5579         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5580         pci_write_config(dev, reg, link_ctrl, 2);
5581         return;
5582 }
5583
5584 /**********************************************************************
5585  *
5586  *  Update the board statistics counters.
5587  *
5588  **********************************************************************/
5589 static void
5590 em_update_stats_counters(struct adapter *adapter)
5591 {
5592         struct ifnet   *ifp;
5593
5594         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5595            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5596                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5597                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5598         }
5599         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5600         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5601         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5602         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5603
5604         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5605         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5606         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5607         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5608         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5609         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5610         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5611         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5612         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5613         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5614         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5615         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5616         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5617         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5618         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5619         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5620         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5621         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5622         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5623         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5624
5625         /* For the 64-bit byte counters the low dword must be read first. */
5626         /* Both registers clear on the read of the high dword */
5627
5628         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5629             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5630         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5631             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5632
5633         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5634         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5635         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5636         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5637         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5638
5639         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5640         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5641
5642         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5643         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5644         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5645         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5646         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5647         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5648         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5649         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5650         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5651         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5652
5653         /* Interrupt Counts */
5654
5655         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5656         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5657         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5658         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5659         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5660         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5661         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5662         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5663         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5664
5665         if (adapter->hw.mac.type >= e1000_82543) {
5666                 adapter->stats.algnerrc += 
5667                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5668                 adapter->stats.rxerrc += 
5669                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5670                 adapter->stats.tncrs += 
5671                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5672                 adapter->stats.cexterr += 
5673                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5674                 adapter->stats.tsctc += 
5675                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5676                 adapter->stats.tsctfc += 
5677                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5678         }
5679         ifp = adapter->ifp;
5680
5681         ifp->if_collisions = adapter->stats.colc;
5682
5683         /* Rx Errors */
5684         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5685             adapter->stats.crcerrs + adapter->stats.algnerrc +
5686             adapter->stats.ruc + adapter->stats.roc +
5687             adapter->stats.mpc + adapter->stats.cexterr;
5688
5689         /* Tx Errors */
5690         ifp->if_oerrors = adapter->stats.ecol +
5691             adapter->stats.latecol + adapter->watchdog_events;
5692 }
5693
5694 /* Export a single 32-bit register via a read-only sysctl. */
5695 static int
5696 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5697 {
5698         struct adapter *adapter;
5699         u_int val;
5700
5701         adapter = oidp->oid_arg1;
5702         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5703         return (sysctl_handle_int(oidp, &val, 0, req));
5704 }
5705
5706 /*
5707  * Add sysctl variables, one per statistic, to the system.
5708  */
5709 static void
5710 em_add_hw_stats(struct adapter *adapter)
5711 {
5712         device_t dev = adapter->dev;
5713
5714         struct tx_ring *txr = adapter->tx_rings;
5715         struct rx_ring *rxr = adapter->rx_rings;
5716
5717         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5718         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5719         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5720         struct e1000_hw_stats *stats = &adapter->stats;
5721
5722         struct sysctl_oid *stat_node, *queue_node, *int_node;
5723         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5724
5725 #define QUEUE_NAME_LEN 32
5726         char namebuf[QUEUE_NAME_LEN];
5727         
5728         /* Driver Statistics */
5729         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5730                         CTLFLAG_RD, &adapter->dropped_pkts,
5731                         "Driver dropped packets");
5732         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5733                         CTLFLAG_RD, &adapter->link_irq,
5734                         "Link MSIX IRQ Handled");
5735         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 
5736                          CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5737                          "Defragmenting mbuf chain failed");
5738         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5739                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5740                         "Driver tx dma failure in xmit");
5741         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5742                         CTLFLAG_RD, &adapter->rx_overruns,
5743                         "RX overruns");
5744         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5745                         CTLFLAG_RD, &adapter->watchdog_events,
5746                         "Watchdog timeouts");
5747         
5748         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5749                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5750                         em_sysctl_reg_handler, "IU",
5751                         "Device Control Register");
5752         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5753                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5754                         em_sysctl_reg_handler, "IU",
5755                         "Receiver Control Register");
5756         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5757                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5758                         "Flow Control High Watermark");
5759         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5760                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5761                         "Flow Control Low Watermark");
5762
5763         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5764                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5765                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5766                                             CTLFLAG_RD, NULL, "TX Queue Name");
5767                 queue_list = SYSCTL_CHILDREN(queue_node);
5768
5769                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5770                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5771                                 E1000_TDH(txr->me),
5772                                 em_sysctl_reg_handler, "IU",
5773                                 "Transmit Descriptor Head");
5774                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5775                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5776                                 E1000_TDT(txr->me),
5777                                 em_sysctl_reg_handler, "IU",
5778                                 "Transmit Descriptor Tail");
5779                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5780                                 CTLFLAG_RD, &txr->tx_irq,
5781                                 "Queue MSI-X Transmit Interrupts");
5782                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5783                                 CTLFLAG_RD, &txr->no_desc_avail,
5784                                 "Queue No Descriptor Available");
5785
5786                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5787                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5788                                             CTLFLAG_RD, NULL, "RX Queue Name");
5789                 queue_list = SYSCTL_CHILDREN(queue_node);
5790
5791                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5792                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5793                                 E1000_RDH(rxr->me),
5794                                 em_sysctl_reg_handler, "IU",
5795                                 "Receive Descriptor Head");
5796                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5797                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5798                                 E1000_RDT(rxr->me),
5799                                 em_sysctl_reg_handler, "IU",
5800                                 "Receive Descriptor Tail");
5801                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5802                                 CTLFLAG_RD, &rxr->rx_irq,
5803                                 "Queue MSI-X Receive Interrupts");
5804         }
5805
5806         /* MAC stats get their own sub node */
5807
5808         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5809                                     CTLFLAG_RD, NULL, "Statistics");
5810         stat_list = SYSCTL_CHILDREN(stat_node);
5811
5812         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5813                         CTLFLAG_RD, &stats->ecol,
5814                         "Excessive collisions");
5815         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5816                         CTLFLAG_RD, &stats->scc,
5817                         "Single collisions");
5818         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5819                         CTLFLAG_RD, &stats->mcc,
5820                         "Multiple collisions");
5821         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5822                         CTLFLAG_RD, &stats->latecol,
5823                         "Late collisions");
5824         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5825                         CTLFLAG_RD, &stats->colc,
5826                         "Collision Count");
5827         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5828                         CTLFLAG_RD, &adapter->stats.symerrs,
5829                         "Symbol Errors");
5830         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5831                         CTLFLAG_RD, &adapter->stats.sec,
5832                         "Sequence Errors");
5833         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5834                         CTLFLAG_RD, &adapter->stats.dc,
5835                         "Defer Count");
5836         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5837                         CTLFLAG_RD, &adapter->stats.mpc,
5838                         "Missed Packets");
5839         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5840                         CTLFLAG_RD, &adapter->stats.rnbc,
5841                         "Receive No Buffers");
5842         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5843                         CTLFLAG_RD, &adapter->stats.ruc,
5844                         "Receive Undersize");
5845         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5846                         CTLFLAG_RD, &adapter->stats.rfc,
5847                         "Fragmented Packets Received ");
5848         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5849                         CTLFLAG_RD, &adapter->stats.roc,
5850                         "Oversized Packets Received");
5851         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5852                         CTLFLAG_RD, &adapter->stats.rjc,
5853                         "Recevied Jabber");
5854         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5855                         CTLFLAG_RD, &adapter->stats.rxerrc,
5856                         "Receive Errors");
5857         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5858                         CTLFLAG_RD, &adapter->stats.crcerrs,
5859                         "CRC errors");
5860         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5861                         CTLFLAG_RD, &adapter->stats.algnerrc,
5862                         "Alignment Errors");
5863         /* On 82575 these are collision counts */
5864         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5865                         CTLFLAG_RD, &adapter->stats.cexterr,
5866                         "Collision/Carrier extension errors");
5867         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5868                         CTLFLAG_RD, &adapter->stats.xonrxc,
5869                         "XON Received");
5870         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5871                         CTLFLAG_RD, &adapter->stats.xontxc,
5872                         "XON Transmitted");
5873         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5874                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5875                         "XOFF Received");
5876         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5877                         CTLFLAG_RD, &adapter->stats.xofftxc,
5878                         "XOFF Transmitted");
5879
5880         /* Packet Reception Stats */
5881         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5882                         CTLFLAG_RD, &adapter->stats.tpr,
5883                         "Total Packets Received ");
5884         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5885                         CTLFLAG_RD, &adapter->stats.gprc,
5886                         "Good Packets Received");
5887         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5888                         CTLFLAG_RD, &adapter->stats.bprc,
5889                         "Broadcast Packets Received");
5890         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5891                         CTLFLAG_RD, &adapter->stats.mprc,
5892                         "Multicast Packets Received");
5893         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5894                         CTLFLAG_RD, &adapter->stats.prc64,
5895                         "64 byte frames received ");
5896         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5897                         CTLFLAG_RD, &adapter->stats.prc127,
5898                         "65-127 byte frames received");
5899         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5900                         CTLFLAG_RD, &adapter->stats.prc255,
5901                         "128-255 byte frames received");
5902         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5903                         CTLFLAG_RD, &adapter->stats.prc511,
5904                         "256-511 byte frames received");
5905         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5906                         CTLFLAG_RD, &adapter->stats.prc1023,
5907                         "512-1023 byte frames received");
5908         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5909                         CTLFLAG_RD, &adapter->stats.prc1522,
5910                         "1023-1522 byte frames received");
5911         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5912                         CTLFLAG_RD, &adapter->stats.gorc, 
5913                         "Good Octets Received"); 
5914
5915         /* Packet Transmission Stats */
5916         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5917                         CTLFLAG_RD, &adapter->stats.gotc, 
5918                         "Good Octets Transmitted"); 
5919         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5920                         CTLFLAG_RD, &adapter->stats.tpt,
5921                         "Total Packets Transmitted");
5922         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5923                         CTLFLAG_RD, &adapter->stats.gptc,
5924                         "Good Packets Transmitted");
5925         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5926                         CTLFLAG_RD, &adapter->stats.bptc,
5927                         "Broadcast Packets Transmitted");
5928         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5929                         CTLFLAG_RD, &adapter->stats.mptc,
5930                         "Multicast Packets Transmitted");
5931         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5932                         CTLFLAG_RD, &adapter->stats.ptc64,
5933                         "64 byte frames transmitted ");
5934         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5935                         CTLFLAG_RD, &adapter->stats.ptc127,
5936                         "65-127 byte frames transmitted");
5937         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5938                         CTLFLAG_RD, &adapter->stats.ptc255,
5939                         "128-255 byte frames transmitted");
5940         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5941                         CTLFLAG_RD, &adapter->stats.ptc511,
5942                         "256-511 byte frames transmitted");
5943         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5944                         CTLFLAG_RD, &adapter->stats.ptc1023,
5945                         "512-1023 byte frames transmitted");
5946         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5947                         CTLFLAG_RD, &adapter->stats.ptc1522,
5948                         "1024-1522 byte frames transmitted");
5949         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5950                         CTLFLAG_RD, &adapter->stats.tsctc,
5951                         "TSO Contexts Transmitted");
5952         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5953                         CTLFLAG_RD, &adapter->stats.tsctfc,
5954                         "TSO Contexts Failed");
5955
5956
5957         /* Interrupt Stats */
5958
5959         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5960                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5961         int_list = SYSCTL_CHILDREN(int_node);
5962
5963         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5964                         CTLFLAG_RD, &adapter->stats.iac,
5965                         "Interrupt Assertion Count");
5966
5967         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5968                         CTLFLAG_RD, &adapter->stats.icrxptc,
5969                         "Interrupt Cause Rx Pkt Timer Expire Count");
5970
5971         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5972                         CTLFLAG_RD, &adapter->stats.icrxatc,
5973                         "Interrupt Cause Rx Abs Timer Expire Count");
5974
5975         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5976                         CTLFLAG_RD, &adapter->stats.ictxptc,
5977                         "Interrupt Cause Tx Pkt Timer Expire Count");
5978
5979         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5980                         CTLFLAG_RD, &adapter->stats.ictxatc,
5981                         "Interrupt Cause Tx Abs Timer Expire Count");
5982
5983         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5984                         CTLFLAG_RD, &adapter->stats.ictxqec,
5985                         "Interrupt Cause Tx Queue Empty Count");
5986
5987         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5988                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5989                         "Interrupt Cause Tx Queue Min Thresh Count");
5990
5991         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5992                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5993                         "Interrupt Cause Rx Desc Min Thresh Count");
5994
5995         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5996                         CTLFLAG_RD, &adapter->stats.icrxoc,
5997                         "Interrupt Cause Receiver Overrun Count");
5998 }
5999
6000 /**********************************************************************
6001  *
6002  *  This routine provides a way to dump out the adapter eeprom,
6003  *  often a useful debug/service tool. This only dumps the first
6004  *  32 words, stuff that matters is in that extent.
6005  *
6006  **********************************************************************/
6007 static int
6008 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6009 {
6010         struct adapter *adapter = (struct adapter *)arg1;
6011         int error;
6012         int result;
6013
6014         result = -1;
6015         error = sysctl_handle_int(oidp, &result, 0, req);
6016
6017         if (error || !req->newptr)
6018                 return (error);
6019
6020         /*
6021          * This value will cause a hex dump of the
6022          * first 32 16-bit words of the EEPROM to
6023          * the screen.
6024          */
6025         if (result == 1)
6026                 em_print_nvm_info(adapter);
6027
6028         return (error);
6029 }
6030
6031 static void
6032 em_print_nvm_info(struct adapter *adapter)
6033 {
6034         u16     eeprom_data;
6035         int     i, j, row = 0;
6036
6037         /* Its a bit crude, but it gets the job done */
6038         printf("\nInterface EEPROM Dump:\n");
6039         printf("Offset\n0x0000  ");
6040         for (i = 0, j = 0; i < 32; i++, j++) {
6041                 if (j == 8) { /* Make the offset block */
6042                         j = 0; ++row;
6043                         printf("\n0x00%x0  ",row);
6044                 }
6045                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6046                 printf("%04x ", eeprom_data);
6047         }
6048         printf("\n");
6049 }
6050
6051 static int
6052 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6053 {
6054         struct em_int_delay_info *info;
6055         struct adapter *adapter;
6056         u32 regval;
6057         int error, usecs, ticks;
6058
6059         info = (struct em_int_delay_info *)arg1;
6060         usecs = info->value;
6061         error = sysctl_handle_int(oidp, &usecs, 0, req);
6062         if (error != 0 || req->newptr == NULL)
6063                 return (error);
6064         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6065                 return (EINVAL);
6066         info->value = usecs;
6067         ticks = EM_USECS_TO_TICKS(usecs);
6068         if (info->offset == E1000_ITR)  /* units are 256ns here */
6069                 ticks *= 4;
6070
6071         adapter = info->adapter;
6072         
6073         EM_CORE_LOCK(adapter);
6074         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6075         regval = (regval & ~0xffff) | (ticks & 0xffff);
6076         /* Handle a few special cases. */
6077         switch (info->offset) {
6078         case E1000_RDTR:
6079                 break;
6080         case E1000_TIDV:
6081                 if (ticks == 0) {
6082                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6083                         /* Don't write 0 into the TIDV register. */
6084                         regval++;
6085                 } else
6086                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6087                 break;
6088         }
6089         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6090         EM_CORE_UNLOCK(adapter);
6091         return (0);
6092 }
6093
6094 static void
6095 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6096         const char *description, struct em_int_delay_info *info,
6097         int offset, int value)
6098 {
6099         info->adapter = adapter;
6100         info->offset = offset;
6101         info->value = value;
6102         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6103             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6104             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6105             info, 0, em_sysctl_int_delay, "I", description);
6106 }
6107
6108 static void
6109 em_set_sysctl_value(struct adapter *adapter, const char *name,
6110         const char *description, int *limit, int value)
6111 {
6112         *limit = value;
6113         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6114             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6115             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6116 }
6117
6118
6119 /*
6120 ** Set flow control using sysctl:
6121 ** Flow control values:
6122 **      0 - off
6123 **      1 - rx pause
6124 **      2 - tx pause
6125 **      3 - full
6126 */
6127 static int
6128 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6129 {       
6130         int             error;
6131         static int      input = 3; /* default is full */
6132         struct adapter  *adapter = (struct adapter *) arg1;
6133                     
6134         error = sysctl_handle_int(oidp, &input, 0, req);
6135     
6136         if ((error) || (req->newptr == NULL))
6137                 return (error);
6138                 
6139         if (input == adapter->fc) /* no change? */
6140                 return (error);
6141
6142         switch (input) {
6143                 case e1000_fc_rx_pause:
6144                 case e1000_fc_tx_pause:
6145                 case e1000_fc_full:
6146                 case e1000_fc_none:
6147                         adapter->hw.fc.requested_mode = input;
6148                         adapter->fc = input;
6149                         break;
6150                 default:
6151                         /* Do nothing */
6152                         return (error);
6153         }
6154
6155         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6156         e1000_force_mac_fc(&adapter->hw);
6157         return (error);
6158 }
6159
6160 /*
6161 ** Manage Energy Efficient Ethernet:
6162 ** Control values:
6163 **     0/1 - enabled/disabled
6164 */
6165 static int
6166 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6167 {
6168        struct adapter *adapter = (struct adapter *) arg1;
6169        int             error, value;
6170
6171        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6172        error = sysctl_handle_int(oidp, &value, 0, req);
6173        if (error || req->newptr == NULL)
6174                return (error);
6175        EM_CORE_LOCK(adapter);
6176        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6177        em_init_locked(adapter);
6178        EM_CORE_UNLOCK(adapter);
6179        return (0);
6180 }
6181
6182 static int
6183 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6184 {
6185         struct adapter *adapter;
6186         int error;
6187         int result;
6188
6189         result = -1;
6190         error = sysctl_handle_int(oidp, &result, 0, req);
6191
6192         if (error || !req->newptr)
6193                 return (error);
6194
6195         if (result == 1) {
6196                 adapter = (struct adapter *)arg1;
6197                 em_print_debug_info(adapter);
6198         }
6199
6200         return (error);
6201 }
6202
6203 /*
6204 ** This routine is meant to be fluid, add whatever is
6205 ** needed for debugging a problem.  -jfv
6206 */
6207 static void
6208 em_print_debug_info(struct adapter *adapter)
6209 {
6210         device_t dev = adapter->dev;
6211         struct tx_ring *txr = adapter->tx_rings;
6212         struct rx_ring *rxr = adapter->rx_rings;
6213
6214         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
6215                 printf("Interface is RUNNING ");
6216         else
6217                 printf("Interface is NOT RUNNING\n");
6218
6219         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
6220                 printf("and INACTIVE\n");
6221         else
6222                 printf("and ACTIVE\n");
6223
6224         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6225                 device_printf(dev, "TX Queue %d ------\n", i);
6226                 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6227                         E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6228                         E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6229                 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6230                 device_printf(dev, "TX descriptors avail = %d\n",
6231                         txr->tx_avail);
6232                 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6233                         txr->no_desc_avail);
6234                 device_printf(dev, "RX Queue %d ------\n", i);
6235                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6236                         E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6237                         E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6238                 device_printf(dev, "RX discarded packets = %ld\n",
6239                         rxr->rx_discarded);
6240                 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6241                 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6242         }
6243 }
6244
6245 #ifdef EM_MULTIQUEUE
6246 /*
6247  * 82574 only:
6248  * Write a new value to the EEPROM increasing the number of MSIX
6249  * vectors from 3 to 5, for proper multiqueue support.
6250  */
6251 static void
6252 em_enable_vectors_82574(struct adapter *adapter)
6253 {
6254         struct e1000_hw *hw = &adapter->hw;
6255         device_t dev = adapter->dev;
6256         u16 edata;
6257
6258         e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6259         printf("Current cap: %#06x\n", edata);
6260         if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6261                 device_printf(dev, "Writing to eeprom: increasing "
6262                     "reported MSIX vectors from 3 to 5...\n");
6263                 edata &= ~(EM_NVM_MSIX_N_MASK);
6264                 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6265                 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6266                 e1000_update_nvm_checksum(hw);
6267                 device_printf(dev, "Writing to eeprom: done\n");
6268         }
6269 }
6270 #endif
6271
6272 #ifdef DDB
6273 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6274 {
6275         devclass_t      dc;
6276         int max_em;
6277
6278         dc = devclass_find("em");
6279         max_em = devclass_get_maxunit(dc);
6280
6281         for (int index = 0; index < (max_em - 1); index++) {
6282                 device_t dev;
6283                 dev = devclass_get_device(dc, index);
6284                 if (device_get_driver(dev) == &em_driver) {
6285                         struct adapter *adapter = device_get_softc(dev);
6286                         EM_CORE_LOCK(adapter);
6287                         em_init_locked(adapter);
6288                         EM_CORE_UNLOCK(adapter);
6289                 }
6290         }
6291 }
6292 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6293 {
6294         devclass_t      dc;
6295         int max_em;
6296
6297         dc = devclass_find("em");
6298         max_em = devclass_get_maxunit(dc);
6299
6300         for (int index = 0; index < (max_em - 1); index++) {
6301                 device_t dev;
6302                 dev = devclass_get_device(dc, index);
6303                 if (device_get_driver(dev) == &em_driver)
6304                         em_print_debug_info(device_get_softc(dev));
6305         }
6306
6307 }
6308 #endif