]> CyberLeo.Net >> Repos - FreeBSD/releng/10.3.git/blob - sys/dev/e1000/if_em.c
- Copy stable/10@296371 to releng/10.3 in preparation for 10.3-RC1
[FreeBSD/releng/10.3.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_arp.h>
74 #include <net/if_dl.h>
75 #include <net/if_media.h>
76
77 #include <net/if_types.h>
78 #include <net/if_vlan_var.h>
79
80 #include <netinet/in_systm.h>
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip6.h>
85 #include <netinet/tcp.h>
86 #include <netinet/udp.h>
87
88 #include <machine/in_cksum.h>
89 #include <dev/led/led.h>
90 #include <dev/pci/pcivar.h>
91 #include <dev/pci/pcireg.h>
92
93 #include "e1000_api.h"
94 #include "e1000_82571.h"
95 #include "if_em.h"
96
97 /*********************************************************************
98  *  Driver version:
99  *********************************************************************/
100 char em_driver_version[] = "7.6.1-k";
101
102 /*********************************************************************
103  *  PCI Device ID Table
104  *
105  *  Used by probe to select devices to load on
106  *  Last field stores an index into e1000_strings
107  *  Last entry must be all 0s
108  *
109  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
110  *********************************************************************/
111
112 static em_vendor_info_t em_vendor_info_array[] =
113 {
114         /* Intel(R) PRO/1000 Network Connection */
115         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
127                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
134
135         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
144                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
146                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
181                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
182         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
183                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
184         { 0x8086, E1000_DEV_ID_PCH_I218_LM2,    PCI_ANY_ID, PCI_ANY_ID, 0},
185         { 0x8086, E1000_DEV_ID_PCH_I218_V2,     PCI_ANY_ID, PCI_ANY_ID, 0},
186         { 0x8086, E1000_DEV_ID_PCH_I218_LM3,    PCI_ANY_ID, PCI_ANY_ID, 0},
187         { 0x8086, E1000_DEV_ID_PCH_I218_V3,     PCI_ANY_ID, PCI_ANY_ID, 0},
188         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
189         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
190         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
191                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
192         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
193         { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
194                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
195         /* required last entry */
196         { 0, 0, 0, 0, 0}
197 };
198
199 /*********************************************************************
200  *  Table of branding strings for all supported NICs.
201  *********************************************************************/
202
203 static char *em_strings[] = {
204         "Intel(R) PRO/1000 Network Connection"
205 };
206
207 /*********************************************************************
208  *  Function prototypes
209  *********************************************************************/
210 static int      em_probe(device_t);
211 static int      em_attach(device_t);
212 static int      em_detach(device_t);
213 static int      em_shutdown(device_t);
214 static int      em_suspend(device_t);
215 static int      em_resume(device_t);
216 #ifdef EM_MULTIQUEUE
217 static int      em_mq_start(struct ifnet *, struct mbuf *);
218 static int      em_mq_start_locked(struct ifnet *,
219                     struct tx_ring *);
220 static void     em_qflush(struct ifnet *);
221 #else
222 static void     em_start(struct ifnet *);
223 static void     em_start_locked(struct ifnet *, struct tx_ring *);
224 #endif
225 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
226 static void     em_init(void *);
227 static void     em_init_locked(struct adapter *);
228 static void     em_stop(void *);
229 static void     em_media_status(struct ifnet *, struct ifmediareq *);
230 static int      em_media_change(struct ifnet *);
231 static void     em_identify_hardware(struct adapter *);
232 static int      em_allocate_pci_resources(struct adapter *);
233 static int      em_allocate_legacy(struct adapter *);
234 static int      em_allocate_msix(struct adapter *);
235 static int      em_allocate_queues(struct adapter *);
236 static int      em_setup_msix(struct adapter *);
237 static void     em_free_pci_resources(struct adapter *);
238 static void     em_local_timer(void *);
239 static void     em_reset(struct adapter *);
240 static int      em_setup_interface(device_t, struct adapter *);
241 static void     em_flush_desc_rings(struct adapter *);
242
243 static void     em_setup_transmit_structures(struct adapter *);
244 static void     em_initialize_transmit_unit(struct adapter *);
245 static int      em_allocate_transmit_buffers(struct tx_ring *);
246 static void     em_free_transmit_structures(struct adapter *);
247 static void     em_free_transmit_buffers(struct tx_ring *);
248
249 static int      em_setup_receive_structures(struct adapter *);
250 static int      em_allocate_receive_buffers(struct rx_ring *);
251 static void     em_initialize_receive_unit(struct adapter *);
252 static void     em_free_receive_structures(struct adapter *);
253 static void     em_free_receive_buffers(struct rx_ring *);
254
255 static void     em_enable_intr(struct adapter *);
256 static void     em_disable_intr(struct adapter *);
257 static void     em_update_stats_counters(struct adapter *);
258 static void     em_add_hw_stats(struct adapter *adapter);
259 static void     em_txeof(struct tx_ring *);
260 static bool     em_rxeof(struct rx_ring *, int, int *);
261 #ifndef __NO_STRICT_ALIGNMENT
262 static int      em_fixup_rx(struct rx_ring *);
263 #endif
264 static void     em_setup_rxdesc(union e1000_rx_desc_extended *,
265                     const struct em_rxbuffer *rxbuf);
266 static void     em_receive_checksum(uint32_t status, struct mbuf *);
267 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
268                     struct ip *, u32 *, u32 *);
269 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
270                     struct tcphdr *, u32 *, u32 *);
271 static void     em_set_promisc(struct adapter *);
272 static void     em_disable_promisc(struct adapter *);
273 static void     em_set_multi(struct adapter *);
274 static void     em_update_link_status(struct adapter *);
275 static void     em_refresh_mbufs(struct rx_ring *, int);
276 static void     em_register_vlan(void *, struct ifnet *, u16);
277 static void     em_unregister_vlan(void *, struct ifnet *, u16);
278 static void     em_setup_vlan_hw_support(struct adapter *);
279 static int      em_xmit(struct tx_ring *, struct mbuf **);
280 static int      em_dma_malloc(struct adapter *, bus_size_t,
281                     struct em_dma_alloc *, int);
282 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
283 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
284 static void     em_print_nvm_info(struct adapter *);
285 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
286 static void     em_print_debug_info(struct adapter *);
287 static int      em_is_valid_ether_addr(u8 *);
288 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
289 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
290                     const char *, struct em_int_delay_info *, int, int);
291 /* Management and WOL Support */
292 static void     em_init_manageability(struct adapter *);
293 static void     em_release_manageability(struct adapter *);
294 static void     em_get_hw_control(struct adapter *);
295 static void     em_release_hw_control(struct adapter *);
296 static void     em_get_wakeup(device_t);
297 static void     em_enable_wakeup(device_t);
298 static int      em_enable_phy_wakeup(struct adapter *);
299 static void     em_led_func(void *, int);
300 static void     em_disable_aspm(struct adapter *);
301
302 static int      em_irq_fast(void *);
303
304 /* MSIX handlers */
305 static void     em_msix_tx(void *);
306 static void     em_msix_rx(void *);
307 static void     em_msix_link(void *);
308 static void     em_handle_tx(void *context, int pending);
309 static void     em_handle_rx(void *context, int pending);
310 static void     em_handle_link(void *context, int pending);
311
312 #ifdef EM_MULTIQUEUE
313 static void     em_enable_vectors_82574(struct adapter *);
314 #endif
315
316 static void     em_set_sysctl_value(struct adapter *, const char *,
317                     const char *, int *, int);
318 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
319 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
320
321 static __inline void em_rx_discard(struct rx_ring *, int);
322
323 #ifdef DEVICE_POLLING
324 static poll_handler_t em_poll;
325 #endif /* POLLING */
326
327 /*********************************************************************
328  *  FreeBSD Device Interface Entry Points
329  *********************************************************************/
330
331 static device_method_t em_methods[] = {
332         /* Device interface */
333         DEVMETHOD(device_probe, em_probe),
334         DEVMETHOD(device_attach, em_attach),
335         DEVMETHOD(device_detach, em_detach),
336         DEVMETHOD(device_shutdown, em_shutdown),
337         DEVMETHOD(device_suspend, em_suspend),
338         DEVMETHOD(device_resume, em_resume),
339         DEVMETHOD_END
340 };
341
342 static driver_t em_driver = {
343         "em", em_methods, sizeof(struct adapter),
344 };
345
346 devclass_t em_devclass;
347 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
348 MODULE_DEPEND(em, pci, 1, 1, 1);
349 MODULE_DEPEND(em, ether, 1, 1, 1);
350
351 /*********************************************************************
352  *  Tunable default values.
353  *********************************************************************/
354
355 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
356 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
357 #define M_TSO_LEN                       66
358
359 #define MAX_INTS_PER_SEC        8000
360 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
361
362 /* Allow common code without TSO */
363 #ifndef CSUM_TSO
364 #define CSUM_TSO        0
365 #endif
366
367 #define TSO_WORKAROUND  4
368
369 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
370
371 static int em_disable_crc_stripping = 0;
372 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
373     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
374
375 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
376 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
377 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
378 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
379 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
380     0, "Default transmit interrupt delay in usecs");
381 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
382     0, "Default receive interrupt delay in usecs");
383
384 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
385 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
386 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
387 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
388 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
389     &em_tx_abs_int_delay_dflt, 0,
390     "Default transmit interrupt delay limit in usecs");
391 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
392     &em_rx_abs_int_delay_dflt, 0,
393     "Default receive interrupt delay limit in usecs");
394
395 static int em_rxd = EM_DEFAULT_RXD;
396 static int em_txd = EM_DEFAULT_TXD;
397 TUNABLE_INT("hw.em.rxd", &em_rxd);
398 TUNABLE_INT("hw.em.txd", &em_txd);
399 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
400     "Number of receive descriptors per queue");
401 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
402     "Number of transmit descriptors per queue");
403
404 static int em_smart_pwr_down = FALSE;
405 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
406 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
407     0, "Set to true to leave smart power down enabled on newer adapters");
408
409 /* Controls whether promiscuous also shows bad packets */
410 static int em_debug_sbp = FALSE;
411 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
412 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
413     "Show bad packets in promiscuous mode");
414
415 static int em_enable_msix = TRUE;
416 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
417 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
418     "Enable MSI-X interrupts");
419
420 #ifdef EM_MULTIQUEUE
421 static int em_num_queues = 1;
422 TUNABLE_INT("hw.em.num_queues", &em_num_queues);
423 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
424     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
425 #endif
426
427 /*
428 ** Global variable to store last used CPU when binding queues
429 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
430 ** queue is bound to a cpu.
431 */
432 static int em_last_bind_cpu = -1;
433
434 /* How many packets rxeof tries to clean at a time */
435 static int em_rx_process_limit = 100;
436 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
437 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
438     &em_rx_process_limit, 0,
439     "Maximum number of received packets to process "
440     "at a time, -1 means unlimited");
441
442 /* Energy efficient ethernet - default to OFF */
443 static int eee_setting = 1;
444 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
445 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
446     "Enable Energy Efficient Ethernet");
447
448 /* Global used in WOL setup with multiport cards */
449 static int global_quad_port_a = 0;
450
451 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
452 #include <dev/netmap/if_em_netmap.h>
453 #endif /* DEV_NETMAP */
454
455 /*********************************************************************
456  *  Device identification routine
457  *
458  *  em_probe determines if the driver should be loaded on
459  *  adapter based on PCI vendor/device id of the adapter.
460  *
461  *  return BUS_PROBE_DEFAULT on success, positive on failure
462  *********************************************************************/
463
464 static int
465 em_probe(device_t dev)
466 {
467         char            adapter_name[60];
468         uint16_t        pci_vendor_id = 0;
469         uint16_t        pci_device_id = 0;
470         uint16_t        pci_subvendor_id = 0;
471         uint16_t        pci_subdevice_id = 0;
472         em_vendor_info_t *ent;
473
474         INIT_DEBUGOUT("em_probe: begin");
475
476         pci_vendor_id = pci_get_vendor(dev);
477         if (pci_vendor_id != EM_VENDOR_ID)
478                 return (ENXIO);
479
480         pci_device_id = pci_get_device(dev);
481         pci_subvendor_id = pci_get_subvendor(dev);
482         pci_subdevice_id = pci_get_subdevice(dev);
483
484         ent = em_vendor_info_array;
485         while (ent->vendor_id != 0) {
486                 if ((pci_vendor_id == ent->vendor_id) &&
487                     (pci_device_id == ent->device_id) &&
488
489                     ((pci_subvendor_id == ent->subvendor_id) ||
490                     (ent->subvendor_id == PCI_ANY_ID)) &&
491
492                     ((pci_subdevice_id == ent->subdevice_id) ||
493                     (ent->subdevice_id == PCI_ANY_ID))) {
494                         sprintf(adapter_name, "%s %s",
495                                 em_strings[ent->index],
496                                 em_driver_version);
497                         device_set_desc_copy(dev, adapter_name);
498                         return (BUS_PROBE_DEFAULT);
499                 }
500                 ent++;
501         }
502
503         return (ENXIO);
504 }
505
506 /*********************************************************************
507  *  Device initialization routine
508  *
509  *  The attach entry point is called when the driver is being loaded.
510  *  This routine identifies the type of hardware, allocates all resources
511  *  and initializes the hardware.
512  *
513  *  return 0 on success, positive on failure
514  *********************************************************************/
515
516 static int
517 em_attach(device_t dev)
518 {
519         struct adapter  *adapter;
520         struct e1000_hw *hw;
521         int             error = 0;
522
523         INIT_DEBUGOUT("em_attach: begin");
524
525         if (resource_disabled("em", device_get_unit(dev))) {
526                 device_printf(dev, "Disabled by device hint\n");
527                 return (ENXIO);
528         }
529
530         adapter = device_get_softc(dev);
531         adapter->dev = adapter->osdep.dev = dev;
532         hw = &adapter->hw;
533         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
534
535         /* SYSCTL stuff */
536         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
537             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
538             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
539             em_sysctl_nvm_info, "I", "NVM Information");
540
541         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
542             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
543             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
544             em_sysctl_debug_info, "I", "Debug Information");
545
546         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
547             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
548             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
549             em_set_flowcntl, "I", "Flow Control");
550
551         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
552
553         /* Determine hardware and mac info */
554         em_identify_hardware(adapter);
555
556         /* Setup PCI resources */
557         if (em_allocate_pci_resources(adapter)) {
558                 device_printf(dev, "Allocation of PCI resources failed\n");
559                 error = ENXIO;
560                 goto err_pci;
561         }
562
563         /*
564         ** For ICH8 and family we need to
565         ** map the flash memory, and this
566         ** must happen after the MAC is 
567         ** identified
568         */
569         if ((hw->mac.type == e1000_ich8lan) ||
570             (hw->mac.type == e1000_ich9lan) ||
571             (hw->mac.type == e1000_ich10lan) ||
572             (hw->mac.type == e1000_pchlan) ||
573             (hw->mac.type == e1000_pch2lan) ||
574             (hw->mac.type == e1000_pch_lpt)) {
575                 int rid = EM_BAR_TYPE_FLASH;
576                 adapter->flash = bus_alloc_resource_any(dev,
577                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
578                 if (adapter->flash == NULL) {
579                         device_printf(dev, "Mapping of Flash failed\n");
580                         error = ENXIO;
581                         goto err_pci;
582                 }
583                 /* This is used in the shared code */
584                 hw->flash_address = (u8 *)adapter->flash;
585                 adapter->osdep.flash_bus_space_tag =
586                     rman_get_bustag(adapter->flash);
587                 adapter->osdep.flash_bus_space_handle =
588                     rman_get_bushandle(adapter->flash);
589         }
590         /*
591         ** In the new SPT device flash is not  a
592         ** seperate BAR, rather it is also in BAR0,
593         ** so use the same tag and an offset handle for the
594         ** FLASH read/write macros in the shared code.
595         */
596         else if (hw->mac.type == e1000_pch_spt) {
597                 adapter->osdep.flash_bus_space_tag =
598                     adapter->osdep.mem_bus_space_tag;
599                 adapter->osdep.flash_bus_space_handle =
600                     adapter->osdep.mem_bus_space_handle
601                     + E1000_FLASH_BASE_ADDR;
602         }
603
604         /* Do Shared Code initialization */
605         error = e1000_setup_init_funcs(hw, TRUE);
606         if (error) {
607                 device_printf(dev, "Setup of Shared code failed, error %d\n",
608                     error);
609                 error = ENXIO;
610                 goto err_pci;
611         }
612
613         /*
614          * Setup MSI/X or MSI if PCI Express
615          */
616         adapter->msix = em_setup_msix(adapter);
617
618         e1000_get_bus_info(hw);
619
620         /* Set up some sysctls for the tunable interrupt delays */
621         em_add_int_delay_sysctl(adapter, "rx_int_delay",
622             "receive interrupt delay in usecs", &adapter->rx_int_delay,
623             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
624         em_add_int_delay_sysctl(adapter, "tx_int_delay",
625             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
626             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
627         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
628             "receive interrupt delay limit in usecs",
629             &adapter->rx_abs_int_delay,
630             E1000_REGISTER(hw, E1000_RADV),
631             em_rx_abs_int_delay_dflt);
632         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
633             "transmit interrupt delay limit in usecs",
634             &adapter->tx_abs_int_delay,
635             E1000_REGISTER(hw, E1000_TADV),
636             em_tx_abs_int_delay_dflt);
637         em_add_int_delay_sysctl(adapter, "itr",
638             "interrupt delay limit in usecs/4",
639             &adapter->tx_itr,
640             E1000_REGISTER(hw, E1000_ITR),
641             DEFAULT_ITR);
642
643         /* Sysctl for limiting the amount of work done in the taskqueue */
644         em_set_sysctl_value(adapter, "rx_processing_limit",
645             "max number of rx packets to process", &adapter->rx_process_limit,
646             em_rx_process_limit);
647
648         /*
649          * Validate number of transmit and receive descriptors. It
650          * must not exceed hardware maximum, and must be multiple
651          * of E1000_DBA_ALIGN.
652          */
653         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
654             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
655                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
656                     EM_DEFAULT_TXD, em_txd);
657                 adapter->num_tx_desc = EM_DEFAULT_TXD;
658         } else
659                 adapter->num_tx_desc = em_txd;
660
661         if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
662             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
663                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
664                     EM_DEFAULT_RXD, em_rxd);
665                 adapter->num_rx_desc = EM_DEFAULT_RXD;
666         } else
667                 adapter->num_rx_desc = em_rxd;
668
669         hw->mac.autoneg = DO_AUTO_NEG;
670         hw->phy.autoneg_wait_to_complete = FALSE;
671         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
672
673         /* Copper options */
674         if (hw->phy.media_type == e1000_media_type_copper) {
675                 hw->phy.mdix = AUTO_ALL_MODES;
676                 hw->phy.disable_polarity_correction = FALSE;
677                 hw->phy.ms_type = EM_MASTER_SLAVE;
678         }
679
680         /*
681          * Set the frame limits assuming
682          * standard ethernet sized frames.
683          */
684         adapter->hw.mac.max_frame_size =
685             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
686
687         /*
688          * This controls when hardware reports transmit completion
689          * status.
690          */
691         hw->mac.report_tx_early = 1;
692
693         /* 
694         ** Get queue/ring memory
695         */
696         if (em_allocate_queues(adapter)) {
697                 error = ENOMEM;
698                 goto err_pci;
699         }
700
701         /* Allocate multicast array memory. */
702         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
703             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
704         if (adapter->mta == NULL) {
705                 device_printf(dev, "Can not allocate multicast setup array\n");
706                 error = ENOMEM;
707                 goto err_late;
708         }
709
710         /* Check SOL/IDER usage */
711         if (e1000_check_reset_block(hw))
712                 device_printf(dev, "PHY reset is blocked"
713                     " due to SOL/IDER session.\n");
714
715         /* Sysctl for setting Energy Efficient Ethernet */
716         hw->dev_spec.ich8lan.eee_disable = eee_setting;
717         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
718             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
719             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
720             adapter, 0, em_sysctl_eee, "I",
721             "Disable Energy Efficient Ethernet");
722
723         /*
724         ** Start from a known state, this is
725         ** important in reading the nvm and
726         ** mac from that.
727         */
728         e1000_reset_hw(hw);
729
730
731         /* Make sure we have a good EEPROM before we read from it */
732         if (e1000_validate_nvm_checksum(hw) < 0) {
733                 /*
734                 ** Some PCI-E parts fail the first check due to
735                 ** the link being in sleep state, call it again,
736                 ** if it fails a second time its a real issue.
737                 */
738                 if (e1000_validate_nvm_checksum(hw) < 0) {
739                         device_printf(dev,
740                             "The EEPROM Checksum Is Not Valid\n");
741                         error = EIO;
742                         goto err_late;
743                 }
744         }
745
746         /* Copy the permanent MAC address out of the EEPROM */
747         if (e1000_read_mac_addr(hw) < 0) {
748                 device_printf(dev, "EEPROM read error while reading MAC"
749                     " address\n");
750                 error = EIO;
751                 goto err_late;
752         }
753
754         if (!em_is_valid_ether_addr(hw->mac.addr)) {
755                 device_printf(dev, "Invalid MAC address\n");
756                 error = EIO;
757                 goto err_late;
758         }
759
760         /* Disable ULP support */
761         e1000_disable_ulp_lpt_lp(hw, TRUE);
762
763         /*
764         **  Do interrupt configuration
765         */
766         if (adapter->msix > 1) /* Do MSIX */
767                 error = em_allocate_msix(adapter);
768         else  /* MSI or Legacy */
769                 error = em_allocate_legacy(adapter);
770         if (error)
771                 goto err_late;
772
773         /*
774          * Get Wake-on-Lan and Management info for later use
775          */
776         em_get_wakeup(dev);
777
778         /* Setup OS specific network interface */
779         if (em_setup_interface(dev, adapter) != 0)
780                 goto err_late;
781
782         em_reset(adapter);
783
784         /* Initialize statistics */
785         em_update_stats_counters(adapter);
786
787         hw->mac.get_link_status = 1;
788         em_update_link_status(adapter);
789
790         /* Register for VLAN events */
791         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
792             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
793         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
794             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
795
796         em_add_hw_stats(adapter);
797
798         /* Non-AMT based hardware can now take control from firmware */
799         if (adapter->has_manage && !adapter->has_amt)
800                 em_get_hw_control(adapter);
801
802         /* Tell the stack that the interface is not active */
803         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
804         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
805
806         adapter->led_dev = led_create(em_led_func, adapter,
807             device_get_nameunit(dev));
808 #ifdef DEV_NETMAP
809         em_netmap_attach(adapter);
810 #endif /* DEV_NETMAP */
811
812         INIT_DEBUGOUT("em_attach: end");
813
814         return (0);
815
816 err_late:
817         em_free_transmit_structures(adapter);
818         em_free_receive_structures(adapter);
819         em_release_hw_control(adapter);
820         if (adapter->ifp != NULL)
821                 if_free(adapter->ifp);
822 err_pci:
823         em_free_pci_resources(adapter);
824         free(adapter->mta, M_DEVBUF);
825         EM_CORE_LOCK_DESTROY(adapter);
826
827         return (error);
828 }
829
830 /*********************************************************************
831  *  Device removal routine
832  *
833  *  The detach entry point is called when the driver is being removed.
834  *  This routine stops the adapter and deallocates all the resources
835  *  that were allocated for driver operation.
836  *
837  *  return 0 on success, positive on failure
838  *********************************************************************/
839
840 static int
841 em_detach(device_t dev)
842 {
843         struct adapter  *adapter = device_get_softc(dev);
844         struct ifnet    *ifp = adapter->ifp;
845
846         INIT_DEBUGOUT("em_detach: begin");
847
848         /* Make sure VLANS are not using driver */
849         if (adapter->ifp->if_vlantrunk != NULL) {
850                 device_printf(dev,"Vlan in use, detach first\n");
851                 return (EBUSY);
852         }
853
854 #ifdef DEVICE_POLLING
855         if (ifp->if_capenable & IFCAP_POLLING)
856                 ether_poll_deregister(ifp);
857 #endif
858
859         if (adapter->led_dev != NULL)
860                 led_destroy(adapter->led_dev);
861
862         EM_CORE_LOCK(adapter);
863         adapter->in_detach = 1;
864         em_stop(adapter);
865         EM_CORE_UNLOCK(adapter);
866         EM_CORE_LOCK_DESTROY(adapter);
867
868         e1000_phy_hw_reset(&adapter->hw);
869
870         em_release_manageability(adapter);
871         em_release_hw_control(adapter);
872
873         /* Unregister VLAN events */
874         if (adapter->vlan_attach != NULL)
875                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
876         if (adapter->vlan_detach != NULL)
877                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
878
879         ether_ifdetach(adapter->ifp);
880         callout_drain(&adapter->timer);
881
882 #ifdef DEV_NETMAP
883         netmap_detach(ifp);
884 #endif /* DEV_NETMAP */
885
886         em_free_pci_resources(adapter);
887         bus_generic_detach(dev);
888         if_free(ifp);
889
890         em_free_transmit_structures(adapter);
891         em_free_receive_structures(adapter);
892
893         em_release_hw_control(adapter);
894         free(adapter->mta, M_DEVBUF);
895
896         return (0);
897 }
898
899 /*********************************************************************
900  *
901  *  Shutdown entry point
902  *
903  **********************************************************************/
904
905 static int
906 em_shutdown(device_t dev)
907 {
908         return em_suspend(dev);
909 }
910
911 /*
912  * Suspend/resume device methods.
913  */
914 static int
915 em_suspend(device_t dev)
916 {
917         struct adapter *adapter = device_get_softc(dev);
918
919         EM_CORE_LOCK(adapter);
920
921         em_release_manageability(adapter);
922         em_release_hw_control(adapter);
923         em_enable_wakeup(dev);
924
925         EM_CORE_UNLOCK(adapter);
926
927         return bus_generic_suspend(dev);
928 }
929
930 static int
931 em_resume(device_t dev)
932 {
933         struct adapter *adapter = device_get_softc(dev);
934         struct tx_ring  *txr = adapter->tx_rings;
935         struct ifnet *ifp = adapter->ifp;
936
937         EM_CORE_LOCK(adapter);
938         if (adapter->hw.mac.type == e1000_pch2lan)
939                 e1000_resume_workarounds_pchlan(&adapter->hw);
940         em_init_locked(adapter);
941         em_init_manageability(adapter);
942
943         if ((ifp->if_flags & IFF_UP) &&
944             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
945                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
946                         EM_TX_LOCK(txr);
947 #ifdef EM_MULTIQUEUE
948                         if (!drbr_empty(ifp, txr->br))
949                                 em_mq_start_locked(ifp, txr);
950 #else
951                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
952                                 em_start_locked(ifp, txr);
953 #endif
954                         EM_TX_UNLOCK(txr);
955                 }
956         }
957         EM_CORE_UNLOCK(adapter);
958
959         return bus_generic_resume(dev);
960 }
961
962
963 #ifndef EM_MULTIQUEUE
964 static void
965 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
966 {
967         struct adapter  *adapter = ifp->if_softc;
968         struct mbuf     *m_head;
969
970         EM_TX_LOCK_ASSERT(txr);
971
972         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
973             IFF_DRV_RUNNING)
974                 return;
975
976         if (!adapter->link_active)
977                 return;
978
979         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
980                 /* Call cleanup if number of TX descriptors low */
981                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
982                         em_txeof(txr);
983                 if (txr->tx_avail < EM_MAX_SCATTER) {
984                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
985                         break;
986                 }
987                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
988                 if (m_head == NULL)
989                         break;
990                 /*
991                  *  Encapsulation can modify our pointer, and or make it
992                  *  NULL on failure.  In that event, we can't requeue.
993                  */
994                 if (em_xmit(txr, &m_head)) {
995                         if (m_head == NULL)
996                                 break;
997                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
998                         break;
999                 }
1000
1001                 /* Mark the queue as having work */
1002                 if (txr->busy == EM_TX_IDLE)
1003                         txr->busy = EM_TX_BUSY;
1004
1005                 /* Send a copy of the frame to the BPF listener */
1006                 ETHER_BPF_MTAP(ifp, m_head);
1007
1008         }
1009
1010         return;
1011 }
1012
1013 static void
1014 em_start(struct ifnet *ifp)
1015 {
1016         struct adapter  *adapter = ifp->if_softc;
1017         struct tx_ring  *txr = adapter->tx_rings;
1018
1019         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1020                 EM_TX_LOCK(txr);
1021                 em_start_locked(ifp, txr);
1022                 EM_TX_UNLOCK(txr);
1023         }
1024         return;
1025 }
1026 #else /* EM_MULTIQUEUE */
1027 /*********************************************************************
1028  *  Multiqueue Transmit routines 
1029  *
1030  *  em_mq_start is called by the stack to initiate a transmit.
1031  *  however, if busy the driver can queue the request rather
1032  *  than do an immediate send. It is this that is an advantage
1033  *  in this driver, rather than also having multiple tx queues.
1034  **********************************************************************/
1035 /*
1036 ** Multiqueue capable stack interface
1037 */
1038 static int
1039 em_mq_start(struct ifnet *ifp, struct mbuf *m)
1040 {
1041         struct adapter  *adapter = ifp->if_softc;
1042         struct tx_ring  *txr = adapter->tx_rings;
1043         unsigned int    i, error;
1044
1045         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1046                 i = m->m_pkthdr.flowid % adapter->num_queues;
1047         else
1048                 i = curcpu % adapter->num_queues;
1049
1050         txr = &adapter->tx_rings[i];
1051
1052         error = drbr_enqueue(ifp, txr->br, m);
1053         if (error)
1054                 return (error);
1055
1056         if (EM_TX_TRYLOCK(txr)) {
1057                 em_mq_start_locked(ifp, txr);
1058                 EM_TX_UNLOCK(txr);
1059         } else 
1060                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1061
1062         return (0);
1063 }
1064
1065 static int
1066 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1067 {
1068         struct adapter  *adapter = txr->adapter;
1069         struct mbuf     *next;
1070         int             err = 0, enq = 0;
1071
1072         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1073             IFF_DRV_RUNNING || adapter->link_active == 0) {
1074                 return (ENETDOWN);
1075         }
1076
1077         /* Process the queue */
1078         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1079                 if ((err = em_xmit(txr, &next)) != 0) {
1080                         if (next == NULL) {
1081                                 /* It was freed, move forward */
1082                                 drbr_advance(ifp, txr->br);
1083                         } else {
1084                                 /* 
1085                                  * Still have one left, it may not be
1086                                  * the same since the transmit function
1087                                  * may have changed it.
1088                                  */
1089                                 drbr_putback(ifp, txr->br, next);
1090                         }
1091                         break;
1092                 }
1093                 drbr_advance(ifp, txr->br);
1094                 enq++;
1095                 ifp->if_obytes += next->m_pkthdr.len;
1096                 if (next->m_flags & M_MCAST)
1097                         ifp->if_omcasts++;
1098                 ETHER_BPF_MTAP(ifp, next);
1099                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1100                         break;
1101         }
1102
1103         /* Mark the queue as having work */
1104         if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1105                 txr->busy = EM_TX_BUSY;
1106
1107         if (txr->tx_avail < EM_MAX_SCATTER)
1108                 em_txeof(txr);
1109         if (txr->tx_avail < EM_MAX_SCATTER) {
1110                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1111         }
1112         return (err);
1113 }
1114
1115 /*
1116 ** Flush all ring buffers
1117 */
1118 static void
1119 em_qflush(struct ifnet *ifp)
1120 {
1121         struct adapter  *adapter = ifp->if_softc;
1122         struct tx_ring  *txr = adapter->tx_rings;
1123         struct mbuf     *m;
1124
1125         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1126                 EM_TX_LOCK(txr);
1127                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1128                         m_freem(m);
1129                 EM_TX_UNLOCK(txr);
1130         }
1131         if_qflush(ifp);
1132 }
1133 #endif /* EM_MULTIQUEUE */
1134
1135 /*********************************************************************
1136  *  Ioctl entry point
1137  *
1138  *  em_ioctl is called when the user wants to configure the
1139  *  interface.
1140  *
1141  *  return 0 on success, positive on failure
1142  **********************************************************************/
1143
1144 static int
1145 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1146 {
1147         struct adapter  *adapter = ifp->if_softc;
1148         struct ifreq    *ifr = (struct ifreq *)data;
1149 #if defined(INET) || defined(INET6)
1150         struct ifaddr   *ifa = (struct ifaddr *)data;
1151 #endif
1152         bool            avoid_reset = FALSE;
1153         int             error = 0;
1154
1155         if (adapter->in_detach)
1156                 return (error);
1157
1158         switch (command) {
1159         case SIOCSIFADDR:
1160 #ifdef INET
1161                 if (ifa->ifa_addr->sa_family == AF_INET)
1162                         avoid_reset = TRUE;
1163 #endif
1164 #ifdef INET6
1165                 if (ifa->ifa_addr->sa_family == AF_INET6)
1166                         avoid_reset = TRUE;
1167 #endif
1168                 /*
1169                 ** Calling init results in link renegotiation,
1170                 ** so we avoid doing it when possible.
1171                 */
1172                 if (avoid_reset) {
1173                         ifp->if_flags |= IFF_UP;
1174                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1175                                 em_init(adapter);
1176 #ifdef INET
1177                         if (!(ifp->if_flags & IFF_NOARP))
1178                                 arp_ifinit(ifp, ifa);
1179 #endif
1180                 } else
1181                         error = ether_ioctl(ifp, command, data);
1182                 break;
1183         case SIOCSIFMTU:
1184             {
1185                 int max_frame_size;
1186
1187                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1188
1189                 EM_CORE_LOCK(adapter);
1190                 switch (adapter->hw.mac.type) {
1191                 case e1000_82571:
1192                 case e1000_82572:
1193                 case e1000_ich9lan:
1194                 case e1000_ich10lan:
1195                 case e1000_pch2lan:
1196                 case e1000_pch_lpt:
1197                 case e1000_pch_spt:
1198                 case e1000_82574:
1199                 case e1000_82583:
1200                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1201                         max_frame_size = 9234;
1202                         break;
1203                 case e1000_pchlan:
1204                         max_frame_size = 4096;
1205                         break;
1206                         /* Adapters that do not support jumbo frames */
1207                 case e1000_ich8lan:
1208                         max_frame_size = ETHER_MAX_LEN;
1209                         break;
1210                 default:
1211                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1212                 }
1213                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1214                     ETHER_CRC_LEN) {
1215                         EM_CORE_UNLOCK(adapter);
1216                         error = EINVAL;
1217                         break;
1218                 }
1219
1220                 ifp->if_mtu = ifr->ifr_mtu;
1221                 adapter->hw.mac.max_frame_size =
1222                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1223                 em_init_locked(adapter);
1224                 EM_CORE_UNLOCK(adapter);
1225                 break;
1226             }
1227         case SIOCSIFFLAGS:
1228                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1229                     SIOCSIFFLAGS (Set Interface Flags)");
1230                 EM_CORE_LOCK(adapter);
1231                 if (ifp->if_flags & IFF_UP) {
1232                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1233                                 if ((ifp->if_flags ^ adapter->if_flags) &
1234                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1235                                         em_disable_promisc(adapter);
1236                                         em_set_promisc(adapter);
1237                                 }
1238                         } else
1239                                 em_init_locked(adapter);
1240                 } else
1241                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1242                                 em_stop(adapter);
1243                 adapter->if_flags = ifp->if_flags;
1244                 EM_CORE_UNLOCK(adapter);
1245                 break;
1246         case SIOCADDMULTI:
1247         case SIOCDELMULTI:
1248                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1249                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1250                         EM_CORE_LOCK(adapter);
1251                         em_disable_intr(adapter);
1252                         em_set_multi(adapter);
1253 #ifdef DEVICE_POLLING
1254                         if (!(ifp->if_capenable & IFCAP_POLLING))
1255 #endif
1256                                 em_enable_intr(adapter);
1257                         EM_CORE_UNLOCK(adapter);
1258                 }
1259                 break;
1260         case SIOCSIFMEDIA:
1261                 /* Check SOL/IDER usage */
1262                 EM_CORE_LOCK(adapter);
1263                 if (e1000_check_reset_block(&adapter->hw)) {
1264                         EM_CORE_UNLOCK(adapter);
1265                         device_printf(adapter->dev, "Media change is"
1266                             " blocked due to SOL/IDER session.\n");
1267                         break;
1268                 }
1269                 EM_CORE_UNLOCK(adapter);
1270                 /* falls thru */
1271         case SIOCGIFMEDIA:
1272                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1273                     SIOCxIFMEDIA (Get/Set Interface Media)");
1274                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1275                 break;
1276         case SIOCSIFCAP:
1277             {
1278                 int mask, reinit;
1279
1280                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1281                 reinit = 0;
1282                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1283 #ifdef DEVICE_POLLING
1284                 if (mask & IFCAP_POLLING) {
1285                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1286                                 error = ether_poll_register(em_poll, ifp);
1287                                 if (error)
1288                                         return (error);
1289                                 EM_CORE_LOCK(adapter);
1290                                 em_disable_intr(adapter);
1291                                 ifp->if_capenable |= IFCAP_POLLING;
1292                                 EM_CORE_UNLOCK(adapter);
1293                         } else {
1294                                 error = ether_poll_deregister(ifp);
1295                                 /* Enable interrupt even in error case */
1296                                 EM_CORE_LOCK(adapter);
1297                                 em_enable_intr(adapter);
1298                                 ifp->if_capenable &= ~IFCAP_POLLING;
1299                                 EM_CORE_UNLOCK(adapter);
1300                         }
1301                 }
1302 #endif
1303                 if (mask & IFCAP_HWCSUM) {
1304                         ifp->if_capenable ^= IFCAP_HWCSUM;
1305                         reinit = 1;
1306                 }
1307                 if (mask & IFCAP_TSO4) {
1308                         ifp->if_capenable ^= IFCAP_TSO4;
1309                         reinit = 1;
1310                 }
1311                 if (mask & IFCAP_VLAN_HWTAGGING) {
1312                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1313                         reinit = 1;
1314                 }
1315                 if (mask & IFCAP_VLAN_HWFILTER) {
1316                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1317                         reinit = 1;
1318                 }
1319                 if (mask & IFCAP_VLAN_HWTSO) {
1320                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1321                         reinit = 1;
1322                 }
1323                 if ((mask & IFCAP_WOL) &&
1324                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1325                         if (mask & IFCAP_WOL_MCAST)
1326                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1327                         if (mask & IFCAP_WOL_MAGIC)
1328                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1329                 }
1330                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1331                         em_init(adapter);
1332                 VLAN_CAPABILITIES(ifp);
1333                 break;
1334             }
1335
1336         default:
1337                 error = ether_ioctl(ifp, command, data);
1338                 break;
1339         }
1340
1341         return (error);
1342 }
1343
1344
1345 /*********************************************************************
1346  *  Init entry point
1347  *
1348  *  This routine is used in two ways. It is used by the stack as
1349  *  init entry point in network interface structure. It is also used
1350  *  by the driver as a hw/sw initialization routine to get to a
1351  *  consistent state.
1352  *
1353  *  return 0 on success, positive on failure
1354  **********************************************************************/
1355
1356 static void
1357 em_init_locked(struct adapter *adapter)
1358 {
1359         struct ifnet    *ifp = adapter->ifp;
1360         device_t        dev = adapter->dev;
1361
1362         INIT_DEBUGOUT("em_init: begin");
1363
1364         EM_CORE_LOCK_ASSERT(adapter);
1365
1366         em_disable_intr(adapter);
1367         callout_stop(&adapter->timer);
1368
1369         /* Get the latest mac address, User can use a LAA */
1370         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1371               ETHER_ADDR_LEN);
1372
1373         /* Put the address into the Receive Address Array */
1374         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1375
1376         /*
1377          * With the 82571 adapter, RAR[0] may be overwritten
1378          * when the other port is reset, we make a duplicate
1379          * in RAR[14] for that eventuality, this assures
1380          * the interface continues to function.
1381          */
1382         if (adapter->hw.mac.type == e1000_82571) {
1383                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1384                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1385                     E1000_RAR_ENTRIES - 1);
1386         }
1387
1388         /* Initialize the hardware */
1389         em_reset(adapter);
1390         em_update_link_status(adapter);
1391
1392         /* Setup VLAN support, basic and offload if available */
1393         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1394
1395         /* Set hardware offload abilities */
1396         ifp->if_hwassist = 0;
1397         if (ifp->if_capenable & IFCAP_TXCSUM)
1398                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1399         /* 
1400         ** There have proven to be problems with TSO when not
1401         ** at full gigabit speed, so disable the assist automatically
1402         ** when at lower speeds.  -jfv
1403         */
1404         if (ifp->if_capenable & IFCAP_TSO4) {
1405                 if (adapter->link_speed == SPEED_1000)
1406                         ifp->if_hwassist |= CSUM_TSO;
1407         }
1408
1409         /* Configure for OS presence */
1410         em_init_manageability(adapter);
1411
1412         /* Prepare transmit descriptors and buffers */
1413         em_setup_transmit_structures(adapter);
1414         em_initialize_transmit_unit(adapter);
1415
1416         /* Setup Multicast table */
1417         em_set_multi(adapter);
1418
1419         /*
1420         ** Figure out the desired mbuf
1421         ** pool for doing jumbos
1422         */
1423         if (adapter->hw.mac.max_frame_size <= 2048)
1424                 adapter->rx_mbuf_sz = MCLBYTES;
1425         else if (adapter->hw.mac.max_frame_size <= 4096)
1426                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1427         else
1428                 adapter->rx_mbuf_sz = MJUM9BYTES;
1429
1430         /* Prepare receive descriptors and buffers */
1431         if (em_setup_receive_structures(adapter)) {
1432                 device_printf(dev, "Could not setup receive structures\n");
1433                 em_stop(adapter);
1434                 return;
1435         }
1436         em_initialize_receive_unit(adapter);
1437
1438         /* Use real VLAN Filter support? */
1439         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1440                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1441                         /* Use real VLAN Filter support */
1442                         em_setup_vlan_hw_support(adapter);
1443                 else {
1444                         u32 ctrl;
1445                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1446                         ctrl |= E1000_CTRL_VME;
1447                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1448                 }
1449         }
1450
1451         /* Don't lose promiscuous settings */
1452         em_set_promisc(adapter);
1453
1454         /* Set the interface as ACTIVE */
1455         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1456         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1457
1458         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1459         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1460
1461         /* MSI/X configuration for 82574 */
1462         if (adapter->hw.mac.type == e1000_82574) {
1463                 int tmp;
1464                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1465                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1466                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1467                 /* Set the IVAR - interrupt vector routing. */
1468                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1469         }
1470
1471 #ifdef DEVICE_POLLING
1472         /*
1473          * Only enable interrupts if we are not polling, make sure
1474          * they are off otherwise.
1475          */
1476         if (ifp->if_capenable & IFCAP_POLLING)
1477                 em_disable_intr(adapter);
1478         else
1479 #endif /* DEVICE_POLLING */
1480                 em_enable_intr(adapter);
1481
1482         /* AMT based hardware can now take control from firmware */
1483         if (adapter->has_manage && adapter->has_amt)
1484                 em_get_hw_control(adapter);
1485 }
1486
1487 static void
1488 em_init(void *arg)
1489 {
1490         struct adapter *adapter = arg;
1491
1492         EM_CORE_LOCK(adapter);
1493         em_init_locked(adapter);
1494         EM_CORE_UNLOCK(adapter);
1495 }
1496
1497
1498 #ifdef DEVICE_POLLING
1499 /*********************************************************************
1500  *
1501  *  Legacy polling routine: note this only works with single queue
1502  *
1503  *********************************************************************/
1504 static int
1505 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1506 {
1507         struct adapter *adapter = ifp->if_softc;
1508         struct tx_ring  *txr = adapter->tx_rings;
1509         struct rx_ring  *rxr = adapter->rx_rings;
1510         u32             reg_icr;
1511         int             rx_done;
1512
1513         EM_CORE_LOCK(adapter);
1514         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1515                 EM_CORE_UNLOCK(adapter);
1516                 return (0);
1517         }
1518
1519         if (cmd == POLL_AND_CHECK_STATUS) {
1520                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1521                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1522                         callout_stop(&adapter->timer);
1523                         adapter->hw.mac.get_link_status = 1;
1524                         em_update_link_status(adapter);
1525                         callout_reset(&adapter->timer, hz,
1526                             em_local_timer, adapter);
1527                 }
1528         }
1529         EM_CORE_UNLOCK(adapter);
1530
1531         em_rxeof(rxr, count, &rx_done);
1532
1533         EM_TX_LOCK(txr);
1534         em_txeof(txr);
1535 #ifdef EM_MULTIQUEUE
1536         if (!drbr_empty(ifp, txr->br))
1537                 em_mq_start_locked(ifp, txr);
1538 #else
1539         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1540                 em_start_locked(ifp, txr);
1541 #endif
1542         EM_TX_UNLOCK(txr);
1543
1544         return (rx_done);
1545 }
1546 #endif /* DEVICE_POLLING */
1547
1548
1549 /*********************************************************************
1550  *
1551  *  Fast Legacy/MSI Combined Interrupt Service routine  
1552  *
1553  *********************************************************************/
1554 static int
1555 em_irq_fast(void *arg)
1556 {
1557         struct adapter  *adapter = arg;
1558         struct ifnet    *ifp;
1559         u32             reg_icr;
1560
1561         ifp = adapter->ifp;
1562
1563         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1564
1565         /* Hot eject?  */
1566         if (reg_icr == 0xffffffff)
1567                 return FILTER_STRAY;
1568
1569         /* Definitely not our interrupt.  */
1570         if (reg_icr == 0x0)
1571                 return FILTER_STRAY;
1572
1573         /*
1574          * Starting with the 82571 chip, bit 31 should be used to
1575          * determine whether the interrupt belongs to us.
1576          */
1577         if (adapter->hw.mac.type >= e1000_82571 &&
1578             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1579                 return FILTER_STRAY;
1580
1581         em_disable_intr(adapter);
1582         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1583
1584         /* Link status change */
1585         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1586                 adapter->hw.mac.get_link_status = 1;
1587                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1588         }
1589
1590         if (reg_icr & E1000_ICR_RXO)
1591                 adapter->rx_overruns++;
1592         return FILTER_HANDLED;
1593 }
1594
1595 /* Combined RX/TX handler, used by Legacy and MSI */
1596 static void
1597 em_handle_que(void *context, int pending)
1598 {
1599         struct adapter  *adapter = context;
1600         struct ifnet    *ifp = adapter->ifp;
1601         struct tx_ring  *txr = adapter->tx_rings;
1602         struct rx_ring  *rxr = adapter->rx_rings;
1603
1604         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1605                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1606
1607                 EM_TX_LOCK(txr);
1608                 em_txeof(txr);
1609 #ifdef EM_MULTIQUEUE
1610                 if (!drbr_empty(ifp, txr->br))
1611                         em_mq_start_locked(ifp, txr);
1612 #else
1613                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1614                         em_start_locked(ifp, txr);
1615 #endif
1616                 EM_TX_UNLOCK(txr);
1617                 if (more) {
1618                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1619                         return;
1620                 }
1621         }
1622
1623         em_enable_intr(adapter);
1624         return;
1625 }
1626
1627
1628 /*********************************************************************
1629  *
1630  *  MSIX Interrupt Service Routines
1631  *
1632  **********************************************************************/
1633 static void
1634 em_msix_tx(void *arg)
1635 {
1636         struct tx_ring *txr = arg;
1637         struct adapter *adapter = txr->adapter;
1638         struct ifnet    *ifp = adapter->ifp;
1639
1640         ++txr->tx_irq;
1641         EM_TX_LOCK(txr);
1642         em_txeof(txr);
1643 #ifdef EM_MULTIQUEUE
1644         if (!drbr_empty(ifp, txr->br))
1645                 em_mq_start_locked(ifp, txr);
1646 #else
1647         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1648                 em_start_locked(ifp, txr);
1649 #endif
1650
1651         /* Reenable this interrupt */
1652         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1653         EM_TX_UNLOCK(txr);
1654         return;
1655 }
1656
1657 /*********************************************************************
1658  *
1659  *  MSIX RX Interrupt Service routine
1660  *
1661  **********************************************************************/
1662
1663 static void
1664 em_msix_rx(void *arg)
1665 {
1666         struct rx_ring  *rxr = arg;
1667         struct adapter  *adapter = rxr->adapter;
1668         bool            more;
1669
1670         ++rxr->rx_irq;
1671         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1672                 return;
1673         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1674         if (more)
1675                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1676         else {
1677                 /* Reenable this interrupt */
1678                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1679         }
1680         return;
1681 }
1682
1683 /*********************************************************************
1684  *
1685  *  MSIX Link Fast Interrupt Service routine
1686  *
1687  **********************************************************************/
1688 static void
1689 em_msix_link(void *arg)
1690 {
1691         struct adapter  *adapter = arg;
1692         u32             reg_icr;
1693
1694         ++adapter->link_irq;
1695         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1696
1697         if (reg_icr & E1000_ICR_RXO)
1698                 adapter->rx_overruns++;
1699
1700         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1701                 adapter->hw.mac.get_link_status = 1;
1702                 em_handle_link(adapter, 0);
1703         } else
1704                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1705                     EM_MSIX_LINK | E1000_IMS_LSC);
1706         /*
1707         ** Because we must read the ICR for this interrupt
1708         ** it may clear other causes using autoclear, for
1709         ** this reason we simply create a soft interrupt
1710         ** for all these vectors.
1711         */
1712         if (reg_icr) {
1713                 E1000_WRITE_REG(&adapter->hw,
1714                         E1000_ICS, adapter->ims);
1715         }
1716         return;
1717 }
1718
1719 static void
1720 em_handle_rx(void *context, int pending)
1721 {
1722         struct rx_ring  *rxr = context;
1723         struct adapter  *adapter = rxr->adapter;
1724         bool            more;
1725
1726         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1727         if (more)
1728                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1729         else {
1730                 /* Reenable this interrupt */
1731                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1732         }
1733 }
1734
1735 static void
1736 em_handle_tx(void *context, int pending)
1737 {
1738         struct tx_ring  *txr = context;
1739         struct adapter  *adapter = txr->adapter;
1740         struct ifnet    *ifp = adapter->ifp;
1741
1742         EM_TX_LOCK(txr);
1743         em_txeof(txr);
1744 #ifdef EM_MULTIQUEUE
1745         if (!drbr_empty(ifp, txr->br))
1746                 em_mq_start_locked(ifp, txr);
1747 #else
1748         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1749                 em_start_locked(ifp, txr);
1750 #endif
1751         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1752         EM_TX_UNLOCK(txr);
1753 }
1754
1755 static void
1756 em_handle_link(void *context, int pending)
1757 {
1758         struct adapter  *adapter = context;
1759         struct tx_ring  *txr = adapter->tx_rings;
1760         struct ifnet *ifp = adapter->ifp;
1761
1762         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1763                 return;
1764
1765         EM_CORE_LOCK(adapter);
1766         callout_stop(&adapter->timer);
1767         em_update_link_status(adapter);
1768         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1769         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1770             EM_MSIX_LINK | E1000_IMS_LSC);
1771         if (adapter->link_active) {
1772                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1773                         EM_TX_LOCK(txr);
1774 #ifdef EM_MULTIQUEUE
1775                         if (!drbr_empty(ifp, txr->br))
1776                                 em_mq_start_locked(ifp, txr);
1777 #else
1778                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1779                                 em_start_locked(ifp, txr);
1780 #endif
1781                         EM_TX_UNLOCK(txr);
1782                 }
1783         }
1784         EM_CORE_UNLOCK(adapter);
1785 }
1786
1787
1788 /*********************************************************************
1789  *
1790  *  Media Ioctl callback
1791  *
1792  *  This routine is called whenever the user queries the status of
1793  *  the interface using ifconfig.
1794  *
1795  **********************************************************************/
1796 static void
1797 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1798 {
1799         struct adapter *adapter = ifp->if_softc;
1800         u_char fiber_type = IFM_1000_SX;
1801
1802         INIT_DEBUGOUT("em_media_status: begin");
1803
1804         EM_CORE_LOCK(adapter);
1805         em_update_link_status(adapter);
1806
1807         ifmr->ifm_status = IFM_AVALID;
1808         ifmr->ifm_active = IFM_ETHER;
1809
1810         if (!adapter->link_active) {
1811                 EM_CORE_UNLOCK(adapter);
1812                 return;
1813         }
1814
1815         ifmr->ifm_status |= IFM_ACTIVE;
1816
1817         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1818             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1819                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1820         } else {
1821                 switch (adapter->link_speed) {
1822                 case 10:
1823                         ifmr->ifm_active |= IFM_10_T;
1824                         break;
1825                 case 100:
1826                         ifmr->ifm_active |= IFM_100_TX;
1827                         break;
1828                 case 1000:
1829                         ifmr->ifm_active |= IFM_1000_T;
1830                         break;
1831                 }
1832                 if (adapter->link_duplex == FULL_DUPLEX)
1833                         ifmr->ifm_active |= IFM_FDX;
1834                 else
1835                         ifmr->ifm_active |= IFM_HDX;
1836         }
1837         EM_CORE_UNLOCK(adapter);
1838 }
1839
1840 /*********************************************************************
1841  *
1842  *  Media Ioctl callback
1843  *
1844  *  This routine is called when the user changes speed/duplex using
1845  *  media/mediopt option with ifconfig.
1846  *
1847  **********************************************************************/
1848 static int
1849 em_media_change(struct ifnet *ifp)
1850 {
1851         struct adapter *adapter = ifp->if_softc;
1852         struct ifmedia  *ifm = &adapter->media;
1853
1854         INIT_DEBUGOUT("em_media_change: begin");
1855
1856         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1857                 return (EINVAL);
1858
1859         EM_CORE_LOCK(adapter);
1860         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1861         case IFM_AUTO:
1862                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1863                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1864                 break;
1865         case IFM_1000_LX:
1866         case IFM_1000_SX:
1867         case IFM_1000_T:
1868                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1869                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1870                 break;
1871         case IFM_100_TX:
1872                 adapter->hw.mac.autoneg = FALSE;
1873                 adapter->hw.phy.autoneg_advertised = 0;
1874                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1875                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1876                 else
1877                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1878                 break;
1879         case IFM_10_T:
1880                 adapter->hw.mac.autoneg = FALSE;
1881                 adapter->hw.phy.autoneg_advertised = 0;
1882                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1883                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1884                 else
1885                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1886                 break;
1887         default:
1888                 device_printf(adapter->dev, "Unsupported media type\n");
1889         }
1890
1891         em_init_locked(adapter);
1892         EM_CORE_UNLOCK(adapter);
1893
1894         return (0);
1895 }
1896
1897 /*********************************************************************
1898  *
1899  *  This routine maps the mbufs to tx descriptors.
1900  *
1901  *  return 0 on success, positive on failure
1902  **********************************************************************/
1903
1904 static int
1905 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1906 {
1907         struct adapter          *adapter = txr->adapter;
1908         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1909         bus_dmamap_t            map;
1910         struct em_txbuffer      *tx_buffer, *tx_buffer_mapped;
1911         struct e1000_tx_desc    *ctxd = NULL;
1912         struct mbuf             *m_head;
1913         struct ether_header     *eh;
1914         struct ip               *ip = NULL;
1915         struct tcphdr           *tp = NULL;
1916         u32                     txd_upper = 0, txd_lower = 0;
1917         int                     ip_off, poff;
1918         int                     nsegs, i, j, first, last = 0;
1919         int                     error;
1920         bool                    do_tso, tso_desc, remap = TRUE;
1921
1922         m_head = *m_headp;
1923         do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1924         tso_desc = FALSE;
1925         ip_off = poff = 0;
1926
1927         /*
1928          * Intel recommends entire IP/TCP header length reside in a single
1929          * buffer. If multiple descriptors are used to describe the IP and
1930          * TCP header, each descriptor should describe one or more
1931          * complete headers; descriptors referencing only parts of headers
1932          * are not supported. If all layer headers are not coalesced into
1933          * a single buffer, each buffer should not cross a 4KB boundary,
1934          * or be larger than the maximum read request size.
1935          * Controller also requires modifing IP/TCP header to make TSO work
1936          * so we firstly get a writable mbuf chain then coalesce ethernet/
1937          * IP/TCP header into a single buffer to meet the requirement of
1938          * controller. This also simplifies IP/TCP/UDP checksum offloading
1939          * which also has similiar restrictions.
1940          */
1941         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1942                 if (do_tso || (m_head->m_next != NULL && 
1943                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1944                         if (M_WRITABLE(*m_headp) == 0) {
1945                                 m_head = m_dup(*m_headp, M_NOWAIT);
1946                                 m_freem(*m_headp);
1947                                 if (m_head == NULL) {
1948                                         *m_headp = NULL;
1949                                         return (ENOBUFS);
1950                                 }
1951                                 *m_headp = m_head;
1952                         }
1953                 }
1954                 /*
1955                  * XXX
1956                  * Assume IPv4, we don't have TSO/checksum offload support
1957                  * for IPv6 yet.
1958                  */
1959                 ip_off = sizeof(struct ether_header);
1960                 if (m_head->m_len < ip_off) {
1961                         m_head = m_pullup(m_head, ip_off);
1962                         if (m_head == NULL) {
1963                                 *m_headp = NULL;
1964                                 return (ENOBUFS);
1965                         }
1966                 }
1967                 eh = mtod(m_head, struct ether_header *);
1968                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1969                         ip_off = sizeof(struct ether_vlan_header);
1970                         if (m_head->m_len < ip_off) {
1971                                 m_head = m_pullup(m_head, ip_off);
1972                                 if (m_head == NULL) {
1973                                         *m_headp = NULL;
1974                                         return (ENOBUFS);
1975                                 }
1976                         }
1977                 }
1978                 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1979                         m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1980                         if (m_head == NULL) {
1981                                 *m_headp = NULL;
1982                                 return (ENOBUFS);
1983                         }
1984                 }
1985                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1986                 poff = ip_off + (ip->ip_hl << 2);
1987
1988                 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1989                         if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1990                                 m_head = m_pullup(m_head, poff +
1991                                     sizeof(struct tcphdr));
1992                                 if (m_head == NULL) {
1993                                         *m_headp = NULL;
1994                                         return (ENOBUFS);
1995                                 }
1996                         }
1997                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1998                         /*
1999                          * TSO workaround:
2000                          *   pull 4 more bytes of data into it.
2001                          */
2002                         if (m_head->m_len < poff + (tp->th_off << 2)) {
2003                                 m_head = m_pullup(m_head, poff +
2004                                                  (tp->th_off << 2) +
2005                                                  TSO_WORKAROUND);
2006                                 if (m_head == NULL) {
2007                                         *m_headp = NULL;
2008                                         return (ENOBUFS);
2009                                 }
2010                         }
2011                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2012                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2013                         if (do_tso) {
2014                                 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2015                                                   (ip->ip_hl << 2) +
2016                                                   (tp->th_off << 2));
2017                                 ip->ip_sum = 0;
2018                                 /*
2019                                  * The pseudo TCP checksum does not include TCP
2020                                  * payload length so driver should recompute
2021                                  * the checksum here what hardware expect to
2022                                  * see. This is adherence of Microsoft's Large
2023                                  * Send specification.
2024                                 */
2025                                 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2026                                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2027                         }
2028                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2029                         if (m_head->m_len < poff + sizeof(struct udphdr)) {
2030                                 m_head = m_pullup(m_head, poff +
2031                                     sizeof(struct udphdr));
2032                                 if (m_head == NULL) {
2033                                         *m_headp = NULL;
2034                                         return (ENOBUFS);
2035                                 }
2036                         }
2037                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2038                 }
2039                 *m_headp = m_head;
2040         }
2041
2042         /*
2043          * Map the packet for DMA
2044          *
2045          * Capture the first descriptor index,
2046          * this descriptor will have the index
2047          * of the EOP which is the only one that
2048          * now gets a DONE bit writeback.
2049          */
2050         first = txr->next_avail_desc;
2051         tx_buffer = &txr->tx_buffers[first];
2052         tx_buffer_mapped = tx_buffer;
2053         map = tx_buffer->map;
2054
2055 retry:
2056         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2057             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2058
2059         /*
2060          * There are two types of errors we can (try) to handle:
2061          * - EFBIG means the mbuf chain was too long and bus_dma ran
2062          *   out of segments.  Defragment the mbuf chain and try again.
2063          * - ENOMEM means bus_dma could not obtain enough bounce buffers
2064          *   at this point in time.  Defer sending and try again later.
2065          * All other errors, in particular EINVAL, are fatal and prevent the
2066          * mbuf chain from ever going through.  Drop it and report error.
2067          */
2068         if (error == EFBIG && remap) {
2069                 struct mbuf *m;
2070
2071                 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2072                 if (m == NULL) {
2073                         adapter->mbuf_defrag_failed++;
2074                         m_freem(*m_headp);
2075                         *m_headp = NULL;
2076                         return (ENOBUFS);
2077                 }
2078                 *m_headp = m;
2079
2080                 /* Try it again, but only once */
2081                 remap = FALSE;
2082                 goto retry;
2083         } else if (error != 0) {
2084                 adapter->no_tx_dma_setup++;
2085                 m_freem(*m_headp);
2086                 *m_headp = NULL;
2087                 return (error);
2088         }
2089
2090         /*
2091          * TSO Hardware workaround, if this packet is not
2092          * TSO, and is only a single descriptor long, and
2093          * it follows a TSO burst, then we need to add a
2094          * sentinel descriptor to prevent premature writeback.
2095          */
2096         if ((!do_tso) && (txr->tx_tso == TRUE)) {
2097                 if (nsegs == 1)
2098                         tso_desc = TRUE;
2099                 txr->tx_tso = FALSE;
2100         }
2101
2102         if (nsegs > (txr->tx_avail - EM_MAX_SCATTER)) {
2103                 txr->no_desc_avail++;
2104                 bus_dmamap_unload(txr->txtag, map);
2105                 return (ENOBUFS);
2106         }
2107         m_head = *m_headp;
2108
2109         /* Do hardware assists */
2110         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2111                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2112                     &txd_upper, &txd_lower);
2113                 /* we need to make a final sentinel transmit desc */
2114                 tso_desc = TRUE;
2115         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2116                 em_transmit_checksum_setup(txr, m_head,
2117                     ip_off, ip, &txd_upper, &txd_lower);
2118
2119         if (m_head->m_flags & M_VLANTAG) {
2120                 /* Set the vlan id. */
2121                 txd_upper |=
2122                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2123                 /* Tell hardware to add tag */
2124                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2125         }
2126
2127         i = txr->next_avail_desc;
2128
2129         /* Set up our transmit descriptors */
2130         for (j = 0; j < nsegs; j++) {
2131                 bus_size_t seg_len;
2132                 bus_addr_t seg_addr;
2133
2134                 tx_buffer = &txr->tx_buffers[i];
2135                 ctxd = &txr->tx_base[i];
2136                 seg_addr = segs[j].ds_addr;
2137                 seg_len  = segs[j].ds_len;
2138                 /*
2139                 ** TSO Workaround:
2140                 ** If this is the last descriptor, we want to
2141                 ** split it so we have a small final sentinel
2142                 */
2143                 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2144                         seg_len -= TSO_WORKAROUND;
2145                         ctxd->buffer_addr = htole64(seg_addr);
2146                         ctxd->lower.data = htole32(
2147                                 adapter->txd_cmd | txd_lower | seg_len);
2148                         ctxd->upper.data = htole32(txd_upper);
2149                         if (++i == adapter->num_tx_desc)
2150                                 i = 0;
2151
2152                         /* Now make the sentinel */     
2153                         txr->tx_avail--;
2154                         ctxd = &txr->tx_base[i];
2155                         tx_buffer = &txr->tx_buffers[i];
2156                         ctxd->buffer_addr =
2157                             htole64(seg_addr + seg_len);
2158                         ctxd->lower.data = htole32(
2159                         adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2160                         ctxd->upper.data =
2161                             htole32(txd_upper);
2162                         last = i;
2163                         if (++i == adapter->num_tx_desc)
2164                                 i = 0;
2165                 } else {
2166                         ctxd->buffer_addr = htole64(seg_addr);
2167                         ctxd->lower.data = htole32(
2168                         adapter->txd_cmd | txd_lower | seg_len);
2169                         ctxd->upper.data = htole32(txd_upper);
2170                         last = i;
2171                         if (++i == adapter->num_tx_desc)
2172                                 i = 0;
2173                 }
2174                 tx_buffer->m_head = NULL;
2175                 tx_buffer->next_eop = -1;
2176         }
2177
2178         txr->next_avail_desc = i;
2179         txr->tx_avail -= nsegs;
2180
2181         tx_buffer->m_head = m_head;
2182         /*
2183         ** Here we swap the map so the last descriptor,
2184         ** which gets the completion interrupt has the
2185         ** real map, and the first descriptor gets the
2186         ** unused map from this descriptor.
2187         */
2188         tx_buffer_mapped->map = tx_buffer->map;
2189         tx_buffer->map = map;
2190         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2191
2192         /*
2193          * Last Descriptor of Packet
2194          * needs End Of Packet (EOP)
2195          * and Report Status (RS)
2196          */
2197         ctxd->lower.data |=
2198             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2199         /*
2200          * Keep track in the first buffer which
2201          * descriptor will be written back
2202          */
2203         tx_buffer = &txr->tx_buffers[first];
2204         tx_buffer->next_eop = last;
2205
2206         /*
2207          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2208          * that this frame is available to transmit.
2209          */
2210         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2211             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2212         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2213
2214         return (0);
2215 }
2216
2217 static void
2218 em_set_promisc(struct adapter *adapter)
2219 {
2220         struct ifnet    *ifp = adapter->ifp;
2221         u32             reg_rctl;
2222
2223         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2224
2225         if (ifp->if_flags & IFF_PROMISC) {
2226                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2227                 /* Turn this on if you want to see bad packets */
2228                 if (em_debug_sbp)
2229                         reg_rctl |= E1000_RCTL_SBP;
2230                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2231         } else if (ifp->if_flags & IFF_ALLMULTI) {
2232                 reg_rctl |= E1000_RCTL_MPE;
2233                 reg_rctl &= ~E1000_RCTL_UPE;
2234                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2235         }
2236 }
2237
2238 static void
2239 em_disable_promisc(struct adapter *adapter)
2240 {
2241         struct ifnet    *ifp = adapter->ifp;
2242         u32             reg_rctl;
2243         int             mcnt = 0;
2244
2245         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2246         reg_rctl &=  (~E1000_RCTL_UPE);
2247         if (ifp->if_flags & IFF_ALLMULTI)
2248                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2249         else {
2250                 struct  ifmultiaddr *ifma;
2251 #if __FreeBSD_version < 800000
2252                 IF_ADDR_LOCK(ifp);
2253 #else   
2254                 if_maddr_rlock(ifp);
2255 #endif
2256                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2257                         if (ifma->ifma_addr->sa_family != AF_LINK)
2258                                 continue;
2259                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2260                                 break;
2261                         mcnt++;
2262                 }
2263 #if __FreeBSD_version < 800000
2264                 IF_ADDR_UNLOCK(ifp);
2265 #else
2266                 if_maddr_runlock(ifp);
2267 #endif
2268         }
2269         /* Don't disable if in MAX groups */
2270         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2271                 reg_rctl &=  (~E1000_RCTL_MPE);
2272         reg_rctl &=  (~E1000_RCTL_SBP);
2273         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2274 }
2275
2276
2277 /*********************************************************************
2278  *  Multicast Update
2279  *
2280  *  This routine is called whenever multicast address list is updated.
2281  *
2282  **********************************************************************/
2283
2284 static void
2285 em_set_multi(struct adapter *adapter)
2286 {
2287         struct ifnet    *ifp = adapter->ifp;
2288         struct ifmultiaddr *ifma;
2289         u32 reg_rctl = 0;
2290         u8  *mta; /* Multicast array memory */
2291         int mcnt = 0;
2292
2293         IOCTL_DEBUGOUT("em_set_multi: begin");
2294
2295         mta = adapter->mta;
2296         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2297
2298         if (adapter->hw.mac.type == e1000_82542 && 
2299             adapter->hw.revision_id == E1000_REVISION_2) {
2300                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2301                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2302                         e1000_pci_clear_mwi(&adapter->hw);
2303                 reg_rctl |= E1000_RCTL_RST;
2304                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2305                 msec_delay(5);
2306         }
2307
2308 #if __FreeBSD_version < 800000
2309         IF_ADDR_LOCK(ifp);
2310 #else
2311         if_maddr_rlock(ifp);
2312 #endif
2313         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2314                 if (ifma->ifma_addr->sa_family != AF_LINK)
2315                         continue;
2316
2317                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2318                         break;
2319
2320                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2321                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2322                 mcnt++;
2323         }
2324 #if __FreeBSD_version < 800000
2325         IF_ADDR_UNLOCK(ifp);
2326 #else
2327         if_maddr_runlock(ifp);
2328 #endif
2329         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2330                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2331                 reg_rctl |= E1000_RCTL_MPE;
2332                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2333         } else
2334                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2335
2336         if (adapter->hw.mac.type == e1000_82542 && 
2337             adapter->hw.revision_id == E1000_REVISION_2) {
2338                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2339                 reg_rctl &= ~E1000_RCTL_RST;
2340                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2341                 msec_delay(5);
2342                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2343                         e1000_pci_set_mwi(&adapter->hw);
2344         }
2345 }
2346
2347
2348 /*********************************************************************
2349  *  Timer routine
2350  *
2351  *  This routine checks for link status and updates statistics.
2352  *
2353  **********************************************************************/
2354
2355 static void
2356 em_local_timer(void *arg)
2357 {
2358         struct adapter  *adapter = arg;
2359         struct ifnet    *ifp = adapter->ifp;
2360         struct tx_ring  *txr = adapter->tx_rings;
2361         struct rx_ring  *rxr = adapter->rx_rings;
2362         u32             trigger = 0;
2363
2364         EM_CORE_LOCK_ASSERT(adapter);
2365
2366         em_update_link_status(adapter);
2367         em_update_stats_counters(adapter);
2368
2369         /* Reset LAA into RAR[0] on 82571 */
2370         if ((adapter->hw.mac.type == e1000_82571) &&
2371             e1000_get_laa_state_82571(&adapter->hw))
2372                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2373
2374         /* Mask to use in the irq trigger */
2375         if (adapter->msix_mem) {
2376                 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2377                         trigger |= rxr->ims;
2378                 rxr = adapter->rx_rings;
2379         } else
2380                 trigger = E1000_ICS_RXDMT0;
2381
2382         /*
2383         ** Check on the state of the TX queue(s), this 
2384         ** can be done without the lock because its RO
2385         ** and the HUNG state will be static if set.
2386         */
2387         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2388                 if (txr->busy == EM_TX_HUNG)
2389                         goto hung;
2390                 if (txr->busy >= EM_TX_MAXTRIES)
2391                         txr->busy = EM_TX_HUNG;
2392                 /* Schedule a TX tasklet if needed */
2393                 if (txr->tx_avail <= EM_MAX_SCATTER)
2394                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2395         }
2396         
2397         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2398 #ifndef DEVICE_POLLING
2399         /* Trigger an RX interrupt to guarantee mbuf refresh */
2400         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2401 #endif
2402         return;
2403 hung:
2404         /* Looks like we're hung */
2405         device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2406                         txr->me);
2407         em_print_debug_info(adapter);
2408         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2409         adapter->watchdog_events++;
2410         em_init_locked(adapter);
2411 }
2412
2413
2414 static void
2415 em_update_link_status(struct adapter *adapter)
2416 {
2417         struct e1000_hw *hw = &adapter->hw;
2418         struct ifnet *ifp = adapter->ifp;
2419         device_t dev = adapter->dev;
2420         struct tx_ring *txr = adapter->tx_rings;
2421         u32 link_check = 0;
2422
2423         /* Get the cached link value or read phy for real */
2424         switch (hw->phy.media_type) {
2425         case e1000_media_type_copper:
2426                 if (hw->mac.get_link_status) {
2427                         if (hw->mac.type == e1000_pch_spt)
2428                                 msec_delay(50);
2429                         /* Do the work to read phy */
2430                         e1000_check_for_link(hw);
2431                         link_check = !hw->mac.get_link_status;
2432                         if (link_check) /* ESB2 fix */
2433                                 e1000_cfg_on_link_up(hw);
2434                 } else
2435                         link_check = TRUE;
2436                 break;
2437         case e1000_media_type_fiber:
2438                 e1000_check_for_link(hw);
2439                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2440                                  E1000_STATUS_LU);
2441                 break;
2442         case e1000_media_type_internal_serdes:
2443                 e1000_check_for_link(hw);
2444                 link_check = adapter->hw.mac.serdes_has_link;
2445                 break;
2446         default:
2447         case e1000_media_type_unknown:
2448                 break;
2449         }
2450
2451         /* Now check for a transition */
2452         if (link_check && (adapter->link_active == 0)) {
2453                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2454                     &adapter->link_duplex);
2455                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2456                 if ((adapter->link_speed != SPEED_1000) &&
2457                     ((hw->mac.type == e1000_82571) ||
2458                     (hw->mac.type == e1000_82572))) {
2459                         int tarc0;
2460                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2461                         tarc0 &= ~TARC_SPEED_MODE_BIT;
2462                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2463                 }
2464                 if (bootverbose)
2465                         device_printf(dev, "Link is up %d Mbps %s\n",
2466                             adapter->link_speed,
2467                             ((adapter->link_duplex == FULL_DUPLEX) ?
2468                             "Full Duplex" : "Half Duplex"));
2469                 adapter->link_active = 1;
2470                 adapter->smartspeed = 0;
2471                 ifp->if_baudrate = adapter->link_speed * 1000000;
2472                 if_link_state_change(ifp, LINK_STATE_UP);
2473         } else if (!link_check && (adapter->link_active == 1)) {
2474                 ifp->if_baudrate = adapter->link_speed = 0;
2475                 adapter->link_duplex = 0;
2476                 if (bootverbose)
2477                         device_printf(dev, "Link is Down\n");
2478                 adapter->link_active = 0;
2479                 /* Link down, disable hang detection */
2480                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2481                         txr->busy = EM_TX_IDLE;
2482                 if_link_state_change(ifp, LINK_STATE_DOWN);
2483         }
2484 }
2485
2486 /*********************************************************************
2487  *
2488  *  This routine disables all traffic on the adapter by issuing a
2489  *  global reset on the MAC and deallocates TX/RX buffers.
2490  *
2491  *  This routine should always be called with BOTH the CORE
2492  *  and TX locks.
2493  **********************************************************************/
2494
2495 static void
2496 em_stop(void *arg)
2497 {
2498         struct adapter  *adapter = arg;
2499         struct ifnet    *ifp = adapter->ifp;
2500         struct tx_ring  *txr = adapter->tx_rings;
2501
2502         EM_CORE_LOCK_ASSERT(adapter);
2503
2504         INIT_DEBUGOUT("em_stop: begin");
2505
2506         em_disable_intr(adapter);
2507         callout_stop(&adapter->timer);
2508
2509         /* Tell the stack that the interface is no longer active */
2510         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2511         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2512
2513         /* Disarm Hang Detection. */
2514         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2515                 EM_TX_LOCK(txr);
2516                 txr->busy = EM_TX_IDLE;
2517                 EM_TX_UNLOCK(txr);
2518         }
2519
2520         /* I219 needs some special flushing to avoid hangs */
2521         if (adapter->hw.mac.type == e1000_pch_spt)
2522                 em_flush_desc_rings(adapter);
2523
2524         e1000_reset_hw(&adapter->hw);
2525         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2526
2527         e1000_led_off(&adapter->hw);
2528         e1000_cleanup_led(&adapter->hw);
2529 }
2530
2531
2532 /*********************************************************************
2533  *
2534  *  Determine hardware revision.
2535  *
2536  **********************************************************************/
2537 static void
2538 em_identify_hardware(struct adapter *adapter)
2539 {
2540         device_t dev = adapter->dev;
2541
2542         /* Make sure our PCI config space has the necessary stuff set */
2543         pci_enable_busmaster(dev);
2544         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2545
2546         /* Save off the information about this board */
2547         adapter->hw.vendor_id = pci_get_vendor(dev);
2548         adapter->hw.device_id = pci_get_device(dev);
2549         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2550         adapter->hw.subsystem_vendor_id =
2551             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2552         adapter->hw.subsystem_device_id =
2553             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2554
2555         /* Do Shared Code Init and Setup */
2556         if (e1000_set_mac_type(&adapter->hw)) {
2557                 device_printf(dev, "Setup init failure\n");
2558                 return;
2559         }
2560 }
2561
2562 static int
2563 em_allocate_pci_resources(struct adapter *adapter)
2564 {
2565         device_t        dev = adapter->dev;
2566         int             rid;
2567
2568         rid = PCIR_BAR(0);
2569         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2570             &rid, RF_ACTIVE);
2571         if (adapter->memory == NULL) {
2572                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2573                 return (ENXIO);
2574         }
2575         adapter->osdep.mem_bus_space_tag =
2576             rman_get_bustag(adapter->memory);
2577         adapter->osdep.mem_bus_space_handle =
2578             rman_get_bushandle(adapter->memory);
2579         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2580
2581         adapter->hw.back = &adapter->osdep;
2582
2583         return (0);
2584 }
2585
2586 /*********************************************************************
2587  *
2588  *  Setup the Legacy or MSI Interrupt handler
2589  *
2590  **********************************************************************/
2591 int
2592 em_allocate_legacy(struct adapter *adapter)
2593 {
2594         device_t dev = adapter->dev;
2595         struct tx_ring  *txr = adapter->tx_rings;
2596         int error, rid = 0;
2597
2598         /* Manually turn off all interrupts */
2599         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2600
2601         if (adapter->msix == 1) /* using MSI */
2602                 rid = 1;
2603         /* We allocate a single interrupt resource */
2604         adapter->res = bus_alloc_resource_any(dev,
2605             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2606         if (adapter->res == NULL) {
2607                 device_printf(dev, "Unable to allocate bus resource: "
2608                     "interrupt\n");
2609                 return (ENXIO);
2610         }
2611
2612         /*
2613          * Allocate a fast interrupt and the associated
2614          * deferred processing contexts.
2615          */
2616         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2617         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2618             taskqueue_thread_enqueue, &adapter->tq);
2619         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2620             device_get_nameunit(adapter->dev));
2621         /* Use a TX only tasklet for local timer */
2622         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2623         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2624             taskqueue_thread_enqueue, &txr->tq);
2625         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2626             device_get_nameunit(adapter->dev));
2627         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2628         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2629             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2630                 device_printf(dev, "Failed to register fast interrupt "
2631                             "handler: %d\n", error);
2632                 taskqueue_free(adapter->tq);
2633                 adapter->tq = NULL;
2634                 return (error);
2635         }
2636         
2637         return (0);
2638 }
2639
2640 /*********************************************************************
2641  *
2642  *  Setup the MSIX Interrupt handlers
2643  *   This is not really Multiqueue, rather
2644  *   its just seperate interrupt vectors
2645  *   for TX, RX, and Link.
2646  *
2647  **********************************************************************/
2648 int
2649 em_allocate_msix(struct adapter *adapter)
2650 {
2651         device_t        dev = adapter->dev;
2652         struct          tx_ring *txr = adapter->tx_rings;
2653         struct          rx_ring *rxr = adapter->rx_rings;
2654         int             error, rid, vector = 0;
2655         int             cpu_id = 0;
2656
2657
2658         /* Make sure all interrupts are disabled */
2659         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2660
2661         /* First set up ring resources */
2662         for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2663
2664                 /* RX ring */
2665                 rid = vector + 1;
2666
2667                 rxr->res = bus_alloc_resource_any(dev,
2668                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2669                 if (rxr->res == NULL) {
2670                         device_printf(dev,
2671                             "Unable to allocate bus resource: "
2672                             "RX MSIX Interrupt %d\n", i);
2673                         return (ENXIO);
2674                 }
2675                 if ((error = bus_setup_intr(dev, rxr->res,
2676                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2677                     rxr, &rxr->tag)) != 0) {
2678                         device_printf(dev, "Failed to register RX handler");
2679                         return (error);
2680                 }
2681 #if __FreeBSD_version >= 800504
2682                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2683 #endif
2684                 rxr->msix = vector;
2685
2686                 if (em_last_bind_cpu < 0)
2687                         em_last_bind_cpu = CPU_FIRST();
2688                 cpu_id = em_last_bind_cpu;
2689                 bus_bind_intr(dev, rxr->res, cpu_id);
2690
2691                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2692                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2693                     taskqueue_thread_enqueue, &rxr->tq);
2694                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2695                     device_get_nameunit(adapter->dev), cpu_id);
2696                 /*
2697                 ** Set the bit to enable interrupt
2698                 ** in E1000_IMS -- bits 20 and 21
2699                 ** are for RX0 and RX1, note this has
2700                 ** NOTHING to do with the MSIX vector
2701                 */
2702                 rxr->ims = 1 << (20 + i);
2703                 adapter->ims |= rxr->ims;
2704                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2705
2706                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2707         }
2708
2709         for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2710                 /* TX ring */
2711                 rid = vector + 1;
2712                 txr->res = bus_alloc_resource_any(dev,
2713                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2714                 if (txr->res == NULL) {
2715                         device_printf(dev,
2716                             "Unable to allocate bus resource: "
2717                             "TX MSIX Interrupt %d\n", i);
2718                         return (ENXIO);
2719                 }
2720                 if ((error = bus_setup_intr(dev, txr->res,
2721                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2722                     txr, &txr->tag)) != 0) {
2723                         device_printf(dev, "Failed to register TX handler");
2724                         return (error);
2725                 }
2726 #if __FreeBSD_version >= 800504
2727                 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2728 #endif
2729                 txr->msix = vector;
2730
2731                 if (em_last_bind_cpu < 0)
2732                         em_last_bind_cpu = CPU_FIRST();
2733                 cpu_id = em_last_bind_cpu;
2734                 bus_bind_intr(dev, txr->res, cpu_id);
2735
2736                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2737                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2738                     taskqueue_thread_enqueue, &txr->tq);
2739                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2740                     device_get_nameunit(adapter->dev), cpu_id);
2741                 /*
2742                 ** Set the bit to enable interrupt
2743                 ** in E1000_IMS -- bits 22 and 23
2744                 ** are for TX0 and TX1, note this has
2745                 ** NOTHING to do with the MSIX vector
2746                 */
2747                 txr->ims = 1 << (22 + i);
2748                 adapter->ims |= txr->ims;
2749                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2750
2751                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2752         }
2753
2754         /* Link interrupt */
2755         rid = vector + 1;
2756         adapter->res = bus_alloc_resource_any(dev,
2757             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2758         if (!adapter->res) {
2759                 device_printf(dev,"Unable to allocate "
2760                     "bus resource: Link interrupt [%d]\n", rid);
2761                 return (ENXIO);
2762         }
2763         /* Set the link handler function */
2764         error = bus_setup_intr(dev, adapter->res,
2765             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2766             em_msix_link, adapter, &adapter->tag);
2767         if (error) {
2768                 adapter->res = NULL;
2769                 device_printf(dev, "Failed to register LINK handler");
2770                 return (error);
2771         }
2772 #if __FreeBSD_version >= 800504
2773         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2774 #endif
2775         adapter->linkvec = vector;
2776         adapter->ivars |=  (8 | vector) << 16;
2777         adapter->ivars |= 0x80000000;
2778
2779         return (0);
2780 }
2781
2782
2783 static void
2784 em_free_pci_resources(struct adapter *adapter)
2785 {
2786         device_t        dev = adapter->dev;
2787         struct tx_ring  *txr;
2788         struct rx_ring  *rxr;
2789         int             rid;
2790
2791
2792         /*
2793         ** Release all the queue interrupt resources:
2794         */
2795         for (int i = 0; i < adapter->num_queues; i++) {
2796                 txr = &adapter->tx_rings[i];
2797                 /* an early abort? */
2798                 if (txr == NULL)
2799                         break;
2800                 rid = txr->msix +1;
2801                 if (txr->tag != NULL) {
2802                         bus_teardown_intr(dev, txr->res, txr->tag);
2803                         txr->tag = NULL;
2804                 }
2805                 if (txr->res != NULL)
2806                         bus_release_resource(dev, SYS_RES_IRQ,
2807                             rid, txr->res);
2808
2809                 rxr = &adapter->rx_rings[i];
2810                 /* an early abort? */
2811                 if (rxr == NULL)
2812                         break;
2813                 rid = rxr->msix +1;
2814                 if (rxr->tag != NULL) {
2815                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2816                         rxr->tag = NULL;
2817                 }
2818                 if (rxr->res != NULL)
2819                         bus_release_resource(dev, SYS_RES_IRQ,
2820                             rid, rxr->res);
2821         }
2822
2823         if (adapter->linkvec) /* we are doing MSIX */
2824                 rid = adapter->linkvec + 1;
2825         else
2826                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2827
2828         if (adapter->tag != NULL) {
2829                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2830                 adapter->tag = NULL;
2831         }
2832
2833         if (adapter->res != NULL)
2834                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2835
2836
2837         if (adapter->msix)
2838                 pci_release_msi(dev);
2839
2840         if (adapter->msix_mem != NULL)
2841                 bus_release_resource(dev, SYS_RES_MEMORY,
2842                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2843
2844         if (adapter->memory != NULL)
2845                 bus_release_resource(dev, SYS_RES_MEMORY,
2846                     PCIR_BAR(0), adapter->memory);
2847
2848         if (adapter->flash != NULL)
2849                 bus_release_resource(dev, SYS_RES_MEMORY,
2850                     EM_FLASH, adapter->flash);
2851 }
2852
2853 /*
2854  * Setup MSI or MSI/X
2855  */
2856 static int
2857 em_setup_msix(struct adapter *adapter)
2858 {
2859         device_t dev = adapter->dev;
2860         int val;
2861
2862         /* Nearly always going to use one queue */
2863         adapter->num_queues = 1;
2864
2865         /*
2866         ** Try using MSI-X for Hartwell adapters
2867         */
2868         if ((adapter->hw.mac.type == e1000_82574) &&
2869             (em_enable_msix == TRUE)) {
2870 #ifdef EM_MULTIQUEUE
2871                 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2872                 if (adapter->num_queues > 1)
2873                         em_enable_vectors_82574(adapter);
2874 #endif
2875                 /* Map the MSIX BAR */
2876                 int rid = PCIR_BAR(EM_MSIX_BAR);
2877                 adapter->msix_mem = bus_alloc_resource_any(dev,
2878                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2879                 if (adapter->msix_mem == NULL) {
2880                         /* May not be enabled */
2881                         device_printf(adapter->dev,
2882                             "Unable to map MSIX table \n");
2883                         goto msi;
2884                 }
2885                 val = pci_msix_count(dev); 
2886
2887 #ifdef EM_MULTIQUEUE
2888                 /* We need 5 vectors in the multiqueue case */
2889                 if (adapter->num_queues > 1 ) {
2890                         if (val >= 5)
2891                                 val = 5;
2892                         else {
2893                                 adapter->num_queues = 1;
2894                                 device_printf(adapter->dev,
2895                                     "Insufficient MSIX vectors for >1 queue, "
2896                                     "using single queue...\n");
2897                                 goto msix_one;
2898                         }
2899                 } else {
2900 msix_one:
2901 #endif
2902                         if (val >= 3)
2903                                 val = 3;
2904                         else {
2905                                 device_printf(adapter->dev,
2906                                 "Insufficient MSIX vectors, using MSI\n");
2907                                 goto msi;
2908                         }
2909 #ifdef EM_MULTIQUEUE
2910                 }
2911 #endif
2912
2913                 if ((pci_alloc_msix(dev, &val) == 0)) {
2914                         device_printf(adapter->dev,
2915                             "Using MSIX interrupts "
2916                             "with %d vectors\n", val);
2917                         return (val);
2918                 }
2919
2920                 /*
2921                 ** If MSIX alloc failed or provided us with
2922                 ** less than needed, free and fall through to MSI
2923                 */
2924                 pci_release_msi(dev);
2925         }
2926 msi:
2927         if (adapter->msix_mem != NULL) {
2928                 bus_release_resource(dev, SYS_RES_MEMORY,
2929                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2930                 adapter->msix_mem = NULL;
2931         }
2932         val = 1;
2933         if (pci_alloc_msi(dev, &val) == 0) {
2934                 device_printf(adapter->dev, "Using an MSI interrupt\n");
2935                 return (val);
2936         } 
2937         /* Should only happen due to manual configuration */
2938         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2939         return (0);
2940 }
2941
2942
2943 /*
2944 ** The 3 following flush routines are used as a workaround in the
2945 ** I219 client parts and only for them.
2946 **
2947 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2948 **
2949 ** We want to clear all pending descriptors from the TX ring.
2950 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2951 ** the data of the next descriptor. We don't care about the data we are about
2952 ** to reset the HW.
2953 */
2954 static void
2955 em_flush_tx_ring(struct adapter *adapter)
2956 {
2957         struct e1000_hw         *hw = &adapter->hw;
2958         struct tx_ring          *txr = adapter->tx_rings;
2959         struct e1000_tx_desc    *txd;
2960         u32                     tctl, txd_lower = E1000_TXD_CMD_IFCS;
2961         u16                     size = 512;
2962
2963         tctl = E1000_READ_REG(hw, E1000_TCTL);
2964         E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2965
2966         txd = &txr->tx_base[txr->next_avail_desc++];
2967         if (txr->next_avail_desc == adapter->num_tx_desc)
2968                 txr->next_avail_desc = 0;
2969
2970         /* Just use the ring as a dummy buffer addr */
2971         txd->buffer_addr = txr->txdma.dma_paddr;
2972         txd->lower.data = htole32(txd_lower | size);
2973         txd->upper.data = 0;
2974
2975         /* flush descriptors to memory before notifying the HW */
2976         wmb();
2977
2978         E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2979         mb();
2980         usec_delay(250);
2981 }
2982
2983 /*
2984 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2985 **
2986 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2987 */
2988 static void
2989 em_flush_rx_ring(struct adapter *adapter)
2990 {
2991         struct e1000_hw *hw = &adapter->hw;
2992         u32             rctl, rxdctl;
2993
2994         rctl = E1000_READ_REG(hw, E1000_RCTL);
2995         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2996         E1000_WRITE_FLUSH(hw);
2997         usec_delay(150);
2998
2999         rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
3000         /* zero the lower 14 bits (prefetch and host thresholds) */
3001         rxdctl &= 0xffffc000;
3002         /*
3003          * update thresholds: prefetch threshold to 31, host threshold to 1
3004          * and make sure the granularity is "descriptors" and not "cache lines"
3005          */
3006         rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3007         E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
3008
3009         /* momentarily enable the RX ring for the changes to take effect */
3010         E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3011         E1000_WRITE_FLUSH(hw);
3012         usec_delay(150);
3013         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3014 }
3015
3016 /*
3017 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
3018 **
3019 ** In i219, the descriptor rings must be emptied before resetting the HW
3020 ** or before changing the device state to D3 during runtime (runtime PM).
3021 **
3022 ** Failure to do this will cause the HW to enter a unit hang state which can
3023 ** only be released by PCI reset on the device
3024 **
3025 */
3026 static void
3027 em_flush_desc_rings(struct adapter *adapter)
3028 {
3029         struct e1000_hw *hw = &adapter->hw;
3030         device_t        dev = adapter->dev;
3031         u16             hang_state;
3032         u32             fext_nvm11, tdlen;
3033  
3034         /* First, disable MULR fix in FEXTNVM11 */
3035         fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3036         fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3037         E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3038         
3039         /* do nothing if we're not in faulty state, or if the queue is empty */
3040         tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3041         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3042         if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3043                 return;
3044         em_flush_tx_ring(adapter);
3045
3046         /* recheck, maybe the fault is caused by the rx ring */
3047         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3048         if (hang_state & FLUSH_DESC_REQUIRED)
3049                 em_flush_rx_ring(adapter);
3050 }
3051
3052
3053 /*********************************************************************
3054  *
3055  *  Initialize the hardware to a configuration
3056  *  as specified by the adapter structure.
3057  *
3058  **********************************************************************/
3059 static void
3060 em_reset(struct adapter *adapter)
3061 {
3062         device_t        dev = adapter->dev;
3063         struct ifnet    *ifp = adapter->ifp;
3064         struct e1000_hw *hw = &adapter->hw;
3065         u16             rx_buffer_size;
3066         u32             pba;
3067
3068         INIT_DEBUGOUT("em_reset: begin");
3069
3070         /* Set up smart power down as default off on newer adapters. */
3071         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3072             hw->mac.type == e1000_82572)) {
3073                 u16 phy_tmp = 0;
3074
3075                 /* Speed up time to link by disabling smart power down. */
3076                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3077                 phy_tmp &= ~IGP02E1000_PM_SPD;
3078                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3079         }
3080
3081         /*
3082          * Packet Buffer Allocation (PBA)
3083          * Writing PBA sets the receive portion of the buffer
3084          * the remainder is used for the transmit buffer.
3085          */
3086         switch (hw->mac.type) {
3087         /* Total Packet Buffer on these is 48K */
3088         case e1000_82571:
3089         case e1000_82572:
3090         case e1000_80003es2lan:
3091                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3092                 break;
3093         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3094                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3095                 break;
3096         case e1000_82574:
3097         case e1000_82583:
3098                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3099                 break;
3100         case e1000_ich8lan:
3101                 pba = E1000_PBA_8K;
3102                 break;
3103         case e1000_ich9lan:
3104         case e1000_ich10lan:
3105                 /* Boost Receive side for jumbo frames */
3106                 if (adapter->hw.mac.max_frame_size > 4096)
3107                         pba = E1000_PBA_14K;
3108                 else
3109                         pba = E1000_PBA_10K;
3110                 break;
3111         case e1000_pchlan:
3112         case e1000_pch2lan:
3113         case e1000_pch_lpt:
3114         case e1000_pch_spt:
3115                 pba = E1000_PBA_26K;
3116                 break;
3117         default:
3118                 if (adapter->hw.mac.max_frame_size > 8192)
3119                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3120                 else
3121                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3122         }
3123         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3124
3125         /*
3126          * These parameters control the automatic generation (Tx) and
3127          * response (Rx) to Ethernet PAUSE frames.
3128          * - High water mark should allow for at least two frames to be
3129          *   received after sending an XOFF.
3130          * - Low water mark works best when it is very near the high water mark.
3131          *   This allows the receiver to restart by sending XON when it has
3132          *   drained a bit. Here we use an arbitary value of 1500 which will
3133          *   restart after one full frame is pulled from the buffer. There
3134          *   could be several smaller frames in the buffer and if so they will
3135          *   not trigger the XON until their total number reduces the buffer
3136          *   by 1500.
3137          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3138          */
3139         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3140         hw->fc.high_water = rx_buffer_size -
3141             roundup2(adapter->hw.mac.max_frame_size, 1024);
3142         hw->fc.low_water = hw->fc.high_water - 1500;
3143
3144         if (adapter->fc) /* locally set flow control value? */
3145                 hw->fc.requested_mode = adapter->fc;
3146         else
3147                 hw->fc.requested_mode = e1000_fc_full;
3148
3149         if (hw->mac.type == e1000_80003es2lan)
3150                 hw->fc.pause_time = 0xFFFF;
3151         else
3152                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3153
3154         hw->fc.send_xon = TRUE;
3155
3156         /* Device specific overrides/settings */
3157         switch (hw->mac.type) {
3158         case e1000_pchlan:
3159                 /* Workaround: no TX flow ctrl for PCH */
3160                 hw->fc.requested_mode = e1000_fc_rx_pause;
3161                 hw->fc.pause_time = 0xFFFF; /* override */
3162                 if (ifp->if_mtu > ETHERMTU) {
3163                         hw->fc.high_water = 0x3500;
3164                         hw->fc.low_water = 0x1500;
3165                 } else {
3166                         hw->fc.high_water = 0x5000;
3167                         hw->fc.low_water = 0x3000;
3168                 }
3169                 hw->fc.refresh_time = 0x1000;
3170                 break;
3171         case e1000_pch2lan:
3172         case e1000_pch_lpt:
3173         case e1000_pch_spt:
3174                 hw->fc.high_water = 0x5C20;
3175                 hw->fc.low_water = 0x5048;
3176                 hw->fc.pause_time = 0x0650;
3177                 hw->fc.refresh_time = 0x0400;
3178                 /* Jumbos need adjusted PBA */
3179                 if (ifp->if_mtu > ETHERMTU)
3180                         E1000_WRITE_REG(hw, E1000_PBA, 12);
3181                 else
3182                         E1000_WRITE_REG(hw, E1000_PBA, 26);
3183                 break;
3184         case e1000_ich9lan:
3185         case e1000_ich10lan:
3186                 if (ifp->if_mtu > ETHERMTU) {
3187                         hw->fc.high_water = 0x2800;
3188                         hw->fc.low_water = hw->fc.high_water - 8;
3189                         break;
3190                 } 
3191                 /* else fall thru */
3192         default:
3193                 if (hw->mac.type == e1000_80003es2lan)
3194                         hw->fc.pause_time = 0xFFFF;
3195                 break;
3196         }
3197
3198         /* I219 needs some special flushing to avoid hangs */
3199         if (hw->mac.type == e1000_pch_spt)
3200                 em_flush_desc_rings(adapter);
3201
3202         /* Issue a global reset */
3203         e1000_reset_hw(hw);
3204         E1000_WRITE_REG(hw, E1000_WUC, 0);
3205         em_disable_aspm(adapter);
3206         /* and a re-init */
3207         if (e1000_init_hw(hw) < 0) {
3208                 device_printf(dev, "Hardware Initialization Failed\n");
3209                 return;
3210         }
3211
3212         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3213         e1000_get_phy_info(hw);
3214         e1000_check_for_link(hw);
3215         return;
3216 }
3217
3218 /*********************************************************************
3219  *
3220  *  Setup networking device structure and register an interface.
3221  *
3222  **********************************************************************/
3223 static int
3224 em_setup_interface(device_t dev, struct adapter *adapter)
3225 {
3226         struct ifnet   *ifp;
3227
3228         INIT_DEBUGOUT("em_setup_interface: begin");
3229
3230         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3231         if (ifp == NULL) {
3232                 device_printf(dev, "can not allocate ifnet structure\n");
3233                 return (-1);
3234         }
3235         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3236         ifp->if_init =  em_init;
3237         ifp->if_softc = adapter;
3238         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3239         ifp->if_ioctl = em_ioctl;
3240
3241         /* TSO parameters */
3242         ifp->if_hw_tsomax = IP_MAXPACKET;
3243         /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3244         ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3245         ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3246
3247 #ifdef EM_MULTIQUEUE
3248         /* Multiqueue stack interface */
3249         ifp->if_transmit = em_mq_start;
3250         ifp->if_qflush = em_qflush;
3251 #else
3252         ifp->if_start = em_start;
3253         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3254         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3255         IFQ_SET_READY(&ifp->if_snd);
3256 #endif  
3257
3258         ether_ifattach(ifp, adapter->hw.mac.addr);
3259
3260         ifp->if_capabilities = ifp->if_capenable = 0;
3261
3262
3263         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3264         ifp->if_capabilities |= IFCAP_TSO4;
3265         /*
3266          * Tell the upper layer(s) we
3267          * support full VLAN capability
3268          */
3269         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3270         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3271                              |  IFCAP_VLAN_HWTSO
3272                              |  IFCAP_VLAN_MTU;
3273         ifp->if_capenable = ifp->if_capabilities;
3274
3275         /*
3276         ** Don't turn this on by default, if vlans are
3277         ** created on another pseudo device (eg. lagg)
3278         ** then vlan events are not passed thru, breaking
3279         ** operation, but with HW FILTER off it works. If
3280         ** using vlans directly on the em driver you can
3281         ** enable this and get full hardware tag filtering.
3282         */
3283         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3284
3285 #ifdef DEVICE_POLLING
3286         ifp->if_capabilities |= IFCAP_POLLING;
3287 #endif
3288
3289         /* Enable only WOL MAGIC by default */
3290         if (adapter->wol) {
3291                 ifp->if_capabilities |= IFCAP_WOL;
3292                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3293         }
3294                 
3295         /*
3296          * Specify the media types supported by this adapter and register
3297          * callbacks to update media and link information
3298          */
3299         ifmedia_init(&adapter->media, IFM_IMASK,
3300             em_media_change, em_media_status);
3301         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3302             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3303                 u_char fiber_type = IFM_1000_SX;        /* default type */
3304
3305                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3306                             0, NULL);
3307                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3308         } else {
3309                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3310                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3311                             0, NULL);
3312                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3313                             0, NULL);
3314                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3315                             0, NULL);
3316                 if (adapter->hw.phy.type != e1000_phy_ife) {
3317                         ifmedia_add(&adapter->media,
3318                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3319                         ifmedia_add(&adapter->media,
3320                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3321                 }
3322         }
3323         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3324         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3325         return (0);
3326 }
3327
3328
3329 /*
3330  * Manage DMA'able memory.
3331  */
3332 static void
3333 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3334 {
3335         if (error)
3336                 return;
3337         *(bus_addr_t *) arg = segs[0].ds_addr;
3338 }
3339
3340 static int
3341 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3342         struct em_dma_alloc *dma, int mapflags)
3343 {
3344         int error;
3345
3346         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3347                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3348                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3349                                 BUS_SPACE_MAXADDR,      /* highaddr */
3350                                 NULL, NULL,             /* filter, filterarg */
3351                                 size,                   /* maxsize */
3352                                 1,                      /* nsegments */
3353                                 size,                   /* maxsegsize */
3354                                 0,                      /* flags */
3355                                 NULL,                   /* lockfunc */
3356                                 NULL,                   /* lockarg */
3357                                 &dma->dma_tag);
3358         if (error) {
3359                 device_printf(adapter->dev,
3360                     "%s: bus_dma_tag_create failed: %d\n",
3361                     __func__, error);
3362                 goto fail_0;
3363         }
3364
3365         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3366             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3367         if (error) {
3368                 device_printf(adapter->dev,
3369                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3370                     __func__, (uintmax_t)size, error);
3371                 goto fail_2;
3372         }
3373
3374         dma->dma_paddr = 0;
3375         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3376             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3377         if (error || dma->dma_paddr == 0) {
3378                 device_printf(adapter->dev,
3379                     "%s: bus_dmamap_load failed: %d\n",
3380                     __func__, error);
3381                 goto fail_3;
3382         }
3383
3384         return (0);
3385
3386 fail_3:
3387         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3388 fail_2:
3389         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3390         bus_dma_tag_destroy(dma->dma_tag);
3391 fail_0:
3392         dma->dma_tag = NULL;
3393
3394         return (error);
3395 }
3396
3397 static void
3398 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3399 {
3400         if (dma->dma_tag == NULL)
3401                 return;
3402         if (dma->dma_paddr != 0) {
3403                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3404                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3405                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3406                 dma->dma_paddr = 0;
3407         }
3408         if (dma->dma_vaddr != NULL) {
3409                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3410                 dma->dma_vaddr = NULL;
3411         }
3412         bus_dma_tag_destroy(dma->dma_tag);
3413         dma->dma_tag = NULL;
3414 }
3415
3416
3417 /*********************************************************************
3418  *
3419  *  Allocate memory for the transmit and receive rings, and then
3420  *  the descriptors associated with each, called only once at attach.
3421  *
3422  **********************************************************************/
3423 static int
3424 em_allocate_queues(struct adapter *adapter)
3425 {
3426         device_t                dev = adapter->dev;
3427         struct tx_ring          *txr = NULL;
3428         struct rx_ring          *rxr = NULL;
3429         int rsize, tsize, error = E1000_SUCCESS;
3430         int txconf = 0, rxconf = 0;
3431
3432
3433         /* Allocate the TX ring struct memory */
3434         if (!(adapter->tx_rings =
3435             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3436             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3437                 device_printf(dev, "Unable to allocate TX ring memory\n");
3438                 error = ENOMEM;
3439                 goto fail;
3440         }
3441
3442         /* Now allocate the RX */
3443         if (!(adapter->rx_rings =
3444             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3445             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3446                 device_printf(dev, "Unable to allocate RX ring memory\n");
3447                 error = ENOMEM;
3448                 goto rx_fail;
3449         }
3450
3451         tsize = roundup2(adapter->num_tx_desc *
3452             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3453         /*
3454          * Now set up the TX queues, txconf is needed to handle the
3455          * possibility that things fail midcourse and we need to
3456          * undo memory gracefully
3457          */ 
3458         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3459                 /* Set up some basics */
3460                 txr = &adapter->tx_rings[i];
3461                 txr->adapter = adapter;
3462                 txr->me = i;
3463
3464                 /* Initialize the TX lock */
3465                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3466                     device_get_nameunit(dev), txr->me);
3467                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3468
3469                 if (em_dma_malloc(adapter, tsize,
3470                         &txr->txdma, BUS_DMA_NOWAIT)) {
3471                         device_printf(dev,
3472                             "Unable to allocate TX Descriptor memory\n");
3473                         error = ENOMEM;
3474                         goto err_tx_desc;
3475                 }
3476                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3477                 bzero((void *)txr->tx_base, tsize);
3478
3479                 if (em_allocate_transmit_buffers(txr)) {
3480                         device_printf(dev,
3481                             "Critical Failure setting up transmit buffers\n");
3482                         error = ENOMEM;
3483                         goto err_tx_desc;
3484                 }
3485 #if __FreeBSD_version >= 800000
3486                 /* Allocate a buf ring */
3487                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3488                     M_WAITOK, &txr->tx_mtx);
3489 #endif
3490         }
3491
3492         /*
3493          * Next the RX queues...
3494          */ 
3495         rsize = roundup2(adapter->num_rx_desc *
3496             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3497         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3498                 rxr = &adapter->rx_rings[i];
3499                 rxr->adapter = adapter;
3500                 rxr->me = i;
3501
3502                 /* Initialize the RX lock */
3503                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3504                     device_get_nameunit(dev), txr->me);
3505                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3506
3507                 if (em_dma_malloc(adapter, rsize,
3508                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3509                         device_printf(dev,
3510                             "Unable to allocate RxDescriptor memory\n");
3511                         error = ENOMEM;
3512                         goto err_rx_desc;
3513                 }
3514                 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3515                 bzero((void *)rxr->rx_base, rsize);
3516
3517                 /* Allocate receive buffers for the ring*/
3518                 if (em_allocate_receive_buffers(rxr)) {
3519                         device_printf(dev,
3520                             "Critical Failure setting up receive buffers\n");
3521                         error = ENOMEM;
3522                         goto err_rx_desc;
3523                 }
3524         }
3525
3526         return (0);
3527
3528 err_rx_desc:
3529         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3530                 em_dma_free(adapter, &rxr->rxdma);
3531 err_tx_desc:
3532         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3533                 em_dma_free(adapter, &txr->txdma);
3534         free(adapter->rx_rings, M_DEVBUF);
3535 rx_fail:
3536 #if __FreeBSD_version >= 800000
3537         buf_ring_free(txr->br, M_DEVBUF);
3538 #endif
3539         free(adapter->tx_rings, M_DEVBUF);
3540 fail:
3541         return (error);
3542 }
3543
3544
3545 /*********************************************************************
3546  *
3547  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3548  *  the information needed to transmit a packet on the wire. This is
3549  *  called only once at attach, setup is done every reset.
3550  *
3551  **********************************************************************/
3552 static int
3553 em_allocate_transmit_buffers(struct tx_ring *txr)
3554 {
3555         struct adapter *adapter = txr->adapter;
3556         device_t dev = adapter->dev;
3557         struct em_txbuffer *txbuf;
3558         int error, i;
3559
3560         /*
3561          * Setup DMA descriptor areas.
3562          */
3563         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3564                                1, 0,                    /* alignment, bounds */
3565                                BUS_SPACE_MAXADDR,       /* lowaddr */
3566                                BUS_SPACE_MAXADDR,       /* highaddr */
3567                                NULL, NULL,              /* filter, filterarg */
3568                                EM_TSO_SIZE,             /* maxsize */
3569                                EM_MAX_SCATTER,          /* nsegments */
3570                                PAGE_SIZE,               /* maxsegsize */
3571                                0,                       /* flags */
3572                                NULL,                    /* lockfunc */
3573                                NULL,                    /* lockfuncarg */
3574                                &txr->txtag))) {
3575                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3576                 goto fail;
3577         }
3578
3579         if (!(txr->tx_buffers =
3580             (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3581             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3582                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3583                 error = ENOMEM;
3584                 goto fail;
3585         }
3586
3587         /* Create the descriptor buffer dma maps */
3588         txbuf = txr->tx_buffers;
3589         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3590                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3591                 if (error != 0) {
3592                         device_printf(dev, "Unable to create TX DMA map\n");
3593                         goto fail;
3594                 }
3595         }
3596
3597         return 0;
3598 fail:
3599         /* We free all, it handles case where we are in the middle */
3600         em_free_transmit_structures(adapter);
3601         return (error);
3602 }
3603
3604 /*********************************************************************
3605  *
3606  *  Initialize a transmit ring.
3607  *
3608  **********************************************************************/
3609 static void
3610 em_setup_transmit_ring(struct tx_ring *txr)
3611 {
3612         struct adapter *adapter = txr->adapter;
3613         struct em_txbuffer *txbuf;
3614         int i;
3615 #ifdef DEV_NETMAP
3616         struct netmap_adapter *na = NA(adapter->ifp);
3617         struct netmap_slot *slot;
3618 #endif /* DEV_NETMAP */
3619
3620         /* Clear the old descriptor contents */
3621         EM_TX_LOCK(txr);
3622 #ifdef DEV_NETMAP
3623         slot = netmap_reset(na, NR_TX, txr->me, 0);
3624 #endif /* DEV_NETMAP */
3625
3626         bzero((void *)txr->tx_base,
3627               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3628         /* Reset indices */
3629         txr->next_avail_desc = 0;
3630         txr->next_to_clean = 0;
3631
3632         /* Free any existing tx buffers. */
3633         txbuf = txr->tx_buffers;
3634         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3635                 if (txbuf->m_head != NULL) {
3636                         bus_dmamap_sync(txr->txtag, txbuf->map,
3637                             BUS_DMASYNC_POSTWRITE);
3638                         bus_dmamap_unload(txr->txtag, txbuf->map);
3639                         m_freem(txbuf->m_head);
3640                         txbuf->m_head = NULL;
3641                 }
3642 #ifdef DEV_NETMAP
3643                 if (slot) {
3644                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3645                         uint64_t paddr;
3646                         void *addr;
3647
3648                         addr = PNMB(na, slot + si, &paddr);
3649                         txr->tx_base[i].buffer_addr = htole64(paddr);
3650                         /* reload the map for netmap mode */
3651                         netmap_load_map(na, txr->txtag, txbuf->map, addr);
3652                 }
3653 #endif /* DEV_NETMAP */
3654
3655                 /* clear the watch index */
3656                 txbuf->next_eop = -1;
3657         }
3658
3659         /* Set number of descriptors available */
3660         txr->tx_avail = adapter->num_tx_desc;
3661         txr->busy = EM_TX_IDLE;
3662
3663         /* Clear checksum offload context. */
3664         txr->last_hw_offload = 0;
3665         txr->last_hw_ipcss = 0;
3666         txr->last_hw_ipcso = 0;
3667         txr->last_hw_tucss = 0;
3668         txr->last_hw_tucso = 0;
3669
3670         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3671             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3672         EM_TX_UNLOCK(txr);
3673 }
3674
3675 /*********************************************************************
3676  *
3677  *  Initialize all transmit rings.
3678  *
3679  **********************************************************************/
3680 static void
3681 em_setup_transmit_structures(struct adapter *adapter)
3682 {
3683         struct tx_ring *txr = adapter->tx_rings;
3684
3685         for (int i = 0; i < adapter->num_queues; i++, txr++)
3686                 em_setup_transmit_ring(txr);
3687
3688         return;
3689 }
3690
3691 /*********************************************************************
3692  *
3693  *  Enable transmit unit.
3694  *
3695  **********************************************************************/
3696 static void
3697 em_initialize_transmit_unit(struct adapter *adapter)
3698 {
3699         struct tx_ring  *txr = adapter->tx_rings;
3700         struct e1000_hw *hw = &adapter->hw;
3701         u32     tctl, txdctl = 0, tarc, tipg = 0;
3702
3703          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3704
3705         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3706                 u64 bus_addr = txr->txdma.dma_paddr;
3707                 /* Base and Len of TX Ring */
3708                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3709                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3710                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3711                     (u32)(bus_addr >> 32));
3712                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3713                     (u32)bus_addr);
3714                 /* Init the HEAD/TAIL indices */
3715                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3716                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3717
3718                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3719                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3720                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3721
3722                 txr->busy = EM_TX_IDLE;
3723                 txdctl = 0; /* clear txdctl */
3724                 txdctl |= 0x1f; /* PTHRESH */
3725                 txdctl |= 1 << 8; /* HTHRESH */
3726                 txdctl |= 1 << 16;/* WTHRESH */
3727                 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3728                 txdctl |= E1000_TXDCTL_GRAN;
3729                 txdctl |= 1 << 25; /* LWTHRESH */
3730
3731                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3732         }
3733
3734         /* Set the default values for the Tx Inter Packet Gap timer */
3735         switch (adapter->hw.mac.type) {
3736         case e1000_80003es2lan:
3737                 tipg = DEFAULT_82543_TIPG_IPGR1;
3738                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3739                     E1000_TIPG_IPGR2_SHIFT;
3740                 break;
3741         default:
3742                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3743                     (adapter->hw.phy.media_type ==
3744                     e1000_media_type_internal_serdes))
3745                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3746                 else
3747                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3748                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3749                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3750         }
3751
3752         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3753         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3754
3755         if(adapter->hw.mac.type >= e1000_82540)
3756                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3757                     adapter->tx_abs_int_delay.value);
3758
3759         if ((adapter->hw.mac.type == e1000_82571) ||
3760             (adapter->hw.mac.type == e1000_82572)) {
3761                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3762                 tarc |= TARC_SPEED_MODE_BIT;
3763                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3764         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3765                 /* errata: program both queues to unweighted RR */
3766                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3767                 tarc |= 1;
3768                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3769                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3770                 tarc |= 1;
3771                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3772         } else if (adapter->hw.mac.type == e1000_82574) {
3773                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3774                 tarc |= TARC_ERRATA_BIT;
3775                 if ( adapter->num_queues > 1) {
3776                         tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3777                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3778                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3779                 } else
3780                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3781         }
3782
3783         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3784         if (adapter->tx_int_delay.value > 0)
3785                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3786
3787         /* Program the Transmit Control Register */
3788         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3789         tctl &= ~E1000_TCTL_CT;
3790         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3791                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3792
3793         if (adapter->hw.mac.type >= e1000_82571)
3794                 tctl |= E1000_TCTL_MULR;
3795
3796         /* This write will effectively turn on the transmit unit. */
3797         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3798
3799         if (hw->mac.type == e1000_pch_spt) {
3800                 u32 reg;
3801                 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3802                 reg |= E1000_RCTL_RDMTS_HEX;
3803                 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3804                 reg = E1000_READ_REG(hw, E1000_TARC(0));
3805                 reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3806                 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3807         }
3808 }
3809
3810
3811 /*********************************************************************
3812  *
3813  *  Free all transmit rings.
3814  *
3815  **********************************************************************/
3816 static void
3817 em_free_transmit_structures(struct adapter *adapter)
3818 {
3819         struct tx_ring *txr = adapter->tx_rings;
3820
3821         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3822                 EM_TX_LOCK(txr);
3823                 em_free_transmit_buffers(txr);
3824                 em_dma_free(adapter, &txr->txdma);
3825                 EM_TX_UNLOCK(txr);
3826                 EM_TX_LOCK_DESTROY(txr);
3827         }
3828
3829         free(adapter->tx_rings, M_DEVBUF);
3830 }
3831
3832 /*********************************************************************
3833  *
3834  *  Free transmit ring related data structures.
3835  *
3836  **********************************************************************/
3837 static void
3838 em_free_transmit_buffers(struct tx_ring *txr)
3839 {
3840         struct adapter          *adapter = txr->adapter;
3841         struct em_txbuffer      *txbuf;
3842
3843         INIT_DEBUGOUT("free_transmit_ring: begin");
3844
3845         if (txr->tx_buffers == NULL)
3846                 return;
3847
3848         for (int i = 0; i < adapter->num_tx_desc; i++) {
3849                 txbuf = &txr->tx_buffers[i];
3850                 if (txbuf->m_head != NULL) {
3851                         bus_dmamap_sync(txr->txtag, txbuf->map,
3852                             BUS_DMASYNC_POSTWRITE);
3853                         bus_dmamap_unload(txr->txtag,
3854                             txbuf->map);
3855                         m_freem(txbuf->m_head);
3856                         txbuf->m_head = NULL;
3857                         if (txbuf->map != NULL) {
3858                                 bus_dmamap_destroy(txr->txtag,
3859                                     txbuf->map);
3860                                 txbuf->map = NULL;
3861                         }
3862                 } else if (txbuf->map != NULL) {
3863                         bus_dmamap_unload(txr->txtag,
3864                             txbuf->map);
3865                         bus_dmamap_destroy(txr->txtag,
3866                             txbuf->map);
3867                         txbuf->map = NULL;
3868                 }
3869         }
3870 #if __FreeBSD_version >= 800000
3871         if (txr->br != NULL)
3872                 buf_ring_free(txr->br, M_DEVBUF);
3873 #endif
3874         if (txr->tx_buffers != NULL) {
3875                 free(txr->tx_buffers, M_DEVBUF);
3876                 txr->tx_buffers = NULL;
3877         }
3878         if (txr->txtag != NULL) {
3879                 bus_dma_tag_destroy(txr->txtag);
3880                 txr->txtag = NULL;
3881         }
3882         return;
3883 }
3884
3885
3886 /*********************************************************************
3887  *  The offload context is protocol specific (TCP/UDP) and thus
3888  *  only needs to be set when the protocol changes. The occasion
3889  *  of a context change can be a performance detriment, and
3890  *  might be better just disabled. The reason arises in the way
3891  *  in which the controller supports pipelined requests from the
3892  *  Tx data DMA. Up to four requests can be pipelined, and they may
3893  *  belong to the same packet or to multiple packets. However all
3894  *  requests for one packet are issued before a request is issued
3895  *  for a subsequent packet and if a request for the next packet
3896  *  requires a context change, that request will be stalled
3897  *  until the previous request completes. This means setting up
3898  *  a new context effectively disables pipelined Tx data DMA which
3899  *  in turn greatly slow down performance to send small sized
3900  *  frames. 
3901  **********************************************************************/
3902 static void
3903 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3904     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3905 {
3906         struct adapter                  *adapter = txr->adapter;
3907         struct e1000_context_desc       *TXD = NULL;
3908         struct em_txbuffer              *tx_buffer;
3909         int                             cur, hdr_len;
3910         u32                             cmd = 0;
3911         u16                             offload = 0;
3912         u8                              ipcso, ipcss, tucso, tucss;
3913
3914         ipcss = ipcso = tucss = tucso = 0;
3915         hdr_len = ip_off + (ip->ip_hl << 2);
3916         cur = txr->next_avail_desc;
3917
3918         /* Setup of IP header checksum. */
3919         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3920                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3921                 offload |= CSUM_IP;
3922                 ipcss = ip_off;
3923                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3924                 /*
3925                  * Start offset for header checksum calculation.
3926                  * End offset for header checksum calculation.
3927                  * Offset of place to put the checksum.
3928                  */
3929                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3930                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3931                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3932                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3933                 cmd |= E1000_TXD_CMD_IP;
3934         }
3935
3936         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3937                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3938                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3939                 offload |= CSUM_TCP;
3940                 tucss = hdr_len;
3941                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3942                 /*
3943                  * The 82574L can only remember the *last* context used
3944                  * regardless of queue that it was use for.  We cannot reuse
3945                  * contexts on this hardware platform and must generate a new
3946                  * context every time.  82574L hardware spec, section 7.2.6,
3947                  * second note.
3948                  */
3949                 if (adapter->num_queues < 2) {
3950                         /*
3951                         * Setting up new checksum offload context for every
3952                         * frames takes a lot of processing time for hardware.
3953                         * This also reduces performance a lot for small sized
3954                         * frames so avoid it if driver can use previously
3955                         * configured checksum offload context.
3956                         */
3957                         if (txr->last_hw_offload == offload) {
3958                                 if (offload & CSUM_IP) {
3959                                         if (txr->last_hw_ipcss == ipcss &&
3960                                         txr->last_hw_ipcso == ipcso &&
3961                                         txr->last_hw_tucss == tucss &&
3962                                         txr->last_hw_tucso == tucso)
3963                                                 return;
3964                                 } else {
3965                                         if (txr->last_hw_tucss == tucss &&
3966                                         txr->last_hw_tucso == tucso)
3967                                                 return;
3968                                 }
3969                         }
3970                         txr->last_hw_offload = offload;
3971                         txr->last_hw_tucss = tucss;
3972                         txr->last_hw_tucso = tucso;
3973                 }
3974                 /*
3975                  * Start offset for payload checksum calculation.
3976                  * End offset for payload checksum calculation.
3977                  * Offset of place to put the checksum.
3978                  */
3979                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3980                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3981                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3982                 TXD->upper_setup.tcp_fields.tucso = tucso;
3983                 cmd |= E1000_TXD_CMD_TCP;
3984         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3985                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3986                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3987                 tucss = hdr_len;
3988                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3989                 /*
3990                  * The 82574L can only remember the *last* context used
3991                  * regardless of queue that it was use for.  We cannot reuse
3992                  * contexts on this hardware platform and must generate a new
3993                  * context every time.  82574L hardware spec, section 7.2.6,
3994                  * second note.
3995                  */
3996                 if (adapter->num_queues < 2) {
3997                         /*
3998                         * Setting up new checksum offload context for every
3999                         * frames takes a lot of processing time for hardware.
4000                         * This also reduces performance a lot for small sized
4001                         * frames so avoid it if driver can use previously
4002                         * configured checksum offload context.
4003                         */
4004                         if (txr->last_hw_offload == offload) {
4005                                 if (offload & CSUM_IP) {
4006                                         if (txr->last_hw_ipcss == ipcss &&
4007                                         txr->last_hw_ipcso == ipcso &&
4008                                         txr->last_hw_tucss == tucss &&
4009                                         txr->last_hw_tucso == tucso)
4010                                                 return;
4011                                 } else {
4012                                         if (txr->last_hw_tucss == tucss &&
4013                                         txr->last_hw_tucso == tucso)
4014                                                 return;
4015                                 }
4016                         }
4017                         txr->last_hw_offload = offload;
4018                         txr->last_hw_tucss = tucss;
4019                         txr->last_hw_tucso = tucso;
4020                 }
4021                 /*
4022                  * Start offset for header checksum calculation.
4023                  * End offset for header checksum calculation.
4024                  * Offset of place to put the checksum.
4025                  */
4026                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4027                 TXD->upper_setup.tcp_fields.tucss = tucss;
4028                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4029                 TXD->upper_setup.tcp_fields.tucso = tucso;
4030         }
4031   
4032         if (offload & CSUM_IP) {
4033                 txr->last_hw_ipcss = ipcss;
4034                 txr->last_hw_ipcso = ipcso;
4035         }
4036
4037         TXD->tcp_seg_setup.data = htole32(0);
4038         TXD->cmd_and_length =
4039             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4040         tx_buffer = &txr->tx_buffers[cur];
4041         tx_buffer->m_head = NULL;
4042         tx_buffer->next_eop = -1;
4043
4044         if (++cur == adapter->num_tx_desc)
4045                 cur = 0;
4046
4047         txr->tx_avail--;
4048         txr->next_avail_desc = cur;
4049 }
4050
4051
4052 /**********************************************************************
4053  *
4054  *  Setup work for hardware segmentation offload (TSO)
4055  *
4056  **********************************************************************/
4057 static void
4058 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4059     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4060 {
4061         struct adapter                  *adapter = txr->adapter;
4062         struct e1000_context_desc       *TXD;
4063         struct em_txbuffer              *tx_buffer;
4064         int cur, hdr_len;
4065
4066         /*
4067          * In theory we can use the same TSO context if and only if
4068          * frame is the same type(IP/TCP) and the same MSS. However
4069          * checking whether a frame has the same IP/TCP structure is
4070          * hard thing so just ignore that and always restablish a
4071          * new TSO context.
4072          */
4073         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4074         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
4075                       E1000_TXD_DTYP_D |        /* Data descr type */
4076                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
4077
4078         /* IP and/or TCP header checksum calculation and insertion. */
4079         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4080
4081         cur = txr->next_avail_desc;
4082         tx_buffer = &txr->tx_buffers[cur];
4083         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4084
4085         /*
4086          * Start offset for header checksum calculation.
4087          * End offset for header checksum calculation.
4088          * Offset of place put the checksum.
4089          */
4090         TXD->lower_setup.ip_fields.ipcss = ip_off;
4091         TXD->lower_setup.ip_fields.ipcse =
4092             htole16(ip_off + (ip->ip_hl << 2) - 1);
4093         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4094         /*
4095          * Start offset for payload checksum calculation.
4096          * End offset for payload checksum calculation.
4097          * Offset of place to put the checksum.
4098          */
4099         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4100         TXD->upper_setup.tcp_fields.tucse = 0;
4101         TXD->upper_setup.tcp_fields.tucso =
4102             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4103         /*
4104          * Payload size per packet w/o any headers.
4105          * Length of all headers up to payload.
4106          */
4107         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4108         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4109
4110         TXD->cmd_and_length = htole32(adapter->txd_cmd |
4111                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
4112                                 E1000_TXD_CMD_TSE |     /* TSE context */
4113                                 E1000_TXD_CMD_IP |      /* Do IP csum */
4114                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
4115                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4116
4117         tx_buffer->m_head = NULL;
4118         tx_buffer->next_eop = -1;
4119
4120         if (++cur == adapter->num_tx_desc)
4121                 cur = 0;
4122
4123         txr->tx_avail--;
4124         txr->next_avail_desc = cur;
4125         txr->tx_tso = TRUE;
4126 }
4127
4128
4129 /**********************************************************************
4130  *
4131  *  Examine each tx_buffer in the used queue. If the hardware is done
4132  *  processing the packet then free associated resources. The
4133  *  tx_buffer is put back on the free queue.
4134  *
4135  **********************************************************************/
4136 static void
4137 em_txeof(struct tx_ring *txr)
4138 {
4139         struct adapter  *adapter = txr->adapter;
4140         int first, last, done, processed;
4141         struct em_txbuffer *tx_buffer;
4142         struct e1000_tx_desc   *tx_desc, *eop_desc;
4143         struct ifnet   *ifp = adapter->ifp;
4144
4145         EM_TX_LOCK_ASSERT(txr);
4146 #ifdef DEV_NETMAP
4147         if (netmap_tx_irq(ifp, txr->me))
4148                 return;
4149 #endif /* DEV_NETMAP */
4150
4151         /* No work, make sure hang detection is disabled */
4152         if (txr->tx_avail == adapter->num_tx_desc) {
4153                 txr->busy = EM_TX_IDLE;
4154                 return;
4155         }
4156
4157         processed = 0;
4158         first = txr->next_to_clean;
4159         tx_desc = &txr->tx_base[first];
4160         tx_buffer = &txr->tx_buffers[first];
4161         last = tx_buffer->next_eop;
4162         eop_desc = &txr->tx_base[last];
4163
4164         /*
4165          * What this does is get the index of the
4166          * first descriptor AFTER the EOP of the 
4167          * first packet, that way we can do the
4168          * simple comparison on the inner while loop.
4169          */
4170         if (++last == adapter->num_tx_desc)
4171                 last = 0;
4172         done = last;
4173
4174         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4175             BUS_DMASYNC_POSTREAD);
4176
4177         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4178                 /* We clean the range of the packet */
4179                 while (first != done) {
4180                         tx_desc->upper.data = 0;
4181                         tx_desc->lower.data = 0;
4182                         tx_desc->buffer_addr = 0;
4183                         ++txr->tx_avail;
4184                         ++processed;
4185
4186                         if (tx_buffer->m_head) {
4187                                 bus_dmamap_sync(txr->txtag,
4188                                     tx_buffer->map,
4189                                     BUS_DMASYNC_POSTWRITE);
4190                                 bus_dmamap_unload(txr->txtag,
4191                                     tx_buffer->map);
4192                                 m_freem(tx_buffer->m_head);
4193                                 tx_buffer->m_head = NULL;
4194                         }
4195                         tx_buffer->next_eop = -1;
4196
4197                         if (++first == adapter->num_tx_desc)
4198                                 first = 0;
4199
4200                         tx_buffer = &txr->tx_buffers[first];
4201                         tx_desc = &txr->tx_base[first];
4202                 }
4203                 ++ifp->if_opackets;
4204                 /* See if we can continue to the next packet */
4205                 last = tx_buffer->next_eop;
4206                 if (last != -1) {
4207                         eop_desc = &txr->tx_base[last];
4208                         /* Get new done point */
4209                         if (++last == adapter->num_tx_desc) last = 0;
4210                         done = last;
4211                 } else
4212                         break;
4213         }
4214         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4215             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4216
4217         txr->next_to_clean = first;
4218
4219         /*
4220         ** Hang detection: we know there's work outstanding
4221         ** or the entry return would have been taken, so no
4222         ** descriptor processed here indicates a potential hang.
4223         ** The local timer will examine this and do a reset if needed.
4224         */
4225         if (processed == 0) {
4226                 if (txr->busy != EM_TX_HUNG)
4227                         ++txr->busy;
4228         } else /* At least one descriptor was cleaned */
4229                 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4230
4231         /*
4232          * If we have a minimum free, clear IFF_DRV_OACTIVE
4233          * to tell the stack that it is OK to send packets.
4234          * Notice that all writes of OACTIVE happen under the
4235          * TX lock which, with a single queue, guarantees 
4236          * sanity.
4237          */
4238         if (txr->tx_avail >= EM_MAX_SCATTER) {
4239                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
4240         }
4241
4242         /* Disable hang detection if all clean */
4243         if (txr->tx_avail == adapter->num_tx_desc)
4244                 txr->busy = EM_TX_IDLE;
4245 }
4246
4247 /*********************************************************************
4248  *
4249  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4250  *
4251  **********************************************************************/
4252 static void
4253 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4254 {
4255         struct adapter          *adapter = rxr->adapter;
4256         struct mbuf             *m;
4257         bus_dma_segment_t       segs;
4258         struct em_rxbuffer      *rxbuf;
4259         int                     i, j, error, nsegs;
4260         bool                    cleaned = FALSE;
4261
4262         i = j = rxr->next_to_refresh;
4263         /*
4264         ** Get one descriptor beyond
4265         ** our work mark to control
4266         ** the loop.
4267         */
4268         if (++j == adapter->num_rx_desc)
4269                 j = 0;
4270
4271         while (j != limit) {
4272                 rxbuf = &rxr->rx_buffers[i];
4273                 if (rxbuf->m_head == NULL) {
4274                         m = m_getjcl(M_NOWAIT, MT_DATA,
4275                             M_PKTHDR, adapter->rx_mbuf_sz);
4276                         /*
4277                         ** If we have a temporary resource shortage
4278                         ** that causes a failure, just abort refresh
4279                         ** for now, we will return to this point when
4280                         ** reinvoked from em_rxeof.
4281                         */
4282                         if (m == NULL)
4283                                 goto update;
4284                 } else
4285                         m = rxbuf->m_head;
4286
4287                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4288                 m->m_flags |= M_PKTHDR;
4289                 m->m_data = m->m_ext.ext_buf;
4290
4291                 /* Use bus_dma machinery to setup the memory mapping  */
4292                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4293                     m, &segs, &nsegs, BUS_DMA_NOWAIT);
4294                 if (error != 0) {
4295                         printf("Refresh mbufs: hdr dmamap load"
4296                             " failure - %d\n", error);
4297                         m_free(m);
4298                         rxbuf->m_head = NULL;
4299                         goto update;
4300                 }
4301                 rxbuf->m_head = m;
4302                 rxbuf->paddr = segs.ds_addr;
4303                 bus_dmamap_sync(rxr->rxtag,
4304                     rxbuf->map, BUS_DMASYNC_PREREAD);
4305                 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4306                 cleaned = TRUE;
4307
4308                 i = j; /* Next is precalulated for us */
4309                 rxr->next_to_refresh = i;
4310                 /* Calculate next controlling index */
4311                 if (++j == adapter->num_rx_desc)
4312                         j = 0;
4313         }
4314 update:
4315         /*
4316         ** Update the tail pointer only if,
4317         ** and as far as we have refreshed.
4318         */
4319         if (cleaned)
4320                 E1000_WRITE_REG(&adapter->hw,
4321                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4322
4323         return;
4324 }
4325
4326
4327 /*********************************************************************
4328  *
4329  *  Allocate memory for rx_buffer structures. Since we use one
4330  *  rx_buffer per received packet, the maximum number of rx_buffer's
4331  *  that we'll need is equal to the number of receive descriptors
4332  *  that we've allocated.
4333  *
4334  **********************************************************************/
4335 static int
4336 em_allocate_receive_buffers(struct rx_ring *rxr)
4337 {
4338         struct adapter          *adapter = rxr->adapter;
4339         device_t                dev = adapter->dev;
4340         struct em_rxbuffer      *rxbuf;
4341         int                     error;
4342
4343         rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4344             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4345         if (rxr->rx_buffers == NULL) {
4346                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4347                 return (ENOMEM);
4348         }
4349
4350         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4351                                 1, 0,                   /* alignment, bounds */
4352                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4353                                 BUS_SPACE_MAXADDR,      /* highaddr */
4354                                 NULL, NULL,             /* filter, filterarg */
4355                                 MJUM9BYTES,             /* maxsize */
4356                                 1,                      /* nsegments */
4357                                 MJUM9BYTES,             /* maxsegsize */
4358                                 0,                      /* flags */
4359                                 NULL,                   /* lockfunc */
4360                                 NULL,                   /* lockarg */
4361                                 &rxr->rxtag);
4362         if (error) {
4363                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4364                     __func__, error);
4365                 goto fail;
4366         }
4367
4368         rxbuf = rxr->rx_buffers;
4369         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4370                 rxbuf = &rxr->rx_buffers[i];
4371                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4372                 if (error) {
4373                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4374                             __func__, error);
4375                         goto fail;
4376                 }
4377         }
4378
4379         return (0);
4380
4381 fail:
4382         em_free_receive_structures(adapter);
4383         return (error);
4384 }
4385
4386
4387 /*********************************************************************
4388  *
4389  *  Initialize a receive ring and its buffers.
4390  *
4391  **********************************************************************/
4392 static int
4393 em_setup_receive_ring(struct rx_ring *rxr)
4394 {
4395         struct  adapter         *adapter = rxr->adapter;
4396         struct em_rxbuffer      *rxbuf;
4397         bus_dma_segment_t       seg[1];
4398         int                     rsize, nsegs, error = 0;
4399 #ifdef DEV_NETMAP
4400         struct netmap_adapter *na = NA(adapter->ifp);
4401         struct netmap_slot *slot;
4402 #endif
4403
4404
4405         /* Clear the ring contents */
4406         EM_RX_LOCK(rxr);
4407         rsize = roundup2(adapter->num_rx_desc *
4408             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4409         bzero((void *)rxr->rx_base, rsize);
4410 #ifdef DEV_NETMAP
4411         slot = netmap_reset(na, NR_RX, 0, 0);
4412 #endif
4413
4414         /*
4415         ** Free current RX buffer structs and their mbufs
4416         */
4417         for (int i = 0; i < adapter->num_rx_desc; i++) {
4418                 rxbuf = &rxr->rx_buffers[i];
4419                 if (rxbuf->m_head != NULL) {
4420                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4421                             BUS_DMASYNC_POSTREAD);
4422                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4423                         m_freem(rxbuf->m_head);
4424                         rxbuf->m_head = NULL; /* mark as freed */
4425                 }
4426         }
4427
4428         /* Now replenish the mbufs */
4429         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4430                 rxbuf = &rxr->rx_buffers[j];
4431 #ifdef DEV_NETMAP
4432                 if (slot) {
4433                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4434                         uint64_t paddr;
4435                         void *addr;
4436
4437                         addr = PNMB(na, slot + si, &paddr);
4438                         netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4439                         em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4440                         continue;
4441                 }
4442 #endif /* DEV_NETMAP */
4443                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4444                     M_PKTHDR, adapter->rx_mbuf_sz);
4445                 if (rxbuf->m_head == NULL) {
4446                         error = ENOBUFS;
4447                         goto fail;
4448                 }
4449                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4450                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4451                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4452
4453                 /* Get the memory mapping */
4454                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4455                     rxbuf->map, rxbuf->m_head, seg,
4456                     &nsegs, BUS_DMA_NOWAIT);
4457                 if (error != 0) {
4458                         m_freem(rxbuf->m_head);
4459                         rxbuf->m_head = NULL;
4460                         goto fail;
4461                 }
4462                 bus_dmamap_sync(rxr->rxtag,
4463                     rxbuf->map, BUS_DMASYNC_PREREAD);
4464
4465                 rxbuf->paddr = seg[0].ds_addr;
4466                 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4467         }
4468         rxr->next_to_check = 0;
4469         rxr->next_to_refresh = 0;
4470         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4471             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4472
4473 fail:
4474         EM_RX_UNLOCK(rxr);
4475         return (error);
4476 }
4477
4478 /*********************************************************************
4479  *
4480  *  Initialize all receive rings.
4481  *
4482  **********************************************************************/
4483 static int
4484 em_setup_receive_structures(struct adapter *adapter)
4485 {
4486         struct rx_ring *rxr = adapter->rx_rings;
4487         int q;
4488
4489         for (q = 0; q < adapter->num_queues; q++, rxr++)
4490                 if (em_setup_receive_ring(rxr))
4491                         goto fail;
4492
4493         return (0);
4494 fail:
4495         /*
4496          * Free RX buffers allocated so far, we will only handle
4497          * the rings that completed, the failing case will have
4498          * cleaned up for itself. 'q' failed, so its the terminus.
4499          */
4500         for (int i = 0; i < q; ++i) {
4501                 rxr = &adapter->rx_rings[i];
4502                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4503                         struct em_rxbuffer *rxbuf;
4504                         rxbuf = &rxr->rx_buffers[n];
4505                         if (rxbuf->m_head != NULL) {
4506                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4507                                   BUS_DMASYNC_POSTREAD);
4508                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4509                                 m_freem(rxbuf->m_head);
4510                                 rxbuf->m_head = NULL;
4511                         }
4512                 }
4513                 rxr->next_to_check = 0;
4514                 rxr->next_to_refresh = 0;
4515         }
4516
4517         return (ENOBUFS);
4518 }
4519
4520 /*********************************************************************
4521  *
4522  *  Free all receive rings.
4523  *
4524  **********************************************************************/
4525 static void
4526 em_free_receive_structures(struct adapter *adapter)
4527 {
4528         struct rx_ring *rxr = adapter->rx_rings;
4529
4530         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4531                 em_free_receive_buffers(rxr);
4532                 /* Free the ring memory as well */
4533                 em_dma_free(adapter, &rxr->rxdma);
4534                 EM_RX_LOCK_DESTROY(rxr);
4535         }
4536
4537         free(adapter->rx_rings, M_DEVBUF);
4538 }
4539
4540
4541 /*********************************************************************
4542  *
4543  *  Free receive ring data structures
4544  *
4545  **********************************************************************/
4546 static void
4547 em_free_receive_buffers(struct rx_ring *rxr)
4548 {
4549         struct adapter          *adapter = rxr->adapter;
4550         struct em_rxbuffer      *rxbuf = NULL;
4551
4552         INIT_DEBUGOUT("free_receive_buffers: begin");
4553
4554         if (rxr->rx_buffers != NULL) {
4555                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4556                         rxbuf = &rxr->rx_buffers[i];
4557                         if (rxbuf->map != NULL) {
4558                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4559                                     BUS_DMASYNC_POSTREAD);
4560                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4561                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4562                         }
4563                         if (rxbuf->m_head != NULL) {
4564                                 m_freem(rxbuf->m_head);
4565                                 rxbuf->m_head = NULL;
4566                         }
4567                 }
4568                 free(rxr->rx_buffers, M_DEVBUF);
4569                 rxr->rx_buffers = NULL;
4570                 rxr->next_to_check = 0;
4571                 rxr->next_to_refresh = 0;
4572         }
4573
4574         if (rxr->rxtag != NULL) {
4575                 bus_dma_tag_destroy(rxr->rxtag);
4576                 rxr->rxtag = NULL;
4577         }
4578
4579         return;
4580 }
4581
4582
4583 /*********************************************************************
4584  *
4585  *  Enable receive unit.
4586  *
4587  **********************************************************************/
4588
4589 static void
4590 em_initialize_receive_unit(struct adapter *adapter)
4591 {
4592         struct rx_ring *rxr = adapter->rx_rings;
4593         struct ifnet    *ifp = adapter->ifp;
4594         struct e1000_hw *hw = &adapter->hw;
4595         u32     rctl, rxcsum, rfctl;
4596
4597         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4598
4599         /*
4600          * Make sure receives are disabled while setting
4601          * up the descriptor ring
4602          */
4603         rctl = E1000_READ_REG(hw, E1000_RCTL);
4604         /* Do not disable if ever enabled on this hardware */
4605         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4606                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4607
4608         /* Setup the Receive Control Register */
4609         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4610         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4611             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4612             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4613
4614         /* Do not store bad packets */
4615         rctl &= ~E1000_RCTL_SBP;
4616
4617         /* Enable Long Packet receive */
4618         if (ifp->if_mtu > ETHERMTU)
4619                 rctl |= E1000_RCTL_LPE;
4620         else
4621                 rctl &= ~E1000_RCTL_LPE;
4622
4623         /* Strip the CRC */
4624         if (!em_disable_crc_stripping)
4625                 rctl |= E1000_RCTL_SECRC;
4626
4627         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4628             adapter->rx_abs_int_delay.value);
4629
4630         E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4631             adapter->rx_int_delay.value);
4632         /*
4633          * Set the interrupt throttling rate. Value is calculated
4634          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4635          */
4636         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4637
4638         /* Use extended rx descriptor formats */
4639         rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4640         rfctl |= E1000_RFCTL_EXTEN;
4641         /*
4642         ** When using MSIX interrupts we need to throttle
4643         ** using the EITR register (82574 only)
4644         */
4645         if (hw->mac.type == e1000_82574) {
4646                 for (int i = 0; i < 4; i++)
4647                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4648                             DEFAULT_ITR);
4649                 /* Disable accelerated acknowledge */
4650                 rfctl |= E1000_RFCTL_ACK_DIS;
4651         }
4652         E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4653
4654         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4655         if (ifp->if_capenable & IFCAP_RXCSUM) {
4656 #ifdef EM_MULTIQUEUE
4657                 rxcsum |= E1000_RXCSUM_TUOFL |
4658                           E1000_RXCSUM_IPOFL |
4659                           E1000_RXCSUM_PCSD;
4660 #else
4661                 rxcsum |= E1000_RXCSUM_TUOFL;
4662 #endif
4663         } else
4664                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4665
4666         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4667
4668 #ifdef EM_MULTIQUEUE
4669 #define RSSKEYLEN 10
4670         if (adapter->num_queues > 1) {
4671                 uint8_t  rss_key[4 * RSSKEYLEN];
4672                 uint32_t reta = 0;
4673                 int i;
4674
4675                 /*
4676                 * Configure RSS key
4677                 */
4678                 arc4rand(rss_key, sizeof(rss_key), 0);
4679                 for (i = 0; i < RSSKEYLEN; ++i) {
4680                         uint32_t rssrk = 0;
4681
4682                         rssrk = EM_RSSRK_VAL(rss_key, i);
4683                         E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4684                 }
4685
4686                 /*
4687                 * Configure RSS redirect table in following fashion:
4688                 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4689                 */
4690                 for (i = 0; i < sizeof(reta); ++i) {
4691                         uint32_t q;
4692
4693                         q = (i % adapter->num_queues) << 7;
4694                         reta |= q << (8 * i);
4695                 }
4696
4697                 for (i = 0; i < 32; ++i) {
4698                         E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4699                 }
4700
4701                 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 
4702                                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4703                                 E1000_MRQC_RSS_FIELD_IPV4 |
4704                                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4705                                 E1000_MRQC_RSS_FIELD_IPV6_EX |
4706                                 E1000_MRQC_RSS_FIELD_IPV6);
4707         }
4708 #endif
4709         /*
4710         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4711         ** long latencies are observed, like Lenovo X60. This
4712         ** change eliminates the problem, but since having positive
4713         ** values in RDTR is a known source of problems on other
4714         ** platforms another solution is being sought.
4715         */
4716         if (hw->mac.type == e1000_82573)
4717                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4718
4719         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4720                 /* Setup the Base and Length of the Rx Descriptor Ring */
4721                 u64 bus_addr = rxr->rxdma.dma_paddr;
4722                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4723
4724                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4725                     adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4726                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4727                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4728                 /* Setup the Head and Tail Descriptor Pointers */
4729                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4730 #ifdef DEV_NETMAP
4731                 /*
4732                  * an init() while a netmap client is active must
4733                  * preserve the rx buffers passed to userspace.
4734                  */
4735                 if (ifp->if_capenable & IFCAP_NETMAP)
4736                         rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4737 #endif /* DEV_NETMAP */
4738                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4739         }
4740
4741         /*
4742          * Set PTHRESH for improved jumbo performance
4743          * According to 10.2.5.11 of Intel 82574 Datasheet,
4744          * RXDCTL(1) is written whenever RXDCTL(0) is written.
4745          * Only write to RXDCTL(1) if there is a need for different
4746          * settings.
4747          */
4748         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4749             (adapter->hw.mac.type == e1000_pch2lan) ||
4750             (adapter->hw.mac.type == e1000_ich10lan)) &&
4751             (ifp->if_mtu > ETHERMTU)) {
4752                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4753                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4754         } else if (adapter->hw.mac.type == e1000_82574) {
4755                 for (int i = 0; i < adapter->num_queues; i++) {
4756                         u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4757
4758                         rxdctl |= 0x20; /* PTHRESH */
4759                         rxdctl |= 4 << 8; /* HTHRESH */
4760                         rxdctl |= 4 << 16;/* WTHRESH */
4761                         rxdctl |= 1 << 24; /* Switch to granularity */
4762                         E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4763                 }
4764         }
4765                 
4766         if (adapter->hw.mac.type >= e1000_pch2lan) {
4767                 if (ifp->if_mtu > ETHERMTU)
4768                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4769                 else
4770                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4771         }
4772
4773         /* Make sure VLAN Filters are off */
4774         rctl &= ~E1000_RCTL_VFE;
4775
4776         if (adapter->rx_mbuf_sz == MCLBYTES)
4777                 rctl |= E1000_RCTL_SZ_2048;
4778         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4779                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4780         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4781                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4782
4783         /* ensure we clear use DTYPE of 00 here */
4784         rctl &= ~0x00000C00;
4785         /* Write out the settings */
4786         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4787
4788         return;
4789 }
4790
4791
4792 /*********************************************************************
4793  *
4794  *  This routine executes in interrupt context. It replenishes
4795  *  the mbufs in the descriptor and sends data which has been
4796  *  dma'ed into host memory to upper layer.
4797  *
4798  *  We loop at most count times if count is > 0, or until done if
4799  *  count < 0.
4800  *  
4801  *  For polling we also now return the number of cleaned packets
4802  *********************************************************************/
4803 static bool
4804 em_rxeof(struct rx_ring *rxr, int count, int *done)
4805 {
4806         struct adapter          *adapter = rxr->adapter;
4807         struct ifnet            *ifp = adapter->ifp;
4808         struct mbuf             *mp, *sendmp;
4809         u32                     status = 0;
4810         u16                     len;
4811         int                     i, processed, rxdone = 0;
4812         bool                    eop;
4813         union e1000_rx_desc_extended    *cur;
4814
4815         EM_RX_LOCK(rxr);
4816
4817         /* Sync the ring */
4818         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4819             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4820
4821
4822 #ifdef DEV_NETMAP
4823         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4824                 EM_RX_UNLOCK(rxr);
4825                 return (FALSE);
4826         }
4827 #endif /* DEV_NETMAP */
4828
4829         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4830                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4831                         break;
4832
4833                 cur = &rxr->rx_base[i];
4834                 status = le32toh(cur->wb.upper.status_error);
4835                 mp = sendmp = NULL;
4836
4837                 if ((status & E1000_RXD_STAT_DD) == 0)
4838                         break;
4839
4840                 len = le16toh(cur->wb.upper.length);
4841                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4842
4843                 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4844                     (rxr->discard == TRUE)) {
4845                         adapter->dropped_pkts++;
4846                         ++rxr->rx_discarded;
4847                         if (!eop) /* Catch subsequent segs */
4848                                 rxr->discard = TRUE;
4849                         else
4850                                 rxr->discard = FALSE;
4851                         em_rx_discard(rxr, i);
4852                         goto next_desc;
4853                 }
4854                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4855
4856                 /* Assign correct length to the current fragment */
4857                 mp = rxr->rx_buffers[i].m_head;
4858                 mp->m_len = len;
4859
4860                 /* Trigger for refresh */
4861                 rxr->rx_buffers[i].m_head = NULL;
4862
4863                 /* First segment? */
4864                 if (rxr->fmp == NULL) {
4865                         mp->m_pkthdr.len = len;
4866                         rxr->fmp = rxr->lmp = mp;
4867                 } else {
4868                         /* Chain mbuf's together */
4869                         mp->m_flags &= ~M_PKTHDR;
4870                         rxr->lmp->m_next = mp;
4871                         rxr->lmp = mp;
4872                         rxr->fmp->m_pkthdr.len += len;
4873                 }
4874
4875                 if (eop) {
4876                         --count;
4877                         sendmp = rxr->fmp;
4878                         sendmp->m_pkthdr.rcvif = ifp;
4879                         ifp->if_ipackets++;
4880                         em_receive_checksum(status, sendmp);
4881 #ifndef __NO_STRICT_ALIGNMENT
4882                         if (adapter->hw.mac.max_frame_size >
4883                             (MCLBYTES - ETHER_ALIGN) &&
4884                             em_fixup_rx(rxr) != 0)
4885                                 goto skip;
4886 #endif
4887                         if (status & E1000_RXD_STAT_VP) {
4888                                 sendmp->m_pkthdr.ether_vtag =
4889                                     le16toh(cur->wb.upper.vlan);
4890                                 sendmp->m_flags |= M_VLANTAG;
4891                         }
4892 #ifndef __NO_STRICT_ALIGNMENT
4893 skip:
4894 #endif
4895                         rxr->fmp = rxr->lmp = NULL;
4896                 }
4897 next_desc:
4898                 /* Sync the ring */
4899                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4900                         BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4901
4902                 /* Zero out the receive descriptors status. */
4903                 cur->wb.upper.status_error &= htole32(~0xFF);
4904                 ++rxdone;       /* cumulative for POLL */
4905                 ++processed;
4906
4907                 /* Advance our pointers to the next descriptor. */
4908                 if (++i == adapter->num_rx_desc)
4909                         i = 0;
4910
4911                 /* Send to the stack */
4912                 if (sendmp != NULL) {
4913                         rxr->next_to_check = i;
4914                         EM_RX_UNLOCK(rxr);
4915                         (*ifp->if_input)(ifp, sendmp);
4916                         EM_RX_LOCK(rxr);
4917                         i = rxr->next_to_check;
4918                 }
4919
4920                 /* Only refresh mbufs every 8 descriptors */
4921                 if (processed == 8) {
4922                         em_refresh_mbufs(rxr, i);
4923                         processed = 0;
4924                 }
4925         }
4926
4927         /* Catch any remaining refresh work */
4928         if (e1000_rx_unrefreshed(rxr))
4929                 em_refresh_mbufs(rxr, i);
4930
4931         rxr->next_to_check = i;
4932         if (done != NULL)
4933                 *done = rxdone;
4934         EM_RX_UNLOCK(rxr);
4935
4936         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4937 }
4938
4939 static __inline void
4940 em_rx_discard(struct rx_ring *rxr, int i)
4941 {
4942         struct em_rxbuffer      *rbuf;
4943
4944         rbuf = &rxr->rx_buffers[i];
4945         bus_dmamap_unload(rxr->rxtag, rbuf->map);
4946
4947         /* Free any previous pieces */
4948         if (rxr->fmp != NULL) {
4949                 rxr->fmp->m_flags |= M_PKTHDR;
4950                 m_freem(rxr->fmp);
4951                 rxr->fmp = NULL;
4952                 rxr->lmp = NULL;
4953         }
4954         /*
4955         ** Free buffer and allow em_refresh_mbufs()
4956         ** to clean up and recharge buffer.
4957         */
4958         if (rbuf->m_head) {
4959                 m_free(rbuf->m_head);
4960                 rbuf->m_head = NULL;
4961         }
4962         return;
4963 }
4964
4965 #ifndef __NO_STRICT_ALIGNMENT
4966 /*
4967  * When jumbo frames are enabled we should realign entire payload on
4968  * architecures with strict alignment. This is serious design mistake of 8254x
4969  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4970  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4971  * payload. On architecures without strict alignment restrictions 8254x still
4972  * performs unaligned memory access which would reduce the performance too.
4973  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4974  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4975  * existing mbuf chain.
4976  *
4977  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4978  * not used at all on architectures with strict alignment.
4979  */
4980 static int
4981 em_fixup_rx(struct rx_ring *rxr)
4982 {
4983         struct adapter *adapter = rxr->adapter;
4984         struct mbuf *m, *n;
4985         int error;
4986
4987         error = 0;
4988         m = rxr->fmp;
4989         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4990                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4991                 m->m_data += ETHER_HDR_LEN;
4992         } else {
4993                 MGETHDR(n, M_NOWAIT, MT_DATA);
4994                 if (n != NULL) {
4995                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4996                         m->m_data += ETHER_HDR_LEN;
4997                         m->m_len -= ETHER_HDR_LEN;
4998                         n->m_len = ETHER_HDR_LEN;
4999                         M_MOVE_PKTHDR(n, m);
5000                         n->m_next = m;
5001                         rxr->fmp = n;
5002                 } else {
5003                         adapter->dropped_pkts++;
5004                         m_freem(rxr->fmp);
5005                         rxr->fmp = NULL;
5006                         error = ENOMEM;
5007                 }
5008         }
5009
5010         return (error);
5011 }
5012 #endif
5013
5014 static void
5015 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5016 {
5017         rxd->read.buffer_addr = htole64(rxbuf->paddr);
5018         /* DD bits must be cleared */
5019         rxd->wb.upper.status_error= 0;
5020 }
5021
5022 /*********************************************************************
5023  *
5024  *  Verify that the hardware indicated that the checksum is valid.
5025  *  Inform the stack about the status of checksum so that stack
5026  *  doesn't spend time verifying the checksum.
5027  *
5028  *********************************************************************/
5029 static void
5030 em_receive_checksum(uint32_t status, struct mbuf *mp)
5031 {
5032         mp->m_pkthdr.csum_flags = 0;
5033
5034         /* Ignore Checksum bit is set */
5035         if (status & E1000_RXD_STAT_IXSM)
5036                 return;
5037
5038         /* If the IP checksum exists and there is no IP Checksum error */
5039         if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5040                 E1000_RXD_STAT_IPCS) {
5041                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5042         }
5043
5044         /* TCP or UDP checksum */
5045         if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5046             E1000_RXD_STAT_TCPCS) {
5047                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5048                 mp->m_pkthdr.csum_data = htons(0xffff);
5049         }
5050         if (status & E1000_RXD_STAT_UDPCS) {
5051                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5052                 mp->m_pkthdr.csum_data = htons(0xffff);
5053         }
5054 }
5055
5056 /*
5057  * This routine is run via an vlan
5058  * config EVENT
5059  */
5060 static void
5061 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5062 {
5063         struct adapter  *adapter = ifp->if_softc;
5064         u32             index, bit;
5065
5066         if (ifp->if_softc !=  arg)   /* Not our event */
5067                 return;
5068
5069         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5070                 return;
5071
5072         EM_CORE_LOCK(adapter);
5073         index = (vtag >> 5) & 0x7F;
5074         bit = vtag & 0x1F;
5075         adapter->shadow_vfta[index] |= (1 << bit);
5076         ++adapter->num_vlans;
5077         /* Re-init to load the changes */
5078         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5079                 em_init_locked(adapter);
5080         EM_CORE_UNLOCK(adapter);
5081 }
5082
5083 /*
5084  * This routine is run via an vlan
5085  * unconfig EVENT
5086  */
5087 static void
5088 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5089 {
5090         struct adapter  *adapter = ifp->if_softc;
5091         u32             index, bit;
5092
5093         if (ifp->if_softc !=  arg)
5094                 return;
5095
5096         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5097                 return;
5098
5099         EM_CORE_LOCK(adapter);
5100         index = (vtag >> 5) & 0x7F;
5101         bit = vtag & 0x1F;
5102         adapter->shadow_vfta[index] &= ~(1 << bit);
5103         --adapter->num_vlans;
5104         /* Re-init to load the changes */
5105         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5106                 em_init_locked(adapter);
5107         EM_CORE_UNLOCK(adapter);
5108 }
5109
5110 static void
5111 em_setup_vlan_hw_support(struct adapter *adapter)
5112 {
5113         struct e1000_hw *hw = &adapter->hw;
5114         u32             reg;
5115
5116         /*
5117         ** We get here thru init_locked, meaning
5118         ** a soft reset, this has already cleared
5119         ** the VFTA and other state, so if there
5120         ** have been no vlan's registered do nothing.
5121         */
5122         if (adapter->num_vlans == 0)
5123                 return;
5124
5125         /*
5126         ** A soft reset zero's out the VFTA, so
5127         ** we need to repopulate it now.
5128         */
5129         for (int i = 0; i < EM_VFTA_SIZE; i++)
5130                 if (adapter->shadow_vfta[i] != 0)
5131                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5132                             i, adapter->shadow_vfta[i]);
5133
5134         reg = E1000_READ_REG(hw, E1000_CTRL);
5135         reg |= E1000_CTRL_VME;
5136         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5137
5138         /* Enable the Filter Table */
5139         reg = E1000_READ_REG(hw, E1000_RCTL);
5140         reg &= ~E1000_RCTL_CFIEN;
5141         reg |= E1000_RCTL_VFE;
5142         E1000_WRITE_REG(hw, E1000_RCTL, reg);
5143 }
5144
5145 static void
5146 em_enable_intr(struct adapter *adapter)
5147 {
5148         struct e1000_hw *hw = &adapter->hw;
5149         u32 ims_mask = IMS_ENABLE_MASK;
5150
5151         if (hw->mac.type == e1000_82574) {
5152                 E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
5153                 ims_mask |= adapter->ims;
5154         } 
5155         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5156 }
5157
5158 static void
5159 em_disable_intr(struct adapter *adapter)
5160 {
5161         struct e1000_hw *hw = &adapter->hw;
5162
5163         if (hw->mac.type == e1000_82574)
5164                 E1000_WRITE_REG(hw, EM_EIAC, 0);
5165         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5166 }
5167
5168 /*
5169  * Bit of a misnomer, what this really means is
5170  * to enable OS management of the system... aka
5171  * to disable special hardware management features 
5172  */
5173 static void
5174 em_init_manageability(struct adapter *adapter)
5175 {
5176         /* A shared code workaround */
5177 #define E1000_82542_MANC2H E1000_MANC2H
5178         if (adapter->has_manage) {
5179                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5180                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5181
5182                 /* disable hardware interception of ARP */
5183                 manc &= ~(E1000_MANC_ARP_EN);
5184
5185                 /* enable receiving management packets to the host */
5186                 manc |= E1000_MANC_EN_MNG2HOST;
5187 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5188 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5189                 manc2h |= E1000_MNG2HOST_PORT_623;
5190                 manc2h |= E1000_MNG2HOST_PORT_664;
5191                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5192                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5193         }
5194 }
5195
5196 /*
5197  * Give control back to hardware management
5198  * controller if there is one.
5199  */
5200 static void
5201 em_release_manageability(struct adapter *adapter)
5202 {
5203         if (adapter->has_manage) {
5204                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5205
5206                 /* re-enable hardware interception of ARP */
5207                 manc |= E1000_MANC_ARP_EN;
5208                 manc &= ~E1000_MANC_EN_MNG2HOST;
5209
5210                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5211         }
5212 }
5213
5214 /*
5215  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5216  * For ASF and Pass Through versions of f/w this means
5217  * that the driver is loaded. For AMT version type f/w
5218  * this means that the network i/f is open.
5219  */
5220 static void
5221 em_get_hw_control(struct adapter *adapter)
5222 {
5223         u32 ctrl_ext, swsm;
5224
5225         if (adapter->hw.mac.type == e1000_82573) {
5226                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5227                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5228                     swsm | E1000_SWSM_DRV_LOAD);
5229                 return;
5230         }
5231         /* else */
5232         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5233         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5234             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5235         return;
5236 }
5237
5238 /*
5239  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5240  * For ASF and Pass Through versions of f/w this means that
5241  * the driver is no longer loaded. For AMT versions of the
5242  * f/w this means that the network i/f is closed.
5243  */
5244 static void
5245 em_release_hw_control(struct adapter *adapter)
5246 {
5247         u32 ctrl_ext, swsm;
5248
5249         if (!adapter->has_manage)
5250                 return;
5251
5252         if (adapter->hw.mac.type == e1000_82573) {
5253                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5254                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5255                     swsm & ~E1000_SWSM_DRV_LOAD);
5256                 return;
5257         }
5258         /* else */
5259         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5260         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5261             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5262         return;
5263 }
5264
5265 static int
5266 em_is_valid_ether_addr(u8 *addr)
5267 {
5268         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5269
5270         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5271                 return (FALSE);
5272         }
5273
5274         return (TRUE);
5275 }
5276
5277 /*
5278 ** Parse the interface capabilities with regard
5279 ** to both system management and wake-on-lan for
5280 ** later use.
5281 */
5282 static void
5283 em_get_wakeup(device_t dev)
5284 {
5285         struct adapter  *adapter = device_get_softc(dev);
5286         u16             eeprom_data = 0, device_id, apme_mask;
5287
5288         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5289         apme_mask = EM_EEPROM_APME;
5290
5291         switch (adapter->hw.mac.type) {
5292         case e1000_82573:
5293         case e1000_82583:
5294                 adapter->has_amt = TRUE;
5295                 /* Falls thru */
5296         case e1000_82571:
5297         case e1000_82572:
5298         case e1000_80003es2lan:
5299                 if (adapter->hw.bus.func == 1) {
5300                         e1000_read_nvm(&adapter->hw,
5301                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5302                         break;
5303                 } else
5304                         e1000_read_nvm(&adapter->hw,
5305                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5306                 break;
5307         case e1000_ich8lan:
5308         case e1000_ich9lan:
5309         case e1000_ich10lan:
5310         case e1000_pchlan:
5311         case e1000_pch2lan:
5312                 apme_mask = E1000_WUC_APME;
5313                 adapter->has_amt = TRUE;
5314                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5315                 break;
5316         default:
5317                 e1000_read_nvm(&adapter->hw,
5318                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5319                 break;
5320         }
5321         if (eeprom_data & apme_mask)
5322                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5323         /*
5324          * We have the eeprom settings, now apply the special cases
5325          * where the eeprom may be wrong or the board won't support
5326          * wake on lan on a particular port
5327          */
5328         device_id = pci_get_device(dev);
5329         switch (device_id) {
5330         case E1000_DEV_ID_82571EB_FIBER:
5331                 /* Wake events only supported on port A for dual fiber
5332                  * regardless of eeprom setting */
5333                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5334                     E1000_STATUS_FUNC_1)
5335                         adapter->wol = 0;
5336                 break;
5337         case E1000_DEV_ID_82571EB_QUAD_COPPER:
5338         case E1000_DEV_ID_82571EB_QUAD_FIBER:
5339         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5340                 /* if quad port adapter, disable WoL on all but port A */
5341                 if (global_quad_port_a != 0)
5342                         adapter->wol = 0;
5343                 /* Reset for multiple quad port adapters */
5344                 if (++global_quad_port_a == 4)
5345                         global_quad_port_a = 0;
5346                 break;
5347         }
5348         return;
5349 }
5350
5351
5352 /*
5353  * Enable PCI Wake On Lan capability
5354  */
5355 static void
5356 em_enable_wakeup(device_t dev)
5357 {
5358         struct adapter  *adapter = device_get_softc(dev);
5359         struct ifnet    *ifp = adapter->ifp;
5360         u32             pmc, ctrl, ctrl_ext, rctl;
5361         u16             status;
5362
5363         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5364                 return;
5365
5366         /* Advertise the wakeup capability */
5367         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5368         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5369         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5370         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5371
5372         if ((adapter->hw.mac.type == e1000_ich8lan) ||
5373             (adapter->hw.mac.type == e1000_pchlan) ||
5374             (adapter->hw.mac.type == e1000_ich9lan) ||
5375             (adapter->hw.mac.type == e1000_ich10lan))
5376                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5377
5378         /* Keep the laser running on Fiber adapters */
5379         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5380             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5381                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5382                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5383                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5384         }
5385
5386         /*
5387         ** Determine type of Wakeup: note that wol
5388         ** is set with all bits on by default.
5389         */
5390         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5391                 adapter->wol &= ~E1000_WUFC_MAG;
5392
5393         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5394                 adapter->wol &= ~E1000_WUFC_MC;
5395         else {
5396                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5397                 rctl |= E1000_RCTL_MPE;
5398                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5399         }
5400
5401         if ((adapter->hw.mac.type == e1000_pchlan) ||
5402             (adapter->hw.mac.type == e1000_pch2lan)) {
5403                 if (em_enable_phy_wakeup(adapter))
5404                         return;
5405         } else {
5406                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5407                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5408         }
5409
5410         if (adapter->hw.phy.type == e1000_phy_igp_3)
5411                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5412
5413         /* Request PME */
5414         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5415         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5416         if (ifp->if_capenable & IFCAP_WOL)
5417                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5418         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5419
5420         return;
5421 }
5422
5423 /*
5424 ** WOL in the newer chipset interfaces (pchlan)
5425 ** require thing to be copied into the phy
5426 */
5427 static int
5428 em_enable_phy_wakeup(struct adapter *adapter)
5429 {
5430         struct e1000_hw *hw = &adapter->hw;
5431         u32 mreg, ret = 0;
5432         u16 preg;
5433
5434         /* copy MAC RARs to PHY RARs */
5435         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5436
5437         /* copy MAC MTA to PHY MTA */
5438         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5439                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5440                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5441                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5442                     (u16)((mreg >> 16) & 0xFFFF));
5443         }
5444
5445         /* configure PHY Rx Control register */
5446         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5447         mreg = E1000_READ_REG(hw, E1000_RCTL);
5448         if (mreg & E1000_RCTL_UPE)
5449                 preg |= BM_RCTL_UPE;
5450         if (mreg & E1000_RCTL_MPE)
5451                 preg |= BM_RCTL_MPE;
5452         preg &= ~(BM_RCTL_MO_MASK);
5453         if (mreg & E1000_RCTL_MO_3)
5454                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5455                                 << BM_RCTL_MO_SHIFT);
5456         if (mreg & E1000_RCTL_BAM)
5457                 preg |= BM_RCTL_BAM;
5458         if (mreg & E1000_RCTL_PMCF)
5459                 preg |= BM_RCTL_PMCF;
5460         mreg = E1000_READ_REG(hw, E1000_CTRL);
5461         if (mreg & E1000_CTRL_RFCE)
5462                 preg |= BM_RCTL_RFCE;
5463         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5464
5465         /* enable PHY wakeup in MAC register */
5466         E1000_WRITE_REG(hw, E1000_WUC,
5467             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5468         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5469
5470         /* configure and enable PHY wakeup in PHY registers */
5471         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5472         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5473
5474         /* activate PHY wakeup */
5475         ret = hw->phy.ops.acquire(hw);
5476         if (ret) {
5477                 printf("Could not acquire PHY\n");
5478                 return ret;
5479         }
5480         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5481                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5482         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5483         if (ret) {
5484                 printf("Could not read PHY page 769\n");
5485                 goto out;
5486         }
5487         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5488         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5489         if (ret)
5490                 printf("Could not set PHY Host Wakeup bit\n");
5491 out:
5492         hw->phy.ops.release(hw);
5493
5494         return ret;
5495 }
5496
5497 static void
5498 em_led_func(void *arg, int onoff)
5499 {
5500         struct adapter  *adapter = arg;
5501  
5502         EM_CORE_LOCK(adapter);
5503         if (onoff) {
5504                 e1000_setup_led(&adapter->hw);
5505                 e1000_led_on(&adapter->hw);
5506         } else {
5507                 e1000_led_off(&adapter->hw);
5508                 e1000_cleanup_led(&adapter->hw);
5509         }
5510         EM_CORE_UNLOCK(adapter);
5511 }
5512
5513 /*
5514 ** Disable the L0S and L1 LINK states
5515 */
5516 static void
5517 em_disable_aspm(struct adapter *adapter)
5518 {
5519         int             base, reg;
5520         u16             link_cap,link_ctrl;
5521         device_t        dev = adapter->dev;
5522
5523         switch (adapter->hw.mac.type) {
5524                 case e1000_82573:
5525                 case e1000_82574:
5526                 case e1000_82583:
5527                         break;
5528                 default:
5529                         return;
5530         }
5531         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5532                 return;
5533         reg = base + PCIER_LINK_CAP;
5534         link_cap = pci_read_config(dev, reg, 2);
5535         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5536                 return;
5537         reg = base + PCIER_LINK_CTL;
5538         link_ctrl = pci_read_config(dev, reg, 2);
5539         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5540         pci_write_config(dev, reg, link_ctrl, 2);
5541         return;
5542 }
5543
5544 /**********************************************************************
5545  *
5546  *  Update the board statistics counters.
5547  *
5548  **********************************************************************/
5549 static void
5550 em_update_stats_counters(struct adapter *adapter)
5551 {
5552         struct ifnet   *ifp;
5553
5554         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5555            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5556                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5557                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5558         }
5559         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5560         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5561         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5562         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5563
5564         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5565         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5566         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5567         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5568         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5569         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5570         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5571         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5572         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5573         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5574         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5575         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5576         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5577         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5578         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5579         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5580         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5581         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5582         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5583         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5584
5585         /* For the 64-bit byte counters the low dword must be read first. */
5586         /* Both registers clear on the read of the high dword */
5587
5588         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5589             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5590         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5591             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5592
5593         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5594         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5595         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5596         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5597         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5598
5599         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5600         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5601
5602         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5603         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5604         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5605         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5606         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5607         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5608         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5609         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5610         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5611         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5612
5613         /* Interrupt Counts */
5614
5615         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5616         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5617         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5618         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5619         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5620         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5621         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5622         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5623         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5624
5625         if (adapter->hw.mac.type >= e1000_82543) {
5626                 adapter->stats.algnerrc += 
5627                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5628                 adapter->stats.rxerrc += 
5629                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5630                 adapter->stats.tncrs += 
5631                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5632                 adapter->stats.cexterr += 
5633                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5634                 adapter->stats.tsctc += 
5635                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5636                 adapter->stats.tsctfc += 
5637                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5638         }
5639         ifp = adapter->ifp;
5640
5641         ifp->if_collisions = adapter->stats.colc;
5642
5643         /* Rx Errors */
5644         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5645             adapter->stats.crcerrs + adapter->stats.algnerrc +
5646             adapter->stats.ruc + adapter->stats.roc +
5647             adapter->stats.mpc + adapter->stats.cexterr;
5648
5649         /* Tx Errors */
5650         ifp->if_oerrors = adapter->stats.ecol +
5651             adapter->stats.latecol + adapter->watchdog_events;
5652 }
5653
5654 /* Export a single 32-bit register via a read-only sysctl. */
5655 static int
5656 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5657 {
5658         struct adapter *adapter;
5659         u_int val;
5660
5661         adapter = oidp->oid_arg1;
5662         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5663         return (sysctl_handle_int(oidp, &val, 0, req));
5664 }
5665
5666 /*
5667  * Add sysctl variables, one per statistic, to the system.
5668  */
5669 static void
5670 em_add_hw_stats(struct adapter *adapter)
5671 {
5672         device_t dev = adapter->dev;
5673
5674         struct tx_ring *txr = adapter->tx_rings;
5675         struct rx_ring *rxr = adapter->rx_rings;
5676
5677         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5678         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5679         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5680         struct e1000_hw_stats *stats = &adapter->stats;
5681
5682         struct sysctl_oid *stat_node, *queue_node, *int_node;
5683         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5684
5685 #define QUEUE_NAME_LEN 32
5686         char namebuf[QUEUE_NAME_LEN];
5687         
5688         /* Driver Statistics */
5689         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5690                         CTLFLAG_RD, &adapter->dropped_pkts,
5691                         "Driver dropped packets");
5692         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5693                         CTLFLAG_RD, &adapter->link_irq,
5694                         "Link MSIX IRQ Handled");
5695         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 
5696                          CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5697                          "Defragmenting mbuf chain failed");
5698         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5699                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5700                         "Driver tx dma failure in xmit");
5701         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5702                         CTLFLAG_RD, &adapter->rx_overruns,
5703                         "RX overruns");
5704         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5705                         CTLFLAG_RD, &adapter->watchdog_events,
5706                         "Watchdog timeouts");
5707         
5708         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5709                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5710                         em_sysctl_reg_handler, "IU",
5711                         "Device Control Register");
5712         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5713                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5714                         em_sysctl_reg_handler, "IU",
5715                         "Receiver Control Register");
5716         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5717                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5718                         "Flow Control High Watermark");
5719         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5720                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5721                         "Flow Control Low Watermark");
5722
5723         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5724                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5725                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5726                                             CTLFLAG_RD, NULL, "TX Queue Name");
5727                 queue_list = SYSCTL_CHILDREN(queue_node);
5728
5729                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5730                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5731                                 E1000_TDH(txr->me),
5732                                 em_sysctl_reg_handler, "IU",
5733                                 "Transmit Descriptor Head");
5734                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5735                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5736                                 E1000_TDT(txr->me),
5737                                 em_sysctl_reg_handler, "IU",
5738                                 "Transmit Descriptor Tail");
5739                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5740                                 CTLFLAG_RD, &txr->tx_irq,
5741                                 "Queue MSI-X Transmit Interrupts");
5742                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5743                                 CTLFLAG_RD, &txr->no_desc_avail,
5744                                 "Queue No Descriptor Available");
5745
5746                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5747                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5748                                             CTLFLAG_RD, NULL, "RX Queue Name");
5749                 queue_list = SYSCTL_CHILDREN(queue_node);
5750
5751                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5752                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5753                                 E1000_RDH(rxr->me),
5754                                 em_sysctl_reg_handler, "IU",
5755                                 "Receive Descriptor Head");
5756                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5757                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5758                                 E1000_RDT(rxr->me),
5759                                 em_sysctl_reg_handler, "IU",
5760                                 "Receive Descriptor Tail");
5761                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5762                                 CTLFLAG_RD, &rxr->rx_irq,
5763                                 "Queue MSI-X Receive Interrupts");
5764         }
5765
5766         /* MAC stats get their own sub node */
5767
5768         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5769                                     CTLFLAG_RD, NULL, "Statistics");
5770         stat_list = SYSCTL_CHILDREN(stat_node);
5771
5772         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5773                         CTLFLAG_RD, &stats->ecol,
5774                         "Excessive collisions");
5775         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5776                         CTLFLAG_RD, &stats->scc,
5777                         "Single collisions");
5778         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5779                         CTLFLAG_RD, &stats->mcc,
5780                         "Multiple collisions");
5781         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5782                         CTLFLAG_RD, &stats->latecol,
5783                         "Late collisions");
5784         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5785                         CTLFLAG_RD, &stats->colc,
5786                         "Collision Count");
5787         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5788                         CTLFLAG_RD, &adapter->stats.symerrs,
5789                         "Symbol Errors");
5790         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5791                         CTLFLAG_RD, &adapter->stats.sec,
5792                         "Sequence Errors");
5793         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5794                         CTLFLAG_RD, &adapter->stats.dc,
5795                         "Defer Count");
5796         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5797                         CTLFLAG_RD, &adapter->stats.mpc,
5798                         "Missed Packets");
5799         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5800                         CTLFLAG_RD, &adapter->stats.rnbc,
5801                         "Receive No Buffers");
5802         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5803                         CTLFLAG_RD, &adapter->stats.ruc,
5804                         "Receive Undersize");
5805         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5806                         CTLFLAG_RD, &adapter->stats.rfc,
5807                         "Fragmented Packets Received ");
5808         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5809                         CTLFLAG_RD, &adapter->stats.roc,
5810                         "Oversized Packets Received");
5811         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5812                         CTLFLAG_RD, &adapter->stats.rjc,
5813                         "Recevied Jabber");
5814         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5815                         CTLFLAG_RD, &adapter->stats.rxerrc,
5816                         "Receive Errors");
5817         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5818                         CTLFLAG_RD, &adapter->stats.crcerrs,
5819                         "CRC errors");
5820         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5821                         CTLFLAG_RD, &adapter->stats.algnerrc,
5822                         "Alignment Errors");
5823         /* On 82575 these are collision counts */
5824         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5825                         CTLFLAG_RD, &adapter->stats.cexterr,
5826                         "Collision/Carrier extension errors");
5827         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5828                         CTLFLAG_RD, &adapter->stats.xonrxc,
5829                         "XON Received");
5830         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5831                         CTLFLAG_RD, &adapter->stats.xontxc,
5832                         "XON Transmitted");
5833         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5834                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5835                         "XOFF Received");
5836         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5837                         CTLFLAG_RD, &adapter->stats.xofftxc,
5838                         "XOFF Transmitted");
5839
5840         /* Packet Reception Stats */
5841         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5842                         CTLFLAG_RD, &adapter->stats.tpr,
5843                         "Total Packets Received ");
5844         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5845                         CTLFLAG_RD, &adapter->stats.gprc,
5846                         "Good Packets Received");
5847         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5848                         CTLFLAG_RD, &adapter->stats.bprc,
5849                         "Broadcast Packets Received");
5850         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5851                         CTLFLAG_RD, &adapter->stats.mprc,
5852                         "Multicast Packets Received");
5853         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5854                         CTLFLAG_RD, &adapter->stats.prc64,
5855                         "64 byte frames received ");
5856         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5857                         CTLFLAG_RD, &adapter->stats.prc127,
5858                         "65-127 byte frames received");
5859         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5860                         CTLFLAG_RD, &adapter->stats.prc255,
5861                         "128-255 byte frames received");
5862         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5863                         CTLFLAG_RD, &adapter->stats.prc511,
5864                         "256-511 byte frames received");
5865         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5866                         CTLFLAG_RD, &adapter->stats.prc1023,
5867                         "512-1023 byte frames received");
5868         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5869                         CTLFLAG_RD, &adapter->stats.prc1522,
5870                         "1023-1522 byte frames received");
5871         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5872                         CTLFLAG_RD, &adapter->stats.gorc, 
5873                         "Good Octets Received"); 
5874
5875         /* Packet Transmission Stats */
5876         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5877                         CTLFLAG_RD, &adapter->stats.gotc, 
5878                         "Good Octets Transmitted"); 
5879         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5880                         CTLFLAG_RD, &adapter->stats.tpt,
5881                         "Total Packets Transmitted");
5882         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5883                         CTLFLAG_RD, &adapter->stats.gptc,
5884                         "Good Packets Transmitted");
5885         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5886                         CTLFLAG_RD, &adapter->stats.bptc,
5887                         "Broadcast Packets Transmitted");
5888         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5889                         CTLFLAG_RD, &adapter->stats.mptc,
5890                         "Multicast Packets Transmitted");
5891         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5892                         CTLFLAG_RD, &adapter->stats.ptc64,
5893                         "64 byte frames transmitted ");
5894         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5895                         CTLFLAG_RD, &adapter->stats.ptc127,
5896                         "65-127 byte frames transmitted");
5897         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5898                         CTLFLAG_RD, &adapter->stats.ptc255,
5899                         "128-255 byte frames transmitted");
5900         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5901                         CTLFLAG_RD, &adapter->stats.ptc511,
5902                         "256-511 byte frames transmitted");
5903         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5904                         CTLFLAG_RD, &adapter->stats.ptc1023,
5905                         "512-1023 byte frames transmitted");
5906         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5907                         CTLFLAG_RD, &adapter->stats.ptc1522,
5908                         "1024-1522 byte frames transmitted");
5909         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5910                         CTLFLAG_RD, &adapter->stats.tsctc,
5911                         "TSO Contexts Transmitted");
5912         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5913                         CTLFLAG_RD, &adapter->stats.tsctfc,
5914                         "TSO Contexts Failed");
5915
5916
5917         /* Interrupt Stats */
5918
5919         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5920                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5921         int_list = SYSCTL_CHILDREN(int_node);
5922
5923         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5924                         CTLFLAG_RD, &adapter->stats.iac,
5925                         "Interrupt Assertion Count");
5926
5927         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5928                         CTLFLAG_RD, &adapter->stats.icrxptc,
5929                         "Interrupt Cause Rx Pkt Timer Expire Count");
5930
5931         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5932                         CTLFLAG_RD, &adapter->stats.icrxatc,
5933                         "Interrupt Cause Rx Abs Timer Expire Count");
5934
5935         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5936                         CTLFLAG_RD, &adapter->stats.ictxptc,
5937                         "Interrupt Cause Tx Pkt Timer Expire Count");
5938
5939         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5940                         CTLFLAG_RD, &adapter->stats.ictxatc,
5941                         "Interrupt Cause Tx Abs Timer Expire Count");
5942
5943         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5944                         CTLFLAG_RD, &adapter->stats.ictxqec,
5945                         "Interrupt Cause Tx Queue Empty Count");
5946
5947         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5948                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5949                         "Interrupt Cause Tx Queue Min Thresh Count");
5950
5951         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5952                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5953                         "Interrupt Cause Rx Desc Min Thresh Count");
5954
5955         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5956                         CTLFLAG_RD, &adapter->stats.icrxoc,
5957                         "Interrupt Cause Receiver Overrun Count");
5958 }
5959
5960 /**********************************************************************
5961  *
5962  *  This routine provides a way to dump out the adapter eeprom,
5963  *  often a useful debug/service tool. This only dumps the first
5964  *  32 words, stuff that matters is in that extent.
5965  *
5966  **********************************************************************/
5967 static int
5968 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5969 {
5970         struct adapter *adapter = (struct adapter *)arg1;
5971         int error;
5972         int result;
5973
5974         result = -1;
5975         error = sysctl_handle_int(oidp, &result, 0, req);
5976
5977         if (error || !req->newptr)
5978                 return (error);
5979
5980         /*
5981          * This value will cause a hex dump of the
5982          * first 32 16-bit words of the EEPROM to
5983          * the screen.
5984          */
5985         if (result == 1)
5986                 em_print_nvm_info(adapter);
5987
5988         return (error);
5989 }
5990
5991 static void
5992 em_print_nvm_info(struct adapter *adapter)
5993 {
5994         u16     eeprom_data;
5995         int     i, j, row = 0;
5996
5997         /* Its a bit crude, but it gets the job done */
5998         printf("\nInterface EEPROM Dump:\n");
5999         printf("Offset\n0x0000  ");
6000         for (i = 0, j = 0; i < 32; i++, j++) {
6001                 if (j == 8) { /* Make the offset block */
6002                         j = 0; ++row;
6003                         printf("\n0x00%x0  ",row);
6004                 }
6005                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6006                 printf("%04x ", eeprom_data);
6007         }
6008         printf("\n");
6009 }
6010
6011 static int
6012 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6013 {
6014         struct em_int_delay_info *info;
6015         struct adapter *adapter;
6016         u32 regval;
6017         int error, usecs, ticks;
6018
6019         info = (struct em_int_delay_info *)arg1;
6020         usecs = info->value;
6021         error = sysctl_handle_int(oidp, &usecs, 0, req);
6022         if (error != 0 || req->newptr == NULL)
6023                 return (error);
6024         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6025                 return (EINVAL);
6026         info->value = usecs;
6027         ticks = EM_USECS_TO_TICKS(usecs);
6028         if (info->offset == E1000_ITR)  /* units are 256ns here */
6029                 ticks *= 4;
6030
6031         adapter = info->adapter;
6032         
6033         EM_CORE_LOCK(adapter);
6034         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6035         regval = (regval & ~0xffff) | (ticks & 0xffff);
6036         /* Handle a few special cases. */
6037         switch (info->offset) {
6038         case E1000_RDTR:
6039                 break;
6040         case E1000_TIDV:
6041                 if (ticks == 0) {
6042                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6043                         /* Don't write 0 into the TIDV register. */
6044                         regval++;
6045                 } else
6046                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6047                 break;
6048         }
6049         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6050         EM_CORE_UNLOCK(adapter);
6051         return (0);
6052 }
6053
6054 static void
6055 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6056         const char *description, struct em_int_delay_info *info,
6057         int offset, int value)
6058 {
6059         info->adapter = adapter;
6060         info->offset = offset;
6061         info->value = value;
6062         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6063             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6064             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6065             info, 0, em_sysctl_int_delay, "I", description);
6066 }
6067
6068 static void
6069 em_set_sysctl_value(struct adapter *adapter, const char *name,
6070         const char *description, int *limit, int value)
6071 {
6072         *limit = value;
6073         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6074             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6075             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6076 }
6077
6078
6079 /*
6080 ** Set flow control using sysctl:
6081 ** Flow control values:
6082 **      0 - off
6083 **      1 - rx pause
6084 **      2 - tx pause
6085 **      3 - full
6086 */
6087 static int
6088 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6089 {       
6090         int             error;
6091         static int      input = 3; /* default is full */
6092         struct adapter  *adapter = (struct adapter *) arg1;
6093                     
6094         error = sysctl_handle_int(oidp, &input, 0, req);
6095     
6096         if ((error) || (req->newptr == NULL))
6097                 return (error);
6098                 
6099         if (input == adapter->fc) /* no change? */
6100                 return (error);
6101
6102         switch (input) {
6103                 case e1000_fc_rx_pause:
6104                 case e1000_fc_tx_pause:
6105                 case e1000_fc_full:
6106                 case e1000_fc_none:
6107                         adapter->hw.fc.requested_mode = input;
6108                         adapter->fc = input;
6109                         break;
6110                 default:
6111                         /* Do nothing */
6112                         return (error);
6113         }
6114
6115         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6116         e1000_force_mac_fc(&adapter->hw);
6117         return (error);
6118 }
6119
6120 /*
6121 ** Manage Energy Efficient Ethernet:
6122 ** Control values:
6123 **     0/1 - enabled/disabled
6124 */
6125 static int
6126 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6127 {
6128        struct adapter *adapter = (struct adapter *) arg1;
6129        int             error, value;
6130
6131        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6132        error = sysctl_handle_int(oidp, &value, 0, req);
6133        if (error || req->newptr == NULL)
6134                return (error);
6135        EM_CORE_LOCK(adapter);
6136        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6137        em_init_locked(adapter);
6138        EM_CORE_UNLOCK(adapter);
6139        return (0);
6140 }
6141
6142 static int
6143 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6144 {
6145         struct adapter *adapter;
6146         int error;
6147         int result;
6148
6149         result = -1;
6150         error = sysctl_handle_int(oidp, &result, 0, req);
6151
6152         if (error || !req->newptr)
6153                 return (error);
6154
6155         if (result == 1) {
6156                 adapter = (struct adapter *)arg1;
6157                 em_print_debug_info(adapter);
6158         }
6159
6160         return (error);
6161 }
6162
6163 /*
6164 ** This routine is meant to be fluid, add whatever is
6165 ** needed for debugging a problem.  -jfv
6166 */
6167 static void
6168 em_print_debug_info(struct adapter *adapter)
6169 {
6170         device_t dev = adapter->dev;
6171         struct tx_ring *txr = adapter->tx_rings;
6172         struct rx_ring *rxr = adapter->rx_rings;
6173
6174         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
6175                 printf("Interface is RUNNING ");
6176         else
6177                 printf("Interface is NOT RUNNING\n");
6178
6179         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
6180                 printf("and INACTIVE\n");
6181         else
6182                 printf("and ACTIVE\n");
6183
6184         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6185                 device_printf(dev, "TX Queue %d ------\n", i);
6186                 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6187                         E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6188                         E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6189                 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6190                 device_printf(dev, "TX descriptors avail = %d\n",
6191                         txr->tx_avail);
6192                 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6193                         txr->no_desc_avail);
6194                 device_printf(dev, "RX Queue %d ------\n", i);
6195                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6196                         E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6197                         E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6198                 device_printf(dev, "RX discarded packets = %ld\n",
6199                         rxr->rx_discarded);
6200                 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6201                 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6202         }
6203 }
6204
6205 #ifdef EM_MULTIQUEUE
6206 /*
6207  * 82574 only:
6208  * Write a new value to the EEPROM increasing the number of MSIX
6209  * vectors from 3 to 5, for proper multiqueue support.
6210  */
6211 static void
6212 em_enable_vectors_82574(struct adapter *adapter)
6213 {
6214         struct e1000_hw *hw = &adapter->hw;
6215         device_t dev = adapter->dev;
6216         u16 edata;
6217
6218         e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6219         printf("Current cap: %#06x\n", edata);
6220         if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6221                 device_printf(dev, "Writing to eeprom: increasing "
6222                     "reported MSIX vectors from 3 to 5...\n");
6223                 edata &= ~(EM_NVM_MSIX_N_MASK);
6224                 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6225                 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6226                 e1000_update_nvm_checksum(hw);
6227                 device_printf(dev, "Writing to eeprom: done\n");
6228         }
6229 }
6230 #endif
6231
6232 #ifdef DDB
6233 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6234 {
6235         devclass_t      dc;
6236         int max_em;
6237
6238         dc = devclass_find("em");
6239         max_em = devclass_get_maxunit(dc);
6240
6241         for (int index = 0; index < (max_em - 1); index++) {
6242                 device_t dev;
6243                 dev = devclass_get_device(dc, index);
6244                 if (device_get_driver(dev) == &em_driver) {
6245                         struct adapter *adapter = device_get_softc(dev);
6246                         EM_CORE_LOCK(adapter);
6247                         em_init_locked(adapter);
6248                         EM_CORE_UNLOCK(adapter);
6249                 }
6250         }
6251 }
6252 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6253 {
6254         devclass_t      dc;
6255         int max_em;
6256
6257         dc = devclass_find("em");
6258         max_em = devclass_get_maxunit(dc);
6259
6260         for (int index = 0; index < (max_em - 1); index++) {
6261                 device_t dev;
6262                 dev = devclass_get_device(dc, index);
6263                 if (device_get_driver(dev) == &em_driver)
6264                         em_print_debug_info(device_get_softc(dev));
6265         }
6266
6267 }
6268 #endif