]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - sys/dev/e1000/if_em.c
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_arp.h>
74 #include <net/if_dl.h>
75 #include <net/if_media.h>
76
77 #include <net/if_types.h>
78 #include <net/if_vlan_var.h>
79
80 #include <netinet/in_systm.h>
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip6.h>
85 #include <netinet/tcp.h>
86 #include <netinet/udp.h>
87
88 #include <machine/in_cksum.h>
89 #include <dev/led/led.h>
90 #include <dev/pci/pcivar.h>
91 #include <dev/pci/pcireg.h>
92
93 #include "e1000_api.h"
94 #include "e1000_82571.h"
95 #include "if_em.h"
96
97 /*********************************************************************
98  *  Set this to one to display debug statistics
99  *********************************************************************/
100 int     em_display_debug_stats = 0;
101
102 /*********************************************************************
103  *  Driver version:
104  *********************************************************************/
105 char em_driver_version[] = "7.4.2";
106
107 /*********************************************************************
108  *  PCI Device ID Table
109  *
110  *  Used by probe to select devices to load on
111  *  Last field stores an index into e1000_strings
112  *  Last entry must be all 0s
113  *
114  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115  *********************************************************************/
116
117 static em_vendor_info_t em_vendor_info_array[] =
118 {
119         /* Intel(R) PRO/1000 Network Connection */
120         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
132                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
134                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
139
140         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
145                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
147                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
149                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
151                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
181         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
182         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
183         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
184         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
185         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
186                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
187         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
188                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
189         { 0x8086, E1000_DEV_ID_PCH_I218_LM2,    PCI_ANY_ID, PCI_ANY_ID, 0},
190         { 0x8086, E1000_DEV_ID_PCH_I218_V2,     PCI_ANY_ID, PCI_ANY_ID, 0},
191         { 0x8086, E1000_DEV_ID_PCH_I218_LM3,    PCI_ANY_ID, PCI_ANY_ID, 0},
192         { 0x8086, E1000_DEV_ID_PCH_I218_V3,     PCI_ANY_ID, PCI_ANY_ID, 0},
193         /* required last entry */
194         { 0, 0, 0, 0, 0}
195 };
196
197 /*********************************************************************
198  *  Table of branding strings for all supported NICs.
199  *********************************************************************/
200
201 static char *em_strings[] = {
202         "Intel(R) PRO/1000 Network Connection"
203 };
204
205 /*********************************************************************
206  *  Function prototypes
207  *********************************************************************/
208 static int      em_probe(device_t);
209 static int      em_attach(device_t);
210 static int      em_detach(device_t);
211 static int      em_shutdown(device_t);
212 static int      em_suspend(device_t);
213 static int      em_resume(device_t);
214 #ifdef EM_MULTIQUEUE
215 static int      em_mq_start(struct ifnet *, struct mbuf *);
216 static int      em_mq_start_locked(struct ifnet *,
217                     struct tx_ring *);
218 static void     em_qflush(struct ifnet *);
219 #else
220 static void     em_start(struct ifnet *);
221 static void     em_start_locked(struct ifnet *, struct tx_ring *);
222 #endif
223 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
224 static void     em_init(void *);
225 static void     em_init_locked(struct adapter *);
226 static void     em_stop(void *);
227 static void     em_media_status(struct ifnet *, struct ifmediareq *);
228 static int      em_media_change(struct ifnet *);
229 static void     em_identify_hardware(struct adapter *);
230 static int      em_allocate_pci_resources(struct adapter *);
231 static int      em_allocate_legacy(struct adapter *);
232 static int      em_allocate_msix(struct adapter *);
233 static int      em_allocate_queues(struct adapter *);
234 static int      em_setup_msix(struct adapter *);
235 static void     em_free_pci_resources(struct adapter *);
236 static void     em_local_timer(void *);
237 static void     em_reset(struct adapter *);
238 static int      em_setup_interface(device_t, struct adapter *);
239
240 static void     em_setup_transmit_structures(struct adapter *);
241 static void     em_initialize_transmit_unit(struct adapter *);
242 static int      em_allocate_transmit_buffers(struct tx_ring *);
243 static void     em_free_transmit_structures(struct adapter *);
244 static void     em_free_transmit_buffers(struct tx_ring *);
245
246 static int      em_setup_receive_structures(struct adapter *);
247 static int      em_allocate_receive_buffers(struct rx_ring *);
248 static void     em_initialize_receive_unit(struct adapter *);
249 static void     em_free_receive_structures(struct adapter *);
250 static void     em_free_receive_buffers(struct rx_ring *);
251
252 static void     em_enable_intr(struct adapter *);
253 static void     em_disable_intr(struct adapter *);
254 static void     em_update_stats_counters(struct adapter *);
255 static void     em_add_hw_stats(struct adapter *adapter);
256 static void     em_txeof(struct tx_ring *);
257 static bool     em_rxeof(struct rx_ring *, int, int *);
258 #ifndef __NO_STRICT_ALIGNMENT
259 static int      em_fixup_rx(struct rx_ring *);
260 #endif
261 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
262 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
263                     struct ip *, u32 *, u32 *);
264 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
265                     struct tcphdr *, u32 *, u32 *);
266 static void     em_set_promisc(struct adapter *);
267 static void     em_disable_promisc(struct adapter *);
268 static void     em_set_multi(struct adapter *);
269 static void     em_update_link_status(struct adapter *);
270 static void     em_refresh_mbufs(struct rx_ring *, int);
271 static void     em_register_vlan(void *, struct ifnet *, u16);
272 static void     em_unregister_vlan(void *, struct ifnet *, u16);
273 static void     em_setup_vlan_hw_support(struct adapter *);
274 static int      em_xmit(struct tx_ring *, struct mbuf **);
275 static int      em_dma_malloc(struct adapter *, bus_size_t,
276                     struct em_dma_alloc *, int);
277 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
278 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
279 static void     em_print_nvm_info(struct adapter *);
280 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
281 static void     em_print_debug_info(struct adapter *);
282 static int      em_is_valid_ether_addr(u8 *);
283 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
284 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
285                     const char *, struct em_int_delay_info *, int, int);
286 /* Management and WOL Support */
287 static void     em_init_manageability(struct adapter *);
288 static void     em_release_manageability(struct adapter *);
289 static void     em_get_hw_control(struct adapter *);
290 static void     em_release_hw_control(struct adapter *);
291 static void     em_get_wakeup(device_t);
292 static void     em_enable_wakeup(device_t);
293 static int      em_enable_phy_wakeup(struct adapter *);
294 static void     em_led_func(void *, int);
295 static void     em_disable_aspm(struct adapter *);
296
297 static int      em_irq_fast(void *);
298
299 /* MSIX handlers */
300 static void     em_msix_tx(void *);
301 static void     em_msix_rx(void *);
302 static void     em_msix_link(void *);
303 static void     em_handle_tx(void *context, int pending);
304 static void     em_handle_rx(void *context, int pending);
305 static void     em_handle_link(void *context, int pending);
306
307 #ifdef EM_MULTIQUEUE
308 static void     em_enable_vectors_82574(struct adapter *);
309 #endif
310
311 static void     em_set_sysctl_value(struct adapter *, const char *,
312                     const char *, int *, int);
313 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
314 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
315
316 static __inline void em_rx_discard(struct rx_ring *, int);
317
318 #ifdef DEVICE_POLLING
319 static poll_handler_t em_poll;
320 #endif /* POLLING */
321
322 /*********************************************************************
323  *  FreeBSD Device Interface Entry Points
324  *********************************************************************/
325
326 static device_method_t em_methods[] = {
327         /* Device interface */
328         DEVMETHOD(device_probe, em_probe),
329         DEVMETHOD(device_attach, em_attach),
330         DEVMETHOD(device_detach, em_detach),
331         DEVMETHOD(device_shutdown, em_shutdown),
332         DEVMETHOD(device_suspend, em_suspend),
333         DEVMETHOD(device_resume, em_resume),
334         DEVMETHOD_END
335 };
336
337 static driver_t em_driver = {
338         "em", em_methods, sizeof(struct adapter),
339 };
340
341 devclass_t em_devclass;
342 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
343 MODULE_DEPEND(em, pci, 1, 1, 1);
344 MODULE_DEPEND(em, ether, 1, 1, 1);
345
346 /*********************************************************************
347  *  Tunable default values.
348  *********************************************************************/
349
350 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
351 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
352 #define M_TSO_LEN                       66
353
354 #define MAX_INTS_PER_SEC        8000
355 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
356
357 /* Allow common code without TSO */
358 #ifndef CSUM_TSO
359 #define CSUM_TSO        0
360 #endif
361
362 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
363
364 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
365 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
366 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
367 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
368 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
369     0, "Default transmit interrupt delay in usecs");
370 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
371     0, "Default receive interrupt delay in usecs");
372
373 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
374 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
375 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
376 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
377 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
378     &em_tx_abs_int_delay_dflt, 0,
379     "Default transmit interrupt delay limit in usecs");
380 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
381     &em_rx_abs_int_delay_dflt, 0,
382     "Default receive interrupt delay limit in usecs");
383
384 static int em_rxd = EM_DEFAULT_RXD;
385 static int em_txd = EM_DEFAULT_TXD;
386 TUNABLE_INT("hw.em.rxd", &em_rxd);
387 TUNABLE_INT("hw.em.txd", &em_txd);
388 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
389     "Number of receive descriptors per queue");
390 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
391     "Number of transmit descriptors per queue");
392
393 static int em_smart_pwr_down = FALSE;
394 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
395 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
396     0, "Set to true to leave smart power down enabled on newer adapters");
397
398 /* Controls whether promiscuous also shows bad packets */
399 static int em_debug_sbp = FALSE;
400 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
401 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
402     "Show bad packets in promiscuous mode");
403
404 static int em_enable_msix = TRUE;
405 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
406 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
407     "Enable MSI-X interrupts");
408
409 #ifdef EM_MULTIQUEUE
410 static int em_num_queues = 1;
411 TUNABLE_INT("hw.em.num_queues", &em_num_queues);
412 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
413     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
414 #endif
415
416 /*
417 ** Global variable to store last used CPU when binding queues
418 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
419 ** queue is bound to a cpu.
420 */
421 static int em_last_bind_cpu = -1;
422
423 /* How many packets rxeof tries to clean at a time */
424 static int em_rx_process_limit = 100;
425 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
426 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
427     &em_rx_process_limit, 0,
428     "Maximum number of received packets to process "
429     "at a time, -1 means unlimited");
430
431 /* Energy efficient ethernet - default to OFF */
432 static int eee_setting = 1;
433 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
434 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
435     "Enable Energy Efficient Ethernet");
436
437 /* Global used in WOL setup with multiport cards */
438 static int global_quad_port_a = 0;
439
440 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
441 #include <dev/netmap/if_em_netmap.h>
442 #endif /* DEV_NETMAP */
443
444 /*********************************************************************
445  *  Device identification routine
446  *
447  *  em_probe determines if the driver should be loaded on
448  *  adapter based on PCI vendor/device id of the adapter.
449  *
450  *  return BUS_PROBE_DEFAULT on success, positive on failure
451  *********************************************************************/
452
453 static int
454 em_probe(device_t dev)
455 {
456         char            adapter_name[60];
457         uint16_t        pci_vendor_id = 0;
458         uint16_t        pci_device_id = 0;
459         uint16_t        pci_subvendor_id = 0;
460         uint16_t        pci_subdevice_id = 0;
461         em_vendor_info_t *ent;
462
463         INIT_DEBUGOUT("em_probe: begin");
464
465         pci_vendor_id = pci_get_vendor(dev);
466         if (pci_vendor_id != EM_VENDOR_ID)
467                 return (ENXIO);
468
469         pci_device_id = pci_get_device(dev);
470         pci_subvendor_id = pci_get_subvendor(dev);
471         pci_subdevice_id = pci_get_subdevice(dev);
472
473         ent = em_vendor_info_array;
474         while (ent->vendor_id != 0) {
475                 if ((pci_vendor_id == ent->vendor_id) &&
476                     (pci_device_id == ent->device_id) &&
477
478                     ((pci_subvendor_id == ent->subvendor_id) ||
479                     (ent->subvendor_id == PCI_ANY_ID)) &&
480
481                     ((pci_subdevice_id == ent->subdevice_id) ||
482                     (ent->subdevice_id == PCI_ANY_ID))) {
483                         sprintf(adapter_name, "%s %s",
484                                 em_strings[ent->index],
485                                 em_driver_version);
486                         device_set_desc_copy(dev, adapter_name);
487                         return (BUS_PROBE_DEFAULT);
488                 }
489                 ent++;
490         }
491
492         return (ENXIO);
493 }
494
495 /*********************************************************************
496  *  Device initialization routine
497  *
498  *  The attach entry point is called when the driver is being loaded.
499  *  This routine identifies the type of hardware, allocates all resources
500  *  and initializes the hardware.
501  *
502  *  return 0 on success, positive on failure
503  *********************************************************************/
504
505 static int
506 em_attach(device_t dev)
507 {
508         struct adapter  *adapter;
509         struct e1000_hw *hw;
510         int             error = 0;
511
512         INIT_DEBUGOUT("em_attach: begin");
513
514         if (resource_disabled("em", device_get_unit(dev))) {
515                 device_printf(dev, "Disabled by device hint\n");
516                 return (ENXIO);
517         }
518
519         adapter = device_get_softc(dev);
520         adapter->dev = adapter->osdep.dev = dev;
521         hw = &adapter->hw;
522         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
523
524         /* SYSCTL stuff */
525         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
526             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
527             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
528             em_sysctl_nvm_info, "I", "NVM Information");
529
530         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
531             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
532             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
533             em_sysctl_debug_info, "I", "Debug Information");
534
535         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
536             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
537             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
538             em_set_flowcntl, "I", "Flow Control");
539
540         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
541
542         /* Determine hardware and mac info */
543         em_identify_hardware(adapter);
544
545         /* Setup PCI resources */
546         if (em_allocate_pci_resources(adapter)) {
547                 device_printf(dev, "Allocation of PCI resources failed\n");
548                 error = ENXIO;
549                 goto err_pci;
550         }
551
552         /*
553         ** For ICH8 and family we need to
554         ** map the flash memory, and this
555         ** must happen after the MAC is 
556         ** identified
557         */
558         if ((hw->mac.type == e1000_ich8lan) ||
559             (hw->mac.type == e1000_ich9lan) ||
560             (hw->mac.type == e1000_ich10lan) ||
561             (hw->mac.type == e1000_pchlan) ||
562             (hw->mac.type == e1000_pch2lan) ||
563             (hw->mac.type == e1000_pch_lpt)) {
564                 int rid = EM_BAR_TYPE_FLASH;
565                 adapter->flash = bus_alloc_resource_any(dev,
566                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
567                 if (adapter->flash == NULL) {
568                         device_printf(dev, "Mapping of Flash failed\n");
569                         error = ENXIO;
570                         goto err_pci;
571                 }
572                 /* This is used in the shared code */
573                 hw->flash_address = (u8 *)adapter->flash;
574                 adapter->osdep.flash_bus_space_tag =
575                     rman_get_bustag(adapter->flash);
576                 adapter->osdep.flash_bus_space_handle =
577                     rman_get_bushandle(adapter->flash);
578         }
579
580         /* Do Shared Code initialization */
581         if (e1000_setup_init_funcs(hw, TRUE)) {
582                 device_printf(dev, "Setup of Shared code failed\n");
583                 error = ENXIO;
584                 goto err_pci;
585         }
586
587         /*
588          * Setup MSI/X or MSI if PCI Express
589          */
590         adapter->msix = em_setup_msix(adapter);
591
592         e1000_get_bus_info(hw);
593
594         /* Set up some sysctls for the tunable interrupt delays */
595         em_add_int_delay_sysctl(adapter, "rx_int_delay",
596             "receive interrupt delay in usecs", &adapter->rx_int_delay,
597             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
598         em_add_int_delay_sysctl(adapter, "tx_int_delay",
599             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
600             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
601         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
602             "receive interrupt delay limit in usecs",
603             &adapter->rx_abs_int_delay,
604             E1000_REGISTER(hw, E1000_RADV),
605             em_rx_abs_int_delay_dflt);
606         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
607             "transmit interrupt delay limit in usecs",
608             &adapter->tx_abs_int_delay,
609             E1000_REGISTER(hw, E1000_TADV),
610             em_tx_abs_int_delay_dflt);
611         em_add_int_delay_sysctl(adapter, "itr",
612             "interrupt delay limit in usecs/4",
613             &adapter->tx_itr,
614             E1000_REGISTER(hw, E1000_ITR),
615             DEFAULT_ITR);
616
617         /* Sysctl for limiting the amount of work done in the taskqueue */
618         em_set_sysctl_value(adapter, "rx_processing_limit",
619             "max number of rx packets to process", &adapter->rx_process_limit,
620             em_rx_process_limit);
621
622         /*
623          * Validate number of transmit and receive descriptors. It
624          * must not exceed hardware maximum, and must be multiple
625          * of E1000_DBA_ALIGN.
626          */
627         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
628             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
629                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
630                     EM_DEFAULT_TXD, em_txd);
631                 adapter->num_tx_desc = EM_DEFAULT_TXD;
632         } else
633                 adapter->num_tx_desc = em_txd;
634
635         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
636             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
637                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
638                     EM_DEFAULT_RXD, em_rxd);
639                 adapter->num_rx_desc = EM_DEFAULT_RXD;
640         } else
641                 adapter->num_rx_desc = em_rxd;
642
643         hw->mac.autoneg = DO_AUTO_NEG;
644         hw->phy.autoneg_wait_to_complete = FALSE;
645         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
646
647         /* Copper options */
648         if (hw->phy.media_type == e1000_media_type_copper) {
649                 hw->phy.mdix = AUTO_ALL_MODES;
650                 hw->phy.disable_polarity_correction = FALSE;
651                 hw->phy.ms_type = EM_MASTER_SLAVE;
652         }
653
654         /*
655          * Set the frame limits assuming
656          * standard ethernet sized frames.
657          */
658         adapter->hw.mac.max_frame_size =
659             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
660
661         /*
662          * This controls when hardware reports transmit completion
663          * status.
664          */
665         hw->mac.report_tx_early = 1;
666
667         /* 
668         ** Get queue/ring memory
669         */
670         if (em_allocate_queues(adapter)) {
671                 error = ENOMEM;
672                 goto err_pci;
673         }
674
675         /* Allocate multicast array memory. */
676         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
677             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
678         if (adapter->mta == NULL) {
679                 device_printf(dev, "Can not allocate multicast setup array\n");
680                 error = ENOMEM;
681                 goto err_late;
682         }
683
684         /* Check SOL/IDER usage */
685         if (e1000_check_reset_block(hw))
686                 device_printf(dev, "PHY reset is blocked"
687                     " due to SOL/IDER session.\n");
688
689         /* Sysctl for setting Energy Efficient Ethernet */
690         hw->dev_spec.ich8lan.eee_disable = eee_setting;
691         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
692             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
693             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
694             adapter, 0, em_sysctl_eee, "I",
695             "Disable Energy Efficient Ethernet");
696
697         /*
698         ** Start from a known state, this is
699         ** important in reading the nvm and
700         ** mac from that.
701         */
702         e1000_reset_hw(hw);
703
704
705         /* Make sure we have a good EEPROM before we read from it */
706         if (e1000_validate_nvm_checksum(hw) < 0) {
707                 /*
708                 ** Some PCI-E parts fail the first check due to
709                 ** the link being in sleep state, call it again,
710                 ** if it fails a second time its a real issue.
711                 */
712                 if (e1000_validate_nvm_checksum(hw) < 0) {
713                         device_printf(dev,
714                             "The EEPROM Checksum Is Not Valid\n");
715                         error = EIO;
716                         goto err_late;
717                 }
718         }
719
720         /* Copy the permanent MAC address out of the EEPROM */
721         if (e1000_read_mac_addr(hw) < 0) {
722                 device_printf(dev, "EEPROM read error while reading MAC"
723                     " address\n");
724                 error = EIO;
725                 goto err_late;
726         }
727
728         if (!em_is_valid_ether_addr(hw->mac.addr)) {
729                 device_printf(dev, "Invalid MAC address\n");
730                 error = EIO;
731                 goto err_late;
732         }
733
734         /* Disable ULP support */
735         e1000_disable_ulp_lpt_lp(hw, TRUE);
736
737         /*
738         **  Do interrupt configuration
739         */
740         if (adapter->msix > 1) /* Do MSIX */
741                 error = em_allocate_msix(adapter);
742         else  /* MSI or Legacy */
743                 error = em_allocate_legacy(adapter);
744         if (error)
745                 goto err_late;
746
747         /*
748          * Get Wake-on-Lan and Management info for later use
749          */
750         em_get_wakeup(dev);
751
752         /* Setup OS specific network interface */
753         if (em_setup_interface(dev, adapter) != 0)
754                 goto err_late;
755
756         em_reset(adapter);
757
758         /* Initialize statistics */
759         em_update_stats_counters(adapter);
760
761         hw->mac.get_link_status = 1;
762         em_update_link_status(adapter);
763
764         /* Register for VLAN events */
765         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
766             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
767         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
768             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
769
770         em_add_hw_stats(adapter);
771
772         /* Non-AMT based hardware can now take control from firmware */
773         if (adapter->has_manage && !adapter->has_amt)
774                 em_get_hw_control(adapter);
775
776         /* Tell the stack that the interface is not active */
777         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
778         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
779
780         adapter->led_dev = led_create(em_led_func, adapter,
781             device_get_nameunit(dev));
782 #ifdef DEV_NETMAP
783         em_netmap_attach(adapter);
784 #endif /* DEV_NETMAP */
785
786         INIT_DEBUGOUT("em_attach: end");
787
788         return (0);
789
790 err_late:
791         em_free_transmit_structures(adapter);
792         em_free_receive_structures(adapter);
793         em_release_hw_control(adapter);
794         if (adapter->ifp != NULL)
795                 if_free(adapter->ifp);
796 err_pci:
797         em_free_pci_resources(adapter);
798         free(adapter->mta, M_DEVBUF);
799         EM_CORE_LOCK_DESTROY(adapter);
800
801         return (error);
802 }
803
804 /*********************************************************************
805  *  Device removal routine
806  *
807  *  The detach entry point is called when the driver is being removed.
808  *  This routine stops the adapter and deallocates all the resources
809  *  that were allocated for driver operation.
810  *
811  *  return 0 on success, positive on failure
812  *********************************************************************/
813
814 static int
815 em_detach(device_t dev)
816 {
817         struct adapter  *adapter = device_get_softc(dev);
818         struct ifnet    *ifp = adapter->ifp;
819
820         INIT_DEBUGOUT("em_detach: begin");
821
822         /* Make sure VLANS are not using driver */
823         if (adapter->ifp->if_vlantrunk != NULL) {
824                 device_printf(dev,"Vlan in use, detach first\n");
825                 return (EBUSY);
826         }
827
828 #ifdef DEVICE_POLLING
829         if (ifp->if_capenable & IFCAP_POLLING)
830                 ether_poll_deregister(ifp);
831 #endif
832
833         if (adapter->led_dev != NULL)
834                 led_destroy(adapter->led_dev);
835
836         EM_CORE_LOCK(adapter);
837         adapter->in_detach = 1;
838         em_stop(adapter);
839         EM_CORE_UNLOCK(adapter);
840         EM_CORE_LOCK_DESTROY(adapter);
841
842         e1000_phy_hw_reset(&adapter->hw);
843
844         em_release_manageability(adapter);
845         em_release_hw_control(adapter);
846
847         /* Unregister VLAN events */
848         if (adapter->vlan_attach != NULL)
849                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
850         if (adapter->vlan_detach != NULL)
851                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
852
853         ether_ifdetach(adapter->ifp);
854         callout_drain(&adapter->timer);
855
856 #ifdef DEV_NETMAP
857         netmap_detach(ifp);
858 #endif /* DEV_NETMAP */
859
860         em_free_pci_resources(adapter);
861         bus_generic_detach(dev);
862         if_free(ifp);
863
864         em_free_transmit_structures(adapter);
865         em_free_receive_structures(adapter);
866
867         em_release_hw_control(adapter);
868         free(adapter->mta, M_DEVBUF);
869
870         return (0);
871 }
872
873 /*********************************************************************
874  *
875  *  Shutdown entry point
876  *
877  **********************************************************************/
878
879 static int
880 em_shutdown(device_t dev)
881 {
882         return em_suspend(dev);
883 }
884
885 /*
886  * Suspend/resume device methods.
887  */
888 static int
889 em_suspend(device_t dev)
890 {
891         struct adapter *adapter = device_get_softc(dev);
892
893         EM_CORE_LOCK(adapter);
894
895         em_release_manageability(adapter);
896         em_release_hw_control(adapter);
897         em_enable_wakeup(dev);
898
899         EM_CORE_UNLOCK(adapter);
900
901         return bus_generic_suspend(dev);
902 }
903
904 static int
905 em_resume(device_t dev)
906 {
907         struct adapter *adapter = device_get_softc(dev);
908         struct tx_ring  *txr = adapter->tx_rings;
909         struct ifnet *ifp = adapter->ifp;
910
911         EM_CORE_LOCK(adapter);
912         if (adapter->hw.mac.type == e1000_pch2lan)
913                 e1000_resume_workarounds_pchlan(&adapter->hw);
914         em_init_locked(adapter);
915         em_init_manageability(adapter);
916
917         if ((ifp->if_flags & IFF_UP) &&
918             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
919                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
920                         EM_TX_LOCK(txr);
921 #ifdef EM_MULTIQUEUE
922                         if (!drbr_empty(ifp, txr->br))
923                                 em_mq_start_locked(ifp, txr);
924 #else
925                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
926                                 em_start_locked(ifp, txr);
927 #endif
928                         EM_TX_UNLOCK(txr);
929                 }
930         }
931         EM_CORE_UNLOCK(adapter);
932
933         return bus_generic_resume(dev);
934 }
935
936
937 #ifndef EM_MULTIQUEUE
938 static void
939 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
940 {
941         struct adapter  *adapter = ifp->if_softc;
942         struct mbuf     *m_head;
943
944         EM_TX_LOCK_ASSERT(txr);
945
946         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
947             IFF_DRV_RUNNING)
948                 return;
949
950         if (!adapter->link_active)
951                 return;
952
953         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
954                 /* Call cleanup if number of TX descriptors low */
955                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
956                         em_txeof(txr);
957                 if (txr->tx_avail < EM_MAX_SCATTER) {
958                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959                         break;
960                 }
961                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
962                 if (m_head == NULL)
963                         break;
964                 /*
965                  *  Encapsulation can modify our pointer, and or make it
966                  *  NULL on failure.  In that event, we can't requeue.
967                  */
968                 if (em_xmit(txr, &m_head)) {
969                         if (m_head == NULL)
970                                 break;
971                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
972                         break;
973                 }
974
975                 /* Mark the queue as having work */
976                 if (txr->busy == EM_TX_IDLE)
977                         txr->busy = EM_TX_BUSY;
978
979                 /* Send a copy of the frame to the BPF listener */
980                 ETHER_BPF_MTAP(ifp, m_head);
981
982         }
983
984         return;
985 }
986
987 static void
988 em_start(struct ifnet *ifp)
989 {
990         struct adapter  *adapter = ifp->if_softc;
991         struct tx_ring  *txr = adapter->tx_rings;
992
993         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
994                 EM_TX_LOCK(txr);
995                 em_start_locked(ifp, txr);
996                 EM_TX_UNLOCK(txr);
997         }
998         return;
999 }
1000 #else /* EM_MULTIQUEUE */
1001 /*********************************************************************
1002  *  Multiqueue Transmit routines 
1003  *
1004  *  em_mq_start is called by the stack to initiate a transmit.
1005  *  however, if busy the driver can queue the request rather
1006  *  than do an immediate send. It is this that is an advantage
1007  *  in this driver, rather than also having multiple tx queues.
1008  **********************************************************************/
1009 /*
1010 ** Multiqueue capable stack interface
1011 */
1012 static int
1013 em_mq_start(struct ifnet *ifp, struct mbuf *m)
1014 {
1015         struct adapter  *adapter = ifp->if_softc;
1016         struct tx_ring  *txr = adapter->tx_rings;
1017         unsigned int    i, error;
1018
1019         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1020                 i = m->m_pkthdr.flowid % adapter->num_queues;
1021         else
1022                 i = curcpu % adapter->num_queues;
1023
1024         txr = &adapter->tx_rings[i];
1025
1026         error = drbr_enqueue(ifp, txr->br, m);
1027         if (error)
1028                 return (error);
1029
1030         if (EM_TX_TRYLOCK(txr)) {
1031                 em_mq_start_locked(ifp, txr);
1032                 EM_TX_UNLOCK(txr);
1033         } else 
1034                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1035
1036         return (0);
1037 }
1038
1039 static int
1040 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1041 {
1042         struct adapter  *adapter = txr->adapter;
1043         struct mbuf     *next;
1044         int             err = 0, enq = 0;
1045
1046         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1047             IFF_DRV_RUNNING || adapter->link_active == 0) {
1048                 return (ENETDOWN);
1049         }
1050
1051         /* Process the queue */
1052         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1053                 if ((err = em_xmit(txr, &next)) != 0) {
1054                         if (next == NULL) {
1055                                 /* It was freed, move forward */
1056                                 drbr_advance(ifp, txr->br);
1057                         } else {
1058                                 /* 
1059                                  * Still have one left, it may not be
1060                                  * the same since the transmit function
1061                                  * may have changed it.
1062                                  */
1063                                 drbr_putback(ifp, txr->br, next);
1064                         }
1065                         break;
1066                 }
1067                 drbr_advance(ifp, txr->br);
1068                 enq++;
1069                 ifp->if_obytes += next->m_pkthdr.len;
1070                 if (next->m_flags & M_MCAST)
1071                         ifp->if_omcasts++;
1072                 ETHER_BPF_MTAP(ifp, next);
1073                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1074                         break;
1075         }
1076
1077         /* Mark the queue as having work */
1078         if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1079                 txr->busy = EM_TX_BUSY;
1080
1081         if (txr->tx_avail < EM_MAX_SCATTER)
1082                 em_txeof(txr);
1083         if (txr->tx_avail < EM_MAX_SCATTER) {
1084                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1085         }
1086         return (err);
1087 }
1088
1089 /*
1090 ** Flush all ring buffers
1091 */
1092 static void
1093 em_qflush(struct ifnet *ifp)
1094 {
1095         struct adapter  *adapter = ifp->if_softc;
1096         struct tx_ring  *txr = adapter->tx_rings;
1097         struct mbuf     *m;
1098
1099         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1100                 EM_TX_LOCK(txr);
1101                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1102                         m_freem(m);
1103                 EM_TX_UNLOCK(txr);
1104         }
1105         if_qflush(ifp);
1106 }
1107 #endif /* EM_MULTIQUEUE */
1108
1109 /*********************************************************************
1110  *  Ioctl entry point
1111  *
1112  *  em_ioctl is called when the user wants to configure the
1113  *  interface.
1114  *
1115  *  return 0 on success, positive on failure
1116  **********************************************************************/
1117
1118 static int
1119 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1120 {
1121         struct adapter  *adapter = ifp->if_softc;
1122         struct ifreq    *ifr = (struct ifreq *)data;
1123 #if defined(INET) || defined(INET6)
1124         struct ifaddr   *ifa = (struct ifaddr *)data;
1125 #endif
1126         bool            avoid_reset = FALSE;
1127         int             error = 0;
1128
1129         if (adapter->in_detach)
1130                 return (error);
1131
1132         switch (command) {
1133         case SIOCSIFADDR:
1134 #ifdef INET
1135                 if (ifa->ifa_addr->sa_family == AF_INET)
1136                         avoid_reset = TRUE;
1137 #endif
1138 #ifdef INET6
1139                 if (ifa->ifa_addr->sa_family == AF_INET6)
1140                         avoid_reset = TRUE;
1141 #endif
1142                 /*
1143                 ** Calling init results in link renegotiation,
1144                 ** so we avoid doing it when possible.
1145                 */
1146                 if (avoid_reset) {
1147                         ifp->if_flags |= IFF_UP;
1148                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1149                                 em_init(adapter);
1150 #ifdef INET
1151                         if (!(ifp->if_flags & IFF_NOARP))
1152                                 arp_ifinit(ifp, ifa);
1153 #endif
1154                 } else
1155                         error = ether_ioctl(ifp, command, data);
1156                 break;
1157         case SIOCSIFMTU:
1158             {
1159                 int max_frame_size;
1160
1161                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1162
1163                 EM_CORE_LOCK(adapter);
1164                 switch (adapter->hw.mac.type) {
1165                 case e1000_82571:
1166                 case e1000_82572:
1167                 case e1000_ich9lan:
1168                 case e1000_ich10lan:
1169                 case e1000_pch2lan:
1170                 case e1000_pch_lpt:
1171                 case e1000_82574:
1172                 case e1000_82583:
1173                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1174                         max_frame_size = 9234;
1175                         break;
1176                 case e1000_pchlan:
1177                         max_frame_size = 4096;
1178                         break;
1179                         /* Adapters that do not support jumbo frames */
1180                 case e1000_ich8lan:
1181                         max_frame_size = ETHER_MAX_LEN;
1182                         break;
1183                 default:
1184                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1185                 }
1186                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1187                     ETHER_CRC_LEN) {
1188                         EM_CORE_UNLOCK(adapter);
1189                         error = EINVAL;
1190                         break;
1191                 }
1192
1193                 ifp->if_mtu = ifr->ifr_mtu;
1194                 adapter->hw.mac.max_frame_size =
1195                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1196                 em_init_locked(adapter);
1197                 EM_CORE_UNLOCK(adapter);
1198                 break;
1199             }
1200         case SIOCSIFFLAGS:
1201                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1202                     SIOCSIFFLAGS (Set Interface Flags)");
1203                 EM_CORE_LOCK(adapter);
1204                 if (ifp->if_flags & IFF_UP) {
1205                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1206                                 if ((ifp->if_flags ^ adapter->if_flags) &
1207                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1208                                         em_disable_promisc(adapter);
1209                                         em_set_promisc(adapter);
1210                                 }
1211                         } else
1212                                 em_init_locked(adapter);
1213                 } else
1214                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1215                                 em_stop(adapter);
1216                 adapter->if_flags = ifp->if_flags;
1217                 EM_CORE_UNLOCK(adapter);
1218                 break;
1219         case SIOCADDMULTI:
1220         case SIOCDELMULTI:
1221                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1222                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1223                         EM_CORE_LOCK(adapter);
1224                         em_disable_intr(adapter);
1225                         em_set_multi(adapter);
1226 #ifdef DEVICE_POLLING
1227                         if (!(ifp->if_capenable & IFCAP_POLLING))
1228 #endif
1229                                 em_enable_intr(adapter);
1230                         EM_CORE_UNLOCK(adapter);
1231                 }
1232                 break;
1233         case SIOCSIFMEDIA:
1234                 /* Check SOL/IDER usage */
1235                 EM_CORE_LOCK(adapter);
1236                 if (e1000_check_reset_block(&adapter->hw)) {
1237                         EM_CORE_UNLOCK(adapter);
1238                         device_printf(adapter->dev, "Media change is"
1239                             " blocked due to SOL/IDER session.\n");
1240                         break;
1241                 }
1242                 EM_CORE_UNLOCK(adapter);
1243                 /* falls thru */
1244         case SIOCGIFMEDIA:
1245                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1246                     SIOCxIFMEDIA (Get/Set Interface Media)");
1247                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1248                 break;
1249         case SIOCSIFCAP:
1250             {
1251                 int mask, reinit;
1252
1253                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1254                 reinit = 0;
1255                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1256 #ifdef DEVICE_POLLING
1257                 if (mask & IFCAP_POLLING) {
1258                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1259                                 error = ether_poll_register(em_poll, ifp);
1260                                 if (error)
1261                                         return (error);
1262                                 EM_CORE_LOCK(adapter);
1263                                 em_disable_intr(adapter);
1264                                 ifp->if_capenable |= IFCAP_POLLING;
1265                                 EM_CORE_UNLOCK(adapter);
1266                         } else {
1267                                 error = ether_poll_deregister(ifp);
1268                                 /* Enable interrupt even in error case */
1269                                 EM_CORE_LOCK(adapter);
1270                                 em_enable_intr(adapter);
1271                                 ifp->if_capenable &= ~IFCAP_POLLING;
1272                                 EM_CORE_UNLOCK(adapter);
1273                         }
1274                 }
1275 #endif
1276                 if (mask & IFCAP_HWCSUM) {
1277                         ifp->if_capenable ^= IFCAP_HWCSUM;
1278                         reinit = 1;
1279                 }
1280                 if (mask & IFCAP_TSO4) {
1281                         ifp->if_capenable ^= IFCAP_TSO4;
1282                         reinit = 1;
1283                 }
1284                 if (mask & IFCAP_VLAN_HWTAGGING) {
1285                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1286                         reinit = 1;
1287                 }
1288                 if (mask & IFCAP_VLAN_HWFILTER) {
1289                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1290                         reinit = 1;
1291                 }
1292                 if (mask & IFCAP_VLAN_HWTSO) {
1293                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1294                         reinit = 1;
1295                 }
1296                 if ((mask & IFCAP_WOL) &&
1297                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1298                         if (mask & IFCAP_WOL_MCAST)
1299                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1300                         if (mask & IFCAP_WOL_MAGIC)
1301                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1302                 }
1303                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1304                         em_init(adapter);
1305                 VLAN_CAPABILITIES(ifp);
1306                 break;
1307             }
1308
1309         default:
1310                 error = ether_ioctl(ifp, command, data);
1311                 break;
1312         }
1313
1314         return (error);
1315 }
1316
1317
1318 /*********************************************************************
1319  *  Init entry point
1320  *
1321  *  This routine is used in two ways. It is used by the stack as
1322  *  init entry point in network interface structure. It is also used
1323  *  by the driver as a hw/sw initialization routine to get to a
1324  *  consistent state.
1325  *
1326  *  return 0 on success, positive on failure
1327  **********************************************************************/
1328
1329 static void
1330 em_init_locked(struct adapter *adapter)
1331 {
1332         struct ifnet    *ifp = adapter->ifp;
1333         device_t        dev = adapter->dev;
1334
1335         INIT_DEBUGOUT("em_init: begin");
1336
1337         EM_CORE_LOCK_ASSERT(adapter);
1338
1339         em_disable_intr(adapter);
1340         callout_stop(&adapter->timer);
1341
1342         /* Get the latest mac address, User can use a LAA */
1343         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1344               ETHER_ADDR_LEN);
1345
1346         /* Put the address into the Receive Address Array */
1347         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1348
1349         /*
1350          * With the 82571 adapter, RAR[0] may be overwritten
1351          * when the other port is reset, we make a duplicate
1352          * in RAR[14] for that eventuality, this assures
1353          * the interface continues to function.
1354          */
1355         if (adapter->hw.mac.type == e1000_82571) {
1356                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1357                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1358                     E1000_RAR_ENTRIES - 1);
1359         }
1360
1361         /* Initialize the hardware */
1362         em_reset(adapter);
1363         em_update_link_status(adapter);
1364
1365         /* Setup VLAN support, basic and offload if available */
1366         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1367
1368         /* Set hardware offload abilities */
1369         ifp->if_hwassist = 0;
1370         if (ifp->if_capenable & IFCAP_TXCSUM)
1371                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1372         if (ifp->if_capenable & IFCAP_TSO4)
1373                 ifp->if_hwassist |= CSUM_TSO;
1374
1375         /* Configure for OS presence */
1376         em_init_manageability(adapter);
1377
1378         /* Prepare transmit descriptors and buffers */
1379         em_setup_transmit_structures(adapter);
1380         em_initialize_transmit_unit(adapter);
1381
1382         /* Setup Multicast table */
1383         em_set_multi(adapter);
1384
1385         /*
1386         ** Figure out the desired mbuf
1387         ** pool for doing jumbos
1388         */
1389         if (adapter->hw.mac.max_frame_size <= 2048)
1390                 adapter->rx_mbuf_sz = MCLBYTES;
1391         else if (adapter->hw.mac.max_frame_size <= 4096)
1392                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1393         else
1394                 adapter->rx_mbuf_sz = MJUM9BYTES;
1395
1396         /* Prepare receive descriptors and buffers */
1397         if (em_setup_receive_structures(adapter)) {
1398                 device_printf(dev, "Could not setup receive structures\n");
1399                 em_stop(adapter);
1400                 return;
1401         }
1402         em_initialize_receive_unit(adapter);
1403
1404         /* Use real VLAN Filter support? */
1405         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1406                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1407                         /* Use real VLAN Filter support */
1408                         em_setup_vlan_hw_support(adapter);
1409                 else {
1410                         u32 ctrl;
1411                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1412                         ctrl |= E1000_CTRL_VME;
1413                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1414                 }
1415         }
1416
1417         /* Don't lose promiscuous settings */
1418         em_set_promisc(adapter);
1419
1420         /* Set the interface as ACTIVE */
1421         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1422         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1423
1424         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1425         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1426
1427         /* MSI/X configuration for 82574 */
1428         if (adapter->hw.mac.type == e1000_82574) {
1429                 int tmp;
1430                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1431                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1432                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1433                 /* Set the IVAR - interrupt vector routing. */
1434                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1435         }
1436
1437 #ifdef DEVICE_POLLING
1438         /*
1439          * Only enable interrupts if we are not polling, make sure
1440          * they are off otherwise.
1441          */
1442         if (ifp->if_capenable & IFCAP_POLLING)
1443                 em_disable_intr(adapter);
1444         else
1445 #endif /* DEVICE_POLLING */
1446                 em_enable_intr(adapter);
1447
1448         /* AMT based hardware can now take control from firmware */
1449         if (adapter->has_manage && adapter->has_amt)
1450                 em_get_hw_control(adapter);
1451 }
1452
1453 static void
1454 em_init(void *arg)
1455 {
1456         struct adapter *adapter = arg;
1457
1458         EM_CORE_LOCK(adapter);
1459         em_init_locked(adapter);
1460         EM_CORE_UNLOCK(adapter);
1461 }
1462
1463
1464 #ifdef DEVICE_POLLING
1465 /*********************************************************************
1466  *
1467  *  Legacy polling routine: note this only works with single queue
1468  *
1469  *********************************************************************/
1470 static int
1471 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1472 {
1473         struct adapter *adapter = ifp->if_softc;
1474         struct tx_ring  *txr = adapter->tx_rings;
1475         struct rx_ring  *rxr = adapter->rx_rings;
1476         u32             reg_icr;
1477         int             rx_done;
1478
1479         EM_CORE_LOCK(adapter);
1480         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1481                 EM_CORE_UNLOCK(adapter);
1482                 return (0);
1483         }
1484
1485         if (cmd == POLL_AND_CHECK_STATUS) {
1486                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1487                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1488                         callout_stop(&adapter->timer);
1489                         adapter->hw.mac.get_link_status = 1;
1490                         em_update_link_status(adapter);
1491                         callout_reset(&adapter->timer, hz,
1492                             em_local_timer, adapter);
1493                 }
1494         }
1495         EM_CORE_UNLOCK(adapter);
1496
1497         em_rxeof(rxr, count, &rx_done);
1498
1499         EM_TX_LOCK(txr);
1500         em_txeof(txr);
1501 #ifdef EM_MULTIQUEUE
1502         if (!drbr_empty(ifp, txr->br))
1503                 em_mq_start_locked(ifp, txr);
1504 #else
1505         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1506                 em_start_locked(ifp, txr);
1507 #endif
1508         EM_TX_UNLOCK(txr);
1509
1510         return (rx_done);
1511 }
1512 #endif /* DEVICE_POLLING */
1513
1514
1515 /*********************************************************************
1516  *
1517  *  Fast Legacy/MSI Combined Interrupt Service routine  
1518  *
1519  *********************************************************************/
1520 static int
1521 em_irq_fast(void *arg)
1522 {
1523         struct adapter  *adapter = arg;
1524         struct ifnet    *ifp;
1525         u32             reg_icr;
1526
1527         ifp = adapter->ifp;
1528
1529         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1530
1531         /* Hot eject?  */
1532         if (reg_icr == 0xffffffff)
1533                 return FILTER_STRAY;
1534
1535         /* Definitely not our interrupt.  */
1536         if (reg_icr == 0x0)
1537                 return FILTER_STRAY;
1538
1539         /*
1540          * Starting with the 82571 chip, bit 31 should be used to
1541          * determine whether the interrupt belongs to us.
1542          */
1543         if (adapter->hw.mac.type >= e1000_82571 &&
1544             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1545                 return FILTER_STRAY;
1546
1547         em_disable_intr(adapter);
1548         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1549
1550         /* Link status change */
1551         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1552                 adapter->hw.mac.get_link_status = 1;
1553                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1554         }
1555
1556         if (reg_icr & E1000_ICR_RXO)
1557                 adapter->rx_overruns++;
1558         return FILTER_HANDLED;
1559 }
1560
1561 /* Combined RX/TX handler, used by Legacy and MSI */
1562 static void
1563 em_handle_que(void *context, int pending)
1564 {
1565         struct adapter  *adapter = context;
1566         struct ifnet    *ifp = adapter->ifp;
1567         struct tx_ring  *txr = adapter->tx_rings;
1568         struct rx_ring  *rxr = adapter->rx_rings;
1569
1570         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1571                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1572
1573                 EM_TX_LOCK(txr);
1574                 em_txeof(txr);
1575 #ifdef EM_MULTIQUEUE
1576                 if (!drbr_empty(ifp, txr->br))
1577                         em_mq_start_locked(ifp, txr);
1578 #else
1579                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1580                         em_start_locked(ifp, txr);
1581 #endif
1582                 EM_TX_UNLOCK(txr);
1583                 if (more) {
1584                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1585                         return;
1586                 }
1587         }
1588
1589         em_enable_intr(adapter);
1590         return;
1591 }
1592
1593
1594 /*********************************************************************
1595  *
1596  *  MSIX Interrupt Service Routines
1597  *
1598  **********************************************************************/
1599 static void
1600 em_msix_tx(void *arg)
1601 {
1602         struct tx_ring *txr = arg;
1603         struct adapter *adapter = txr->adapter;
1604         struct ifnet    *ifp = adapter->ifp;
1605
1606         ++txr->tx_irq;
1607         EM_TX_LOCK(txr);
1608         em_txeof(txr);
1609 #ifdef EM_MULTIQUEUE
1610         if (!drbr_empty(ifp, txr->br))
1611                 em_mq_start_locked(ifp, txr);
1612 #else
1613         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1614                 em_start_locked(ifp, txr);
1615 #endif
1616
1617         /* Reenable this interrupt */
1618         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1619         EM_TX_UNLOCK(txr);
1620         return;
1621 }
1622
1623 /*********************************************************************
1624  *
1625  *  MSIX RX Interrupt Service routine
1626  *
1627  **********************************************************************/
1628
1629 static void
1630 em_msix_rx(void *arg)
1631 {
1632         struct rx_ring  *rxr = arg;
1633         struct adapter  *adapter = rxr->adapter;
1634         bool            more;
1635
1636         ++rxr->rx_irq;
1637         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1638                 return;
1639         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1640         if (more)
1641                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1642         else {
1643                 /* Reenable this interrupt */
1644                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1645         }
1646         return;
1647 }
1648
1649 /*********************************************************************
1650  *
1651  *  MSIX Link Fast Interrupt Service routine
1652  *
1653  **********************************************************************/
1654 static void
1655 em_msix_link(void *arg)
1656 {
1657         struct adapter  *adapter = arg;
1658         u32             reg_icr;
1659
1660         ++adapter->link_irq;
1661         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1662
1663         if (reg_icr & E1000_ICR_RXO)
1664                 adapter->rx_overruns++;
1665
1666         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1667                 adapter->hw.mac.get_link_status = 1;
1668                 em_handle_link(adapter, 0);
1669         } else
1670                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1671                     EM_MSIX_LINK | E1000_IMS_LSC);
1672         /*
1673         ** Because we must read the ICR for this interrupt
1674         ** it may clear other causes using autoclear, for
1675         ** this reason we simply create a soft interrupt
1676         ** for all these vectors.
1677         */
1678         if (reg_icr) {
1679                 E1000_WRITE_REG(&adapter->hw,
1680                         E1000_ICS, adapter->ims);
1681         }
1682         return;
1683 }
1684
1685 static void
1686 em_handle_rx(void *context, int pending)
1687 {
1688         struct rx_ring  *rxr = context;
1689         struct adapter  *adapter = rxr->adapter;
1690         bool            more;
1691
1692         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1693         if (more)
1694                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1695         else {
1696                 /* Reenable this interrupt */
1697                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1698         }
1699 }
1700
1701 static void
1702 em_handle_tx(void *context, int pending)
1703 {
1704         struct tx_ring  *txr = context;
1705         struct adapter  *adapter = txr->adapter;
1706         struct ifnet    *ifp = adapter->ifp;
1707
1708         EM_TX_LOCK(txr);
1709         em_txeof(txr);
1710 #ifdef EM_MULTIQUEUE
1711         if (!drbr_empty(ifp, txr->br))
1712                 em_mq_start_locked(ifp, txr);
1713 #else
1714         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1715                 em_start_locked(ifp, txr);
1716 #endif
1717         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1718         EM_TX_UNLOCK(txr);
1719 }
1720
1721 static void
1722 em_handle_link(void *context, int pending)
1723 {
1724         struct adapter  *adapter = context;
1725         struct tx_ring  *txr = adapter->tx_rings;
1726         struct ifnet *ifp = adapter->ifp;
1727
1728         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1729                 return;
1730
1731         EM_CORE_LOCK(adapter);
1732         callout_stop(&adapter->timer);
1733         em_update_link_status(adapter);
1734         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1735         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1736             EM_MSIX_LINK | E1000_IMS_LSC);
1737         if (adapter->link_active) {
1738                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1739                         EM_TX_LOCK(txr);
1740 #ifdef EM_MULTIQUEUE
1741                         if (!drbr_empty(ifp, txr->br))
1742                                 em_mq_start_locked(ifp, txr);
1743 #else
1744                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1745                                 em_start_locked(ifp, txr);
1746 #endif
1747                         EM_TX_UNLOCK(txr);
1748                 }
1749         }
1750         EM_CORE_UNLOCK(adapter);
1751 }
1752
1753
1754 /*********************************************************************
1755  *
1756  *  Media Ioctl callback
1757  *
1758  *  This routine is called whenever the user queries the status of
1759  *  the interface using ifconfig.
1760  *
1761  **********************************************************************/
1762 static void
1763 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1764 {
1765         struct adapter *adapter = ifp->if_softc;
1766         u_char fiber_type = IFM_1000_SX;
1767
1768         INIT_DEBUGOUT("em_media_status: begin");
1769
1770         EM_CORE_LOCK(adapter);
1771         em_update_link_status(adapter);
1772
1773         ifmr->ifm_status = IFM_AVALID;
1774         ifmr->ifm_active = IFM_ETHER;
1775
1776         if (!adapter->link_active) {
1777                 EM_CORE_UNLOCK(adapter);
1778                 return;
1779         }
1780
1781         ifmr->ifm_status |= IFM_ACTIVE;
1782
1783         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1784             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1785                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1786         } else {
1787                 switch (adapter->link_speed) {
1788                 case 10:
1789                         ifmr->ifm_active |= IFM_10_T;
1790                         break;
1791                 case 100:
1792                         ifmr->ifm_active |= IFM_100_TX;
1793                         break;
1794                 case 1000:
1795                         ifmr->ifm_active |= IFM_1000_T;
1796                         break;
1797                 }
1798                 if (adapter->link_duplex == FULL_DUPLEX)
1799                         ifmr->ifm_active |= IFM_FDX;
1800                 else
1801                         ifmr->ifm_active |= IFM_HDX;
1802         }
1803         EM_CORE_UNLOCK(adapter);
1804 }
1805
1806 /*********************************************************************
1807  *
1808  *  Media Ioctl callback
1809  *
1810  *  This routine is called when the user changes speed/duplex using
1811  *  media/mediopt option with ifconfig.
1812  *
1813  **********************************************************************/
1814 static int
1815 em_media_change(struct ifnet *ifp)
1816 {
1817         struct adapter *adapter = ifp->if_softc;
1818         struct ifmedia  *ifm = &adapter->media;
1819
1820         INIT_DEBUGOUT("em_media_change: begin");
1821
1822         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1823                 return (EINVAL);
1824
1825         EM_CORE_LOCK(adapter);
1826         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1827         case IFM_AUTO:
1828                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1829                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1830                 break;
1831         case IFM_1000_LX:
1832         case IFM_1000_SX:
1833         case IFM_1000_T:
1834                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1835                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1836                 break;
1837         case IFM_100_TX:
1838                 adapter->hw.mac.autoneg = FALSE;
1839                 adapter->hw.phy.autoneg_advertised = 0;
1840                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1841                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1842                 else
1843                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1844                 break;
1845         case IFM_10_T:
1846                 adapter->hw.mac.autoneg = FALSE;
1847                 adapter->hw.phy.autoneg_advertised = 0;
1848                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1849                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1850                 else
1851                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1852                 break;
1853         default:
1854                 device_printf(adapter->dev, "Unsupported media type\n");
1855         }
1856
1857         em_init_locked(adapter);
1858         EM_CORE_UNLOCK(adapter);
1859
1860         return (0);
1861 }
1862
1863 /*********************************************************************
1864  *
1865  *  This routine maps the mbufs to tx descriptors.
1866  *
1867  *  return 0 on success, positive on failure
1868  **********************************************************************/
1869
1870 static int
1871 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1872 {
1873         struct adapter          *adapter = txr->adapter;
1874         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1875         bus_dmamap_t            map;
1876         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1877         struct e1000_tx_desc    *ctxd = NULL;
1878         struct mbuf             *m_head;
1879         struct ether_header     *eh;
1880         struct ip               *ip = NULL;
1881         struct tcphdr           *tp = NULL;
1882         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1883         int                     ip_off, poff;
1884         int                     nsegs, i, j, first, last = 0;
1885         int                     error, do_tso, tso_desc = 0, remap = 1;
1886
1887         m_head = *m_headp;
1888         txd_upper = txd_lower = txd_used = txd_saved = 0;
1889         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1890         ip_off = poff = 0;
1891
1892         /*
1893          * Intel recommends entire IP/TCP header length reside in a single
1894          * buffer. If multiple descriptors are used to describe the IP and
1895          * TCP header, each descriptor should describe one or more
1896          * complete headers; descriptors referencing only parts of headers
1897          * are not supported. If all layer headers are not coalesced into
1898          * a single buffer, each buffer should not cross a 4KB boundary,
1899          * or be larger than the maximum read request size.
1900          * Controller also requires modifing IP/TCP header to make TSO work
1901          * so we firstly get a writable mbuf chain then coalesce ethernet/
1902          * IP/TCP header into a single buffer to meet the requirement of
1903          * controller. This also simplifies IP/TCP/UDP checksum offloading
1904          * which also has similiar restrictions.
1905          */
1906         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1907                 if (do_tso || (m_head->m_next != NULL && 
1908                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1909                         if (M_WRITABLE(*m_headp) == 0) {
1910                                 m_head = m_dup(*m_headp, M_NOWAIT);
1911                                 m_freem(*m_headp);
1912                                 if (m_head == NULL) {
1913                                         *m_headp = NULL;
1914                                         return (ENOBUFS);
1915                                 }
1916                                 *m_headp = m_head;
1917                         }
1918                 }
1919                 /*
1920                  * XXX
1921                  * Assume IPv4, we don't have TSO/checksum offload support
1922                  * for IPv6 yet.
1923                  */
1924                 ip_off = sizeof(struct ether_header);
1925                 m_head = m_pullup(m_head, ip_off);
1926                 if (m_head == NULL) {
1927                         *m_headp = NULL;
1928                         return (ENOBUFS);
1929                 }
1930                 eh = mtod(m_head, struct ether_header *);
1931                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1932                         ip_off = sizeof(struct ether_vlan_header);
1933                         m_head = m_pullup(m_head, ip_off);
1934                         if (m_head == NULL) {
1935                                 *m_headp = NULL;
1936                                 return (ENOBUFS);
1937                         }
1938                 }
1939                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1940                 if (m_head == NULL) {
1941                         *m_headp = NULL;
1942                         return (ENOBUFS);
1943                 }
1944                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1945                 poff = ip_off + (ip->ip_hl << 2);
1946                 if (do_tso) {
1947                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1948                         if (m_head == NULL) {
1949                                 *m_headp = NULL;
1950                                 return (ENOBUFS);
1951                         }
1952                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1953                         /*
1954                          * TSO workaround:
1955                          *   pull 4 more bytes of data into it.
1956                          */
1957                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1958                         if (m_head == NULL) {
1959                                 *m_headp = NULL;
1960                                 return (ENOBUFS);
1961                         }
1962                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1963                         ip->ip_len = 0;
1964                         ip->ip_sum = 0;
1965                         /*
1966                          * The pseudo TCP checksum does not include TCP payload
1967                          * length so driver should recompute the checksum here
1968                          * what hardware expect to see. This is adherence of
1969                          * Microsoft's Large Send specification.
1970                          */
1971                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1972                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1973                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1974                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1975                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1976                         if (m_head == NULL) {
1977                                 *m_headp = NULL;
1978                                 return (ENOBUFS);
1979                         }
1980                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1981                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1982                         if (m_head == NULL) {
1983                                 *m_headp = NULL;
1984                                 return (ENOBUFS);
1985                         }
1986                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1987                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1988                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1989                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1990                         if (m_head == NULL) {
1991                                 *m_headp = NULL;
1992                                 return (ENOBUFS);
1993                         }
1994                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1995                 }
1996                 *m_headp = m_head;
1997         }
1998
1999         /*
2000          * Map the packet for DMA
2001          *
2002          * Capture the first descriptor index,
2003          * this descriptor will have the index
2004          * of the EOP which is the only one that
2005          * now gets a DONE bit writeback.
2006          */
2007         first = txr->next_avail_desc;
2008         tx_buffer = &txr->tx_buffers[first];
2009         tx_buffer_mapped = tx_buffer;
2010         map = tx_buffer->map;
2011
2012 retry:
2013         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2014             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2015
2016         /*
2017          * There are two types of errors we can (try) to handle:
2018          * - EFBIG means the mbuf chain was too long and bus_dma ran
2019          *   out of segments.  Defragment the mbuf chain and try again.
2020          * - ENOMEM means bus_dma could not obtain enough bounce buffers
2021          *   at this point in time.  Defer sending and try again later.
2022          * All other errors, in particular EINVAL, are fatal and prevent the
2023          * mbuf chain from ever going through.  Drop it and report error.
2024          */
2025         if (error == EFBIG && remap) {
2026                 struct mbuf *m;
2027
2028                 m = m_defrag(*m_headp, M_NOWAIT);
2029                 if (m == NULL) {
2030                         adapter->mbuf_alloc_failed++;
2031                         m_freem(*m_headp);
2032                         *m_headp = NULL;
2033                         return (ENOBUFS);
2034                 }
2035                 *m_headp = m;
2036
2037                 /* Try it again, but only once */
2038                 remap = 0;
2039                 goto retry;
2040         } else if (error == ENOMEM) {
2041                 adapter->no_tx_dma_setup++;
2042                 return (error);
2043         } else if (error != 0) {
2044                 adapter->no_tx_dma_setup++;
2045                 m_freem(*m_headp);
2046                 *m_headp = NULL;
2047                 return (error);
2048         }
2049
2050         /*
2051          * TSO Hardware workaround, if this packet is not
2052          * TSO, and is only a single descriptor long, and
2053          * it follows a TSO burst, then we need to add a
2054          * sentinel descriptor to prevent premature writeback.
2055          */
2056         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
2057                 if (nsegs == 1)
2058                         tso_desc = TRUE;
2059                 txr->tx_tso = FALSE;
2060         }
2061
2062         if (nsegs > (txr->tx_avail - 2)) {
2063                 txr->no_desc_avail++;
2064                 bus_dmamap_unload(txr->txtag, map);
2065                 return (ENOBUFS);
2066         }
2067         m_head = *m_headp;
2068
2069         /* Do hardware assists */
2070         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2071                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2072                     &txd_upper, &txd_lower);
2073                 /* we need to make a final sentinel transmit desc */
2074                 tso_desc = TRUE;
2075         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2076                 em_transmit_checksum_setup(txr, m_head,
2077                     ip_off, ip, &txd_upper, &txd_lower);
2078
2079         if (m_head->m_flags & M_VLANTAG) {
2080                 /* Set the vlan id. */
2081                 txd_upper |=
2082                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2083                 /* Tell hardware to add tag */
2084                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2085         }
2086
2087         i = txr->next_avail_desc;
2088
2089         /* Set up our transmit descriptors */
2090         for (j = 0; j < nsegs; j++) {
2091                 bus_size_t seg_len;
2092                 bus_addr_t seg_addr;
2093
2094                 tx_buffer = &txr->tx_buffers[i];
2095                 ctxd = &txr->tx_base[i];
2096                 seg_addr = segs[j].ds_addr;
2097                 seg_len  = segs[j].ds_len;
2098                 /*
2099                 ** TSO Workaround:
2100                 ** If this is the last descriptor, we want to
2101                 ** split it so we have a small final sentinel
2102                 */
2103                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2104                         seg_len -= 4;
2105                         ctxd->buffer_addr = htole64(seg_addr);
2106                         ctxd->lower.data = htole32(
2107                         adapter->txd_cmd | txd_lower | seg_len);
2108                         ctxd->upper.data =
2109                             htole32(txd_upper);
2110                         if (++i == adapter->num_tx_desc)
2111                                 i = 0;
2112                         /* Now make the sentinel */     
2113                         ++txd_used; /* using an extra txd */
2114                         ctxd = &txr->tx_base[i];
2115                         tx_buffer = &txr->tx_buffers[i];
2116                         ctxd->buffer_addr =
2117                             htole64(seg_addr + seg_len);
2118                         ctxd->lower.data = htole32(
2119                         adapter->txd_cmd | txd_lower | 4);
2120                         ctxd->upper.data =
2121                             htole32(txd_upper);
2122                         last = i;
2123                         if (++i == adapter->num_tx_desc)
2124                                 i = 0;
2125                 } else {
2126                         ctxd->buffer_addr = htole64(seg_addr);
2127                         ctxd->lower.data = htole32(
2128                         adapter->txd_cmd | txd_lower | seg_len);
2129                         ctxd->upper.data =
2130                             htole32(txd_upper);
2131                         last = i;
2132                         if (++i == adapter->num_tx_desc)
2133                                 i = 0;
2134                 }
2135                 tx_buffer->m_head = NULL;
2136                 tx_buffer->next_eop = -1;
2137         }
2138
2139         txr->next_avail_desc = i;
2140         txr->tx_avail -= nsegs;
2141         if (tso_desc) /* TSO used an extra for sentinel */
2142                 txr->tx_avail -= txd_used;
2143
2144         tx_buffer->m_head = m_head;
2145         /*
2146         ** Here we swap the map so the last descriptor,
2147         ** which gets the completion interrupt has the
2148         ** real map, and the first descriptor gets the
2149         ** unused map from this descriptor.
2150         */
2151         tx_buffer_mapped->map = tx_buffer->map;
2152         tx_buffer->map = map;
2153         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2154
2155         /*
2156          * Last Descriptor of Packet
2157          * needs End Of Packet (EOP)
2158          * and Report Status (RS)
2159          */
2160         ctxd->lower.data |=
2161             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2162         /*
2163          * Keep track in the first buffer which
2164          * descriptor will be written back
2165          */
2166         tx_buffer = &txr->tx_buffers[first];
2167         tx_buffer->next_eop = last;
2168
2169         /*
2170          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2171          * that this frame is available to transmit.
2172          */
2173         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2174             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2175         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2176
2177         return (0);
2178 }
2179
2180 static void
2181 em_set_promisc(struct adapter *adapter)
2182 {
2183         struct ifnet    *ifp = adapter->ifp;
2184         u32             reg_rctl;
2185
2186         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2187
2188         if (ifp->if_flags & IFF_PROMISC) {
2189                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2190                 /* Turn this on if you want to see bad packets */
2191                 if (em_debug_sbp)
2192                         reg_rctl |= E1000_RCTL_SBP;
2193                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2194         } else if (ifp->if_flags & IFF_ALLMULTI) {
2195                 reg_rctl |= E1000_RCTL_MPE;
2196                 reg_rctl &= ~E1000_RCTL_UPE;
2197                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2198         }
2199 }
2200
2201 static void
2202 em_disable_promisc(struct adapter *adapter)
2203 {
2204         struct ifnet    *ifp = adapter->ifp;
2205         u32             reg_rctl;
2206         int             mcnt = 0;
2207
2208         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2209         reg_rctl &=  (~E1000_RCTL_UPE);
2210         if (ifp->if_flags & IFF_ALLMULTI)
2211                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2212         else {
2213                 struct  ifmultiaddr *ifma;
2214 #if __FreeBSD_version < 800000
2215                 IF_ADDR_LOCK(ifp);
2216 #else   
2217                 if_maddr_rlock(ifp);
2218 #endif
2219                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2220                         if (ifma->ifma_addr->sa_family != AF_LINK)
2221                                 continue;
2222                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2223                                 break;
2224                         mcnt++;
2225                 }
2226 #if __FreeBSD_version < 800000
2227                 IF_ADDR_UNLOCK(ifp);
2228 #else
2229                 if_maddr_runlock(ifp);
2230 #endif
2231         }
2232         /* Don't disable if in MAX groups */
2233         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2234                 reg_rctl &=  (~E1000_RCTL_MPE);
2235         reg_rctl &=  (~E1000_RCTL_SBP);
2236         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2237 }
2238
2239
2240 /*********************************************************************
2241  *  Multicast Update
2242  *
2243  *  This routine is called whenever multicast address list is updated.
2244  *
2245  **********************************************************************/
2246
2247 static void
2248 em_set_multi(struct adapter *adapter)
2249 {
2250         struct ifnet    *ifp = adapter->ifp;
2251         struct ifmultiaddr *ifma;
2252         u32 reg_rctl = 0;
2253         u8  *mta; /* Multicast array memory */
2254         int mcnt = 0;
2255
2256         IOCTL_DEBUGOUT("em_set_multi: begin");
2257
2258         mta = adapter->mta;
2259         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2260
2261         if (adapter->hw.mac.type == e1000_82542 && 
2262             adapter->hw.revision_id == E1000_REVISION_2) {
2263                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2264                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2265                         e1000_pci_clear_mwi(&adapter->hw);
2266                 reg_rctl |= E1000_RCTL_RST;
2267                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2268                 msec_delay(5);
2269         }
2270
2271 #if __FreeBSD_version < 800000
2272         IF_ADDR_LOCK(ifp);
2273 #else
2274         if_maddr_rlock(ifp);
2275 #endif
2276         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2277                 if (ifma->ifma_addr->sa_family != AF_LINK)
2278                         continue;
2279
2280                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2281                         break;
2282
2283                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2284                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2285                 mcnt++;
2286         }
2287 #if __FreeBSD_version < 800000
2288         IF_ADDR_UNLOCK(ifp);
2289 #else
2290         if_maddr_runlock(ifp);
2291 #endif
2292         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2293                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2294                 reg_rctl |= E1000_RCTL_MPE;
2295                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2296         } else
2297                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2298
2299         if (adapter->hw.mac.type == e1000_82542 && 
2300             adapter->hw.revision_id == E1000_REVISION_2) {
2301                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2302                 reg_rctl &= ~E1000_RCTL_RST;
2303                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2304                 msec_delay(5);
2305                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2306                         e1000_pci_set_mwi(&adapter->hw);
2307         }
2308 }
2309
2310
2311 /*********************************************************************
2312  *  Timer routine
2313  *
2314  *  This routine checks for link status and updates statistics.
2315  *
2316  **********************************************************************/
2317
2318 static void
2319 em_local_timer(void *arg)
2320 {
2321         struct adapter  *adapter = arg;
2322         struct ifnet    *ifp = adapter->ifp;
2323         struct tx_ring  *txr = adapter->tx_rings;
2324         struct rx_ring  *rxr = adapter->rx_rings;
2325         u32             trigger = 0;
2326
2327         EM_CORE_LOCK_ASSERT(adapter);
2328
2329         em_update_link_status(adapter);
2330         em_update_stats_counters(adapter);
2331
2332         /* Reset LAA into RAR[0] on 82571 */
2333         if ((adapter->hw.mac.type == e1000_82571) &&
2334             e1000_get_laa_state_82571(&adapter->hw))
2335                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2336
2337         /* Mask to use in the irq trigger */
2338         if (adapter->msix_mem) {
2339                 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2340                         trigger |= rxr->ims;
2341                 rxr = adapter->rx_rings;
2342         } else
2343                 trigger = E1000_ICS_RXDMT0;
2344
2345         /*
2346         ** Check on the state of the TX queue(s), this 
2347         ** can be done without the lock because its RO
2348         ** and the HUNG state will be static if set.
2349         */
2350         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2351                 if (txr->busy == EM_TX_HUNG)
2352                         goto hung;
2353                 if (txr->busy >= EM_TX_MAXTRIES)
2354                         txr->busy = EM_TX_HUNG;
2355                 /* Schedule a TX tasklet if needed */
2356                 if (txr->tx_avail <= EM_MAX_SCATTER)
2357                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2358         }
2359         
2360         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2361 #ifndef DEVICE_POLLING
2362         /* Trigger an RX interrupt to guarantee mbuf refresh */
2363         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2364 #endif
2365         return;
2366 hung:
2367         /* Looks like we're hung */
2368         device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2369                         txr->me);
2370         em_print_debug_info(adapter);
2371         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2372         adapter->watchdog_events++;
2373         em_init_locked(adapter);
2374 }
2375
2376
2377 static void
2378 em_update_link_status(struct adapter *adapter)
2379 {
2380         struct e1000_hw *hw = &adapter->hw;
2381         struct ifnet *ifp = adapter->ifp;
2382         device_t dev = adapter->dev;
2383         struct tx_ring *txr = adapter->tx_rings;
2384         u32 link_check = 0;
2385
2386         /* Get the cached link value or read phy for real */
2387         switch (hw->phy.media_type) {
2388         case e1000_media_type_copper:
2389                 if (hw->mac.get_link_status) {
2390                         /* Do the work to read phy */
2391                         e1000_check_for_link(hw);
2392                         link_check = !hw->mac.get_link_status;
2393                         if (link_check) /* ESB2 fix */
2394                                 e1000_cfg_on_link_up(hw);
2395                 } else
2396                         link_check = TRUE;
2397                 break;
2398         case e1000_media_type_fiber:
2399                 e1000_check_for_link(hw);
2400                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2401                                  E1000_STATUS_LU);
2402                 break;
2403         case e1000_media_type_internal_serdes:
2404                 e1000_check_for_link(hw);
2405                 link_check = adapter->hw.mac.serdes_has_link;
2406                 break;
2407         default:
2408         case e1000_media_type_unknown:
2409                 break;
2410         }
2411
2412         /* Now check for a transition */
2413         if (link_check && (adapter->link_active == 0)) {
2414                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2415                     &adapter->link_duplex);
2416                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2417                 if ((adapter->link_speed != SPEED_1000) &&
2418                     ((hw->mac.type == e1000_82571) ||
2419                     (hw->mac.type == e1000_82572))) {
2420                         int tarc0;
2421                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2422                         tarc0 &= ~TARC_SPEED_MODE_BIT;
2423                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2424                 }
2425                 if (bootverbose)
2426                         device_printf(dev, "Link is up %d Mbps %s\n",
2427                             adapter->link_speed,
2428                             ((adapter->link_duplex == FULL_DUPLEX) ?
2429                             "Full Duplex" : "Half Duplex"));
2430                 adapter->link_active = 1;
2431                 adapter->smartspeed = 0;
2432                 ifp->if_baudrate = adapter->link_speed * 1000000;
2433                 if_link_state_change(ifp, LINK_STATE_UP);
2434         } else if (!link_check && (adapter->link_active == 1)) {
2435                 ifp->if_baudrate = adapter->link_speed = 0;
2436                 adapter->link_duplex = 0;
2437                 if (bootverbose)
2438                         device_printf(dev, "Link is Down\n");
2439                 adapter->link_active = 0;
2440                 /* Link down, disable hang detection */
2441                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2442                         txr->busy = EM_TX_IDLE;
2443                 if_link_state_change(ifp, LINK_STATE_DOWN);
2444         }
2445 }
2446
2447 /*********************************************************************
2448  *
2449  *  This routine disables all traffic on the adapter by issuing a
2450  *  global reset on the MAC and deallocates TX/RX buffers.
2451  *
2452  *  This routine should always be called with BOTH the CORE
2453  *  and TX locks.
2454  **********************************************************************/
2455
2456 static void
2457 em_stop(void *arg)
2458 {
2459         struct adapter  *adapter = arg;
2460         struct ifnet    *ifp = adapter->ifp;
2461         struct tx_ring  *txr = adapter->tx_rings;
2462
2463         EM_CORE_LOCK_ASSERT(adapter);
2464
2465         INIT_DEBUGOUT("em_stop: begin");
2466
2467         em_disable_intr(adapter);
2468         callout_stop(&adapter->timer);
2469
2470         /* Tell the stack that the interface is no longer active */
2471         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2472         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2473
2474         /* Disarm Hang Detection. */
2475         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2476                 EM_TX_LOCK(txr);
2477                 txr->busy = EM_TX_IDLE;
2478                 EM_TX_UNLOCK(txr);
2479         }
2480
2481         e1000_reset_hw(&adapter->hw);
2482         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2483
2484         e1000_led_off(&adapter->hw);
2485         e1000_cleanup_led(&adapter->hw);
2486 }
2487
2488
2489 /*********************************************************************
2490  *
2491  *  Determine hardware revision.
2492  *
2493  **********************************************************************/
2494 static void
2495 em_identify_hardware(struct adapter *adapter)
2496 {
2497         device_t dev = adapter->dev;
2498
2499         /* Make sure our PCI config space has the necessary stuff set */
2500         pci_enable_busmaster(dev);
2501         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2502
2503         /* Save off the information about this board */
2504         adapter->hw.vendor_id = pci_get_vendor(dev);
2505         adapter->hw.device_id = pci_get_device(dev);
2506         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2507         adapter->hw.subsystem_vendor_id =
2508             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2509         adapter->hw.subsystem_device_id =
2510             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2511
2512         /* Do Shared Code Init and Setup */
2513         if (e1000_set_mac_type(&adapter->hw)) {
2514                 device_printf(dev, "Setup init failure\n");
2515                 return;
2516         }
2517 }
2518
2519 static int
2520 em_allocate_pci_resources(struct adapter *adapter)
2521 {
2522         device_t        dev = adapter->dev;
2523         int             rid;
2524
2525         rid = PCIR_BAR(0);
2526         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2527             &rid, RF_ACTIVE);
2528         if (adapter->memory == NULL) {
2529                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2530                 return (ENXIO);
2531         }
2532         adapter->osdep.mem_bus_space_tag =
2533             rman_get_bustag(adapter->memory);
2534         adapter->osdep.mem_bus_space_handle =
2535             rman_get_bushandle(adapter->memory);
2536         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2537
2538         adapter->hw.back = &adapter->osdep;
2539
2540         return (0);
2541 }
2542
2543 /*********************************************************************
2544  *
2545  *  Setup the Legacy or MSI Interrupt handler
2546  *
2547  **********************************************************************/
2548 int
2549 em_allocate_legacy(struct adapter *adapter)
2550 {
2551         device_t dev = adapter->dev;
2552         struct tx_ring  *txr = adapter->tx_rings;
2553         int error, rid = 0;
2554
2555         /* Manually turn off all interrupts */
2556         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2557
2558         if (adapter->msix == 1) /* using MSI */
2559                 rid = 1;
2560         /* We allocate a single interrupt resource */
2561         adapter->res = bus_alloc_resource_any(dev,
2562             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2563         if (adapter->res == NULL) {
2564                 device_printf(dev, "Unable to allocate bus resource: "
2565                     "interrupt\n");
2566                 return (ENXIO);
2567         }
2568
2569         /*
2570          * Allocate a fast interrupt and the associated
2571          * deferred processing contexts.
2572          */
2573         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2574         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2575             taskqueue_thread_enqueue, &adapter->tq);
2576         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2577             device_get_nameunit(adapter->dev));
2578         /* Use a TX only tasklet for local timer */
2579         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2580         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2581             taskqueue_thread_enqueue, &txr->tq);
2582         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2583             device_get_nameunit(adapter->dev));
2584         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2585         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2586             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2587                 device_printf(dev, "Failed to register fast interrupt "
2588                             "handler: %d\n", error);
2589                 taskqueue_free(adapter->tq);
2590                 adapter->tq = NULL;
2591                 return (error);
2592         }
2593         
2594         return (0);
2595 }
2596
2597 /*********************************************************************
2598  *
2599  *  Setup the MSIX Interrupt handlers
2600  *   This is not really Multiqueue, rather
2601  *   its just seperate interrupt vectors
2602  *   for TX, RX, and Link.
2603  *
2604  **********************************************************************/
2605 int
2606 em_allocate_msix(struct adapter *adapter)
2607 {
2608         device_t        dev = adapter->dev;
2609         struct          tx_ring *txr = adapter->tx_rings;
2610         struct          rx_ring *rxr = adapter->rx_rings;
2611         int             error, rid, vector = 0;
2612         int             cpu_id = 0;
2613
2614
2615         /* Make sure all interrupts are disabled */
2616         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2617
2618         /* First set up ring resources */
2619         for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2620
2621                 /* RX ring */
2622                 rid = vector + 1;
2623
2624                 rxr->res = bus_alloc_resource_any(dev,
2625                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2626                 if (rxr->res == NULL) {
2627                         device_printf(dev,
2628                             "Unable to allocate bus resource: "
2629                             "RX MSIX Interrupt %d\n", i);
2630                         return (ENXIO);
2631                 }
2632                 if ((error = bus_setup_intr(dev, rxr->res,
2633                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2634                     rxr, &rxr->tag)) != 0) {
2635                         device_printf(dev, "Failed to register RX handler");
2636                         return (error);
2637                 }
2638 #if __FreeBSD_version >= 800504
2639                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2640 #endif
2641                 rxr->msix = vector;
2642
2643                 if (em_last_bind_cpu < 0)
2644                         em_last_bind_cpu = CPU_FIRST();
2645                 cpu_id = em_last_bind_cpu;
2646                 bus_bind_intr(dev, rxr->res, cpu_id);
2647
2648                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2649                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2650                     taskqueue_thread_enqueue, &rxr->tq);
2651                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2652                     device_get_nameunit(adapter->dev), cpu_id);
2653                 /*
2654                 ** Set the bit to enable interrupt
2655                 ** in E1000_IMS -- bits 20 and 21
2656                 ** are for RX0 and RX1, note this has
2657                 ** NOTHING to do with the MSIX vector
2658                 */
2659                 rxr->ims = 1 << (20 + i);
2660                 adapter->ims |= rxr->ims;
2661                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2662
2663                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2664         }
2665
2666         for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2667                 /* TX ring */
2668                 rid = vector + 1;
2669                 txr->res = bus_alloc_resource_any(dev,
2670                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2671                 if (txr->res == NULL) {
2672                         device_printf(dev,
2673                             "Unable to allocate bus resource: "
2674                             "TX MSIX Interrupt %d\n", i);
2675                         return (ENXIO);
2676                 }
2677                 if ((error = bus_setup_intr(dev, txr->res,
2678                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2679                     txr, &txr->tag)) != 0) {
2680                         device_printf(dev, "Failed to register TX handler");
2681                         return (error);
2682                 }
2683 #if __FreeBSD_version >= 800504
2684                 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2685 #endif
2686                 txr->msix = vector;
2687
2688                 if (em_last_bind_cpu < 0)
2689                         em_last_bind_cpu = CPU_FIRST();
2690                 cpu_id = em_last_bind_cpu;
2691                 bus_bind_intr(dev, txr->res, cpu_id);
2692
2693                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2694                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2695                     taskqueue_thread_enqueue, &txr->tq);
2696                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2697                     device_get_nameunit(adapter->dev), cpu_id);
2698                 /*
2699                 ** Set the bit to enable interrupt
2700                 ** in E1000_IMS -- bits 22 and 23
2701                 ** are for TX0 and TX1, note this has
2702                 ** NOTHING to do with the MSIX vector
2703                 */
2704                 txr->ims = 1 << (22 + i);
2705                 adapter->ims |= txr->ims;
2706                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2707
2708                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2709         }
2710
2711         /* Link interrupt */
2712         rid = vector + 1;
2713         adapter->res = bus_alloc_resource_any(dev,
2714             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2715         if (!adapter->res) {
2716                 device_printf(dev,"Unable to allocate "
2717                     "bus resource: Link interrupt [%d]\n", rid);
2718                 return (ENXIO);
2719         }
2720         /* Set the link handler function */
2721         error = bus_setup_intr(dev, adapter->res,
2722             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2723             em_msix_link, adapter, &adapter->tag);
2724         if (error) {
2725                 adapter->res = NULL;
2726                 device_printf(dev, "Failed to register LINK handler");
2727                 return (error);
2728         }
2729 #if __FreeBSD_version >= 800504
2730         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2731 #endif
2732         adapter->linkvec = vector;
2733         adapter->ivars |=  (8 | vector) << 16;
2734         adapter->ivars |= 0x80000000;
2735
2736         return (0);
2737 }
2738
2739
2740 static void
2741 em_free_pci_resources(struct adapter *adapter)
2742 {
2743         device_t        dev = adapter->dev;
2744         struct tx_ring  *txr;
2745         struct rx_ring  *rxr;
2746         int             rid;
2747
2748
2749         /*
2750         ** Release all the queue interrupt resources:
2751         */
2752         for (int i = 0; i < adapter->num_queues; i++) {
2753                 txr = &adapter->tx_rings[i];
2754                 /* an early abort? */
2755                 if (txr == NULL)
2756                         break;
2757                 rid = txr->msix +1;
2758                 if (txr->tag != NULL) {
2759                         bus_teardown_intr(dev, txr->res, txr->tag);
2760                         txr->tag = NULL;
2761                 }
2762                 if (txr->res != NULL)
2763                         bus_release_resource(dev, SYS_RES_IRQ,
2764                             rid, txr->res);
2765
2766                 rxr = &adapter->rx_rings[i];
2767                 /* an early abort? */
2768                 if (rxr == NULL)
2769                         break;
2770                 rid = rxr->msix +1;
2771                 if (rxr->tag != NULL) {
2772                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2773                         rxr->tag = NULL;
2774                 }
2775                 if (rxr->res != NULL)
2776                         bus_release_resource(dev, SYS_RES_IRQ,
2777                             rid, rxr->res);
2778         }
2779
2780         if (adapter->linkvec) /* we are doing MSIX */
2781                 rid = adapter->linkvec + 1;
2782         else
2783                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2784
2785         if (adapter->tag != NULL) {
2786                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2787                 adapter->tag = NULL;
2788         }
2789
2790         if (adapter->res != NULL)
2791                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2792
2793
2794         if (adapter->msix)
2795                 pci_release_msi(dev);
2796
2797         if (adapter->msix_mem != NULL)
2798                 bus_release_resource(dev, SYS_RES_MEMORY,
2799                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2800
2801         if (adapter->memory != NULL)
2802                 bus_release_resource(dev, SYS_RES_MEMORY,
2803                     PCIR_BAR(0), adapter->memory);
2804
2805         if (adapter->flash != NULL)
2806                 bus_release_resource(dev, SYS_RES_MEMORY,
2807                     EM_FLASH, adapter->flash);
2808 }
2809
2810 /*
2811  * Setup MSI or MSI/X
2812  */
2813 static int
2814 em_setup_msix(struct adapter *adapter)
2815 {
2816         device_t dev = adapter->dev;
2817         int val;
2818
2819         /* Nearly always going to use one queue */
2820         adapter->num_queues = 1;
2821
2822         /*
2823         ** Try using MSI-X for Hartwell adapters
2824         */
2825         if ((adapter->hw.mac.type == e1000_82574) &&
2826             (em_enable_msix == TRUE)) {
2827 #ifdef EM_MULTIQUEUE
2828                 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2829                 if (adapter->num_queues > 1)
2830                         em_enable_vectors_82574(adapter);
2831 #endif
2832                 /* Map the MSIX BAR */
2833                 int rid = PCIR_BAR(EM_MSIX_BAR);
2834                 adapter->msix_mem = bus_alloc_resource_any(dev,
2835                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2836                 if (adapter->msix_mem == NULL) {
2837                         /* May not be enabled */
2838                         device_printf(adapter->dev,
2839                             "Unable to map MSIX table \n");
2840                         goto msi;
2841                 }
2842                 val = pci_msix_count(dev); 
2843
2844 #ifdef EM_MULTIQUEUE
2845                 /* We need 5 vectors in the multiqueue case */
2846                 if (adapter->num_queues > 1 ) {
2847                         if (val >= 5)
2848                                 val = 5;
2849                         else {
2850                                 adapter->num_queues = 1;
2851                                 device_printf(adapter->dev,
2852                                     "Insufficient MSIX vectors for >1 queue, "
2853                                     "using single queue...\n");
2854                                 goto msix_one;
2855                         }
2856                 } else {
2857 msix_one:
2858 #endif
2859                         if (val >= 3)
2860                                 val = 3;
2861                         else {
2862                                 device_printf(adapter->dev,
2863                                 "Insufficient MSIX vectors, using MSI\n");
2864                                 goto msi;
2865                         }
2866 #ifdef EM_MULTIQUEUE
2867                 }
2868 #endif
2869
2870                 if ((pci_alloc_msix(dev, &val) == 0)) {
2871                         device_printf(adapter->dev,
2872                             "Using MSIX interrupts "
2873                             "with %d vectors\n", val);
2874                         return (val);
2875                 }
2876
2877                 /*
2878                 ** If MSIX alloc failed or provided us with
2879                 ** less than needed, free and fall through to MSI
2880                 */
2881                 pci_release_msi(dev);
2882         }
2883 msi:
2884         if (adapter->msix_mem != NULL) {
2885                 bus_release_resource(dev, SYS_RES_MEMORY,
2886                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2887                 adapter->msix_mem = NULL;
2888         }
2889         val = 1;
2890         if (pci_alloc_msi(dev, &val) == 0) {
2891                 device_printf(adapter->dev, "Using an MSI interrupt\n");
2892                 return (val);
2893         } 
2894         /* Should only happen due to manual configuration */
2895         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2896         return (0);
2897 }
2898
2899
2900 /*********************************************************************
2901  *
2902  *  Initialize the hardware to a configuration
2903  *  as specified by the adapter structure.
2904  *
2905  **********************************************************************/
2906 static void
2907 em_reset(struct adapter *adapter)
2908 {
2909         device_t        dev = adapter->dev;
2910         struct ifnet    *ifp = adapter->ifp;
2911         struct e1000_hw *hw = &adapter->hw;
2912         u16             rx_buffer_size;
2913         u32             pba;
2914
2915         INIT_DEBUGOUT("em_reset: begin");
2916
2917         /* Set up smart power down as default off on newer adapters. */
2918         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2919             hw->mac.type == e1000_82572)) {
2920                 u16 phy_tmp = 0;
2921
2922                 /* Speed up time to link by disabling smart power down. */
2923                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2924                 phy_tmp &= ~IGP02E1000_PM_SPD;
2925                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2926         }
2927
2928         /*
2929          * Packet Buffer Allocation (PBA)
2930          * Writing PBA sets the receive portion of the buffer
2931          * the remainder is used for the transmit buffer.
2932          */
2933         switch (hw->mac.type) {
2934         /* Total Packet Buffer on these is 48K */
2935         case e1000_82571:
2936         case e1000_82572:
2937         case e1000_80003es2lan:
2938                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2939                 break;
2940         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2941                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2942                 break;
2943         case e1000_82574:
2944         case e1000_82583:
2945                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2946                 break;
2947         case e1000_ich8lan:
2948                 pba = E1000_PBA_8K;
2949                 break;
2950         case e1000_ich9lan:
2951         case e1000_ich10lan:
2952                 /* Boost Receive side for jumbo frames */
2953                 if (adapter->hw.mac.max_frame_size > 4096)
2954                         pba = E1000_PBA_14K;
2955                 else
2956                         pba = E1000_PBA_10K;
2957                 break;
2958         case e1000_pchlan:
2959         case e1000_pch2lan:
2960         case e1000_pch_lpt:
2961                 pba = E1000_PBA_26K;
2962                 break;
2963         default:
2964                 if (adapter->hw.mac.max_frame_size > 8192)
2965                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2966                 else
2967                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2968         }
2969         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2970
2971         /*
2972          * These parameters control the automatic generation (Tx) and
2973          * response (Rx) to Ethernet PAUSE frames.
2974          * - High water mark should allow for at least two frames to be
2975          *   received after sending an XOFF.
2976          * - Low water mark works best when it is very near the high water mark.
2977          *   This allows the receiver to restart by sending XON when it has
2978          *   drained a bit. Here we use an arbitary value of 1500 which will
2979          *   restart after one full frame is pulled from the buffer. There
2980          *   could be several smaller frames in the buffer and if so they will
2981          *   not trigger the XON until their total number reduces the buffer
2982          *   by 1500.
2983          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2984          */
2985         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2986         hw->fc.high_water = rx_buffer_size -
2987             roundup2(adapter->hw.mac.max_frame_size, 1024);
2988         hw->fc.low_water = hw->fc.high_water - 1500;
2989
2990         if (adapter->fc) /* locally set flow control value? */
2991                 hw->fc.requested_mode = adapter->fc;
2992         else
2993                 hw->fc.requested_mode = e1000_fc_full;
2994
2995         if (hw->mac.type == e1000_80003es2lan)
2996                 hw->fc.pause_time = 0xFFFF;
2997         else
2998                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2999
3000         hw->fc.send_xon = TRUE;
3001
3002         /* Device specific overrides/settings */
3003         switch (hw->mac.type) {
3004         case e1000_pchlan:
3005                 /* Workaround: no TX flow ctrl for PCH */
3006                 hw->fc.requested_mode = e1000_fc_rx_pause;
3007                 hw->fc.pause_time = 0xFFFF; /* override */
3008                 if (ifp->if_mtu > ETHERMTU) {
3009                         hw->fc.high_water = 0x3500;
3010                         hw->fc.low_water = 0x1500;
3011                 } else {
3012                         hw->fc.high_water = 0x5000;
3013                         hw->fc.low_water = 0x3000;
3014                 }
3015                 hw->fc.refresh_time = 0x1000;
3016                 break;
3017         case e1000_pch2lan:
3018         case e1000_pch_lpt:
3019                 hw->fc.high_water = 0x5C20;
3020                 hw->fc.low_water = 0x5048;
3021                 hw->fc.pause_time = 0x0650;
3022                 hw->fc.refresh_time = 0x0400;
3023                 /* Jumbos need adjusted PBA */
3024                 if (ifp->if_mtu > ETHERMTU)
3025                         E1000_WRITE_REG(hw, E1000_PBA, 12);
3026                 else
3027                         E1000_WRITE_REG(hw, E1000_PBA, 26);
3028                 break;
3029         case e1000_ich9lan:
3030         case e1000_ich10lan:
3031                 if (ifp->if_mtu > ETHERMTU) {
3032                         hw->fc.high_water = 0x2800;
3033                         hw->fc.low_water = hw->fc.high_water - 8;
3034                         break;
3035                 } 
3036                 /* else fall thru */
3037         default:
3038                 if (hw->mac.type == e1000_80003es2lan)
3039                         hw->fc.pause_time = 0xFFFF;
3040                 break;
3041         }
3042
3043         /* Issue a global reset */
3044         e1000_reset_hw(hw);
3045         E1000_WRITE_REG(hw, E1000_WUC, 0);
3046         em_disable_aspm(adapter);
3047         /* and a re-init */
3048         if (e1000_init_hw(hw) < 0) {
3049                 device_printf(dev, "Hardware Initialization Failed\n");
3050                 return;
3051         }
3052
3053         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3054         e1000_get_phy_info(hw);
3055         e1000_check_for_link(hw);
3056         return;
3057 }
3058
3059 /*********************************************************************
3060  *
3061  *  Setup networking device structure and register an interface.
3062  *
3063  **********************************************************************/
3064 static int
3065 em_setup_interface(device_t dev, struct adapter *adapter)
3066 {
3067         struct ifnet   *ifp;
3068
3069         INIT_DEBUGOUT("em_setup_interface: begin");
3070
3071         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3072         if (ifp == NULL) {
3073                 device_printf(dev, "can not allocate ifnet structure\n");
3074                 return (-1);
3075         }
3076         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3077         ifp->if_init =  em_init;
3078         ifp->if_softc = adapter;
3079         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3080         ifp->if_ioctl = em_ioctl;
3081 #ifdef EM_MULTIQUEUE
3082         /* Multiqueue stack interface */
3083         ifp->if_transmit = em_mq_start;
3084         ifp->if_qflush = em_qflush;
3085 #else
3086         ifp->if_start = em_start;
3087         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3088         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3089         IFQ_SET_READY(&ifp->if_snd);
3090 #endif  
3091
3092         ether_ifattach(ifp, adapter->hw.mac.addr);
3093
3094         ifp->if_capabilities = ifp->if_capenable = 0;
3095
3096
3097         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3098         ifp->if_capabilities |= IFCAP_TSO4;
3099         /*
3100          * Tell the upper layer(s) we
3101          * support full VLAN capability
3102          */
3103         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3104         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3105                              |  IFCAP_VLAN_HWTSO
3106                              |  IFCAP_VLAN_MTU;
3107         ifp->if_capenable = ifp->if_capabilities;
3108
3109         /*
3110         ** Don't turn this on by default, if vlans are
3111         ** created on another pseudo device (eg. lagg)
3112         ** then vlan events are not passed thru, breaking
3113         ** operation, but with HW FILTER off it works. If
3114         ** using vlans directly on the em driver you can
3115         ** enable this and get full hardware tag filtering.
3116         */
3117         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3118
3119 #ifdef DEVICE_POLLING
3120         ifp->if_capabilities |= IFCAP_POLLING;
3121 #endif
3122
3123         /* Enable only WOL MAGIC by default */
3124         if (adapter->wol) {
3125                 ifp->if_capabilities |= IFCAP_WOL;
3126                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3127         }
3128                 
3129         /*
3130          * Specify the media types supported by this adapter and register
3131          * callbacks to update media and link information
3132          */
3133         ifmedia_init(&adapter->media, IFM_IMASK,
3134             em_media_change, em_media_status);
3135         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3136             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3137                 u_char fiber_type = IFM_1000_SX;        /* default type */
3138
3139                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3140                             0, NULL);
3141                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3142         } else {
3143                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3144                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3145                             0, NULL);
3146                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3147                             0, NULL);
3148                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3149                             0, NULL);
3150                 if (adapter->hw.phy.type != e1000_phy_ife) {
3151                         ifmedia_add(&adapter->media,
3152                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3153                         ifmedia_add(&adapter->media,
3154                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3155                 }
3156         }
3157         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3158         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3159         return (0);
3160 }
3161
3162
3163 /*
3164  * Manage DMA'able memory.
3165  */
3166 static void
3167 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3168 {
3169         if (error)
3170                 return;
3171         *(bus_addr_t *) arg = segs[0].ds_addr;
3172 }
3173
3174 static int
3175 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3176         struct em_dma_alloc *dma, int mapflags)
3177 {
3178         int error;
3179
3180         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3181                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3182                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3183                                 BUS_SPACE_MAXADDR,      /* highaddr */
3184                                 NULL, NULL,             /* filter, filterarg */
3185                                 size,                   /* maxsize */
3186                                 1,                      /* nsegments */
3187                                 size,                   /* maxsegsize */
3188                                 0,                      /* flags */
3189                                 NULL,                   /* lockfunc */
3190                                 NULL,                   /* lockarg */
3191                                 &dma->dma_tag);
3192         if (error) {
3193                 device_printf(adapter->dev,
3194                     "%s: bus_dma_tag_create failed: %d\n",
3195                     __func__, error);
3196                 goto fail_0;
3197         }
3198
3199         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3200             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3201         if (error) {
3202                 device_printf(adapter->dev,
3203                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3204                     __func__, (uintmax_t)size, error);
3205                 goto fail_2;
3206         }
3207
3208         dma->dma_paddr = 0;
3209         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3210             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3211         if (error || dma->dma_paddr == 0) {
3212                 device_printf(adapter->dev,
3213                     "%s: bus_dmamap_load failed: %d\n",
3214                     __func__, error);
3215                 goto fail_3;
3216         }
3217
3218         return (0);
3219
3220 fail_3:
3221         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3222 fail_2:
3223         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3224         bus_dma_tag_destroy(dma->dma_tag);
3225 fail_0:
3226         dma->dma_map = NULL;
3227         dma->dma_tag = NULL;
3228
3229         return (error);
3230 }
3231
3232 static void
3233 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3234 {
3235         if (dma->dma_tag == NULL)
3236                 return;
3237         if (dma->dma_map != NULL) {
3238                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3239                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3240                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3241                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3242                 dma->dma_map = NULL;
3243         }
3244         bus_dma_tag_destroy(dma->dma_tag);
3245         dma->dma_tag = NULL;
3246 }
3247
3248
3249 /*********************************************************************
3250  *
3251  *  Allocate memory for the transmit and receive rings, and then
3252  *  the descriptors associated with each, called only once at attach.
3253  *
3254  **********************************************************************/
3255 static int
3256 em_allocate_queues(struct adapter *adapter)
3257 {
3258         device_t                dev = adapter->dev;
3259         struct tx_ring          *txr = NULL;
3260         struct rx_ring          *rxr = NULL;
3261         int rsize, tsize, error = E1000_SUCCESS;
3262         int txconf = 0, rxconf = 0;
3263
3264
3265         /* Allocate the TX ring struct memory */
3266         if (!(adapter->tx_rings =
3267             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3268             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3269                 device_printf(dev, "Unable to allocate TX ring memory\n");
3270                 error = ENOMEM;
3271                 goto fail;
3272         }
3273
3274         /* Now allocate the RX */
3275         if (!(adapter->rx_rings =
3276             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3277             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3278                 device_printf(dev, "Unable to allocate RX ring memory\n");
3279                 error = ENOMEM;
3280                 goto rx_fail;
3281         }
3282
3283         tsize = roundup2(adapter->num_tx_desc *
3284             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3285         /*
3286          * Now set up the TX queues, txconf is needed to handle the
3287          * possibility that things fail midcourse and we need to
3288          * undo memory gracefully
3289          */ 
3290         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3291                 /* Set up some basics */
3292                 txr = &adapter->tx_rings[i];
3293                 txr->adapter = adapter;
3294                 txr->me = i;
3295
3296                 /* Initialize the TX lock */
3297                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3298                     device_get_nameunit(dev), txr->me);
3299                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3300
3301                 if (em_dma_malloc(adapter, tsize,
3302                         &txr->txdma, BUS_DMA_NOWAIT)) {
3303                         device_printf(dev,
3304                             "Unable to allocate TX Descriptor memory\n");
3305                         error = ENOMEM;
3306                         goto err_tx_desc;
3307                 }
3308                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3309                 bzero((void *)txr->tx_base, tsize);
3310
3311                 if (em_allocate_transmit_buffers(txr)) {
3312                         device_printf(dev,
3313                             "Critical Failure setting up transmit buffers\n");
3314                         error = ENOMEM;
3315                         goto err_tx_desc;
3316                 }
3317 #if __FreeBSD_version >= 800000
3318                 /* Allocate a buf ring */
3319                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3320                     M_WAITOK, &txr->tx_mtx);
3321 #endif
3322         }
3323
3324         /*
3325          * Next the RX queues...
3326          */ 
3327         rsize = roundup2(adapter->num_rx_desc *
3328             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3329         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3330                 rxr = &adapter->rx_rings[i];
3331                 rxr->adapter = adapter;
3332                 rxr->me = i;
3333
3334                 /* Initialize the RX lock */
3335                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3336                     device_get_nameunit(dev), txr->me);
3337                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3338
3339                 if (em_dma_malloc(adapter, rsize,
3340                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3341                         device_printf(dev,
3342                             "Unable to allocate RxDescriptor memory\n");
3343                         error = ENOMEM;
3344                         goto err_rx_desc;
3345                 }
3346                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3347                 bzero((void *)rxr->rx_base, rsize);
3348
3349                 /* Allocate receive buffers for the ring*/
3350                 if (em_allocate_receive_buffers(rxr)) {
3351                         device_printf(dev,
3352                             "Critical Failure setting up receive buffers\n");
3353                         error = ENOMEM;
3354                         goto err_rx_desc;
3355                 }
3356         }
3357
3358         return (0);
3359
3360 err_rx_desc:
3361         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3362                 em_dma_free(adapter, &rxr->rxdma);
3363 err_tx_desc:
3364         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3365                 em_dma_free(adapter, &txr->txdma);
3366         free(adapter->rx_rings, M_DEVBUF);
3367 rx_fail:
3368 #if __FreeBSD_version >= 800000
3369         buf_ring_free(txr->br, M_DEVBUF);
3370 #endif
3371         free(adapter->tx_rings, M_DEVBUF);
3372 fail:
3373         return (error);
3374 }
3375
3376
3377 /*********************************************************************
3378  *
3379  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3380  *  the information needed to transmit a packet on the wire. This is
3381  *  called only once at attach, setup is done every reset.
3382  *
3383  **********************************************************************/
3384 static int
3385 em_allocate_transmit_buffers(struct tx_ring *txr)
3386 {
3387         struct adapter *adapter = txr->adapter;
3388         device_t dev = adapter->dev;
3389         struct em_buffer *txbuf;
3390         int error, i;
3391
3392         /*
3393          * Setup DMA descriptor areas.
3394          */
3395         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3396                                1, 0,                    /* alignment, bounds */
3397                                BUS_SPACE_MAXADDR,       /* lowaddr */
3398                                BUS_SPACE_MAXADDR,       /* highaddr */
3399                                NULL, NULL,              /* filter, filterarg */
3400                                EM_TSO_SIZE,             /* maxsize */
3401                                EM_MAX_SCATTER,          /* nsegments */
3402                                PAGE_SIZE,               /* maxsegsize */
3403                                0,                       /* flags */
3404                                NULL,                    /* lockfunc */
3405                                NULL,                    /* lockfuncarg */
3406                                &txr->txtag))) {
3407                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3408                 goto fail;
3409         }
3410
3411         if (!(txr->tx_buffers =
3412             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3413             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3414                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3415                 error = ENOMEM;
3416                 goto fail;
3417         }
3418
3419         /* Create the descriptor buffer dma maps */
3420         txbuf = txr->tx_buffers;
3421         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3422                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3423                 if (error != 0) {
3424                         device_printf(dev, "Unable to create TX DMA map\n");
3425                         goto fail;
3426                 }
3427         }
3428
3429         return 0;
3430 fail:
3431         /* We free all, it handles case where we are in the middle */
3432         em_free_transmit_structures(adapter);
3433         return (error);
3434 }
3435
3436 /*********************************************************************
3437  *
3438  *  Initialize a transmit ring.
3439  *
3440  **********************************************************************/
3441 static void
3442 em_setup_transmit_ring(struct tx_ring *txr)
3443 {
3444         struct adapter *adapter = txr->adapter;
3445         struct em_buffer *txbuf;
3446         int i;
3447 #ifdef DEV_NETMAP
3448         struct netmap_adapter *na = NA(adapter->ifp);
3449         struct netmap_slot *slot;
3450 #endif /* DEV_NETMAP */
3451
3452         /* Clear the old descriptor contents */
3453         EM_TX_LOCK(txr);
3454 #ifdef DEV_NETMAP
3455         slot = netmap_reset(na, NR_TX, txr->me, 0);
3456 #endif /* DEV_NETMAP */
3457
3458         bzero((void *)txr->tx_base,
3459               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3460         /* Reset indices */
3461         txr->next_avail_desc = 0;
3462         txr->next_to_clean = 0;
3463
3464         /* Free any existing tx buffers. */
3465         txbuf = txr->tx_buffers;
3466         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3467                 if (txbuf->m_head != NULL) {
3468                         bus_dmamap_sync(txr->txtag, txbuf->map,
3469                             BUS_DMASYNC_POSTWRITE);
3470                         bus_dmamap_unload(txr->txtag, txbuf->map);
3471                         m_freem(txbuf->m_head);
3472                         txbuf->m_head = NULL;
3473                 }
3474 #ifdef DEV_NETMAP
3475                 if (slot) {
3476                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3477                         uint64_t paddr;
3478                         void *addr;
3479
3480                         addr = PNMB(na, slot + si, &paddr);
3481                         txr->tx_base[i].buffer_addr = htole64(paddr);
3482                         /* reload the map for netmap mode */
3483                         netmap_load_map(na, txr->txtag, txbuf->map, addr);
3484                 }
3485 #endif /* DEV_NETMAP */
3486
3487                 /* clear the watch index */
3488                 txbuf->next_eop = -1;
3489         }
3490
3491         /* Set number of descriptors available */
3492         txr->tx_avail = adapter->num_tx_desc;
3493         txr->busy = EM_TX_IDLE;
3494
3495         /* Clear checksum offload context. */
3496         txr->last_hw_offload = 0;
3497         txr->last_hw_ipcss = 0;
3498         txr->last_hw_ipcso = 0;
3499         txr->last_hw_tucss = 0;
3500         txr->last_hw_tucso = 0;
3501
3502         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3503             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3504         EM_TX_UNLOCK(txr);
3505 }
3506
3507 /*********************************************************************
3508  *
3509  *  Initialize all transmit rings.
3510  *
3511  **********************************************************************/
3512 static void
3513 em_setup_transmit_structures(struct adapter *adapter)
3514 {
3515         struct tx_ring *txr = adapter->tx_rings;
3516
3517         for (int i = 0; i < adapter->num_queues; i++, txr++)
3518                 em_setup_transmit_ring(txr);
3519
3520         return;
3521 }
3522
3523 /*********************************************************************
3524  *
3525  *  Enable transmit unit.
3526  *
3527  **********************************************************************/
3528 static void
3529 em_initialize_transmit_unit(struct adapter *adapter)
3530 {
3531         struct tx_ring  *txr = adapter->tx_rings;
3532         struct e1000_hw *hw = &adapter->hw;
3533         u32     tctl, txdctl = 0, tarc, tipg = 0;
3534
3535          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3536
3537         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3538                 u64 bus_addr = txr->txdma.dma_paddr;
3539                 /* Base and Len of TX Ring */
3540                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3541                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3542                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3543                     (u32)(bus_addr >> 32));
3544                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3545                     (u32)bus_addr);
3546                 /* Init the HEAD/TAIL indices */
3547                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3548                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3549
3550                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3551                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3552                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3553
3554                 txr->busy = EM_TX_IDLE;
3555                 txdctl = 0; /* clear txdctl */
3556                 txdctl |= 0x1f; /* PTHRESH */
3557                 txdctl |= 1 << 8; /* HTHRESH */
3558                 txdctl |= 1 << 16;/* WTHRESH */
3559                 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3560                 txdctl |= E1000_TXDCTL_GRAN;
3561                 txdctl |= 1 << 25; /* LWTHRESH */
3562
3563                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3564         }
3565
3566         /* Set the default values for the Tx Inter Packet Gap timer */
3567         switch (adapter->hw.mac.type) {
3568         case e1000_80003es2lan:
3569                 tipg = DEFAULT_82543_TIPG_IPGR1;
3570                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3571                     E1000_TIPG_IPGR2_SHIFT;
3572                 break;
3573         default:
3574                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3575                     (adapter->hw.phy.media_type ==
3576                     e1000_media_type_internal_serdes))
3577                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3578                 else
3579                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3580                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3581                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3582         }
3583
3584         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3585         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3586
3587         if(adapter->hw.mac.type >= e1000_82540)
3588                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3589                     adapter->tx_abs_int_delay.value);
3590
3591         if ((adapter->hw.mac.type == e1000_82571) ||
3592             (adapter->hw.mac.type == e1000_82572)) {
3593                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3594                 tarc |= TARC_SPEED_MODE_BIT;
3595                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3596         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3597                 /* errata: program both queues to unweighted RR */
3598                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3599                 tarc |= 1;
3600                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3601                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3602                 tarc |= 1;
3603                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3604         } else if (adapter->hw.mac.type == e1000_82574) {
3605                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3606                 tarc |= TARC_ERRATA_BIT;
3607                 if ( adapter->num_queues > 1) {
3608                         tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3609                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3610                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3611                 } else
3612                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3613         }
3614
3615         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3616         if (adapter->tx_int_delay.value > 0)
3617                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3618
3619         /* Program the Transmit Control Register */
3620         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3621         tctl &= ~E1000_TCTL_CT;
3622         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3623                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3624
3625         if (adapter->hw.mac.type >= e1000_82571)
3626                 tctl |= E1000_TCTL_MULR;
3627
3628         /* This write will effectively turn on the transmit unit. */
3629         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3630
3631 }
3632
3633
3634 /*********************************************************************
3635  *
3636  *  Free all transmit rings.
3637  *
3638  **********************************************************************/
3639 static void
3640 em_free_transmit_structures(struct adapter *adapter)
3641 {
3642         struct tx_ring *txr = adapter->tx_rings;
3643
3644         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3645                 EM_TX_LOCK(txr);
3646                 em_free_transmit_buffers(txr);
3647                 em_dma_free(adapter, &txr->txdma);
3648                 EM_TX_UNLOCK(txr);
3649                 EM_TX_LOCK_DESTROY(txr);
3650         }
3651
3652         free(adapter->tx_rings, M_DEVBUF);
3653 }
3654
3655 /*********************************************************************
3656  *
3657  *  Free transmit ring related data structures.
3658  *
3659  **********************************************************************/
3660 static void
3661 em_free_transmit_buffers(struct tx_ring *txr)
3662 {
3663         struct adapter          *adapter = txr->adapter;
3664         struct em_buffer        *txbuf;
3665
3666         INIT_DEBUGOUT("free_transmit_ring: begin");
3667
3668         if (txr->tx_buffers == NULL)
3669                 return;
3670
3671         for (int i = 0; i < adapter->num_tx_desc; i++) {
3672                 txbuf = &txr->tx_buffers[i];
3673                 if (txbuf->m_head != NULL) {
3674                         bus_dmamap_sync(txr->txtag, txbuf->map,
3675                             BUS_DMASYNC_POSTWRITE);
3676                         bus_dmamap_unload(txr->txtag,
3677                             txbuf->map);
3678                         m_freem(txbuf->m_head);
3679                         txbuf->m_head = NULL;
3680                         if (txbuf->map != NULL) {
3681                                 bus_dmamap_destroy(txr->txtag,
3682                                     txbuf->map);
3683                                 txbuf->map = NULL;
3684                         }
3685                 } else if (txbuf->map != NULL) {
3686                         bus_dmamap_unload(txr->txtag,
3687                             txbuf->map);
3688                         bus_dmamap_destroy(txr->txtag,
3689                             txbuf->map);
3690                         txbuf->map = NULL;
3691                 }
3692         }
3693 #if __FreeBSD_version >= 800000
3694         if (txr->br != NULL)
3695                 buf_ring_free(txr->br, M_DEVBUF);
3696 #endif
3697         if (txr->tx_buffers != NULL) {
3698                 free(txr->tx_buffers, M_DEVBUF);
3699                 txr->tx_buffers = NULL;
3700         }
3701         if (txr->txtag != NULL) {
3702                 bus_dma_tag_destroy(txr->txtag);
3703                 txr->txtag = NULL;
3704         }
3705         return;
3706 }
3707
3708
3709 /*********************************************************************
3710  *  The offload context is protocol specific (TCP/UDP) and thus
3711  *  only needs to be set when the protocol changes. The occasion
3712  *  of a context change can be a performance detriment, and
3713  *  might be better just disabled. The reason arises in the way
3714  *  in which the controller supports pipelined requests from the
3715  *  Tx data DMA. Up to four requests can be pipelined, and they may
3716  *  belong to the same packet or to multiple packets. However all
3717  *  requests for one packet are issued before a request is issued
3718  *  for a subsequent packet and if a request for the next packet
3719  *  requires a context change, that request will be stalled
3720  *  until the previous request completes. This means setting up
3721  *  a new context effectively disables pipelined Tx data DMA which
3722  *  in turn greatly slow down performance to send small sized
3723  *  frames. 
3724  **********************************************************************/
3725 static void
3726 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3727     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3728 {
3729         struct adapter                  *adapter = txr->adapter;
3730         struct e1000_context_desc       *TXD = NULL;
3731         struct em_buffer                *tx_buffer;
3732         int                             cur, hdr_len;
3733         u32                             cmd = 0;
3734         u16                             offload = 0;
3735         u8                              ipcso, ipcss, tucso, tucss;
3736
3737         ipcss = ipcso = tucss = tucso = 0;
3738         hdr_len = ip_off + (ip->ip_hl << 2);
3739         cur = txr->next_avail_desc;
3740
3741         /* Setup of IP header checksum. */
3742         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3743                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3744                 offload |= CSUM_IP;
3745                 ipcss = ip_off;
3746                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3747                 /*
3748                  * Start offset for header checksum calculation.
3749                  * End offset for header checksum calculation.
3750                  * Offset of place to put the checksum.
3751                  */
3752                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3753                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3754                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3755                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3756                 cmd |= E1000_TXD_CMD_IP;
3757         }
3758
3759         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3760                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3761                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3762                 offload |= CSUM_TCP;
3763                 tucss = hdr_len;
3764                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3765                 /*
3766                  * Setting up new checksum offload context for every frames
3767                  * takes a lot of processing time for hardware. This also
3768                  * reduces performance a lot for small sized frames so avoid
3769                  * it if driver can use previously configured checksum
3770                  * offload context.
3771                  */
3772                 if (txr->last_hw_offload == offload) {
3773                         if (offload & CSUM_IP) {
3774                                 if (txr->last_hw_ipcss == ipcss &&
3775                                     txr->last_hw_ipcso == ipcso &&
3776                                     txr->last_hw_tucss == tucss &&
3777                                     txr->last_hw_tucso == tucso)
3778                                         return;
3779                         } else {
3780                                 if (txr->last_hw_tucss == tucss &&
3781                                     txr->last_hw_tucso == tucso)
3782                                         return;
3783                         }
3784                 }
3785                 txr->last_hw_offload = offload;
3786                 txr->last_hw_tucss = tucss;
3787                 txr->last_hw_tucso = tucso;
3788                 /*
3789                  * Start offset for payload checksum calculation.
3790                  * End offset for payload checksum calculation.
3791                  * Offset of place to put the checksum.
3792                  */
3793                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3794                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3795                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3796                 TXD->upper_setup.tcp_fields.tucso = tucso;
3797                 cmd |= E1000_TXD_CMD_TCP;
3798         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3799                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3800                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3801                 tucss = hdr_len;
3802                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3803                 /*
3804                  * Setting up new checksum offload context for every frames
3805                  * takes a lot of processing time for hardware. This also
3806                  * reduces performance a lot for small sized frames so avoid
3807                  * it if driver can use previously configured checksum
3808                  * offload context.
3809                  */
3810                 if (txr->last_hw_offload == offload) {
3811                         if (offload & CSUM_IP) {
3812                                 if (txr->last_hw_ipcss == ipcss &&
3813                                     txr->last_hw_ipcso == ipcso &&
3814                                     txr->last_hw_tucss == tucss &&
3815                                     txr->last_hw_tucso == tucso)
3816                                         return;
3817                         } else {
3818                                 if (txr->last_hw_tucss == tucss &&
3819                                     txr->last_hw_tucso == tucso)
3820                                         return;
3821                         }
3822                 }
3823                 txr->last_hw_offload = offload;
3824                 txr->last_hw_tucss = tucss;
3825                 txr->last_hw_tucso = tucso;
3826                 /*
3827                  * Start offset for header checksum calculation.
3828                  * End offset for header checksum calculation.
3829                  * Offset of place to put the checksum.
3830                  */
3831                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3832                 TXD->upper_setup.tcp_fields.tucss = tucss;
3833                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3834                 TXD->upper_setup.tcp_fields.tucso = tucso;
3835         }
3836   
3837         if (offload & CSUM_IP) {
3838                 txr->last_hw_ipcss = ipcss;
3839                 txr->last_hw_ipcso = ipcso;
3840         }
3841
3842         TXD->tcp_seg_setup.data = htole32(0);
3843         TXD->cmd_and_length =
3844             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3845         tx_buffer = &txr->tx_buffers[cur];
3846         tx_buffer->m_head = NULL;
3847         tx_buffer->next_eop = -1;
3848
3849         if (++cur == adapter->num_tx_desc)
3850                 cur = 0;
3851
3852         txr->tx_avail--;
3853         txr->next_avail_desc = cur;
3854 }
3855
3856
3857 /**********************************************************************
3858  *
3859  *  Setup work for hardware segmentation offload (TSO)
3860  *
3861  **********************************************************************/
3862 static void
3863 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3864     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3865 {
3866         struct adapter                  *adapter = txr->adapter;
3867         struct e1000_context_desc       *TXD;
3868         struct em_buffer                *tx_buffer;
3869         int cur, hdr_len;
3870
3871         /*
3872          * In theory we can use the same TSO context if and only if
3873          * frame is the same type(IP/TCP) and the same MSS. However
3874          * checking whether a frame has the same IP/TCP structure is
3875          * hard thing so just ignore that and always restablish a
3876          * new TSO context.
3877          */
3878         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3879         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3880                       E1000_TXD_DTYP_D |        /* Data descr type */
3881                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3882
3883         /* IP and/or TCP header checksum calculation and insertion. */
3884         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3885
3886         cur = txr->next_avail_desc;
3887         tx_buffer = &txr->tx_buffers[cur];
3888         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3889
3890         /*
3891          * Start offset for header checksum calculation.
3892          * End offset for header checksum calculation.
3893          * Offset of place put the checksum.
3894          */
3895         TXD->lower_setup.ip_fields.ipcss = ip_off;
3896         TXD->lower_setup.ip_fields.ipcse =
3897             htole16(ip_off + (ip->ip_hl << 2) - 1);
3898         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3899         /*
3900          * Start offset for payload checksum calculation.
3901          * End offset for payload checksum calculation.
3902          * Offset of place to put the checksum.
3903          */
3904         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3905         TXD->upper_setup.tcp_fields.tucse = 0;
3906         TXD->upper_setup.tcp_fields.tucso =
3907             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3908         /*
3909          * Payload size per packet w/o any headers.
3910          * Length of all headers up to payload.
3911          */
3912         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3913         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3914
3915         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3916                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3917                                 E1000_TXD_CMD_TSE |     /* TSE context */
3918                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3919                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3920                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3921
3922         tx_buffer->m_head = NULL;
3923         tx_buffer->next_eop = -1;
3924
3925         if (++cur == adapter->num_tx_desc)
3926                 cur = 0;
3927
3928         txr->tx_avail--;
3929         txr->next_avail_desc = cur;
3930         txr->tx_tso = TRUE;
3931 }
3932
3933
3934 /**********************************************************************
3935  *
3936  *  Examine each tx_buffer in the used queue. If the hardware is done
3937  *  processing the packet then free associated resources. The
3938  *  tx_buffer is put back on the free queue.
3939  *
3940  **********************************************************************/
3941 static void
3942 em_txeof(struct tx_ring *txr)
3943 {
3944         struct adapter  *adapter = txr->adapter;
3945         int first, last, done, processed;
3946         struct em_buffer *tx_buffer;
3947         struct e1000_tx_desc   *tx_desc, *eop_desc;
3948         struct ifnet   *ifp = adapter->ifp;
3949
3950         EM_TX_LOCK_ASSERT(txr);
3951 #ifdef DEV_NETMAP
3952         if (netmap_tx_irq(ifp, txr->me))
3953                 return;
3954 #endif /* DEV_NETMAP */
3955
3956         /* No work, make sure hang detection is disabled */
3957         if (txr->tx_avail == adapter->num_tx_desc) {
3958                 txr->busy = EM_TX_IDLE;
3959                 return;
3960         }
3961
3962         processed = 0;
3963         first = txr->next_to_clean;
3964         tx_desc = &txr->tx_base[first];
3965         tx_buffer = &txr->tx_buffers[first];
3966         last = tx_buffer->next_eop;
3967         eop_desc = &txr->tx_base[last];
3968
3969         /*
3970          * What this does is get the index of the
3971          * first descriptor AFTER the EOP of the 
3972          * first packet, that way we can do the
3973          * simple comparison on the inner while loop.
3974          */
3975         if (++last == adapter->num_tx_desc)
3976                 last = 0;
3977         done = last;
3978
3979         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3980             BUS_DMASYNC_POSTREAD);
3981
3982         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3983                 /* We clean the range of the packet */
3984                 while (first != done) {
3985                         tx_desc->upper.data = 0;
3986                         tx_desc->lower.data = 0;
3987                         tx_desc->buffer_addr = 0;
3988                         ++txr->tx_avail;
3989                         ++processed;
3990
3991                         if (tx_buffer->m_head) {
3992                                 bus_dmamap_sync(txr->txtag,
3993                                     tx_buffer->map,
3994                                     BUS_DMASYNC_POSTWRITE);
3995                                 bus_dmamap_unload(txr->txtag,
3996                                     tx_buffer->map);
3997                                 m_freem(tx_buffer->m_head);
3998                                 tx_buffer->m_head = NULL;
3999                         }
4000                         tx_buffer->next_eop = -1;
4001
4002                         if (++first == adapter->num_tx_desc)
4003                                 first = 0;
4004
4005                         tx_buffer = &txr->tx_buffers[first];
4006                         tx_desc = &txr->tx_base[first];
4007                 }
4008                 ++ifp->if_opackets;
4009                 /* See if we can continue to the next packet */
4010                 last = tx_buffer->next_eop;
4011                 if (last != -1) {
4012                         eop_desc = &txr->tx_base[last];
4013                         /* Get new done point */
4014                         if (++last == adapter->num_tx_desc) last = 0;
4015                         done = last;
4016                 } else
4017                         break;
4018         }
4019         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4020             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4021
4022         txr->next_to_clean = first;
4023
4024         /*
4025         ** Hang detection: we know there's work outstanding
4026         ** or the entry return would have been taken, so no
4027         ** descriptor processed here indicates a potential hang.
4028         ** The local timer will examine this and do a reset if needed.
4029         */
4030         if (processed == 0) {
4031                 if (txr->busy != EM_TX_HUNG)
4032                         ++txr->busy;
4033         } else /* At least one descriptor was cleaned */
4034                 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4035
4036         /*
4037          * If we have a minimum free, clear IFF_DRV_OACTIVE
4038          * to tell the stack that it is OK to send packets.
4039          * Notice that all writes of OACTIVE happen under the
4040          * TX lock which, with a single queue, guarantees 
4041          * sanity.
4042          */
4043         if (txr->tx_avail >= EM_MAX_SCATTER) {
4044                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
4045         }
4046
4047         /* Disable hang detection if all clean */
4048         if (txr->tx_avail == adapter->num_tx_desc)
4049                 txr->busy = EM_TX_IDLE;
4050 }
4051
4052
4053 /*********************************************************************
4054  *
4055  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4056  *
4057  **********************************************************************/
4058 static void
4059 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4060 {
4061         struct adapter          *adapter = rxr->adapter;
4062         struct mbuf             *m;
4063         bus_dma_segment_t       segs[1];
4064         struct em_buffer        *rxbuf;
4065         int                     i, j, error, nsegs;
4066         bool                    cleaned = FALSE;
4067
4068         i = j = rxr->next_to_refresh;
4069         /*
4070         ** Get one descriptor beyond
4071         ** our work mark to control
4072         ** the loop.
4073         */
4074         if (++j == adapter->num_rx_desc)
4075                 j = 0;
4076
4077         while (j != limit) {
4078                 rxbuf = &rxr->rx_buffers[i];
4079                 if (rxbuf->m_head == NULL) {
4080                         m = m_getjcl(M_NOWAIT, MT_DATA,
4081                             M_PKTHDR, adapter->rx_mbuf_sz);
4082                         /*
4083                         ** If we have a temporary resource shortage
4084                         ** that causes a failure, just abort refresh
4085                         ** for now, we will return to this point when
4086                         ** reinvoked from em_rxeof.
4087                         */
4088                         if (m == NULL)
4089                                 goto update;
4090                 } else
4091                         m = rxbuf->m_head;
4092
4093                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4094                 m->m_flags |= M_PKTHDR;
4095                 m->m_data = m->m_ext.ext_buf;
4096
4097                 /* Use bus_dma machinery to setup the memory mapping  */
4098                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4099                     m, segs, &nsegs, BUS_DMA_NOWAIT);
4100                 if (error != 0) {
4101                         printf("Refresh mbufs: hdr dmamap load"
4102                             " failure - %d\n", error);
4103                         m_free(m);
4104                         rxbuf->m_head = NULL;
4105                         goto update;
4106                 }
4107                 rxbuf->m_head = m;
4108                 bus_dmamap_sync(rxr->rxtag,
4109                     rxbuf->map, BUS_DMASYNC_PREREAD);
4110                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
4111                 cleaned = TRUE;
4112
4113                 i = j; /* Next is precalulated for us */
4114                 rxr->next_to_refresh = i;
4115                 /* Calculate next controlling index */
4116                 if (++j == adapter->num_rx_desc)
4117                         j = 0;
4118         }
4119 update:
4120         /*
4121         ** Update the tail pointer only if,
4122         ** and as far as we have refreshed.
4123         */
4124         if (cleaned)
4125                 E1000_WRITE_REG(&adapter->hw,
4126                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4127
4128         return;
4129 }
4130
4131
4132 /*********************************************************************
4133  *
4134  *  Allocate memory for rx_buffer structures. Since we use one
4135  *  rx_buffer per received packet, the maximum number of rx_buffer's
4136  *  that we'll need is equal to the number of receive descriptors
4137  *  that we've allocated.
4138  *
4139  **********************************************************************/
4140 static int
4141 em_allocate_receive_buffers(struct rx_ring *rxr)
4142 {
4143         struct adapter          *adapter = rxr->adapter;
4144         device_t                dev = adapter->dev;
4145         struct em_buffer        *rxbuf;
4146         int                     error;
4147
4148         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4149             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4150         if (rxr->rx_buffers == NULL) {
4151                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4152                 return (ENOMEM);
4153         }
4154
4155         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4156                                 1, 0,                   /* alignment, bounds */
4157                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4158                                 BUS_SPACE_MAXADDR,      /* highaddr */
4159                                 NULL, NULL,             /* filter, filterarg */
4160                                 MJUM9BYTES,             /* maxsize */
4161                                 1,                      /* nsegments */
4162                                 MJUM9BYTES,             /* maxsegsize */
4163                                 0,                      /* flags */
4164                                 NULL,                   /* lockfunc */
4165                                 NULL,                   /* lockarg */
4166                                 &rxr->rxtag);
4167         if (error) {
4168                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4169                     __func__, error);
4170                 goto fail;
4171         }
4172
4173         rxbuf = rxr->rx_buffers;
4174         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4175                 rxbuf = &rxr->rx_buffers[i];
4176                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4177                 if (error) {
4178                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4179                             __func__, error);
4180                         goto fail;
4181                 }
4182         }
4183
4184         return (0);
4185
4186 fail:
4187         em_free_receive_structures(adapter);
4188         return (error);
4189 }
4190
4191
4192 /*********************************************************************
4193  *
4194  *  Initialize a receive ring and its buffers.
4195  *
4196  **********************************************************************/
4197 static int
4198 em_setup_receive_ring(struct rx_ring *rxr)
4199 {
4200         struct  adapter         *adapter = rxr->adapter;
4201         struct em_buffer        *rxbuf;
4202         bus_dma_segment_t       seg[1];
4203         int                     rsize, nsegs, error = 0;
4204 #ifdef DEV_NETMAP
4205         struct netmap_adapter *na = NA(adapter->ifp);
4206         struct netmap_slot *slot;
4207 #endif
4208
4209
4210         /* Clear the ring contents */
4211         EM_RX_LOCK(rxr);
4212         rsize = roundup2(adapter->num_rx_desc *
4213             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4214         bzero((void *)rxr->rx_base, rsize);
4215 #ifdef DEV_NETMAP
4216         slot = netmap_reset(na, NR_RX, 0, 0);
4217 #endif
4218
4219         /*
4220         ** Free current RX buffer structs and their mbufs
4221         */
4222         for (int i = 0; i < adapter->num_rx_desc; i++) {
4223                 rxbuf = &rxr->rx_buffers[i];
4224                 if (rxbuf->m_head != NULL) {
4225                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4226                             BUS_DMASYNC_POSTREAD);
4227                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4228                         m_freem(rxbuf->m_head);
4229                         rxbuf->m_head = NULL; /* mark as freed */
4230                 }
4231         }
4232
4233         /* Now replenish the mbufs */
4234         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4235                 rxbuf = &rxr->rx_buffers[j];
4236 #ifdef DEV_NETMAP
4237                 if (slot) {
4238                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4239                         uint64_t paddr;
4240                         void *addr;
4241
4242                         addr = PNMB(na, slot + si, &paddr);
4243                         netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4244                         /* Update descriptor */
4245                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4246                         continue;
4247                 }
4248 #endif /* DEV_NETMAP */
4249                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4250                     M_PKTHDR, adapter->rx_mbuf_sz);
4251                 if (rxbuf->m_head == NULL) {
4252                         error = ENOBUFS;
4253                         goto fail;
4254                 }
4255                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4256                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4257                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4258
4259                 /* Get the memory mapping */
4260                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4261                     rxbuf->map, rxbuf->m_head, seg,
4262                     &nsegs, BUS_DMA_NOWAIT);
4263                 if (error != 0) {
4264                         m_freem(rxbuf->m_head);
4265                         rxbuf->m_head = NULL;
4266                         goto fail;
4267                 }
4268                 bus_dmamap_sync(rxr->rxtag,
4269                     rxbuf->map, BUS_DMASYNC_PREREAD);
4270
4271                 /* Update descriptor */
4272                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4273         }
4274         rxr->next_to_check = 0;
4275         rxr->next_to_refresh = 0;
4276         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4277             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4278
4279 fail:
4280         EM_RX_UNLOCK(rxr);
4281         return (error);
4282 }
4283
4284 /*********************************************************************
4285  *
4286  *  Initialize all receive rings.
4287  *
4288  **********************************************************************/
4289 static int
4290 em_setup_receive_structures(struct adapter *adapter)
4291 {
4292         struct rx_ring *rxr = adapter->rx_rings;
4293         int q;
4294
4295         for (q = 0; q < adapter->num_queues; q++, rxr++)
4296                 if (em_setup_receive_ring(rxr))
4297                         goto fail;
4298
4299         return (0);
4300 fail:
4301         /*
4302          * Free RX buffers allocated so far, we will only handle
4303          * the rings that completed, the failing case will have
4304          * cleaned up for itself. 'q' failed, so its the terminus.
4305          */
4306         for (int i = 0; i < q; ++i) {
4307                 rxr = &adapter->rx_rings[i];
4308                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4309                         struct em_buffer *rxbuf;
4310                         rxbuf = &rxr->rx_buffers[n];
4311                         if (rxbuf->m_head != NULL) {
4312                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4313                                   BUS_DMASYNC_POSTREAD);
4314                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4315                                 m_freem(rxbuf->m_head);
4316                                 rxbuf->m_head = NULL;
4317                         }
4318                 }
4319                 rxr->next_to_check = 0;
4320                 rxr->next_to_refresh = 0;
4321         }
4322
4323         return (ENOBUFS);
4324 }
4325
4326 /*********************************************************************
4327  *
4328  *  Free all receive rings.
4329  *
4330  **********************************************************************/
4331 static void
4332 em_free_receive_structures(struct adapter *adapter)
4333 {
4334         struct rx_ring *rxr = adapter->rx_rings;
4335
4336         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4337                 em_free_receive_buffers(rxr);
4338                 /* Free the ring memory as well */
4339                 em_dma_free(adapter, &rxr->rxdma);
4340                 EM_RX_LOCK_DESTROY(rxr);
4341         }
4342
4343         free(adapter->rx_rings, M_DEVBUF);
4344 }
4345
4346
4347 /*********************************************************************
4348  *
4349  *  Free receive ring data structures
4350  *
4351  **********************************************************************/
4352 static void
4353 em_free_receive_buffers(struct rx_ring *rxr)
4354 {
4355         struct adapter          *adapter = rxr->adapter;
4356         struct em_buffer        *rxbuf = NULL;
4357
4358         INIT_DEBUGOUT("free_receive_buffers: begin");
4359
4360         if (rxr->rx_buffers != NULL) {
4361                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4362                         rxbuf = &rxr->rx_buffers[i];
4363                         if (rxbuf->map != NULL) {
4364                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4365                                     BUS_DMASYNC_POSTREAD);
4366                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4367                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4368                         }
4369                         if (rxbuf->m_head != NULL) {
4370                                 m_freem(rxbuf->m_head);
4371                                 rxbuf->m_head = NULL;
4372                         }
4373                 }
4374                 free(rxr->rx_buffers, M_DEVBUF);
4375                 rxr->rx_buffers = NULL;
4376                 rxr->next_to_check = 0;
4377                 rxr->next_to_refresh = 0;
4378         }
4379
4380         if (rxr->rxtag != NULL) {
4381                 bus_dma_tag_destroy(rxr->rxtag);
4382                 rxr->rxtag = NULL;
4383         }
4384
4385         return;
4386 }
4387
4388
4389 /*********************************************************************
4390  *
4391  *  Enable receive unit.
4392  *
4393  **********************************************************************/
4394
4395 static void
4396 em_initialize_receive_unit(struct adapter *adapter)
4397 {
4398         struct rx_ring  *rxr = adapter->rx_rings;
4399         struct ifnet    *ifp = adapter->ifp;
4400         struct e1000_hw *hw = &adapter->hw;
4401         u64     bus_addr;
4402         u32     rctl, rxcsum;
4403
4404         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4405
4406         /*
4407          * Make sure receives are disabled while setting
4408          * up the descriptor ring
4409          */
4410         rctl = E1000_READ_REG(hw, E1000_RCTL);
4411         /* Do not disable if ever enabled on this hardware */
4412         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4413                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4414
4415         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4416             adapter->rx_abs_int_delay.value);
4417
4418         E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4419             adapter->rx_int_delay.value);
4420         /*
4421          * Set the interrupt throttling rate. Value is calculated
4422          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4423          */
4424         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4425
4426         /*
4427         ** When using MSIX interrupts we need to throttle
4428         ** using the EITR register (82574 only)
4429         */
4430         if (hw->mac.type == e1000_82574) {
4431                 u32 rfctl;
4432                 for (int i = 0; i < 4; i++)
4433                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4434                             DEFAULT_ITR);
4435                 /* Disable accelerated acknowledge */
4436                 rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4437                 rfctl |= E1000_RFCTL_ACK_DIS;
4438                 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4439         }
4440
4441         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4442         if (ifp->if_capenable & IFCAP_RXCSUM) {
4443 #ifdef EM_MULTIQUEUE
4444                 rxcsum |= E1000_RXCSUM_TUOFL |
4445                           E1000_RXCSUM_IPOFL |
4446                           E1000_RXCSUM_PCSD;
4447 #else
4448                 rxcsum |= E1000_RXCSUM_TUOFL;
4449 #endif
4450         } else
4451                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4452
4453         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4454
4455 #ifdef EM_MULTIQUEUE
4456         if (adapter->num_queues > 1) {
4457                 uint32_t rss_key[10];
4458                 uint32_t reta;
4459                 int i;
4460
4461                 /*
4462                 * Configure RSS key
4463                 */
4464                 arc4rand(rss_key, sizeof(rss_key), 0);
4465                 for (i = 0; i < 10; ++i)
4466                         E1000_WRITE_REG_ARRAY(hw,E1000_RSSRK(0), i, rss_key[i]);
4467
4468                 /*
4469                 * Configure RSS redirect table in following fashion:
4470                 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4471                 */
4472                 reta = 0;
4473                 for (i = 0; i < 4; ++i) {
4474                         uint32_t q;
4475                         q = (i % adapter->num_queues) << 7;
4476                         reta |= q << (8 * i);
4477                 }
4478                 for (i = 0; i < 32; ++i)
4479                         E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4480
4481                 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 
4482                                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4483                                 E1000_MRQC_RSS_FIELD_IPV4 |
4484                                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4485                                 E1000_MRQC_RSS_FIELD_IPV6_EX |
4486                                 E1000_MRQC_RSS_FIELD_IPV6 |
4487                                 E1000_MRQC_RSS_FIELD_IPV6_TCP);
4488         }
4489 #endif
4490         /*
4491         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4492         ** long latencies are observed, like Lenovo X60. This
4493         ** change eliminates the problem, but since having positive
4494         ** values in RDTR is a known source of problems on other
4495         ** platforms another solution is being sought.
4496         */
4497         if (hw->mac.type == e1000_82573)
4498                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4499
4500         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4501                 /* Setup the Base and Length of the Rx Descriptor Ring */
4502                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4503
4504                 bus_addr = rxr->rxdma.dma_paddr;
4505                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4506                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4507                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4508                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4509                 /* Setup the Head and Tail Descriptor Pointers */
4510                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4511 #ifdef DEV_NETMAP
4512                 /*
4513                  * an init() while a netmap client is active must
4514                  * preserve the rx buffers passed to userspace.
4515                  */
4516                 if (ifp->if_capenable & IFCAP_NETMAP)
4517                         rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4518 #endif /* DEV_NETMAP */
4519                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4520         }
4521
4522         /*
4523          * Set PTHRESH for improved jumbo performance
4524          * According to 10.2.5.11 of Intel 82574 Datasheet,
4525          * RXDCTL(1) is written whenever RXDCTL(0) is written.
4526          * Only write to RXDCTL(1) if there is a need for different
4527          * settings.
4528          */
4529         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4530             (adapter->hw.mac.type == e1000_pch2lan) ||
4531             (adapter->hw.mac.type == e1000_ich10lan)) &&
4532             (ifp->if_mtu > ETHERMTU)) {
4533                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4534                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4535         } else if ((adapter->hw.mac.type == e1000_82574) &&
4536                   (ifp->if_mtu > ETHERMTU)) {
4537                 for (int i = 0; i < adapter->num_queues; i++) {
4538                         u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4539
4540                         rxdctl |= 0x20; /* PTHRESH */
4541                         rxdctl |= 4 << 8; /* HTHRESH */
4542                         rxdctl |= 4 << 16;/* WTHRESH */
4543                         rxdctl |= 1 << 24; /* Switch to granularity */
4544                         E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4545                 }
4546         }
4547                 
4548         if (adapter->hw.mac.type >= e1000_pch2lan) {
4549                 if (ifp->if_mtu > ETHERMTU)
4550                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4551                 else
4552                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4553         }
4554
4555         /* Setup the Receive Control Register */
4556         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4557         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4558             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4559             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4560
4561         /* Strip the CRC */
4562         rctl |= E1000_RCTL_SECRC;
4563
4564         /* Make sure VLAN Filters are off */
4565         rctl &= ~E1000_RCTL_VFE;
4566         rctl &= ~E1000_RCTL_SBP;
4567
4568         if (adapter->rx_mbuf_sz == MCLBYTES)
4569                 rctl |= E1000_RCTL_SZ_2048;
4570         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4571                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4572         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4573                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4574
4575         if (ifp->if_mtu > ETHERMTU)
4576                 rctl |= E1000_RCTL_LPE;
4577         else
4578                 rctl &= ~E1000_RCTL_LPE;
4579
4580         /* Write out the settings */
4581         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4582
4583         return;
4584 }
4585
4586
4587 /*********************************************************************
4588  *
4589  *  This routine executes in interrupt context. It replenishes
4590  *  the mbufs in the descriptor and sends data which has been
4591  *  dma'ed into host memory to upper layer.
4592  *
4593  *  We loop at most count times if count is > 0, or until done if
4594  *  count < 0.
4595  *  
4596  *  For polling we also now return the number of cleaned packets
4597  *********************************************************************/
4598 static bool
4599 em_rxeof(struct rx_ring *rxr, int count, int *done)
4600 {
4601         struct adapter          *adapter = rxr->adapter;
4602         struct ifnet            *ifp = adapter->ifp;
4603         struct mbuf             *mp, *sendmp;
4604         u8                      status = 0;
4605         u16                     len;
4606         int                     i, processed, rxdone = 0;
4607         bool                    eop;
4608         struct e1000_rx_desc    *cur;
4609
4610         EM_RX_LOCK(rxr);
4611
4612         /* Sync the ring */
4613         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4614             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4615
4616
4617 #ifdef DEV_NETMAP
4618         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4619                 EM_RX_UNLOCK(rxr);
4620                 return (FALSE);
4621         }
4622 #endif /* DEV_NETMAP */
4623
4624         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4625
4626                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4627                         break;
4628
4629                 cur = &rxr->rx_base[i];
4630                 status = cur->status;
4631                 mp = sendmp = NULL;
4632
4633                 if ((status & E1000_RXD_STAT_DD) == 0)
4634                         break;
4635
4636                 len = le16toh(cur->length);
4637                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4638
4639                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4640                     (rxr->discard == TRUE)) {
4641                         adapter->dropped_pkts++;
4642                         ++rxr->rx_discarded;
4643                         if (!eop) /* Catch subsequent segs */
4644                                 rxr->discard = TRUE;
4645                         else
4646                                 rxr->discard = FALSE;
4647                         em_rx_discard(rxr, i);
4648                         goto next_desc;
4649                 }
4650                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4651
4652                 /* Assign correct length to the current fragment */
4653                 mp = rxr->rx_buffers[i].m_head;
4654                 mp->m_len = len;
4655
4656                 /* Trigger for refresh */
4657                 rxr->rx_buffers[i].m_head = NULL;
4658
4659                 /* First segment? */
4660                 if (rxr->fmp == NULL) {
4661                         mp->m_pkthdr.len = len;
4662                         rxr->fmp = rxr->lmp = mp;
4663                 } else {
4664                         /* Chain mbuf's together */
4665                         mp->m_flags &= ~M_PKTHDR;
4666                         rxr->lmp->m_next = mp;
4667                         rxr->lmp = mp;
4668                         rxr->fmp->m_pkthdr.len += len;
4669                 }
4670
4671                 if (eop) {
4672                         --count;
4673                         sendmp = rxr->fmp;
4674                         sendmp->m_pkthdr.rcvif = ifp;
4675                         ifp->if_ipackets++;
4676                         em_receive_checksum(cur, sendmp);
4677 #ifndef __NO_STRICT_ALIGNMENT
4678                         if (adapter->hw.mac.max_frame_size >
4679                             (MCLBYTES - ETHER_ALIGN) &&
4680                             em_fixup_rx(rxr) != 0)
4681                                 goto skip;
4682 #endif
4683                         if (status & E1000_RXD_STAT_VP) {
4684                                 sendmp->m_pkthdr.ether_vtag =
4685                                     le16toh(cur->special);
4686                                 sendmp->m_flags |= M_VLANTAG;
4687                         }
4688 #ifndef __NO_STRICT_ALIGNMENT
4689 skip:
4690 #endif
4691                         rxr->fmp = rxr->lmp = NULL;
4692                 }
4693 next_desc:
4694                 /* Sync the ring */
4695                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4696                         BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4697
4698                 /* Zero out the receive descriptors status. */
4699                 cur->status = 0;
4700                 ++rxdone;       /* cumulative for POLL */
4701                 ++processed;
4702
4703                 /* Advance our pointers to the next descriptor. */
4704                 if (++i == adapter->num_rx_desc)
4705                         i = 0;
4706
4707                 /* Send to the stack */
4708                 if (sendmp != NULL) {
4709                         rxr->next_to_check = i;
4710                         EM_RX_UNLOCK(rxr);
4711                         (*ifp->if_input)(ifp, sendmp);
4712                         EM_RX_LOCK(rxr);
4713                         i = rxr->next_to_check;
4714                 }
4715
4716                 /* Only refresh mbufs every 8 descriptors */
4717                 if (processed == 8) {
4718                         em_refresh_mbufs(rxr, i);
4719                         processed = 0;
4720                 }
4721         }
4722
4723         /* Catch any remaining refresh work */
4724         if (e1000_rx_unrefreshed(rxr))
4725                 em_refresh_mbufs(rxr, i);
4726
4727         rxr->next_to_check = i;
4728         if (done != NULL)
4729                 *done = rxdone;
4730         EM_RX_UNLOCK(rxr);
4731
4732         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4733 }
4734
4735 static __inline void
4736 em_rx_discard(struct rx_ring *rxr, int i)
4737 {
4738         struct em_buffer        *rbuf;
4739
4740         rbuf = &rxr->rx_buffers[i];
4741         bus_dmamap_unload(rxr->rxtag, rbuf->map);
4742
4743         /* Free any previous pieces */
4744         if (rxr->fmp != NULL) {
4745                 rxr->fmp->m_flags |= M_PKTHDR;
4746                 m_freem(rxr->fmp);
4747                 rxr->fmp = NULL;
4748                 rxr->lmp = NULL;
4749         }
4750         /*
4751         ** Free buffer and allow em_refresh_mbufs()
4752         ** to clean up and recharge buffer.
4753         */
4754         if (rbuf->m_head) {
4755                 m_free(rbuf->m_head);
4756                 rbuf->m_head = NULL;
4757         }
4758         return;
4759 }
4760
4761 #ifndef __NO_STRICT_ALIGNMENT
4762 /*
4763  * When jumbo frames are enabled we should realign entire payload on
4764  * architecures with strict alignment. This is serious design mistake of 8254x
4765  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4766  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4767  * payload. On architecures without strict alignment restrictions 8254x still
4768  * performs unaligned memory access which would reduce the performance too.
4769  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4770  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4771  * existing mbuf chain.
4772  *
4773  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4774  * not used at all on architectures with strict alignment.
4775  */
4776 static int
4777 em_fixup_rx(struct rx_ring *rxr)
4778 {
4779         struct adapter *adapter = rxr->adapter;
4780         struct mbuf *m, *n;
4781         int error;
4782
4783         error = 0;
4784         m = rxr->fmp;
4785         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4786                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4787                 m->m_data += ETHER_HDR_LEN;
4788         } else {
4789                 MGETHDR(n, M_NOWAIT, MT_DATA);
4790                 if (n != NULL) {
4791                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4792                         m->m_data += ETHER_HDR_LEN;
4793                         m->m_len -= ETHER_HDR_LEN;
4794                         n->m_len = ETHER_HDR_LEN;
4795                         M_MOVE_PKTHDR(n, m);
4796                         n->m_next = m;
4797                         rxr->fmp = n;
4798                 } else {
4799                         adapter->dropped_pkts++;
4800                         m_freem(rxr->fmp);
4801                         rxr->fmp = NULL;
4802                         error = ENOMEM;
4803                 }
4804         }
4805
4806         return (error);
4807 }
4808 #endif
4809
4810 /*********************************************************************
4811  *
4812  *  Verify that the hardware indicated that the checksum is valid.
4813  *  Inform the stack about the status of checksum so that stack
4814  *  doesn't spend time verifying the checksum.
4815  *
4816  *********************************************************************/
4817 static void
4818 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4819 {
4820         mp->m_pkthdr.csum_flags = 0;
4821
4822         /* Ignore Checksum bit is set */
4823         if (rx_desc->status & E1000_RXD_STAT_IXSM)
4824                 return;
4825
4826         if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4827                 return;
4828
4829         /* IP Checksum Good? */
4830         if (rx_desc->status & E1000_RXD_STAT_IPCS)
4831                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4832
4833         /* TCP or UDP checksum */
4834         if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4835                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4836                 mp->m_pkthdr.csum_data = htons(0xffff);
4837         }
4838 }
4839
4840 /*
4841  * This routine is run via an vlan
4842  * config EVENT
4843  */
4844 static void
4845 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4846 {
4847         struct adapter  *adapter = ifp->if_softc;
4848         u32             index, bit;
4849
4850         if (ifp->if_softc !=  arg)   /* Not our event */
4851                 return;
4852
4853         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4854                 return;
4855
4856         EM_CORE_LOCK(adapter);
4857         index = (vtag >> 5) & 0x7F;
4858         bit = vtag & 0x1F;
4859         adapter->shadow_vfta[index] |= (1 << bit);
4860         ++adapter->num_vlans;
4861         /* Re-init to load the changes */
4862         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4863                 em_init_locked(adapter);
4864         EM_CORE_UNLOCK(adapter);
4865 }
4866
4867 /*
4868  * This routine is run via an vlan
4869  * unconfig EVENT
4870  */
4871 static void
4872 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4873 {
4874         struct adapter  *adapter = ifp->if_softc;
4875         u32             index, bit;
4876
4877         if (ifp->if_softc !=  arg)
4878                 return;
4879
4880         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4881                 return;
4882
4883         EM_CORE_LOCK(adapter);
4884         index = (vtag >> 5) & 0x7F;
4885         bit = vtag & 0x1F;
4886         adapter->shadow_vfta[index] &= ~(1 << bit);
4887         --adapter->num_vlans;
4888         /* Re-init to load the changes */
4889         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4890                 em_init_locked(adapter);
4891         EM_CORE_UNLOCK(adapter);
4892 }
4893
4894 static void
4895 em_setup_vlan_hw_support(struct adapter *adapter)
4896 {
4897         struct e1000_hw *hw = &adapter->hw;
4898         u32             reg;
4899
4900         /*
4901         ** We get here thru init_locked, meaning
4902         ** a soft reset, this has already cleared
4903         ** the VFTA and other state, so if there
4904         ** have been no vlan's registered do nothing.
4905         */
4906         if (adapter->num_vlans == 0)
4907                 return;
4908
4909         /*
4910         ** A soft reset zero's out the VFTA, so
4911         ** we need to repopulate it now.
4912         */
4913         for (int i = 0; i < EM_VFTA_SIZE; i++)
4914                 if (adapter->shadow_vfta[i] != 0)
4915                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4916                             i, adapter->shadow_vfta[i]);
4917
4918         reg = E1000_READ_REG(hw, E1000_CTRL);
4919         reg |= E1000_CTRL_VME;
4920         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4921
4922         /* Enable the Filter Table */
4923         reg = E1000_READ_REG(hw, E1000_RCTL);
4924         reg &= ~E1000_RCTL_CFIEN;
4925         reg |= E1000_RCTL_VFE;
4926         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4927 }
4928
4929 static void
4930 em_enable_intr(struct adapter *adapter)
4931 {
4932         struct e1000_hw *hw = &adapter->hw;
4933         u32 ims_mask = IMS_ENABLE_MASK;
4934
4935         if (hw->mac.type == e1000_82574) {
4936                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4937                 ims_mask |= EM_MSIX_MASK;
4938         } 
4939         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4940 }
4941
4942 static void
4943 em_disable_intr(struct adapter *adapter)
4944 {
4945         struct e1000_hw *hw = &adapter->hw;
4946
4947         if (hw->mac.type == e1000_82574)
4948                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4949         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4950 }
4951
4952 /*
4953  * Bit of a misnomer, what this really means is
4954  * to enable OS management of the system... aka
4955  * to disable special hardware management features 
4956  */
4957 static void
4958 em_init_manageability(struct adapter *adapter)
4959 {
4960         /* A shared code workaround */
4961 #define E1000_82542_MANC2H E1000_MANC2H
4962         if (adapter->has_manage) {
4963                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4964                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4965
4966                 /* disable hardware interception of ARP */
4967                 manc &= ~(E1000_MANC_ARP_EN);
4968
4969                 /* enable receiving management packets to the host */
4970                 manc |= E1000_MANC_EN_MNG2HOST;
4971 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4972 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4973                 manc2h |= E1000_MNG2HOST_PORT_623;
4974                 manc2h |= E1000_MNG2HOST_PORT_664;
4975                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4976                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4977         }
4978 }
4979
4980 /*
4981  * Give control back to hardware management
4982  * controller if there is one.
4983  */
4984 static void
4985 em_release_manageability(struct adapter *adapter)
4986 {
4987         if (adapter->has_manage) {
4988                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4989
4990                 /* re-enable hardware interception of ARP */
4991                 manc |= E1000_MANC_ARP_EN;
4992                 manc &= ~E1000_MANC_EN_MNG2HOST;
4993
4994                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4995         }
4996 }
4997
4998 /*
4999  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5000  * For ASF and Pass Through versions of f/w this means
5001  * that the driver is loaded. For AMT version type f/w
5002  * this means that the network i/f is open.
5003  */
5004 static void
5005 em_get_hw_control(struct adapter *adapter)
5006 {
5007         u32 ctrl_ext, swsm;
5008
5009         if (adapter->hw.mac.type == e1000_82573) {
5010                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5011                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5012                     swsm | E1000_SWSM_DRV_LOAD);
5013                 return;
5014         }
5015         /* else */
5016         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5017         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5018             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5019         return;
5020 }
5021
5022 /*
5023  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5024  * For ASF and Pass Through versions of f/w this means that
5025  * the driver is no longer loaded. For AMT versions of the
5026  * f/w this means that the network i/f is closed.
5027  */
5028 static void
5029 em_release_hw_control(struct adapter *adapter)
5030 {
5031         u32 ctrl_ext, swsm;
5032
5033         if (!adapter->has_manage)
5034                 return;
5035
5036         if (adapter->hw.mac.type == e1000_82573) {
5037                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5038                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5039                     swsm & ~E1000_SWSM_DRV_LOAD);
5040                 return;
5041         }
5042         /* else */
5043         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5044         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5045             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5046         return;
5047 }
5048
5049 static int
5050 em_is_valid_ether_addr(u8 *addr)
5051 {
5052         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5053
5054         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5055                 return (FALSE);
5056         }
5057
5058         return (TRUE);
5059 }
5060
5061 /*
5062 ** Parse the interface capabilities with regard
5063 ** to both system management and wake-on-lan for
5064 ** later use.
5065 */
5066 static void
5067 em_get_wakeup(device_t dev)
5068 {
5069         struct adapter  *adapter = device_get_softc(dev);
5070         u16             eeprom_data = 0, device_id, apme_mask;
5071
5072         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5073         apme_mask = EM_EEPROM_APME;
5074
5075         switch (adapter->hw.mac.type) {
5076         case e1000_82573:
5077         case e1000_82583:
5078                 adapter->has_amt = TRUE;
5079                 /* Falls thru */
5080         case e1000_82571:
5081         case e1000_82572:
5082         case e1000_80003es2lan:
5083                 if (adapter->hw.bus.func == 1) {
5084                         e1000_read_nvm(&adapter->hw,
5085                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5086                         break;
5087                 } else
5088                         e1000_read_nvm(&adapter->hw,
5089                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5090                 break;
5091         case e1000_ich8lan:
5092         case e1000_ich9lan:
5093         case e1000_ich10lan:
5094         case e1000_pchlan:
5095         case e1000_pch2lan:
5096                 apme_mask = E1000_WUC_APME;
5097                 adapter->has_amt = TRUE;
5098                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5099                 break;
5100         default:
5101                 e1000_read_nvm(&adapter->hw,
5102                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5103                 break;
5104         }
5105         if (eeprom_data & apme_mask)
5106                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5107         /*
5108          * We have the eeprom settings, now apply the special cases
5109          * where the eeprom may be wrong or the board won't support
5110          * wake on lan on a particular port
5111          */
5112         device_id = pci_get_device(dev);
5113         switch (device_id) {
5114         case E1000_DEV_ID_82571EB_FIBER:
5115                 /* Wake events only supported on port A for dual fiber
5116                  * regardless of eeprom setting */
5117                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5118                     E1000_STATUS_FUNC_1)
5119                         adapter->wol = 0;
5120                 break;
5121         case E1000_DEV_ID_82571EB_QUAD_COPPER:
5122         case E1000_DEV_ID_82571EB_QUAD_FIBER:
5123         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5124                 /* if quad port adapter, disable WoL on all but port A */
5125                 if (global_quad_port_a != 0)
5126                         adapter->wol = 0;
5127                 /* Reset for multiple quad port adapters */
5128                 if (++global_quad_port_a == 4)
5129                         global_quad_port_a = 0;
5130                 break;
5131         }
5132         return;
5133 }
5134
5135
5136 /*
5137  * Enable PCI Wake On Lan capability
5138  */
5139 static void
5140 em_enable_wakeup(device_t dev)
5141 {
5142         struct adapter  *adapter = device_get_softc(dev);
5143         struct ifnet    *ifp = adapter->ifp;
5144         u32             pmc, ctrl, ctrl_ext, rctl;
5145         u16             status;
5146
5147         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5148                 return;
5149
5150         /* Advertise the wakeup capability */
5151         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5152         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5153         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5154         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5155
5156         if ((adapter->hw.mac.type == e1000_ich8lan) ||
5157             (adapter->hw.mac.type == e1000_pchlan) ||
5158             (adapter->hw.mac.type == e1000_ich9lan) ||
5159             (adapter->hw.mac.type == e1000_ich10lan))
5160                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5161
5162         /* Keep the laser running on Fiber adapters */
5163         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5164             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5165                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5166                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5167                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5168         }
5169
5170         /*
5171         ** Determine type of Wakeup: note that wol
5172         ** is set with all bits on by default.
5173         */
5174         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5175                 adapter->wol &= ~E1000_WUFC_MAG;
5176
5177         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5178                 adapter->wol &= ~E1000_WUFC_MC;
5179         else {
5180                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5181                 rctl |= E1000_RCTL_MPE;
5182                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5183         }
5184
5185         if ((adapter->hw.mac.type == e1000_pchlan) ||
5186             (adapter->hw.mac.type == e1000_pch2lan)) {
5187                 if (em_enable_phy_wakeup(adapter))
5188                         return;
5189         } else {
5190                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5191                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5192         }
5193
5194         if (adapter->hw.phy.type == e1000_phy_igp_3)
5195                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5196
5197         /* Request PME */
5198         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5199         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5200         if (ifp->if_capenable & IFCAP_WOL)
5201                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5202         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5203
5204         return;
5205 }
5206
5207 /*
5208 ** WOL in the newer chipset interfaces (pchlan)
5209 ** require thing to be copied into the phy
5210 */
5211 static int
5212 em_enable_phy_wakeup(struct adapter *adapter)
5213 {
5214         struct e1000_hw *hw = &adapter->hw;
5215         u32 mreg, ret = 0;
5216         u16 preg;
5217
5218         /* copy MAC RARs to PHY RARs */
5219         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5220
5221         /* copy MAC MTA to PHY MTA */
5222         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5223                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5224                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5225                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5226                     (u16)((mreg >> 16) & 0xFFFF));
5227         }
5228
5229         /* configure PHY Rx Control register */
5230         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5231         mreg = E1000_READ_REG(hw, E1000_RCTL);
5232         if (mreg & E1000_RCTL_UPE)
5233                 preg |= BM_RCTL_UPE;
5234         if (mreg & E1000_RCTL_MPE)
5235                 preg |= BM_RCTL_MPE;
5236         preg &= ~(BM_RCTL_MO_MASK);
5237         if (mreg & E1000_RCTL_MO_3)
5238                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5239                                 << BM_RCTL_MO_SHIFT);
5240         if (mreg & E1000_RCTL_BAM)
5241                 preg |= BM_RCTL_BAM;
5242         if (mreg & E1000_RCTL_PMCF)
5243                 preg |= BM_RCTL_PMCF;
5244         mreg = E1000_READ_REG(hw, E1000_CTRL);
5245         if (mreg & E1000_CTRL_RFCE)
5246                 preg |= BM_RCTL_RFCE;
5247         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5248
5249         /* enable PHY wakeup in MAC register */
5250         E1000_WRITE_REG(hw, E1000_WUC,
5251             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5252         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5253
5254         /* configure and enable PHY wakeup in PHY registers */
5255         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5256         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5257
5258         /* activate PHY wakeup */
5259         ret = hw->phy.ops.acquire(hw);
5260         if (ret) {
5261                 printf("Could not acquire PHY\n");
5262                 return ret;
5263         }
5264         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5265                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5266         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5267         if (ret) {
5268                 printf("Could not read PHY page 769\n");
5269                 goto out;
5270         }
5271         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5272         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5273         if (ret)
5274                 printf("Could not set PHY Host Wakeup bit\n");
5275 out:
5276         hw->phy.ops.release(hw);
5277
5278         return ret;
5279 }
5280
5281 static void
5282 em_led_func(void *arg, int onoff)
5283 {
5284         struct adapter  *adapter = arg;
5285  
5286         EM_CORE_LOCK(adapter);
5287         if (onoff) {
5288                 e1000_setup_led(&adapter->hw);
5289                 e1000_led_on(&adapter->hw);
5290         } else {
5291                 e1000_led_off(&adapter->hw);
5292                 e1000_cleanup_led(&adapter->hw);
5293         }
5294         EM_CORE_UNLOCK(adapter);
5295 }
5296
5297 /*
5298 ** Disable the L0S and L1 LINK states
5299 */
5300 static void
5301 em_disable_aspm(struct adapter *adapter)
5302 {
5303         int             base, reg;
5304         u16             link_cap,link_ctrl;
5305         device_t        dev = adapter->dev;
5306
5307         switch (adapter->hw.mac.type) {
5308                 case e1000_82573:
5309                 case e1000_82574:
5310                 case e1000_82583:
5311                         break;
5312                 default:
5313                         return;
5314         }
5315         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5316                 return;
5317         reg = base + PCIER_LINK_CAP;
5318         link_cap = pci_read_config(dev, reg, 2);
5319         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5320                 return;
5321         reg = base + PCIER_LINK_CTL;
5322         link_ctrl = pci_read_config(dev, reg, 2);
5323         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5324         pci_write_config(dev, reg, link_ctrl, 2);
5325         return;
5326 }
5327
5328 /**********************************************************************
5329  *
5330  *  Update the board statistics counters.
5331  *
5332  **********************************************************************/
5333 static void
5334 em_update_stats_counters(struct adapter *adapter)
5335 {
5336         struct ifnet   *ifp;
5337
5338         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5339            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5340                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5341                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5342         }
5343         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5344         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5345         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5346         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5347
5348         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5349         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5350         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5351         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5352         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5353         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5354         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5355         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5356         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5357         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5358         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5359         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5360         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5361         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5362         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5363         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5364         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5365         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5366         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5367         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5368
5369         /* For the 64-bit byte counters the low dword must be read first. */
5370         /* Both registers clear on the read of the high dword */
5371
5372         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5373             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5374         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5375             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5376
5377         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5378         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5379         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5380         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5381         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5382
5383         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5384         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5385
5386         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5387         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5388         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5389         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5390         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5391         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5392         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5393         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5394         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5395         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5396
5397         /* Interrupt Counts */
5398
5399         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5400         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5401         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5402         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5403         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5404         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5405         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5406         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5407         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5408
5409         if (adapter->hw.mac.type >= e1000_82543) {
5410                 adapter->stats.algnerrc += 
5411                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5412                 adapter->stats.rxerrc += 
5413                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5414                 adapter->stats.tncrs += 
5415                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5416                 adapter->stats.cexterr += 
5417                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5418                 adapter->stats.tsctc += 
5419                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5420                 adapter->stats.tsctfc += 
5421                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5422         }
5423         ifp = adapter->ifp;
5424
5425         ifp->if_collisions = adapter->stats.colc;
5426
5427         /* Rx Errors */
5428         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5429             adapter->stats.crcerrs + adapter->stats.algnerrc +
5430             adapter->stats.ruc + adapter->stats.roc +
5431             adapter->stats.mpc + adapter->stats.cexterr;
5432
5433         /* Tx Errors */
5434         ifp->if_oerrors = adapter->stats.ecol +
5435             adapter->stats.latecol + adapter->watchdog_events;
5436 }
5437
5438 /* Export a single 32-bit register via a read-only sysctl. */
5439 static int
5440 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5441 {
5442         struct adapter *adapter;
5443         u_int val;
5444
5445         adapter = oidp->oid_arg1;
5446         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5447         return (sysctl_handle_int(oidp, &val, 0, req));
5448 }
5449
5450 /*
5451  * Add sysctl variables, one per statistic, to the system.
5452  */
5453 static void
5454 em_add_hw_stats(struct adapter *adapter)
5455 {
5456         device_t dev = adapter->dev;
5457
5458         struct tx_ring *txr = adapter->tx_rings;
5459         struct rx_ring *rxr = adapter->rx_rings;
5460
5461         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5462         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5463         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5464         struct e1000_hw_stats *stats = &adapter->stats;
5465
5466         struct sysctl_oid *stat_node, *queue_node, *int_node;
5467         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5468
5469 #define QUEUE_NAME_LEN 32
5470         char namebuf[QUEUE_NAME_LEN];
5471         
5472         /* Driver Statistics */
5473         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5474                         CTLFLAG_RD, &adapter->link_irq,
5475                         "Link MSIX IRQ Handled");
5476         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5477                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5478                          "Std mbuf failed");
5479         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5480                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5481                          "Std mbuf cluster failed");
5482         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5483                         CTLFLAG_RD, &adapter->dropped_pkts,
5484                         "Driver dropped packets");
5485         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5486                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5487                         "Driver tx dma failure in xmit");
5488         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5489                         CTLFLAG_RD, &adapter->rx_overruns,
5490                         "RX overruns");
5491         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5492                         CTLFLAG_RD, &adapter->watchdog_events,
5493                         "Watchdog timeouts");
5494         
5495         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5496                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5497                         em_sysctl_reg_handler, "IU",
5498                         "Device Control Register");
5499         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5500                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5501                         em_sysctl_reg_handler, "IU",
5502                         "Receiver Control Register");
5503         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5504                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5505                         "Flow Control High Watermark");
5506         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5507                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5508                         "Flow Control Low Watermark");
5509
5510         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5511                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5512                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5513                                             CTLFLAG_RD, NULL, "TX Queue Name");
5514                 queue_list = SYSCTL_CHILDREN(queue_node);
5515
5516                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5517                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5518                                 E1000_TDH(txr->me),
5519                                 em_sysctl_reg_handler, "IU",
5520                                 "Transmit Descriptor Head");
5521                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5522                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5523                                 E1000_TDT(txr->me),
5524                                 em_sysctl_reg_handler, "IU",
5525                                 "Transmit Descriptor Tail");
5526                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5527                                 CTLFLAG_RD, &txr->tx_irq,
5528                                 "Queue MSI-X Transmit Interrupts");
5529                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5530                                 CTLFLAG_RD, &txr->no_desc_avail,
5531                                 "Queue No Descriptor Available");
5532
5533                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5534                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5535                                             CTLFLAG_RD, NULL, "RX Queue Name");
5536                 queue_list = SYSCTL_CHILDREN(queue_node);
5537
5538                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5539                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5540                                 E1000_RDH(rxr->me),
5541                                 em_sysctl_reg_handler, "IU",
5542                                 "Receive Descriptor Head");
5543                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5544                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5545                                 E1000_RDT(rxr->me),
5546                                 em_sysctl_reg_handler, "IU",
5547                                 "Receive Descriptor Tail");
5548                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5549                                 CTLFLAG_RD, &rxr->rx_irq,
5550                                 "Queue MSI-X Receive Interrupts");
5551         }
5552
5553         /* MAC stats get their own sub node */
5554
5555         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5556                                     CTLFLAG_RD, NULL, "Statistics");
5557         stat_list = SYSCTL_CHILDREN(stat_node);
5558
5559         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5560                         CTLFLAG_RD, &stats->ecol,
5561                         "Excessive collisions");
5562         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5563                         CTLFLAG_RD, &stats->scc,
5564                         "Single collisions");
5565         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5566                         CTLFLAG_RD, &stats->mcc,
5567                         "Multiple collisions");
5568         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5569                         CTLFLAG_RD, &stats->latecol,
5570                         "Late collisions");
5571         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5572                         CTLFLAG_RD, &stats->colc,
5573                         "Collision Count");
5574         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5575                         CTLFLAG_RD, &adapter->stats.symerrs,
5576                         "Symbol Errors");
5577         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5578                         CTLFLAG_RD, &adapter->stats.sec,
5579                         "Sequence Errors");
5580         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5581                         CTLFLAG_RD, &adapter->stats.dc,
5582                         "Defer Count");
5583         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5584                         CTLFLAG_RD, &adapter->stats.mpc,
5585                         "Missed Packets");
5586         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5587                         CTLFLAG_RD, &adapter->stats.rnbc,
5588                         "Receive No Buffers");
5589         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5590                         CTLFLAG_RD, &adapter->stats.ruc,
5591                         "Receive Undersize");
5592         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5593                         CTLFLAG_RD, &adapter->stats.rfc,
5594                         "Fragmented Packets Received ");
5595         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5596                         CTLFLAG_RD, &adapter->stats.roc,
5597                         "Oversized Packets Received");
5598         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5599                         CTLFLAG_RD, &adapter->stats.rjc,
5600                         "Recevied Jabber");
5601         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5602                         CTLFLAG_RD, &adapter->stats.rxerrc,
5603                         "Receive Errors");
5604         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5605                         CTLFLAG_RD, &adapter->stats.crcerrs,
5606                         "CRC errors");
5607         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5608                         CTLFLAG_RD, &adapter->stats.algnerrc,
5609                         "Alignment Errors");
5610         /* On 82575 these are collision counts */
5611         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5612                         CTLFLAG_RD, &adapter->stats.cexterr,
5613                         "Collision/Carrier extension errors");
5614         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5615                         CTLFLAG_RD, &adapter->stats.xonrxc,
5616                         "XON Received");
5617         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5618                         CTLFLAG_RD, &adapter->stats.xontxc,
5619                         "XON Transmitted");
5620         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5621                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5622                         "XOFF Received");
5623         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5624                         CTLFLAG_RD, &adapter->stats.xofftxc,
5625                         "XOFF Transmitted");
5626
5627         /* Packet Reception Stats */
5628         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5629                         CTLFLAG_RD, &adapter->stats.tpr,
5630                         "Total Packets Received ");
5631         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5632                         CTLFLAG_RD, &adapter->stats.gprc,
5633                         "Good Packets Received");
5634         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5635                         CTLFLAG_RD, &adapter->stats.bprc,
5636                         "Broadcast Packets Received");
5637         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5638                         CTLFLAG_RD, &adapter->stats.mprc,
5639                         "Multicast Packets Received");
5640         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5641                         CTLFLAG_RD, &adapter->stats.prc64,
5642                         "64 byte frames received ");
5643         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5644                         CTLFLAG_RD, &adapter->stats.prc127,
5645                         "65-127 byte frames received");
5646         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5647                         CTLFLAG_RD, &adapter->stats.prc255,
5648                         "128-255 byte frames received");
5649         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5650                         CTLFLAG_RD, &adapter->stats.prc511,
5651                         "256-511 byte frames received");
5652         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5653                         CTLFLAG_RD, &adapter->stats.prc1023,
5654                         "512-1023 byte frames received");
5655         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5656                         CTLFLAG_RD, &adapter->stats.prc1522,
5657                         "1023-1522 byte frames received");
5658         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5659                         CTLFLAG_RD, &adapter->stats.gorc, 
5660                         "Good Octets Received"); 
5661
5662         /* Packet Transmission Stats */
5663         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5664                         CTLFLAG_RD, &adapter->stats.gotc, 
5665                         "Good Octets Transmitted"); 
5666         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5667                         CTLFLAG_RD, &adapter->stats.tpt,
5668                         "Total Packets Transmitted");
5669         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5670                         CTLFLAG_RD, &adapter->stats.gptc,
5671                         "Good Packets Transmitted");
5672         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5673                         CTLFLAG_RD, &adapter->stats.bptc,
5674                         "Broadcast Packets Transmitted");
5675         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5676                         CTLFLAG_RD, &adapter->stats.mptc,
5677                         "Multicast Packets Transmitted");
5678         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5679                         CTLFLAG_RD, &adapter->stats.ptc64,
5680                         "64 byte frames transmitted ");
5681         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5682                         CTLFLAG_RD, &adapter->stats.ptc127,
5683                         "65-127 byte frames transmitted");
5684         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5685                         CTLFLAG_RD, &adapter->stats.ptc255,
5686                         "128-255 byte frames transmitted");
5687         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5688                         CTLFLAG_RD, &adapter->stats.ptc511,
5689                         "256-511 byte frames transmitted");
5690         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5691                         CTLFLAG_RD, &adapter->stats.ptc1023,
5692                         "512-1023 byte frames transmitted");
5693         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5694                         CTLFLAG_RD, &adapter->stats.ptc1522,
5695                         "1024-1522 byte frames transmitted");
5696         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5697                         CTLFLAG_RD, &adapter->stats.tsctc,
5698                         "TSO Contexts Transmitted");
5699         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5700                         CTLFLAG_RD, &adapter->stats.tsctfc,
5701                         "TSO Contexts Failed");
5702
5703
5704         /* Interrupt Stats */
5705
5706         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5707                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5708         int_list = SYSCTL_CHILDREN(int_node);
5709
5710         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5711                         CTLFLAG_RD, &adapter->stats.iac,
5712                         "Interrupt Assertion Count");
5713
5714         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5715                         CTLFLAG_RD, &adapter->stats.icrxptc,
5716                         "Interrupt Cause Rx Pkt Timer Expire Count");
5717
5718         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5719                         CTLFLAG_RD, &adapter->stats.icrxatc,
5720                         "Interrupt Cause Rx Abs Timer Expire Count");
5721
5722         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5723                         CTLFLAG_RD, &adapter->stats.ictxptc,
5724                         "Interrupt Cause Tx Pkt Timer Expire Count");
5725
5726         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5727                         CTLFLAG_RD, &adapter->stats.ictxatc,
5728                         "Interrupt Cause Tx Abs Timer Expire Count");
5729
5730         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5731                         CTLFLAG_RD, &adapter->stats.ictxqec,
5732                         "Interrupt Cause Tx Queue Empty Count");
5733
5734         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5735                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5736                         "Interrupt Cause Tx Queue Min Thresh Count");
5737
5738         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5739                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5740                         "Interrupt Cause Rx Desc Min Thresh Count");
5741
5742         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5743                         CTLFLAG_RD, &adapter->stats.icrxoc,
5744                         "Interrupt Cause Receiver Overrun Count");
5745 }
5746
5747 /**********************************************************************
5748  *
5749  *  This routine provides a way to dump out the adapter eeprom,
5750  *  often a useful debug/service tool. This only dumps the first
5751  *  32 words, stuff that matters is in that extent.
5752  *
5753  **********************************************************************/
5754 static int
5755 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5756 {
5757         struct adapter *adapter = (struct adapter *)arg1;
5758         int error;
5759         int result;
5760
5761         result = -1;
5762         error = sysctl_handle_int(oidp, &result, 0, req);
5763
5764         if (error || !req->newptr)
5765                 return (error);
5766
5767         /*
5768          * This value will cause a hex dump of the
5769          * first 32 16-bit words of the EEPROM to
5770          * the screen.
5771          */
5772         if (result == 1)
5773                 em_print_nvm_info(adapter);
5774
5775         return (error);
5776 }
5777
5778 static void
5779 em_print_nvm_info(struct adapter *adapter)
5780 {
5781         u16     eeprom_data;
5782         int     i, j, row = 0;
5783
5784         /* Its a bit crude, but it gets the job done */
5785         printf("\nInterface EEPROM Dump:\n");
5786         printf("Offset\n0x0000  ");
5787         for (i = 0, j = 0; i < 32; i++, j++) {
5788                 if (j == 8) { /* Make the offset block */
5789                         j = 0; ++row;
5790                         printf("\n0x00%x0  ",row);
5791                 }
5792                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5793                 printf("%04x ", eeprom_data);
5794         }
5795         printf("\n");
5796 }
5797
5798 static int
5799 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5800 {
5801         struct em_int_delay_info *info;
5802         struct adapter *adapter;
5803         u32 regval;
5804         int error, usecs, ticks;
5805
5806         info = (struct em_int_delay_info *)arg1;
5807         usecs = info->value;
5808         error = sysctl_handle_int(oidp, &usecs, 0, req);
5809         if (error != 0 || req->newptr == NULL)
5810                 return (error);
5811         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5812                 return (EINVAL);
5813         info->value = usecs;
5814         ticks = EM_USECS_TO_TICKS(usecs);
5815         if (info->offset == E1000_ITR)  /* units are 256ns here */
5816                 ticks *= 4;
5817
5818         adapter = info->adapter;
5819         
5820         EM_CORE_LOCK(adapter);
5821         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5822         regval = (regval & ~0xffff) | (ticks & 0xffff);
5823         /* Handle a few special cases. */
5824         switch (info->offset) {
5825         case E1000_RDTR:
5826                 break;
5827         case E1000_TIDV:
5828                 if (ticks == 0) {
5829                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5830                         /* Don't write 0 into the TIDV register. */
5831                         regval++;
5832                 } else
5833                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5834                 break;
5835         }
5836         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5837         EM_CORE_UNLOCK(adapter);
5838         return (0);
5839 }
5840
5841 static void
5842 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5843         const char *description, struct em_int_delay_info *info,
5844         int offset, int value)
5845 {
5846         info->adapter = adapter;
5847         info->offset = offset;
5848         info->value = value;
5849         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5850             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5851             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5852             info, 0, em_sysctl_int_delay, "I", description);
5853 }
5854
5855 static void
5856 em_set_sysctl_value(struct adapter *adapter, const char *name,
5857         const char *description, int *limit, int value)
5858 {
5859         *limit = value;
5860         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5861             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5862             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5863 }
5864
5865
5866 /*
5867 ** Set flow control using sysctl:
5868 ** Flow control values:
5869 **      0 - off
5870 **      1 - rx pause
5871 **      2 - tx pause
5872 **      3 - full
5873 */
5874 static int
5875 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5876 {       
5877         int             error;
5878         static int      input = 3; /* default is full */
5879         struct adapter  *adapter = (struct adapter *) arg1;
5880                     
5881         error = sysctl_handle_int(oidp, &input, 0, req);
5882     
5883         if ((error) || (req->newptr == NULL))
5884                 return (error);
5885                 
5886         if (input == adapter->fc) /* no change? */
5887                 return (error);
5888
5889         switch (input) {
5890                 case e1000_fc_rx_pause:
5891                 case e1000_fc_tx_pause:
5892                 case e1000_fc_full:
5893                 case e1000_fc_none:
5894                         adapter->hw.fc.requested_mode = input;
5895                         adapter->fc = input;
5896                         break;
5897                 default:
5898                         /* Do nothing */
5899                         return (error);
5900         }
5901
5902         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5903         e1000_force_mac_fc(&adapter->hw);
5904         return (error);
5905 }
5906
5907 /*
5908 ** Manage Energy Efficient Ethernet:
5909 ** Control values:
5910 **     0/1 - enabled/disabled
5911 */
5912 static int
5913 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5914 {
5915        struct adapter *adapter = (struct adapter *) arg1;
5916        int             error, value;
5917
5918        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5919        error = sysctl_handle_int(oidp, &value, 0, req);
5920        if (error || req->newptr == NULL)
5921                return (error);
5922        EM_CORE_LOCK(adapter);
5923        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5924        em_init_locked(adapter);
5925        EM_CORE_UNLOCK(adapter);
5926        return (0);
5927 }
5928
5929 static int
5930 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5931 {
5932         struct adapter *adapter;
5933         int error;
5934         int result;
5935
5936         result = -1;
5937         error = sysctl_handle_int(oidp, &result, 0, req);
5938
5939         if (error || !req->newptr)
5940                 return (error);
5941
5942         if (result == 1) {
5943                 adapter = (struct adapter *)arg1;
5944                 em_print_debug_info(adapter);
5945         }
5946
5947         return (error);
5948 }
5949
5950 /*
5951 ** This routine is meant to be fluid, add whatever is
5952 ** needed for debugging a problem.  -jfv
5953 */
5954 static void
5955 em_print_debug_info(struct adapter *adapter)
5956 {
5957         device_t dev = adapter->dev;
5958         struct tx_ring *txr = adapter->tx_rings;
5959         struct rx_ring *rxr = adapter->rx_rings;
5960
5961         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5962                 printf("Interface is RUNNING ");
5963         else
5964                 printf("Interface is NOT RUNNING\n");
5965
5966         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5967                 printf("and INACTIVE\n");
5968         else
5969                 printf("and ACTIVE\n");
5970
5971         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5972                 device_printf(dev, "TX Queue %d ------\n", i);
5973                 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5974                         E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
5975                         E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
5976                 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
5977                 device_printf(dev, "TX descriptors avail = %d\n",
5978                         txr->tx_avail);
5979                 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5980                         txr->no_desc_avail);
5981                 device_printf(dev, "RX Queue %d ------\n", i);
5982                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5983                         E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
5984                         E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
5985                 device_printf(dev, "RX discarded packets = %ld\n",
5986                         rxr->rx_discarded);
5987                 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5988                 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5989         }
5990 }
5991
5992 #ifdef EM_MULTIQUEUE
5993 /*
5994  * 82574 only:
5995  * Write a new value to the EEPROM increasing the number of MSIX
5996  * vectors from 3 to 5, for proper multiqueue support.
5997  */
5998 static void
5999 em_enable_vectors_82574(struct adapter *adapter)
6000 {
6001         struct e1000_hw *hw = &adapter->hw;
6002         device_t dev = adapter->dev;
6003         u16 edata;
6004
6005         e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6006         printf("Current cap: %#06x\n", edata);
6007         if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6008                 device_printf(dev, "Writing to eeprom: increasing "
6009                     "reported MSIX vectors from 3 to 5...\n");
6010                 edata &= ~(EM_NVM_MSIX_N_MASK);
6011                 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6012                 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6013                 e1000_update_nvm_checksum(hw);
6014                 device_printf(dev, "Writing to eeprom: done\n");
6015         }
6016 }
6017 #endif
6018
6019 #ifdef DDB
6020 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6021 {
6022         devclass_t      dc;
6023         int max_em;
6024
6025         dc = devclass_find("em");
6026         max_em = devclass_get_maxunit(dc);
6027
6028         for (int index = 0; index < (max_em - 1); index++) {
6029                 device_t dev;
6030                 dev = devclass_get_device(dc, index);
6031                 if (device_get_driver(dev) == &em_driver) {
6032                         struct adapter *adapter = device_get_softc(dev);
6033                         em_init_locked(adapter);
6034                 }
6035         }
6036 }
6037 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6038 {
6039         devclass_t      dc;
6040         int max_em;
6041
6042         dc = devclass_find("em");
6043         max_em = devclass_get_maxunit(dc);
6044
6045         for (int index = 0; index < (max_em - 1); index++) {
6046                 device_t dev;
6047                 dev = devclass_get_device(dc, index);
6048                 if (device_get_driver(dev) == &em_driver)
6049                         em_print_debug_info(device_get_softc(dev));
6050         }
6051
6052 }
6053 #endif