]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
Merge OpenSSL 1.0.1e.
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2011, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int     em_display_debug_stats = 0;
93
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.2";
98
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111         /* Intel(R) PRO/1000 Network Connection */
112         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
175         /* required last entry */
176         { 0, 0, 0, 0, 0}
177 };
178
179 /*********************************************************************
180  *  Table of branding strings for all supported NICs.
181  *********************************************************************/
182
183 static char *em_strings[] = {
184         "Intel(R) PRO/1000 Network Connection"
185 };
186
187 /*********************************************************************
188  *  Function prototypes
189  *********************************************************************/
190 static int      em_probe(device_t);
191 static int      em_attach(device_t);
192 static int      em_detach(device_t);
193 static int      em_shutdown(device_t);
194 static int      em_suspend(device_t);
195 static int      em_resume(device_t);
196 #ifdef EM_MULTIQUEUE
197 static int      em_mq_start(struct ifnet *, struct mbuf *);
198 static int      em_mq_start_locked(struct ifnet *,
199                     struct tx_ring *, struct mbuf *);
200 static void     em_qflush(struct ifnet *);
201 #else
202 static void     em_start(struct ifnet *);
203 static void     em_start_locked(struct ifnet *, struct tx_ring *);
204 #endif
205 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
206 static void     em_init(void *);
207 static void     em_init_locked(struct adapter *);
208 static void     em_stop(void *);
209 static void     em_media_status(struct ifnet *, struct ifmediareq *);
210 static int      em_media_change(struct ifnet *);
211 static void     em_identify_hardware(struct adapter *);
212 static int      em_allocate_pci_resources(struct adapter *);
213 static int      em_allocate_legacy(struct adapter *);
214 static int      em_allocate_msix(struct adapter *);
215 static int      em_allocate_queues(struct adapter *);
216 static int      em_setup_msix(struct adapter *);
217 static void     em_free_pci_resources(struct adapter *);
218 static void     em_local_timer(void *);
219 static void     em_reset(struct adapter *);
220 static int      em_setup_interface(device_t, struct adapter *);
221
222 static void     em_setup_transmit_structures(struct adapter *);
223 static void     em_initialize_transmit_unit(struct adapter *);
224 static int      em_allocate_transmit_buffers(struct tx_ring *);
225 static void     em_free_transmit_structures(struct adapter *);
226 static void     em_free_transmit_buffers(struct tx_ring *);
227
228 static int      em_setup_receive_structures(struct adapter *);
229 static int      em_allocate_receive_buffers(struct rx_ring *);
230 static void     em_initialize_receive_unit(struct adapter *);
231 static void     em_free_receive_structures(struct adapter *);
232 static void     em_free_receive_buffers(struct rx_ring *);
233
234 static void     em_enable_intr(struct adapter *);
235 static void     em_disable_intr(struct adapter *);
236 static void     em_update_stats_counters(struct adapter *);
237 static void     em_add_hw_stats(struct adapter *adapter);
238 static void     em_txeof(struct tx_ring *);
239 static bool     em_rxeof(struct rx_ring *, int, int *);
240 #ifndef __NO_STRICT_ALIGNMENT
241 static int      em_fixup_rx(struct rx_ring *);
242 #endif
243 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245                     struct ip *, u32 *, u32 *);
246 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247                     struct tcphdr *, u32 *, u32 *);
248 static void     em_set_promisc(struct adapter *);
249 static void     em_disable_promisc(struct adapter *);
250 static void     em_set_multi(struct adapter *);
251 static void     em_update_link_status(struct adapter *);
252 static void     em_refresh_mbufs(struct rx_ring *, int);
253 static void     em_register_vlan(void *, struct ifnet *, u16);
254 static void     em_unregister_vlan(void *, struct ifnet *, u16);
255 static void     em_setup_vlan_hw_support(struct adapter *);
256 static int      em_xmit(struct tx_ring *, struct mbuf **);
257 static int      em_dma_malloc(struct adapter *, bus_size_t,
258                     struct em_dma_alloc *, int);
259 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
260 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261 static void     em_print_nvm_info(struct adapter *);
262 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263 static void     em_print_debug_info(struct adapter *);
264 static int      em_is_valid_ether_addr(u8 *);
265 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
267                     const char *, struct em_int_delay_info *, int, int);
268 /* Management and WOL Support */
269 static void     em_init_manageability(struct adapter *);
270 static void     em_release_manageability(struct adapter *);
271 static void     em_get_hw_control(struct adapter *);
272 static void     em_release_hw_control(struct adapter *);
273 static void     em_get_wakeup(device_t);
274 static void     em_enable_wakeup(device_t);
275 static int      em_enable_phy_wakeup(struct adapter *);
276 static void     em_led_func(void *, int);
277 static void     em_disable_aspm(struct adapter *);
278
279 static int      em_irq_fast(void *);
280
281 /* MSIX handlers */
282 static void     em_msix_tx(void *);
283 static void     em_msix_rx(void *);
284 static void     em_msix_link(void *);
285 static void     em_handle_tx(void *context, int pending);
286 static void     em_handle_rx(void *context, int pending);
287 static void     em_handle_link(void *context, int pending);
288
289 static void     em_set_sysctl_value(struct adapter *, const char *,
290                     const char *, int *, int);
291 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
293
294 static __inline void em_rx_discard(struct rx_ring *, int);
295
296 #ifdef DEVICE_POLLING
297 static poll_handler_t em_poll;
298 #endif /* POLLING */
299
300 /*********************************************************************
301  *  FreeBSD Device Interface Entry Points
302  *********************************************************************/
303
304 static device_method_t em_methods[] = {
305         /* Device interface */
306         DEVMETHOD(device_probe, em_probe),
307         DEVMETHOD(device_attach, em_attach),
308         DEVMETHOD(device_detach, em_detach),
309         DEVMETHOD(device_shutdown, em_shutdown),
310         DEVMETHOD(device_suspend, em_suspend),
311         DEVMETHOD(device_resume, em_resume),
312         DEVMETHOD_END
313 };
314
315 static driver_t em_driver = {
316         "em", em_methods, sizeof(struct adapter),
317 };
318
319 devclass_t em_devclass;
320 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
321 MODULE_DEPEND(em, pci, 1, 1, 1);
322 MODULE_DEPEND(em, ether, 1, 1, 1);
323
324 /*********************************************************************
325  *  Tunable default values.
326  *********************************************************************/
327
328 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
329 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
330 #define M_TSO_LEN                       66
331
332 /* Allow common code without TSO */
333 #ifndef CSUM_TSO
334 #define CSUM_TSO        0
335 #endif
336
337 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
338
339 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
340 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
341 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
342 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
343 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
344     0, "Default transmit interrupt delay in usecs");
345 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
346     0, "Default receive interrupt delay in usecs");
347
348 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
349 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
350 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
351 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
352 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
353     &em_tx_abs_int_delay_dflt, 0,
354     "Default transmit interrupt delay limit in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
356     &em_rx_abs_int_delay_dflt, 0,
357     "Default receive interrupt delay limit in usecs");
358
359 static int em_rxd = EM_DEFAULT_RXD;
360 static int em_txd = EM_DEFAULT_TXD;
361 TUNABLE_INT("hw.em.rxd", &em_rxd);
362 TUNABLE_INT("hw.em.txd", &em_txd);
363 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
364     "Number of receive descriptors per queue");
365 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
366     "Number of transmit descriptors per queue");
367
368 static int em_smart_pwr_down = FALSE;
369 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
370 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
371     0, "Set to true to leave smart power down enabled on newer adapters");
372
373 /* Controls whether promiscuous also shows bad packets */
374 static int em_debug_sbp = FALSE;
375 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
376 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
377     "Show bad packets in promiscuous mode");
378
379 static int em_enable_msix = TRUE;
380 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
381 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
382     "Enable MSI-X interrupts");
383
384 /* How many packets rxeof tries to clean at a time */
385 static int em_rx_process_limit = 100;
386 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
387 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
388     &em_rx_process_limit, 0,
389     "Maximum number of received packets to process "
390     "at a time, -1 means unlimited");
391
392 /* Energy efficient ethernet - default to OFF */
393 static int eee_setting = 1;
394 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
395 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
396     "Enable Energy Efficient Ethernet");
397
398 /* Global used in WOL setup with multiport cards */
399 static int global_quad_port_a = 0;
400
401 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
402 #include <dev/netmap/if_em_netmap.h>
403 #endif /* DEV_NETMAP */
404
405 /*********************************************************************
406  *  Device identification routine
407  *
408  *  em_probe determines if the driver should be loaded on
409  *  adapter based on PCI vendor/device id of the adapter.
410  *
411  *  return BUS_PROBE_DEFAULT on success, positive on failure
412  *********************************************************************/
413
414 static int
415 em_probe(device_t dev)
416 {
417         char            adapter_name[60];
418         u16             pci_vendor_id = 0;
419         u16             pci_device_id = 0;
420         u16             pci_subvendor_id = 0;
421         u16             pci_subdevice_id = 0;
422         em_vendor_info_t *ent;
423
424         INIT_DEBUGOUT("em_probe: begin");
425
426         pci_vendor_id = pci_get_vendor(dev);
427         if (pci_vendor_id != EM_VENDOR_ID)
428                 return (ENXIO);
429
430         pci_device_id = pci_get_device(dev);
431         pci_subvendor_id = pci_get_subvendor(dev);
432         pci_subdevice_id = pci_get_subdevice(dev);
433
434         ent = em_vendor_info_array;
435         while (ent->vendor_id != 0) {
436                 if ((pci_vendor_id == ent->vendor_id) &&
437                     (pci_device_id == ent->device_id) &&
438
439                     ((pci_subvendor_id == ent->subvendor_id) ||
440                     (ent->subvendor_id == PCI_ANY_ID)) &&
441
442                     ((pci_subdevice_id == ent->subdevice_id) ||
443                     (ent->subdevice_id == PCI_ANY_ID))) {
444                         sprintf(adapter_name, "%s %s",
445                                 em_strings[ent->index],
446                                 em_driver_version);
447                         device_set_desc_copy(dev, adapter_name);
448                         return (BUS_PROBE_DEFAULT);
449                 }
450                 ent++;
451         }
452
453         return (ENXIO);
454 }
455
456 /*********************************************************************
457  *  Device initialization routine
458  *
459  *  The attach entry point is called when the driver is being loaded.
460  *  This routine identifies the type of hardware, allocates all resources
461  *  and initializes the hardware.
462  *
463  *  return 0 on success, positive on failure
464  *********************************************************************/
465
466 static int
467 em_attach(device_t dev)
468 {
469         struct adapter  *adapter;
470         struct e1000_hw *hw;
471         int             error = 0;
472
473         INIT_DEBUGOUT("em_attach: begin");
474
475         if (resource_disabled("em", device_get_unit(dev))) {
476                 device_printf(dev, "Disabled by device hint\n");
477                 return (ENXIO);
478         }
479
480         adapter = device_get_softc(dev);
481         adapter->dev = adapter->osdep.dev = dev;
482         hw = &adapter->hw;
483         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
484
485         /* SYSCTL stuff */
486         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
487             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
488             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
489             em_sysctl_nvm_info, "I", "NVM Information");
490
491         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
494             em_sysctl_debug_info, "I", "Debug Information");
495
496         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499             em_set_flowcntl, "I", "Flow Control");
500
501         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
502
503         /* Determine hardware and mac info */
504         em_identify_hardware(adapter);
505
506         /* Setup PCI resources */
507         if (em_allocate_pci_resources(adapter)) {
508                 device_printf(dev, "Allocation of PCI resources failed\n");
509                 error = ENXIO;
510                 goto err_pci;
511         }
512
513         /*
514         ** For ICH8 and family we need to
515         ** map the flash memory, and this
516         ** must happen after the MAC is 
517         ** identified
518         */
519         if ((hw->mac.type == e1000_ich8lan) ||
520             (hw->mac.type == e1000_ich9lan) ||
521             (hw->mac.type == e1000_ich10lan) ||
522             (hw->mac.type == e1000_pchlan) ||
523             (hw->mac.type == e1000_pch2lan)) {
524                 int rid = EM_BAR_TYPE_FLASH;
525                 adapter->flash = bus_alloc_resource_any(dev,
526                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
527                 if (adapter->flash == NULL) {
528                         device_printf(dev, "Mapping of Flash failed\n");
529                         error = ENXIO;
530                         goto err_pci;
531                 }
532                 /* This is used in the shared code */
533                 hw->flash_address = (u8 *)adapter->flash;
534                 adapter->osdep.flash_bus_space_tag =
535                     rman_get_bustag(adapter->flash);
536                 adapter->osdep.flash_bus_space_handle =
537                     rman_get_bushandle(adapter->flash);
538         }
539
540         /* Do Shared Code initialization */
541         if (e1000_setup_init_funcs(hw, TRUE)) {
542                 device_printf(dev, "Setup of Shared code failed\n");
543                 error = ENXIO;
544                 goto err_pci;
545         }
546
547         e1000_get_bus_info(hw);
548
549         /* Set up some sysctls for the tunable interrupt delays */
550         em_add_int_delay_sysctl(adapter, "rx_int_delay",
551             "receive interrupt delay in usecs", &adapter->rx_int_delay,
552             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
553         em_add_int_delay_sysctl(adapter, "tx_int_delay",
554             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
555             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
556         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
557             "receive interrupt delay limit in usecs",
558             &adapter->rx_abs_int_delay,
559             E1000_REGISTER(hw, E1000_RADV),
560             em_rx_abs_int_delay_dflt);
561         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
562             "transmit interrupt delay limit in usecs",
563             &adapter->tx_abs_int_delay,
564             E1000_REGISTER(hw, E1000_TADV),
565             em_tx_abs_int_delay_dflt);
566
567         /* Sysctl for limiting the amount of work done in the taskqueue */
568         em_set_sysctl_value(adapter, "rx_processing_limit",
569             "max number of rx packets to process", &adapter->rx_process_limit,
570             em_rx_process_limit);
571
572         /*
573          * Validate number of transmit and receive descriptors. It
574          * must not exceed hardware maximum, and must be multiple
575          * of E1000_DBA_ALIGN.
576          */
577         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
578             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
579                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
580                     EM_DEFAULT_TXD, em_txd);
581                 adapter->num_tx_desc = EM_DEFAULT_TXD;
582         } else
583                 adapter->num_tx_desc = em_txd;
584
585         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
586             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
587                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
588                     EM_DEFAULT_RXD, em_rxd);
589                 adapter->num_rx_desc = EM_DEFAULT_RXD;
590         } else
591                 adapter->num_rx_desc = em_rxd;
592
593         hw->mac.autoneg = DO_AUTO_NEG;
594         hw->phy.autoneg_wait_to_complete = FALSE;
595         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
596
597         /* Copper options */
598         if (hw->phy.media_type == e1000_media_type_copper) {
599                 hw->phy.mdix = AUTO_ALL_MODES;
600                 hw->phy.disable_polarity_correction = FALSE;
601                 hw->phy.ms_type = EM_MASTER_SLAVE;
602         }
603
604         /*
605          * Set the frame limits assuming
606          * standard ethernet sized frames.
607          */
608         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
609         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
610
611         /*
612          * This controls when hardware reports transmit completion
613          * status.
614          */
615         hw->mac.report_tx_early = 1;
616
617         /* 
618         ** Get queue/ring memory
619         */
620         if (em_allocate_queues(adapter)) {
621                 error = ENOMEM;
622                 goto err_pci;
623         }
624
625         /* Allocate multicast array memory. */
626         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
627             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
628         if (adapter->mta == NULL) {
629                 device_printf(dev, "Can not allocate multicast setup array\n");
630                 error = ENOMEM;
631                 goto err_late;
632         }
633
634         /* Check SOL/IDER usage */
635         if (e1000_check_reset_block(hw))
636                 device_printf(dev, "PHY reset is blocked"
637                     " due to SOL/IDER session.\n");
638
639         /* Sysctl for setting Energy Efficient Ethernet */
640         hw->dev_spec.ich8lan.eee_disable = eee_setting;
641         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
642             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
643             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
644             adapter, 0, em_sysctl_eee, "I",
645             "Disable Energy Efficient Ethernet");
646
647         /*
648         ** Start from a known state, this is
649         ** important in reading the nvm and
650         ** mac from that.
651         */
652         e1000_reset_hw(hw);
653
654
655         /* Make sure we have a good EEPROM before we read from it */
656         if (e1000_validate_nvm_checksum(hw) < 0) {
657                 /*
658                 ** Some PCI-E parts fail the first check due to
659                 ** the link being in sleep state, call it again,
660                 ** if it fails a second time its a real issue.
661                 */
662                 if (e1000_validate_nvm_checksum(hw) < 0) {
663                         device_printf(dev,
664                             "The EEPROM Checksum Is Not Valid\n");
665                         error = EIO;
666                         goto err_late;
667                 }
668         }
669
670         /* Copy the permanent MAC address out of the EEPROM */
671         if (e1000_read_mac_addr(hw) < 0) {
672                 device_printf(dev, "EEPROM read error while reading MAC"
673                     " address\n");
674                 error = EIO;
675                 goto err_late;
676         }
677
678         if (!em_is_valid_ether_addr(hw->mac.addr)) {
679                 device_printf(dev, "Invalid MAC address\n");
680                 error = EIO;
681                 goto err_late;
682         }
683
684         /*
685         **  Do interrupt configuration
686         */
687         if (adapter->msix > 1) /* Do MSIX */
688                 error = em_allocate_msix(adapter);
689         else  /* MSI or Legacy */
690                 error = em_allocate_legacy(adapter);
691         if (error)
692                 goto err_late;
693
694         /*
695          * Get Wake-on-Lan and Management info for later use
696          */
697         em_get_wakeup(dev);
698
699         /* Setup OS specific network interface */
700         if (em_setup_interface(dev, adapter) != 0)
701                 goto err_late;
702
703         em_reset(adapter);
704
705         /* Initialize statistics */
706         em_update_stats_counters(adapter);
707
708         hw->mac.get_link_status = 1;
709         em_update_link_status(adapter);
710
711         /* Register for VLAN events */
712         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
713             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
714         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
715             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
716
717         em_add_hw_stats(adapter);
718
719         /* Non-AMT based hardware can now take control from firmware */
720         if (adapter->has_manage && !adapter->has_amt)
721                 em_get_hw_control(adapter);
722
723         /* Tell the stack that the interface is not active */
724         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
725         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
726
727         adapter->led_dev = led_create(em_led_func, adapter,
728             device_get_nameunit(dev));
729 #ifdef DEV_NETMAP
730         em_netmap_attach(adapter);
731 #endif /* DEV_NETMAP */
732
733         INIT_DEBUGOUT("em_attach: end");
734
735         return (0);
736
737 err_late:
738         em_free_transmit_structures(adapter);
739         em_free_receive_structures(adapter);
740         em_release_hw_control(adapter);
741         if (adapter->ifp != NULL)
742                 if_free(adapter->ifp);
743 err_pci:
744         em_free_pci_resources(adapter);
745         free(adapter->mta, M_DEVBUF);
746         EM_CORE_LOCK_DESTROY(adapter);
747
748         return (error);
749 }
750
751 /*********************************************************************
752  *  Device removal routine
753  *
754  *  The detach entry point is called when the driver is being removed.
755  *  This routine stops the adapter and deallocates all the resources
756  *  that were allocated for driver operation.
757  *
758  *  return 0 on success, positive on failure
759  *********************************************************************/
760
761 static int
762 em_detach(device_t dev)
763 {
764         struct adapter  *adapter = device_get_softc(dev);
765         struct ifnet    *ifp = adapter->ifp;
766
767         INIT_DEBUGOUT("em_detach: begin");
768
769         /* Make sure VLANS are not using driver */
770         if (adapter->ifp->if_vlantrunk != NULL) {
771                 device_printf(dev,"Vlan in use, detach first\n");
772                 return (EBUSY);
773         }
774
775 #ifdef DEVICE_POLLING
776         if (ifp->if_capenable & IFCAP_POLLING)
777                 ether_poll_deregister(ifp);
778 #endif
779
780         if (adapter->led_dev != NULL)
781                 led_destroy(adapter->led_dev);
782
783         EM_CORE_LOCK(adapter);
784         adapter->in_detach = 1;
785         em_stop(adapter);
786         EM_CORE_UNLOCK(adapter);
787         EM_CORE_LOCK_DESTROY(adapter);
788
789         e1000_phy_hw_reset(&adapter->hw);
790
791         em_release_manageability(adapter);
792         em_release_hw_control(adapter);
793
794         /* Unregister VLAN events */
795         if (adapter->vlan_attach != NULL)
796                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
797         if (adapter->vlan_detach != NULL)
798                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
799
800         ether_ifdetach(adapter->ifp);
801         callout_drain(&adapter->timer);
802
803 #ifdef DEV_NETMAP
804         netmap_detach(ifp);
805 #endif /* DEV_NETMAP */
806
807         em_free_pci_resources(adapter);
808         bus_generic_detach(dev);
809         if_free(ifp);
810
811         em_free_transmit_structures(adapter);
812         em_free_receive_structures(adapter);
813
814         em_release_hw_control(adapter);
815         free(adapter->mta, M_DEVBUF);
816
817         return (0);
818 }
819
820 /*********************************************************************
821  *
822  *  Shutdown entry point
823  *
824  **********************************************************************/
825
826 static int
827 em_shutdown(device_t dev)
828 {
829         return em_suspend(dev);
830 }
831
832 /*
833  * Suspend/resume device methods.
834  */
835 static int
836 em_suspend(device_t dev)
837 {
838         struct adapter *adapter = device_get_softc(dev);
839
840         EM_CORE_LOCK(adapter);
841
842         em_release_manageability(adapter);
843         em_release_hw_control(adapter);
844         em_enable_wakeup(dev);
845
846         EM_CORE_UNLOCK(adapter);
847
848         return bus_generic_suspend(dev);
849 }
850
851 static int
852 em_resume(device_t dev)
853 {
854         struct adapter *adapter = device_get_softc(dev);
855         struct tx_ring  *txr = adapter->tx_rings;
856         struct ifnet *ifp = adapter->ifp;
857
858         EM_CORE_LOCK(adapter);
859         if (adapter->hw.mac.type == e1000_pch2lan)
860                 e1000_resume_workarounds_pchlan(&adapter->hw);
861         em_init_locked(adapter);
862         em_init_manageability(adapter);
863
864         if ((ifp->if_flags & IFF_UP) &&
865             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
867                         EM_TX_LOCK(txr);
868 #ifdef EM_MULTIQUEUE
869                         if (!drbr_empty(ifp, txr->br))
870                                 em_mq_start_locked(ifp, txr, NULL);
871 #else
872                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
873                                 em_start_locked(ifp, txr);
874 #endif
875                         EM_TX_UNLOCK(txr);
876                 }
877         }
878         EM_CORE_UNLOCK(adapter);
879
880         return bus_generic_resume(dev);
881 }
882
883
884 #ifdef EM_MULTIQUEUE
885 /*********************************************************************
886  *  Multiqueue Transmit routines 
887  *
888  *  em_mq_start is called by the stack to initiate a transmit.
889  *  however, if busy the driver can queue the request rather
890  *  than do an immediate send. It is this that is an advantage
891  *  in this driver, rather than also having multiple tx queues.
892  **********************************************************************/
893 static int
894 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
895 {
896         struct adapter  *adapter = txr->adapter;
897         struct mbuf     *next;
898         int             err = 0, enq = 0;
899
900         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
901             IFF_DRV_RUNNING || adapter->link_active == 0) {
902                 if (m != NULL)
903                         err = drbr_enqueue(ifp, txr->br, m);
904                 return (err);
905         }
906
907         enq = 0;
908         if (m != NULL) {
909                 err = drbr_enqueue(ifp, txr->br, m);
910                 if (err) {
911                         return (err);
912                 }
913         } 
914
915         /* Process the queue */
916         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
917                 if ((err = em_xmit(txr, &next)) != 0) {
918                         if (next == NULL) {
919                                 drbr_advance(ifp, txr->br);
920                         } else {
921                                 drbr_putback(ifp, txr->br, next);
922                         }
923                         break;
924                 }
925                 drbr_advance(ifp, txr->br);
926                 enq++;
927                 ifp->if_obytes += next->m_pkthdr.len;
928                 if (next->m_flags & M_MCAST)
929                         ifp->if_omcasts++;
930                 ETHER_BPF_MTAP(ifp, next);
931                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
932                         break;
933         }
934
935         if (enq > 0) {
936                 /* Set the watchdog */
937                 txr->queue_status = EM_QUEUE_WORKING;
938                 txr->watchdog_time = ticks;
939         }
940
941         if (txr->tx_avail < EM_MAX_SCATTER)
942                 em_txeof(txr);
943         if (txr->tx_avail < EM_MAX_SCATTER)
944                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
945         return (err);
946 }
947
948 /*
949 ** Multiqueue capable stack interface
950 */
951 static int
952 em_mq_start(struct ifnet *ifp, struct mbuf *m)
953 {
954         struct adapter  *adapter = ifp->if_softc;
955         struct tx_ring  *txr = adapter->tx_rings;
956         int             error;
957
958         if (EM_TX_TRYLOCK(txr)) {
959                 error = em_mq_start_locked(ifp, txr, m);
960                 EM_TX_UNLOCK(txr);
961         } else 
962                 error = drbr_enqueue(ifp, txr->br, m);
963
964         return (error);
965 }
966
967 /*
968 ** Flush all ring buffers
969 */
970 static void
971 em_qflush(struct ifnet *ifp)
972 {
973         struct adapter  *adapter = ifp->if_softc;
974         struct tx_ring  *txr = adapter->tx_rings;
975         struct mbuf     *m;
976
977         for (int i = 0; i < adapter->num_queues; i++, txr++) {
978                 EM_TX_LOCK(txr);
979                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
980                         m_freem(m);
981                 EM_TX_UNLOCK(txr);
982         }
983         if_qflush(ifp);
984 }
985 #else  /* !EM_MULTIQUEUE */
986
987 static void
988 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
989 {
990         struct adapter  *adapter = ifp->if_softc;
991         struct mbuf     *m_head;
992
993         EM_TX_LOCK_ASSERT(txr);
994
995         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
996             IFF_DRV_RUNNING)
997                 return;
998
999         if (!adapter->link_active)
1000                 return;
1001
1002         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1003                 /* Call cleanup if number of TX descriptors low */
1004                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1005                         em_txeof(txr);
1006                 if (txr->tx_avail < EM_MAX_SCATTER) {
1007                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1008                         break;
1009                 }
1010                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1011                 if (m_head == NULL)
1012                         break;
1013                 /*
1014                  *  Encapsulation can modify our pointer, and or make it
1015                  *  NULL on failure.  In that event, we can't requeue.
1016                  */
1017                 if (em_xmit(txr, &m_head)) {
1018                         if (m_head == NULL)
1019                                 break;
1020                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1021                         break;
1022                 }
1023
1024                 /* Send a copy of the frame to the BPF listener */
1025                 ETHER_BPF_MTAP(ifp, m_head);
1026
1027                 /* Set timeout in case hardware has problems transmitting. */
1028                 txr->watchdog_time = ticks;
1029                 txr->queue_status = EM_QUEUE_WORKING;
1030         }
1031
1032         return;
1033 }
1034
1035 static void
1036 em_start(struct ifnet *ifp)
1037 {
1038         struct adapter  *adapter = ifp->if_softc;
1039         struct tx_ring  *txr = adapter->tx_rings;
1040
1041         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1042                 EM_TX_LOCK(txr);
1043                 em_start_locked(ifp, txr);
1044                 EM_TX_UNLOCK(txr);
1045         }
1046         return;
1047 }
1048 #endif /* EM_MULTIQUEUE */
1049
1050 /*********************************************************************
1051  *  Ioctl entry point
1052  *
1053  *  em_ioctl is called when the user wants to configure the
1054  *  interface.
1055  *
1056  *  return 0 on success, positive on failure
1057  **********************************************************************/
1058
1059 static int
1060 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1061 {
1062         struct adapter  *adapter = ifp->if_softc;
1063         struct ifreq    *ifr = (struct ifreq *)data;
1064 #if defined(INET) || defined(INET6)
1065         struct ifaddr   *ifa = (struct ifaddr *)data;
1066 #endif
1067         bool            avoid_reset = FALSE;
1068         int             error = 0;
1069
1070         if (adapter->in_detach)
1071                 return (error);
1072
1073         switch (command) {
1074         case SIOCSIFADDR:
1075 #ifdef INET
1076                 if (ifa->ifa_addr->sa_family == AF_INET)
1077                         avoid_reset = TRUE;
1078 #endif
1079 #ifdef INET6
1080                 if (ifa->ifa_addr->sa_family == AF_INET6)
1081                         avoid_reset = TRUE;
1082 #endif
1083                 /*
1084                 ** Calling init results in link renegotiation,
1085                 ** so we avoid doing it when possible.
1086                 */
1087                 if (avoid_reset) {
1088                         ifp->if_flags |= IFF_UP;
1089                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1090                                 em_init(adapter);
1091 #ifdef INET
1092                         if (!(ifp->if_flags & IFF_NOARP))
1093                                 arp_ifinit(ifp, ifa);
1094 #endif
1095                 } else
1096                         error = ether_ioctl(ifp, command, data);
1097                 break;
1098         case SIOCSIFMTU:
1099             {
1100                 int max_frame_size;
1101
1102                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1103
1104                 EM_CORE_LOCK(adapter);
1105                 switch (adapter->hw.mac.type) {
1106                 case e1000_82571:
1107                 case e1000_82572:
1108                 case e1000_ich9lan:
1109                 case e1000_ich10lan:
1110                 case e1000_pch2lan:
1111                 case e1000_82574:
1112                 case e1000_82583:
1113                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1114                         max_frame_size = 9234;
1115                         break;
1116                 case e1000_pchlan:
1117                         max_frame_size = 4096;
1118                         break;
1119                         /* Adapters that do not support jumbo frames */
1120                 case e1000_ich8lan:
1121                         max_frame_size = ETHER_MAX_LEN;
1122                         break;
1123                 default:
1124                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1125                 }
1126                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1127                     ETHER_CRC_LEN) {
1128                         EM_CORE_UNLOCK(adapter);
1129                         error = EINVAL;
1130                         break;
1131                 }
1132
1133                 ifp->if_mtu = ifr->ifr_mtu;
1134                 adapter->max_frame_size =
1135                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1136                 em_init_locked(adapter);
1137                 EM_CORE_UNLOCK(adapter);
1138                 break;
1139             }
1140         case SIOCSIFFLAGS:
1141                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1142                     SIOCSIFFLAGS (Set Interface Flags)");
1143                 EM_CORE_LOCK(adapter);
1144                 if (ifp->if_flags & IFF_UP) {
1145                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1146                                 if ((ifp->if_flags ^ adapter->if_flags) &
1147                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1148                                         em_disable_promisc(adapter);
1149                                         em_set_promisc(adapter);
1150                                 }
1151                         } else
1152                                 em_init_locked(adapter);
1153                 } else
1154                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1155                                 em_stop(adapter);
1156                 adapter->if_flags = ifp->if_flags;
1157                 EM_CORE_UNLOCK(adapter);
1158                 break;
1159         case SIOCADDMULTI:
1160         case SIOCDELMULTI:
1161                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1162                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1163                         EM_CORE_LOCK(adapter);
1164                         em_disable_intr(adapter);
1165                         em_set_multi(adapter);
1166 #ifdef DEVICE_POLLING
1167                         if (!(ifp->if_capenable & IFCAP_POLLING))
1168 #endif
1169                                 em_enable_intr(adapter);
1170                         EM_CORE_UNLOCK(adapter);
1171                 }
1172                 break;
1173         case SIOCSIFMEDIA:
1174                 /* Check SOL/IDER usage */
1175                 EM_CORE_LOCK(adapter);
1176                 if (e1000_check_reset_block(&adapter->hw)) {
1177                         EM_CORE_UNLOCK(adapter);
1178                         device_printf(adapter->dev, "Media change is"
1179                             " blocked due to SOL/IDER session.\n");
1180                         break;
1181                 }
1182                 EM_CORE_UNLOCK(adapter);
1183                 /* falls thru */
1184         case SIOCGIFMEDIA:
1185                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1186                     SIOCxIFMEDIA (Get/Set Interface Media)");
1187                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1188                 break;
1189         case SIOCSIFCAP:
1190             {
1191                 int mask, reinit;
1192
1193                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1194                 reinit = 0;
1195                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1196 #ifdef DEVICE_POLLING
1197                 if (mask & IFCAP_POLLING) {
1198                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1199                                 error = ether_poll_register(em_poll, ifp);
1200                                 if (error)
1201                                         return (error);
1202                                 EM_CORE_LOCK(adapter);
1203                                 em_disable_intr(adapter);
1204                                 ifp->if_capenable |= IFCAP_POLLING;
1205                                 EM_CORE_UNLOCK(adapter);
1206                         } else {
1207                                 error = ether_poll_deregister(ifp);
1208                                 /* Enable interrupt even in error case */
1209                                 EM_CORE_LOCK(adapter);
1210                                 em_enable_intr(adapter);
1211                                 ifp->if_capenable &= ~IFCAP_POLLING;
1212                                 EM_CORE_UNLOCK(adapter);
1213                         }
1214                 }
1215 #endif
1216                 if (mask & IFCAP_HWCSUM) {
1217                         ifp->if_capenable ^= IFCAP_HWCSUM;
1218                         reinit = 1;
1219                 }
1220                 if (mask & IFCAP_TSO4) {
1221                         ifp->if_capenable ^= IFCAP_TSO4;
1222                         reinit = 1;
1223                 }
1224                 if (mask & IFCAP_VLAN_HWTAGGING) {
1225                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1226                         reinit = 1;
1227                 }
1228                 if (mask & IFCAP_VLAN_HWFILTER) {
1229                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1230                         reinit = 1;
1231                 }
1232                 if (mask & IFCAP_VLAN_HWTSO) {
1233                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1234                         reinit = 1;
1235                 }
1236                 if ((mask & IFCAP_WOL) &&
1237                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1238                         if (mask & IFCAP_WOL_MCAST)
1239                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1240                         if (mask & IFCAP_WOL_MAGIC)
1241                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1242                 }
1243                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1244                         em_init(adapter);
1245                 VLAN_CAPABILITIES(ifp);
1246                 break;
1247             }
1248
1249         default:
1250                 error = ether_ioctl(ifp, command, data);
1251                 break;
1252         }
1253
1254         return (error);
1255 }
1256
1257
1258 /*********************************************************************
1259  *  Init entry point
1260  *
1261  *  This routine is used in two ways. It is used by the stack as
1262  *  init entry point in network interface structure. It is also used
1263  *  by the driver as a hw/sw initialization routine to get to a
1264  *  consistent state.
1265  *
1266  *  return 0 on success, positive on failure
1267  **********************************************************************/
1268
1269 static void
1270 em_init_locked(struct adapter *adapter)
1271 {
1272         struct ifnet    *ifp = adapter->ifp;
1273         device_t        dev = adapter->dev;
1274
1275         INIT_DEBUGOUT("em_init: begin");
1276
1277         EM_CORE_LOCK_ASSERT(adapter);
1278
1279         em_disable_intr(adapter);
1280         callout_stop(&adapter->timer);
1281
1282         /* Get the latest mac address, User can use a LAA */
1283         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1284               ETHER_ADDR_LEN);
1285
1286         /* Put the address into the Receive Address Array */
1287         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1288
1289         /*
1290          * With the 82571 adapter, RAR[0] may be overwritten
1291          * when the other port is reset, we make a duplicate
1292          * in RAR[14] for that eventuality, this assures
1293          * the interface continues to function.
1294          */
1295         if (adapter->hw.mac.type == e1000_82571) {
1296                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1297                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1298                     E1000_RAR_ENTRIES - 1);
1299         }
1300
1301         /* Initialize the hardware */
1302         em_reset(adapter);
1303         em_update_link_status(adapter);
1304
1305         /* Setup VLAN support, basic and offload if available */
1306         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1307
1308         /* Set hardware offload abilities */
1309         ifp->if_hwassist = 0;
1310         if (ifp->if_capenable & IFCAP_TXCSUM)
1311                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1312         if (ifp->if_capenable & IFCAP_TSO4)
1313                 ifp->if_hwassist |= CSUM_TSO;
1314
1315         /* Configure for OS presence */
1316         em_init_manageability(adapter);
1317
1318         /* Prepare transmit descriptors and buffers */
1319         em_setup_transmit_structures(adapter);
1320         em_initialize_transmit_unit(adapter);
1321
1322         /* Setup Multicast table */
1323         em_set_multi(adapter);
1324
1325         /*
1326         ** Figure out the desired mbuf
1327         ** pool for doing jumbos
1328         */
1329         if (adapter->max_frame_size <= 2048)
1330                 adapter->rx_mbuf_sz = MCLBYTES;
1331         else if (adapter->max_frame_size <= 4096)
1332                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1333         else
1334                 adapter->rx_mbuf_sz = MJUM9BYTES;
1335
1336         /* Prepare receive descriptors and buffers */
1337         if (em_setup_receive_structures(adapter)) {
1338                 device_printf(dev, "Could not setup receive structures\n");
1339                 em_stop(adapter);
1340                 return;
1341         }
1342         em_initialize_receive_unit(adapter);
1343
1344         /* Use real VLAN Filter support? */
1345         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1346                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1347                         /* Use real VLAN Filter support */
1348                         em_setup_vlan_hw_support(adapter);
1349                 else {
1350                         u32 ctrl;
1351                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1352                         ctrl |= E1000_CTRL_VME;
1353                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1354                 }
1355         }
1356
1357         /* Don't lose promiscuous settings */
1358         em_set_promisc(adapter);
1359
1360         /* Set the interface as ACTIVE */
1361         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1362         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1363
1364         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1365         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1366
1367         /* MSI/X configuration for 82574 */
1368         if (adapter->hw.mac.type == e1000_82574) {
1369                 int tmp;
1370                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1371                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1372                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1373                 /* Set the IVAR - interrupt vector routing. */
1374                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1375         }
1376
1377 #ifdef DEVICE_POLLING
1378         /*
1379          * Only enable interrupts if we are not polling, make sure
1380          * they are off otherwise.
1381          */
1382         if (ifp->if_capenable & IFCAP_POLLING)
1383                 em_disable_intr(adapter);
1384         else
1385 #endif /* DEVICE_POLLING */
1386                 em_enable_intr(adapter);
1387
1388         /* AMT based hardware can now take control from firmware */
1389         if (adapter->has_manage && adapter->has_amt)
1390                 em_get_hw_control(adapter);
1391 }
1392
1393 static void
1394 em_init(void *arg)
1395 {
1396         struct adapter *adapter = arg;
1397
1398         EM_CORE_LOCK(adapter);
1399         em_init_locked(adapter);
1400         EM_CORE_UNLOCK(adapter);
1401 }
1402
1403
1404 #ifdef DEVICE_POLLING
1405 /*********************************************************************
1406  *
1407  *  Legacy polling routine: note this only works with single queue
1408  *
1409  *********************************************************************/
1410 static int
1411 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1412 {
1413         struct adapter *adapter = ifp->if_softc;
1414         struct tx_ring  *txr = adapter->tx_rings;
1415         struct rx_ring  *rxr = adapter->rx_rings;
1416         u32             reg_icr;
1417         int             rx_done;
1418
1419         EM_CORE_LOCK(adapter);
1420         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1421                 EM_CORE_UNLOCK(adapter);
1422                 return (0);
1423         }
1424
1425         if (cmd == POLL_AND_CHECK_STATUS) {
1426                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1427                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1428                         callout_stop(&adapter->timer);
1429                         adapter->hw.mac.get_link_status = 1;
1430                         em_update_link_status(adapter);
1431                         callout_reset(&adapter->timer, hz,
1432                             em_local_timer, adapter);
1433                 }
1434         }
1435         EM_CORE_UNLOCK(adapter);
1436
1437         em_rxeof(rxr, count, &rx_done);
1438
1439         EM_TX_LOCK(txr);
1440         em_txeof(txr);
1441 #ifdef EM_MULTIQUEUE
1442         if (!drbr_empty(ifp, txr->br))
1443                 em_mq_start_locked(ifp, txr, NULL);
1444 #else
1445         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1446                 em_start_locked(ifp, txr);
1447 #endif
1448         EM_TX_UNLOCK(txr);
1449
1450         return (rx_done);
1451 }
1452 #endif /* DEVICE_POLLING */
1453
1454
1455 /*********************************************************************
1456  *
1457  *  Fast Legacy/MSI Combined Interrupt Service routine  
1458  *
1459  *********************************************************************/
1460 static int
1461 em_irq_fast(void *arg)
1462 {
1463         struct adapter  *adapter = arg;
1464         struct ifnet    *ifp;
1465         u32             reg_icr;
1466
1467         ifp = adapter->ifp;
1468
1469         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1470
1471         /* Hot eject?  */
1472         if (reg_icr == 0xffffffff)
1473                 return FILTER_STRAY;
1474
1475         /* Definitely not our interrupt.  */
1476         if (reg_icr == 0x0)
1477                 return FILTER_STRAY;
1478
1479         /*
1480          * Starting with the 82571 chip, bit 31 should be used to
1481          * determine whether the interrupt belongs to us.
1482          */
1483         if (adapter->hw.mac.type >= e1000_82571 &&
1484             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1485                 return FILTER_STRAY;
1486
1487         em_disable_intr(adapter);
1488         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1489
1490         /* Link status change */
1491         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1492                 adapter->hw.mac.get_link_status = 1;
1493                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1494         }
1495
1496         if (reg_icr & E1000_ICR_RXO)
1497                 adapter->rx_overruns++;
1498         return FILTER_HANDLED;
1499 }
1500
1501 /* Combined RX/TX handler, used by Legacy and MSI */
1502 static void
1503 em_handle_que(void *context, int pending)
1504 {
1505         struct adapter  *adapter = context;
1506         struct ifnet    *ifp = adapter->ifp;
1507         struct tx_ring  *txr = adapter->tx_rings;
1508         struct rx_ring  *rxr = adapter->rx_rings;
1509
1510
1511         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1512                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1513                 EM_TX_LOCK(txr);
1514                 em_txeof(txr);
1515 #ifdef EM_MULTIQUEUE
1516                 if (!drbr_empty(ifp, txr->br))
1517                         em_mq_start_locked(ifp, txr, NULL);
1518 #else
1519                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1520                         em_start_locked(ifp, txr);
1521 #endif
1522                 EM_TX_UNLOCK(txr);
1523                 if (more) {
1524                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1525                         return;
1526                 }
1527         }
1528
1529         em_enable_intr(adapter);
1530         return;
1531 }
1532
1533
1534 /*********************************************************************
1535  *
1536  *  MSIX Interrupt Service Routines
1537  *
1538  **********************************************************************/
1539 static void
1540 em_msix_tx(void *arg)
1541 {
1542         struct tx_ring *txr = arg;
1543         struct adapter *adapter = txr->adapter;
1544         struct ifnet    *ifp = adapter->ifp;
1545
1546         ++txr->tx_irq;
1547         EM_TX_LOCK(txr);
1548         em_txeof(txr);
1549 #ifdef EM_MULTIQUEUE
1550         if (!drbr_empty(ifp, txr->br))
1551                 em_mq_start_locked(ifp, txr, NULL);
1552 #else
1553         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1554                 em_start_locked(ifp, txr);
1555 #endif
1556         /* Reenable this interrupt */
1557         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1558         EM_TX_UNLOCK(txr);
1559         return;
1560 }
1561
1562 /*********************************************************************
1563  *
1564  *  MSIX RX Interrupt Service routine
1565  *
1566  **********************************************************************/
1567
1568 static void
1569 em_msix_rx(void *arg)
1570 {
1571         struct rx_ring  *rxr = arg;
1572         struct adapter  *adapter = rxr->adapter;
1573         bool            more;
1574
1575         ++rxr->rx_irq;
1576         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1577                 return;
1578         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1579         if (more)
1580                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1581         else
1582                 /* Reenable this interrupt */
1583                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1584         return;
1585 }
1586
1587 /*********************************************************************
1588  *
1589  *  MSIX Link Fast Interrupt Service routine
1590  *
1591  **********************************************************************/
1592 static void
1593 em_msix_link(void *arg)
1594 {
1595         struct adapter  *adapter = arg;
1596         u32             reg_icr;
1597
1598         ++adapter->link_irq;
1599         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1600
1601         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1602                 adapter->hw.mac.get_link_status = 1;
1603                 em_handle_link(adapter, 0);
1604         } else
1605                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1606                     EM_MSIX_LINK | E1000_IMS_LSC);
1607         return;
1608 }
1609
1610 static void
1611 em_handle_rx(void *context, int pending)
1612 {
1613         struct rx_ring  *rxr = context;
1614         struct adapter  *adapter = rxr->adapter;
1615         bool            more;
1616
1617         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1618         if (more)
1619                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1620         else
1621                 /* Reenable this interrupt */
1622                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1623 }
1624
1625 static void
1626 em_handle_tx(void *context, int pending)
1627 {
1628         struct tx_ring  *txr = context;
1629         struct adapter  *adapter = txr->adapter;
1630         struct ifnet    *ifp = adapter->ifp;
1631
1632         EM_TX_LOCK(txr);
1633         em_txeof(txr);
1634 #ifdef EM_MULTIQUEUE
1635         if (!drbr_empty(ifp, txr->br))
1636                 em_mq_start_locked(ifp, txr, NULL);
1637 #else
1638         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1639                 em_start_locked(ifp, txr);
1640 #endif
1641         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1642         EM_TX_UNLOCK(txr);
1643 }
1644
1645 static void
1646 em_handle_link(void *context, int pending)
1647 {
1648         struct adapter  *adapter = context;
1649         struct tx_ring  *txr = adapter->tx_rings;
1650         struct ifnet *ifp = adapter->ifp;
1651
1652         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1653                 return;
1654
1655         EM_CORE_LOCK(adapter);
1656         callout_stop(&adapter->timer);
1657         em_update_link_status(adapter);
1658         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1659         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1660             EM_MSIX_LINK | E1000_IMS_LSC);
1661         if (adapter->link_active) {
1662                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1663                         EM_TX_LOCK(txr);
1664 #ifdef EM_MULTIQUEUE
1665                         if (!drbr_empty(ifp, txr->br))
1666                                 em_mq_start_locked(ifp, txr, NULL);
1667 #else
1668                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1669                                 em_start_locked(ifp, txr);
1670 #endif
1671                         EM_TX_UNLOCK(txr);
1672                 }
1673         }
1674         EM_CORE_UNLOCK(adapter);
1675 }
1676
1677
1678 /*********************************************************************
1679  *
1680  *  Media Ioctl callback
1681  *
1682  *  This routine is called whenever the user queries the status of
1683  *  the interface using ifconfig.
1684  *
1685  **********************************************************************/
1686 static void
1687 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1688 {
1689         struct adapter *adapter = ifp->if_softc;
1690         u_char fiber_type = IFM_1000_SX;
1691
1692         INIT_DEBUGOUT("em_media_status: begin");
1693
1694         EM_CORE_LOCK(adapter);
1695         em_update_link_status(adapter);
1696
1697         ifmr->ifm_status = IFM_AVALID;
1698         ifmr->ifm_active = IFM_ETHER;
1699
1700         if (!adapter->link_active) {
1701                 EM_CORE_UNLOCK(adapter);
1702                 return;
1703         }
1704
1705         ifmr->ifm_status |= IFM_ACTIVE;
1706
1707         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1708             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1709                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1710         } else {
1711                 switch (adapter->link_speed) {
1712                 case 10:
1713                         ifmr->ifm_active |= IFM_10_T;
1714                         break;
1715                 case 100:
1716                         ifmr->ifm_active |= IFM_100_TX;
1717                         break;
1718                 case 1000:
1719                         ifmr->ifm_active |= IFM_1000_T;
1720                         break;
1721                 }
1722                 if (adapter->link_duplex == FULL_DUPLEX)
1723                         ifmr->ifm_active |= IFM_FDX;
1724                 else
1725                         ifmr->ifm_active |= IFM_HDX;
1726         }
1727         EM_CORE_UNLOCK(adapter);
1728 }
1729
1730 /*********************************************************************
1731  *
1732  *  Media Ioctl callback
1733  *
1734  *  This routine is called when the user changes speed/duplex using
1735  *  media/mediopt option with ifconfig.
1736  *
1737  **********************************************************************/
1738 static int
1739 em_media_change(struct ifnet *ifp)
1740 {
1741         struct adapter *adapter = ifp->if_softc;
1742         struct ifmedia  *ifm = &adapter->media;
1743
1744         INIT_DEBUGOUT("em_media_change: begin");
1745
1746         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1747                 return (EINVAL);
1748
1749         EM_CORE_LOCK(adapter);
1750         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1751         case IFM_AUTO:
1752                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1753                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1754                 break;
1755         case IFM_1000_LX:
1756         case IFM_1000_SX:
1757         case IFM_1000_T:
1758                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1759                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1760                 break;
1761         case IFM_100_TX:
1762                 adapter->hw.mac.autoneg = FALSE;
1763                 adapter->hw.phy.autoneg_advertised = 0;
1764                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1765                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1766                 else
1767                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1768                 break;
1769         case IFM_10_T:
1770                 adapter->hw.mac.autoneg = FALSE;
1771                 adapter->hw.phy.autoneg_advertised = 0;
1772                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1773                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1774                 else
1775                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1776                 break;
1777         default:
1778                 device_printf(adapter->dev, "Unsupported media type\n");
1779         }
1780
1781         em_init_locked(adapter);
1782         EM_CORE_UNLOCK(adapter);
1783
1784         return (0);
1785 }
1786
1787 /*********************************************************************
1788  *
1789  *  This routine maps the mbufs to tx descriptors.
1790  *
1791  *  return 0 on success, positive on failure
1792  **********************************************************************/
1793
1794 static int
1795 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1796 {
1797         struct adapter          *adapter = txr->adapter;
1798         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1799         bus_dmamap_t            map;
1800         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1801         struct e1000_tx_desc    *ctxd = NULL;
1802         struct mbuf             *m_head;
1803         struct ether_header     *eh;
1804         struct ip               *ip = NULL;
1805         struct tcphdr           *tp = NULL;
1806         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1807         int                     ip_off, poff;
1808         int                     nsegs, i, j, first, last = 0;
1809         int                     error, do_tso, tso_desc = 0, remap = 1;
1810
1811 retry:
1812         m_head = *m_headp;
1813         txd_upper = txd_lower = txd_used = txd_saved = 0;
1814         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1815         ip_off = poff = 0;
1816
1817         /*
1818          * Intel recommends entire IP/TCP header length reside in a single
1819          * buffer. If multiple descriptors are used to describe the IP and
1820          * TCP header, each descriptor should describe one or more
1821          * complete headers; descriptors referencing only parts of headers
1822          * are not supported. If all layer headers are not coalesced into
1823          * a single buffer, each buffer should not cross a 4KB boundary,
1824          * or be larger than the maximum read request size.
1825          * Controller also requires modifing IP/TCP header to make TSO work
1826          * so we firstly get a writable mbuf chain then coalesce ethernet/
1827          * IP/TCP header into a single buffer to meet the requirement of
1828          * controller. This also simplifies IP/TCP/UDP checksum offloading
1829          * which also has similiar restrictions.
1830          */
1831         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1832                 if (do_tso || (m_head->m_next != NULL && 
1833                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1834                         if (M_WRITABLE(*m_headp) == 0) {
1835                                 m_head = m_dup(*m_headp, M_NOWAIT);
1836                                 m_freem(*m_headp);
1837                                 if (m_head == NULL) {
1838                                         *m_headp = NULL;
1839                                         return (ENOBUFS);
1840                                 }
1841                                 *m_headp = m_head;
1842                         }
1843                 }
1844                 /*
1845                  * XXX
1846                  * Assume IPv4, we don't have TSO/checksum offload support
1847                  * for IPv6 yet.
1848                  */
1849                 ip_off = sizeof(struct ether_header);
1850                 m_head = m_pullup(m_head, ip_off);
1851                 if (m_head == NULL) {
1852                         *m_headp = NULL;
1853                         return (ENOBUFS);
1854                 }
1855                 eh = mtod(m_head, struct ether_header *);
1856                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1857                         ip_off = sizeof(struct ether_vlan_header);
1858                         m_head = m_pullup(m_head, ip_off);
1859                         if (m_head == NULL) {
1860                                 *m_headp = NULL;
1861                                 return (ENOBUFS);
1862                         }
1863                 }
1864                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1865                 if (m_head == NULL) {
1866                         *m_headp = NULL;
1867                         return (ENOBUFS);
1868                 }
1869                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1870                 poff = ip_off + (ip->ip_hl << 2);
1871                 if (do_tso) {
1872                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1873                         if (m_head == NULL) {
1874                                 *m_headp = NULL;
1875                                 return (ENOBUFS);
1876                         }
1877                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1878                         /*
1879                          * TSO workaround:
1880                          *   pull 4 more bytes of data into it.
1881                          */
1882                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1883                         if (m_head == NULL) {
1884                                 *m_headp = NULL;
1885                                 return (ENOBUFS);
1886                         }
1887                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1888                         ip->ip_len = 0;
1889                         ip->ip_sum = 0;
1890                         /*
1891                          * The pseudo TCP checksum does not include TCP payload
1892                          * length so driver should recompute the checksum here
1893                          * what hardware expect to see. This is adherence of
1894                          * Microsoft's Large Send specification.
1895                          */
1896                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1897                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1898                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1899                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1900                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1901                         if (m_head == NULL) {
1902                                 *m_headp = NULL;
1903                                 return (ENOBUFS);
1904                         }
1905                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1906                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1907                         if (m_head == NULL) {
1908                                 *m_headp = NULL;
1909                                 return (ENOBUFS);
1910                         }
1911                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1912                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1913                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1914                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1915                         if (m_head == NULL) {
1916                                 *m_headp = NULL;
1917                                 return (ENOBUFS);
1918                         }
1919                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1920                 }
1921                 *m_headp = m_head;
1922         }
1923
1924         /*
1925          * Map the packet for DMA
1926          *
1927          * Capture the first descriptor index,
1928          * this descriptor will have the index
1929          * of the EOP which is the only one that
1930          * now gets a DONE bit writeback.
1931          */
1932         first = txr->next_avail_desc;
1933         tx_buffer = &txr->tx_buffers[first];
1934         tx_buffer_mapped = tx_buffer;
1935         map = tx_buffer->map;
1936
1937         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1938             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1939
1940         /*
1941          * There are two types of errors we can (try) to handle:
1942          * - EFBIG means the mbuf chain was too long and bus_dma ran
1943          *   out of segments.  Defragment the mbuf chain and try again.
1944          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1945          *   at this point in time.  Defer sending and try again later.
1946          * All other errors, in particular EINVAL, are fatal and prevent the
1947          * mbuf chain from ever going through.  Drop it and report error.
1948          */
1949         if (error == EFBIG && remap) {
1950                 struct mbuf *m;
1951
1952                 m = m_defrag(*m_headp, M_NOWAIT);
1953                 if (m == NULL) {
1954                         adapter->mbuf_alloc_failed++;
1955                         m_freem(*m_headp);
1956                         *m_headp = NULL;
1957                         return (ENOBUFS);
1958                 }
1959                 *m_headp = m;
1960
1961                 /* Try it again, but only once */
1962                 remap = 0;
1963                 goto retry;
1964         } else if (error == ENOMEM) {
1965                 adapter->no_tx_dma_setup++;
1966                 return (error);
1967         } else if (error != 0) {
1968                 adapter->no_tx_dma_setup++;
1969                 m_freem(*m_headp);
1970                 *m_headp = NULL;
1971                 return (error);
1972         }
1973
1974         /*
1975          * TSO Hardware workaround, if this packet is not
1976          * TSO, and is only a single descriptor long, and
1977          * it follows a TSO burst, then we need to add a
1978          * sentinel descriptor to prevent premature writeback.
1979          */
1980         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1981                 if (nsegs == 1)
1982                         tso_desc = TRUE;
1983                 txr->tx_tso = FALSE;
1984         }
1985
1986         if (nsegs > (txr->tx_avail - 2)) {
1987                 txr->no_desc_avail++;
1988                 bus_dmamap_unload(txr->txtag, map);
1989                 return (ENOBUFS);
1990         }
1991         m_head = *m_headp;
1992
1993         /* Do hardware assists */
1994         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1995                 em_tso_setup(txr, m_head, ip_off, ip, tp,
1996                     &txd_upper, &txd_lower);
1997                 /* we need to make a final sentinel transmit desc */
1998                 tso_desc = TRUE;
1999         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2000                 em_transmit_checksum_setup(txr, m_head,
2001                     ip_off, ip, &txd_upper, &txd_lower);
2002
2003         if (m_head->m_flags & M_VLANTAG) {
2004                 /* Set the vlan id. */
2005                 txd_upper |=
2006                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2007                 /* Tell hardware to add tag */
2008                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2009         }
2010
2011         i = txr->next_avail_desc;
2012
2013         /* Set up our transmit descriptors */
2014         for (j = 0; j < nsegs; j++) {
2015                 bus_size_t seg_len;
2016                 bus_addr_t seg_addr;
2017
2018                 tx_buffer = &txr->tx_buffers[i];
2019                 ctxd = &txr->tx_base[i];
2020                 seg_addr = segs[j].ds_addr;
2021                 seg_len  = segs[j].ds_len;
2022                 /*
2023                 ** TSO Workaround:
2024                 ** If this is the last descriptor, we want to
2025                 ** split it so we have a small final sentinel
2026                 */
2027                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2028                         seg_len -= 4;
2029                         ctxd->buffer_addr = htole64(seg_addr);
2030                         ctxd->lower.data = htole32(
2031                         adapter->txd_cmd | txd_lower | seg_len);
2032                         ctxd->upper.data =
2033                             htole32(txd_upper);
2034                         if (++i == adapter->num_tx_desc)
2035                                 i = 0;
2036                         /* Now make the sentinel */     
2037                         ++txd_used; /* using an extra txd */
2038                         ctxd = &txr->tx_base[i];
2039                         tx_buffer = &txr->tx_buffers[i];
2040                         ctxd->buffer_addr =
2041                             htole64(seg_addr + seg_len);
2042                         ctxd->lower.data = htole32(
2043                         adapter->txd_cmd | txd_lower | 4);
2044                         ctxd->upper.data =
2045                             htole32(txd_upper);
2046                         last = i;
2047                         if (++i == adapter->num_tx_desc)
2048                                 i = 0;
2049                 } else {
2050                         ctxd->buffer_addr = htole64(seg_addr);
2051                         ctxd->lower.data = htole32(
2052                         adapter->txd_cmd | txd_lower | seg_len);
2053                         ctxd->upper.data =
2054                             htole32(txd_upper);
2055                         last = i;
2056                         if (++i == adapter->num_tx_desc)
2057                                 i = 0;
2058                 }
2059                 tx_buffer->m_head = NULL;
2060                 tx_buffer->next_eop = -1;
2061         }
2062
2063         txr->next_avail_desc = i;
2064         txr->tx_avail -= nsegs;
2065         if (tso_desc) /* TSO used an extra for sentinel */
2066                 txr->tx_avail -= txd_used;
2067
2068         tx_buffer->m_head = m_head;
2069         /*
2070         ** Here we swap the map so the last descriptor,
2071         ** which gets the completion interrupt has the
2072         ** real map, and the first descriptor gets the
2073         ** unused map from this descriptor.
2074         */
2075         tx_buffer_mapped->map = tx_buffer->map;
2076         tx_buffer->map = map;
2077         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2078
2079         /*
2080          * Last Descriptor of Packet
2081          * needs End Of Packet (EOP)
2082          * and Report Status (RS)
2083          */
2084         ctxd->lower.data |=
2085             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2086         /*
2087          * Keep track in the first buffer which
2088          * descriptor will be written back
2089          */
2090         tx_buffer = &txr->tx_buffers[first];
2091         tx_buffer->next_eop = last;
2092         /* Update the watchdog time early and often */
2093         txr->watchdog_time = ticks;
2094
2095         /*
2096          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2097          * that this frame is available to transmit.
2098          */
2099         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2100             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2101         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2102
2103         return (0);
2104 }
2105
2106 static void
2107 em_set_promisc(struct adapter *adapter)
2108 {
2109         struct ifnet    *ifp = adapter->ifp;
2110         u32             reg_rctl;
2111
2112         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2113
2114         if (ifp->if_flags & IFF_PROMISC) {
2115                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2116                 /* Turn this on if you want to see bad packets */
2117                 if (em_debug_sbp)
2118                         reg_rctl |= E1000_RCTL_SBP;
2119                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2120         } else if (ifp->if_flags & IFF_ALLMULTI) {
2121                 reg_rctl |= E1000_RCTL_MPE;
2122                 reg_rctl &= ~E1000_RCTL_UPE;
2123                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2124         }
2125 }
2126
2127 static void
2128 em_disable_promisc(struct adapter *adapter)
2129 {
2130         u32     reg_rctl;
2131
2132         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2133
2134         reg_rctl &=  (~E1000_RCTL_UPE);
2135         reg_rctl &=  (~E1000_RCTL_MPE);
2136         reg_rctl &=  (~E1000_RCTL_SBP);
2137         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2138 }
2139
2140
2141 /*********************************************************************
2142  *  Multicast Update
2143  *
2144  *  This routine is called whenever multicast address list is updated.
2145  *
2146  **********************************************************************/
2147
2148 static void
2149 em_set_multi(struct adapter *adapter)
2150 {
2151         struct ifnet    *ifp = adapter->ifp;
2152         struct ifmultiaddr *ifma;
2153         u32 reg_rctl = 0;
2154         u8  *mta; /* Multicast array memory */
2155         int mcnt = 0;
2156
2157         IOCTL_DEBUGOUT("em_set_multi: begin");
2158
2159         mta = adapter->mta;
2160         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2161
2162         if (adapter->hw.mac.type == e1000_82542 && 
2163             adapter->hw.revision_id == E1000_REVISION_2) {
2164                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2165                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2166                         e1000_pci_clear_mwi(&adapter->hw);
2167                 reg_rctl |= E1000_RCTL_RST;
2168                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2169                 msec_delay(5);
2170         }
2171
2172 #if __FreeBSD_version < 800000
2173         IF_ADDR_LOCK(ifp);
2174 #else
2175         if_maddr_rlock(ifp);
2176 #endif
2177         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2178                 if (ifma->ifma_addr->sa_family != AF_LINK)
2179                         continue;
2180
2181                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2182                         break;
2183
2184                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2185                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2186                 mcnt++;
2187         }
2188 #if __FreeBSD_version < 800000
2189         IF_ADDR_UNLOCK(ifp);
2190 #else
2191         if_maddr_runlock(ifp);
2192 #endif
2193         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2194                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2195                 reg_rctl |= E1000_RCTL_MPE;
2196                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2197         } else
2198                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2199
2200         if (adapter->hw.mac.type == e1000_82542 && 
2201             adapter->hw.revision_id == E1000_REVISION_2) {
2202                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2203                 reg_rctl &= ~E1000_RCTL_RST;
2204                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2205                 msec_delay(5);
2206                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2207                         e1000_pci_set_mwi(&adapter->hw);
2208         }
2209 }
2210
2211
2212 /*********************************************************************
2213  *  Timer routine
2214  *
2215  *  This routine checks for link status and updates statistics.
2216  *
2217  **********************************************************************/
2218
2219 static void
2220 em_local_timer(void *arg)
2221 {
2222         struct adapter  *adapter = arg;
2223         struct ifnet    *ifp = adapter->ifp;
2224         struct tx_ring  *txr = adapter->tx_rings;
2225         struct rx_ring  *rxr = adapter->rx_rings;
2226         u32             trigger;
2227
2228         EM_CORE_LOCK_ASSERT(adapter);
2229
2230         em_update_link_status(adapter);
2231         em_update_stats_counters(adapter);
2232
2233         /* Reset LAA into RAR[0] on 82571 */
2234         if ((adapter->hw.mac.type == e1000_82571) &&
2235             e1000_get_laa_state_82571(&adapter->hw))
2236                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2237
2238         /* Mask to use in the irq trigger */
2239         if (adapter->msix_mem)
2240                 trigger = rxr->ims; /* RX for 82574 */
2241         else
2242                 trigger = E1000_ICS_RXDMT0;
2243
2244         /*
2245         ** Check on the state of the TX queue(s), this 
2246         ** can be done without the lock because its RO
2247         ** and the HUNG state will be static if set.
2248         */
2249         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2250                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2251                     (adapter->pause_frames == 0))
2252                         goto hung;
2253                 /* Schedule a TX tasklet if needed */
2254                 if (txr->tx_avail <= EM_MAX_SCATTER)
2255                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2256         }
2257         
2258         adapter->pause_frames = 0;
2259         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2260 #ifndef DEVICE_POLLING
2261         /* Trigger an RX interrupt to guarantee mbuf refresh */
2262         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2263 #endif
2264         return;
2265 hung:
2266         /* Looks like we're hung */
2267         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2268         device_printf(adapter->dev,
2269             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2270             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2271             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2272         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2273             "Next TX to Clean = %d\n",
2274             txr->me, txr->tx_avail, txr->next_to_clean);
2275         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2276         adapter->watchdog_events++;
2277         adapter->pause_frames = 0;
2278         em_init_locked(adapter);
2279 }
2280
2281
2282 static void
2283 em_update_link_status(struct adapter *adapter)
2284 {
2285         struct e1000_hw *hw = &adapter->hw;
2286         struct ifnet *ifp = adapter->ifp;
2287         device_t dev = adapter->dev;
2288         struct tx_ring *txr = adapter->tx_rings;
2289         u32 link_check = 0;
2290
2291         /* Get the cached link value or read phy for real */
2292         switch (hw->phy.media_type) {
2293         case e1000_media_type_copper:
2294                 if (hw->mac.get_link_status) {
2295                         /* Do the work to read phy */
2296                         e1000_check_for_link(hw);
2297                         link_check = !hw->mac.get_link_status;
2298                         if (link_check) /* ESB2 fix */
2299                                 e1000_cfg_on_link_up(hw);
2300                 } else
2301                         link_check = TRUE;
2302                 break;
2303         case e1000_media_type_fiber:
2304                 e1000_check_for_link(hw);
2305                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2306                                  E1000_STATUS_LU);
2307                 break;
2308         case e1000_media_type_internal_serdes:
2309                 e1000_check_for_link(hw);
2310                 link_check = adapter->hw.mac.serdes_has_link;
2311                 break;
2312         default:
2313         case e1000_media_type_unknown:
2314                 break;
2315         }
2316
2317         /* Now check for a transition */
2318         if (link_check && (adapter->link_active == 0)) {
2319                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2320                     &adapter->link_duplex);
2321                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2322                 if ((adapter->link_speed != SPEED_1000) &&
2323                     ((hw->mac.type == e1000_82571) ||
2324                     (hw->mac.type == e1000_82572))) {
2325                         int tarc0;
2326                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2327                         tarc0 &= ~SPEED_MODE_BIT;
2328                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2329                 }
2330                 if (bootverbose)
2331                         device_printf(dev, "Link is up %d Mbps %s\n",
2332                             adapter->link_speed,
2333                             ((adapter->link_duplex == FULL_DUPLEX) ?
2334                             "Full Duplex" : "Half Duplex"));
2335                 adapter->link_active = 1;
2336                 adapter->smartspeed = 0;
2337                 ifp->if_baudrate = adapter->link_speed * 1000000;
2338                 if_link_state_change(ifp, LINK_STATE_UP);
2339         } else if (!link_check && (adapter->link_active == 1)) {
2340                 ifp->if_baudrate = adapter->link_speed = 0;
2341                 adapter->link_duplex = 0;
2342                 if (bootverbose)
2343                         device_printf(dev, "Link is Down\n");
2344                 adapter->link_active = 0;
2345                 /* Link down, disable watchdog */
2346                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2347                         txr->queue_status = EM_QUEUE_IDLE;
2348                 if_link_state_change(ifp, LINK_STATE_DOWN);
2349         }
2350 }
2351
2352 /*********************************************************************
2353  *
2354  *  This routine disables all traffic on the adapter by issuing a
2355  *  global reset on the MAC and deallocates TX/RX buffers.
2356  *
2357  *  This routine should always be called with BOTH the CORE
2358  *  and TX locks.
2359  **********************************************************************/
2360
2361 static void
2362 em_stop(void *arg)
2363 {
2364         struct adapter  *adapter = arg;
2365         struct ifnet    *ifp = adapter->ifp;
2366         struct tx_ring  *txr = adapter->tx_rings;
2367
2368         EM_CORE_LOCK_ASSERT(adapter);
2369
2370         INIT_DEBUGOUT("em_stop: begin");
2371
2372         em_disable_intr(adapter);
2373         callout_stop(&adapter->timer);
2374
2375         /* Tell the stack that the interface is no longer active */
2376         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2377         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2378
2379         /* Unarm watchdog timer. */
2380         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2381                 EM_TX_LOCK(txr);
2382                 txr->queue_status = EM_QUEUE_IDLE;
2383                 EM_TX_UNLOCK(txr);
2384         }
2385
2386         e1000_reset_hw(&adapter->hw);
2387         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2388
2389         e1000_led_off(&adapter->hw);
2390         e1000_cleanup_led(&adapter->hw);
2391 }
2392
2393
2394 /*********************************************************************
2395  *
2396  *  Determine hardware revision.
2397  *
2398  **********************************************************************/
2399 static void
2400 em_identify_hardware(struct adapter *adapter)
2401 {
2402         device_t dev = adapter->dev;
2403
2404         /* Make sure our PCI config space has the necessary stuff set */
2405         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2406         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2407             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2408                 device_printf(dev, "Memory Access and/or Bus Master bits "
2409                     "were not set!\n");
2410                 adapter->hw.bus.pci_cmd_word |=
2411                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2412                 pci_write_config(dev, PCIR_COMMAND,
2413                     adapter->hw.bus.pci_cmd_word, 2);
2414         }
2415
2416         /* Save off the information about this board */
2417         adapter->hw.vendor_id = pci_get_vendor(dev);
2418         adapter->hw.device_id = pci_get_device(dev);
2419         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2420         adapter->hw.subsystem_vendor_id =
2421             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2422         adapter->hw.subsystem_device_id =
2423             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2424
2425         /* Do Shared Code Init and Setup */
2426         if (e1000_set_mac_type(&adapter->hw)) {
2427                 device_printf(dev, "Setup init failure\n");
2428                 return;
2429         }
2430 }
2431
2432 static int
2433 em_allocate_pci_resources(struct adapter *adapter)
2434 {
2435         device_t        dev = adapter->dev;
2436         int             rid;
2437
2438         rid = PCIR_BAR(0);
2439         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2440             &rid, RF_ACTIVE);
2441         if (adapter->memory == NULL) {
2442                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2443                 return (ENXIO);
2444         }
2445         adapter->osdep.mem_bus_space_tag =
2446             rman_get_bustag(adapter->memory);
2447         adapter->osdep.mem_bus_space_handle =
2448             rman_get_bushandle(adapter->memory);
2449         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2450
2451         /* Default to a single queue */
2452         adapter->num_queues = 1;
2453
2454         /*
2455          * Setup MSI/X or MSI if PCI Express
2456          */
2457         adapter->msix = em_setup_msix(adapter);
2458
2459         adapter->hw.back = &adapter->osdep;
2460
2461         return (0);
2462 }
2463
2464 /*********************************************************************
2465  *
2466  *  Setup the Legacy or MSI Interrupt handler
2467  *
2468  **********************************************************************/
2469 int
2470 em_allocate_legacy(struct adapter *adapter)
2471 {
2472         device_t dev = adapter->dev;
2473         struct tx_ring  *txr = adapter->tx_rings;
2474         int error, rid = 0;
2475
2476         /* Manually turn off all interrupts */
2477         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2478
2479         if (adapter->msix == 1) /* using MSI */
2480                 rid = 1;
2481         /* We allocate a single interrupt resource */
2482         adapter->res = bus_alloc_resource_any(dev,
2483             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2484         if (adapter->res == NULL) {
2485                 device_printf(dev, "Unable to allocate bus resource: "
2486                     "interrupt\n");
2487                 return (ENXIO);
2488         }
2489
2490         /*
2491          * Allocate a fast interrupt and the associated
2492          * deferred processing contexts.
2493          */
2494         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2495         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2496             taskqueue_thread_enqueue, &adapter->tq);
2497         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2498             device_get_nameunit(adapter->dev));
2499         /* Use a TX only tasklet for local timer */
2500         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2501         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2502             taskqueue_thread_enqueue, &txr->tq);
2503         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2504             device_get_nameunit(adapter->dev));
2505         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2506         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2507             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2508                 device_printf(dev, "Failed to register fast interrupt "
2509                             "handler: %d\n", error);
2510                 taskqueue_free(adapter->tq);
2511                 adapter->tq = NULL;
2512                 return (error);
2513         }
2514         
2515         return (0);
2516 }
2517
2518 /*********************************************************************
2519  *
2520  *  Setup the MSIX Interrupt handlers
2521  *   This is not really Multiqueue, rather
2522  *   its just seperate interrupt vectors
2523  *   for TX, RX, and Link.
2524  *
2525  **********************************************************************/
2526 int
2527 em_allocate_msix(struct adapter *adapter)
2528 {
2529         device_t        dev = adapter->dev;
2530         struct          tx_ring *txr = adapter->tx_rings;
2531         struct          rx_ring *rxr = adapter->rx_rings;
2532         int             error, rid, vector = 0;
2533
2534
2535         /* Make sure all interrupts are disabled */
2536         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2537
2538         /* First set up ring resources */
2539         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2540
2541                 /* RX ring */
2542                 rid = vector + 1;
2543
2544                 rxr->res = bus_alloc_resource_any(dev,
2545                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2546                 if (rxr->res == NULL) {
2547                         device_printf(dev,
2548                             "Unable to allocate bus resource: "
2549                             "RX MSIX Interrupt %d\n", i);
2550                         return (ENXIO);
2551                 }
2552                 if ((error = bus_setup_intr(dev, rxr->res,
2553                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2554                     rxr, &rxr->tag)) != 0) {
2555                         device_printf(dev, "Failed to register RX handler");
2556                         return (error);
2557                 }
2558 #if __FreeBSD_version >= 800504
2559                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2560 #endif
2561                 rxr->msix = vector++; /* NOTE increment vector for TX */
2562                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2563                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2564                     taskqueue_thread_enqueue, &rxr->tq);
2565                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2566                     device_get_nameunit(adapter->dev));
2567                 /*
2568                 ** Set the bit to enable interrupt
2569                 ** in E1000_IMS -- bits 20 and 21
2570                 ** are for RX0 and RX1, note this has
2571                 ** NOTHING to do with the MSIX vector
2572                 */
2573                 rxr->ims = 1 << (20 + i);
2574                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2575
2576                 /* TX ring */
2577                 rid = vector + 1;
2578                 txr->res = bus_alloc_resource_any(dev,
2579                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2580                 if (txr->res == NULL) {
2581                         device_printf(dev,
2582                             "Unable to allocate bus resource: "
2583                             "TX MSIX Interrupt %d\n", i);
2584                         return (ENXIO);
2585                 }
2586                 if ((error = bus_setup_intr(dev, txr->res,
2587                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2588                     txr, &txr->tag)) != 0) {
2589                         device_printf(dev, "Failed to register TX handler");
2590                         return (error);
2591                 }
2592 #if __FreeBSD_version >= 800504
2593                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2594 #endif
2595                 txr->msix = vector++; /* Increment vector for next pass */
2596                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2597                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2598                     taskqueue_thread_enqueue, &txr->tq);
2599                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2600                     device_get_nameunit(adapter->dev));
2601                 /*
2602                 ** Set the bit to enable interrupt
2603                 ** in E1000_IMS -- bits 22 and 23
2604                 ** are for TX0 and TX1, note this has
2605                 ** NOTHING to do with the MSIX vector
2606                 */
2607                 txr->ims = 1 << (22 + i);
2608                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2609         }
2610
2611         /* Link interrupt */
2612         ++rid;
2613         adapter->res = bus_alloc_resource_any(dev,
2614             SYS_RES_IRQ, &rid, RF_ACTIVE);
2615         if (!adapter->res) {
2616                 device_printf(dev,"Unable to allocate "
2617                     "bus resource: Link interrupt [%d]\n", rid);
2618                 return (ENXIO);
2619         }
2620         /* Set the link handler function */
2621         error = bus_setup_intr(dev, adapter->res,
2622             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2623             em_msix_link, adapter, &adapter->tag);
2624         if (error) {
2625                 adapter->res = NULL;
2626                 device_printf(dev, "Failed to register LINK handler");
2627                 return (error);
2628         }
2629 #if __FreeBSD_version >= 800504
2630                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2631 #endif
2632         adapter->linkvec = vector;
2633         adapter->ivars |=  (8 | vector) << 16;
2634         adapter->ivars |= 0x80000000;
2635
2636         return (0);
2637 }
2638
2639
2640 static void
2641 em_free_pci_resources(struct adapter *adapter)
2642 {
2643         device_t        dev = adapter->dev;
2644         struct tx_ring  *txr;
2645         struct rx_ring  *rxr;
2646         int             rid;
2647
2648
2649         /*
2650         ** Release all the queue interrupt resources:
2651         */
2652         for (int i = 0; i < adapter->num_queues; i++) {
2653                 txr = &adapter->tx_rings[i];
2654                 rxr = &adapter->rx_rings[i];
2655                 /* an early abort? */
2656                 if ((txr == NULL) || (rxr == NULL))
2657                         break;
2658                 rid = txr->msix +1;
2659                 if (txr->tag != NULL) {
2660                         bus_teardown_intr(dev, txr->res, txr->tag);
2661                         txr->tag = NULL;
2662                 }
2663                 if (txr->res != NULL)
2664                         bus_release_resource(dev, SYS_RES_IRQ,
2665                             rid, txr->res);
2666                 rid = rxr->msix +1;
2667                 if (rxr->tag != NULL) {
2668                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2669                         rxr->tag = NULL;
2670                 }
2671                 if (rxr->res != NULL)
2672                         bus_release_resource(dev, SYS_RES_IRQ,
2673                             rid, rxr->res);
2674         }
2675
2676         if (adapter->linkvec) /* we are doing MSIX */
2677                 rid = adapter->linkvec + 1;
2678         else
2679                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2680
2681         if (adapter->tag != NULL) {
2682                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2683                 adapter->tag = NULL;
2684         }
2685
2686         if (adapter->res != NULL)
2687                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2688
2689
2690         if (adapter->msix)
2691                 pci_release_msi(dev);
2692
2693         if (adapter->msix_mem != NULL)
2694                 bus_release_resource(dev, SYS_RES_MEMORY,
2695                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2696
2697         if (adapter->memory != NULL)
2698                 bus_release_resource(dev, SYS_RES_MEMORY,
2699                     PCIR_BAR(0), adapter->memory);
2700
2701         if (adapter->flash != NULL)
2702                 bus_release_resource(dev, SYS_RES_MEMORY,
2703                     EM_FLASH, adapter->flash);
2704 }
2705
2706 /*
2707  * Setup MSI or MSI/X
2708  */
2709 static int
2710 em_setup_msix(struct adapter *adapter)
2711 {
2712         device_t dev = adapter->dev;
2713         int val = 0;
2714
2715         /*
2716         ** Setup MSI/X for Hartwell: tests have shown
2717         ** use of two queues to be unstable, and to
2718         ** provide no great gain anyway, so we simply
2719         ** seperate the interrupts and use a single queue.
2720         */
2721         if ((adapter->hw.mac.type == e1000_82574) &&
2722             (em_enable_msix == TRUE)) {
2723                 /* Map the MSIX BAR */
2724                 int rid = PCIR_BAR(EM_MSIX_BAR);
2725                 adapter->msix_mem = bus_alloc_resource_any(dev,
2726                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2727                 if (!adapter->msix_mem) {
2728                         /* May not be enabled */
2729                         device_printf(adapter->dev,
2730                             "Unable to map MSIX table \n");
2731                         goto msi;
2732                 }
2733                 val = pci_msix_count(dev); 
2734                 /* We only need 3 vectors */
2735                 if (val > 3)
2736                         val = 3;
2737                 if ((val != 3) && (val != 5)) {
2738                         bus_release_resource(dev, SYS_RES_MEMORY,
2739                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2740                         adapter->msix_mem = NULL;
2741                         device_printf(adapter->dev,
2742                             "MSIX: incorrect vectors, using MSI\n");
2743                         goto msi;
2744                 }
2745
2746                 if (pci_alloc_msix(dev, &val) == 0) {
2747                         device_printf(adapter->dev,
2748                             "Using MSIX interrupts "
2749                             "with %d vectors\n", val);
2750                 }
2751
2752                 return (val);
2753         }
2754 msi:
2755         val = pci_msi_count(dev);
2756         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2757                 adapter->msix = 1;
2758                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2759                 return (val);
2760         } 
2761         /* Should only happen due to manual configuration */
2762         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2763         return (0);
2764 }
2765
2766
2767 /*********************************************************************
2768  *
2769  *  Initialize the hardware to a configuration
2770  *  as specified by the adapter structure.
2771  *
2772  **********************************************************************/
2773 static void
2774 em_reset(struct adapter *adapter)
2775 {
2776         device_t        dev = adapter->dev;
2777         struct ifnet    *ifp = adapter->ifp;
2778         struct e1000_hw *hw = &adapter->hw;
2779         u16             rx_buffer_size;
2780         u32             pba;
2781
2782         INIT_DEBUGOUT("em_reset: begin");
2783
2784         /* Set up smart power down as default off on newer adapters. */
2785         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2786             hw->mac.type == e1000_82572)) {
2787                 u16 phy_tmp = 0;
2788
2789                 /* Speed up time to link by disabling smart power down. */
2790                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2791                 phy_tmp &= ~IGP02E1000_PM_SPD;
2792                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2793         }
2794
2795         /*
2796          * Packet Buffer Allocation (PBA)
2797          * Writing PBA sets the receive portion of the buffer
2798          * the remainder is used for the transmit buffer.
2799          */
2800         switch (hw->mac.type) {
2801         /* Total Packet Buffer on these is 48K */
2802         case e1000_82571:
2803         case e1000_82572:
2804         case e1000_80003es2lan:
2805                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2806                 break;
2807         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2808                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2809                 break;
2810         case e1000_82574:
2811         case e1000_82583:
2812                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2813                 break;
2814         case e1000_ich8lan:
2815                 pba = E1000_PBA_8K;
2816                 break;
2817         case e1000_ich9lan:
2818         case e1000_ich10lan:
2819                 /* Boost Receive side for jumbo frames */
2820                 if (adapter->max_frame_size > 4096)
2821                         pba = E1000_PBA_14K;
2822                 else
2823                         pba = E1000_PBA_10K;
2824                 break;
2825         case e1000_pchlan:
2826         case e1000_pch2lan:
2827                 pba = E1000_PBA_26K;
2828                 break;
2829         default:
2830                 if (adapter->max_frame_size > 8192)
2831                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2832                 else
2833                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2834         }
2835         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2836
2837         /*
2838          * These parameters control the automatic generation (Tx) and
2839          * response (Rx) to Ethernet PAUSE frames.
2840          * - High water mark should allow for at least two frames to be
2841          *   received after sending an XOFF.
2842          * - Low water mark works best when it is very near the high water mark.
2843          *   This allows the receiver to restart by sending XON when it has
2844          *   drained a bit. Here we use an arbitary value of 1500 which will
2845          *   restart after one full frame is pulled from the buffer. There
2846          *   could be several smaller frames in the buffer and if so they will
2847          *   not trigger the XON until their total number reduces the buffer
2848          *   by 1500.
2849          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2850          */
2851         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2852         hw->fc.high_water = rx_buffer_size -
2853             roundup2(adapter->max_frame_size, 1024);
2854         hw->fc.low_water = hw->fc.high_water - 1500;
2855
2856         if (adapter->fc) /* locally set flow control value? */
2857                 hw->fc.requested_mode = adapter->fc;
2858         else
2859                 hw->fc.requested_mode = e1000_fc_full;
2860
2861         if (hw->mac.type == e1000_80003es2lan)
2862                 hw->fc.pause_time = 0xFFFF;
2863         else
2864                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2865
2866         hw->fc.send_xon = TRUE;
2867
2868         /* Device specific overrides/settings */
2869         switch (hw->mac.type) {
2870         case e1000_pchlan:
2871                 /* Workaround: no TX flow ctrl for PCH */
2872                 hw->fc.requested_mode = e1000_fc_rx_pause;
2873                 hw->fc.pause_time = 0xFFFF; /* override */
2874                 if (ifp->if_mtu > ETHERMTU) {
2875                         hw->fc.high_water = 0x3500;
2876                         hw->fc.low_water = 0x1500;
2877                 } else {
2878                         hw->fc.high_water = 0x5000;
2879                         hw->fc.low_water = 0x3000;
2880                 }
2881                 hw->fc.refresh_time = 0x1000;
2882                 break;
2883         case e1000_pch2lan:
2884                 hw->fc.high_water = 0x5C20;
2885                 hw->fc.low_water = 0x5048;
2886                 hw->fc.pause_time = 0x0650;
2887                 hw->fc.refresh_time = 0x0400;
2888                 /* Jumbos need adjusted PBA */
2889                 if (ifp->if_mtu > ETHERMTU)
2890                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2891                 else
2892                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2893                 break;
2894         case e1000_ich9lan:
2895         case e1000_ich10lan:
2896                 if (ifp->if_mtu > ETHERMTU) {
2897                         hw->fc.high_water = 0x2800;
2898                         hw->fc.low_water = hw->fc.high_water - 8;
2899                         break;
2900                 } 
2901                 /* else fall thru */
2902         default:
2903                 if (hw->mac.type == e1000_80003es2lan)
2904                         hw->fc.pause_time = 0xFFFF;
2905                 break;
2906         }
2907
2908         /* Issue a global reset */
2909         e1000_reset_hw(hw);
2910         E1000_WRITE_REG(hw, E1000_WUC, 0);
2911         em_disable_aspm(adapter);
2912         /* and a re-init */
2913         if (e1000_init_hw(hw) < 0) {
2914                 device_printf(dev, "Hardware Initialization Failed\n");
2915                 return;
2916         }
2917
2918         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2919         e1000_get_phy_info(hw);
2920         e1000_check_for_link(hw);
2921         return;
2922 }
2923
2924 /*********************************************************************
2925  *
2926  *  Setup networking device structure and register an interface.
2927  *
2928  **********************************************************************/
2929 static int
2930 em_setup_interface(device_t dev, struct adapter *adapter)
2931 {
2932         struct ifnet   *ifp;
2933
2934         INIT_DEBUGOUT("em_setup_interface: begin");
2935
2936         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2937         if (ifp == NULL) {
2938                 device_printf(dev, "can not allocate ifnet structure\n");
2939                 return (-1);
2940         }
2941         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2942         ifp->if_init =  em_init;
2943         ifp->if_softc = adapter;
2944         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2945         ifp->if_ioctl = em_ioctl;
2946 #ifdef EM_MULTIQUEUE
2947         /* Multiqueue stack interface */
2948         ifp->if_transmit = em_mq_start;
2949         ifp->if_qflush = em_qflush;
2950 #else
2951         ifp->if_start = em_start;
2952         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2953         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2954         IFQ_SET_READY(&ifp->if_snd);
2955 #endif  
2956
2957         ether_ifattach(ifp, adapter->hw.mac.addr);
2958
2959         ifp->if_capabilities = ifp->if_capenable = 0;
2960
2961
2962         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2963         ifp->if_capabilities |= IFCAP_TSO4;
2964         /*
2965          * Tell the upper layer(s) we
2966          * support full VLAN capability
2967          */
2968         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2969         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2970                              |  IFCAP_VLAN_HWTSO
2971                              |  IFCAP_VLAN_MTU;
2972         ifp->if_capenable = ifp->if_capabilities;
2973
2974         /*
2975         ** Don't turn this on by default, if vlans are
2976         ** created on another pseudo device (eg. lagg)
2977         ** then vlan events are not passed thru, breaking
2978         ** operation, but with HW FILTER off it works. If
2979         ** using vlans directly on the em driver you can
2980         ** enable this and get full hardware tag filtering.
2981         */
2982         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2983
2984 #ifdef DEVICE_POLLING
2985         ifp->if_capabilities |= IFCAP_POLLING;
2986 #endif
2987
2988         /* Enable only WOL MAGIC by default */
2989         if (adapter->wol) {
2990                 ifp->if_capabilities |= IFCAP_WOL;
2991                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2992         }
2993                 
2994         /*
2995          * Specify the media types supported by this adapter and register
2996          * callbacks to update media and link information
2997          */
2998         ifmedia_init(&adapter->media, IFM_IMASK,
2999             em_media_change, em_media_status);
3000         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3001             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3002                 u_char fiber_type = IFM_1000_SX;        /* default type */
3003
3004                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3005                             0, NULL);
3006                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3007         } else {
3008                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3009                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3010                             0, NULL);
3011                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3012                             0, NULL);
3013                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3014                             0, NULL);
3015                 if (adapter->hw.phy.type != e1000_phy_ife) {
3016                         ifmedia_add(&adapter->media,
3017                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3018                         ifmedia_add(&adapter->media,
3019                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3020                 }
3021         }
3022         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3023         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3024         return (0);
3025 }
3026
3027
3028 /*
3029  * Manage DMA'able memory.
3030  */
3031 static void
3032 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3033 {
3034         if (error)
3035                 return;
3036         *(bus_addr_t *) arg = segs[0].ds_addr;
3037 }
3038
3039 static int
3040 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3041         struct em_dma_alloc *dma, int mapflags)
3042 {
3043         int error;
3044
3045         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3046                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3047                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3048                                 BUS_SPACE_MAXADDR,      /* highaddr */
3049                                 NULL, NULL,             /* filter, filterarg */
3050                                 size,                   /* maxsize */
3051                                 1,                      /* nsegments */
3052                                 size,                   /* maxsegsize */
3053                                 0,                      /* flags */
3054                                 NULL,                   /* lockfunc */
3055                                 NULL,                   /* lockarg */
3056                                 &dma->dma_tag);
3057         if (error) {
3058                 device_printf(adapter->dev,
3059                     "%s: bus_dma_tag_create failed: %d\n",
3060                     __func__, error);
3061                 goto fail_0;
3062         }
3063
3064         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3065             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3066         if (error) {
3067                 device_printf(adapter->dev,
3068                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3069                     __func__, (uintmax_t)size, error);
3070                 goto fail_2;
3071         }
3072
3073         dma->dma_paddr = 0;
3074         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3075             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3076         if (error || dma->dma_paddr == 0) {
3077                 device_printf(adapter->dev,
3078                     "%s: bus_dmamap_load failed: %d\n",
3079                     __func__, error);
3080                 goto fail_3;
3081         }
3082
3083         return (0);
3084
3085 fail_3:
3086         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3087 fail_2:
3088         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3089         bus_dma_tag_destroy(dma->dma_tag);
3090 fail_0:
3091         dma->dma_map = NULL;
3092         dma->dma_tag = NULL;
3093
3094         return (error);
3095 }
3096
3097 static void
3098 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3099 {
3100         if (dma->dma_tag == NULL)
3101                 return;
3102         if (dma->dma_map != NULL) {
3103                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3104                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3105                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3106                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3107                 dma->dma_map = NULL;
3108         }
3109         bus_dma_tag_destroy(dma->dma_tag);
3110         dma->dma_tag = NULL;
3111 }
3112
3113
3114 /*********************************************************************
3115  *
3116  *  Allocate memory for the transmit and receive rings, and then
3117  *  the descriptors associated with each, called only once at attach.
3118  *
3119  **********************************************************************/
3120 static int
3121 em_allocate_queues(struct adapter *adapter)
3122 {
3123         device_t                dev = adapter->dev;
3124         struct tx_ring          *txr = NULL;
3125         struct rx_ring          *rxr = NULL;
3126         int rsize, tsize, error = E1000_SUCCESS;
3127         int txconf = 0, rxconf = 0;
3128
3129
3130         /* Allocate the TX ring struct memory */
3131         if (!(adapter->tx_rings =
3132             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3133             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3134                 device_printf(dev, "Unable to allocate TX ring memory\n");
3135                 error = ENOMEM;
3136                 goto fail;
3137         }
3138
3139         /* Now allocate the RX */
3140         if (!(adapter->rx_rings =
3141             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3142             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3143                 device_printf(dev, "Unable to allocate RX ring memory\n");
3144                 error = ENOMEM;
3145                 goto rx_fail;
3146         }
3147
3148         tsize = roundup2(adapter->num_tx_desc *
3149             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3150         /*
3151          * Now set up the TX queues, txconf is needed to handle the
3152          * possibility that things fail midcourse and we need to
3153          * undo memory gracefully
3154          */ 
3155         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3156                 /* Set up some basics */
3157                 txr = &adapter->tx_rings[i];
3158                 txr->adapter = adapter;
3159                 txr->me = i;
3160
3161                 /* Initialize the TX lock */
3162                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3163                     device_get_nameunit(dev), txr->me);
3164                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3165
3166                 if (em_dma_malloc(adapter, tsize,
3167                         &txr->txdma, BUS_DMA_NOWAIT)) {
3168                         device_printf(dev,
3169                             "Unable to allocate TX Descriptor memory\n");
3170                         error = ENOMEM;
3171                         goto err_tx_desc;
3172                 }
3173                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3174                 bzero((void *)txr->tx_base, tsize);
3175
3176                 if (em_allocate_transmit_buffers(txr)) {
3177                         device_printf(dev,
3178                             "Critical Failure setting up transmit buffers\n");
3179                         error = ENOMEM;
3180                         goto err_tx_desc;
3181                 }
3182 #if __FreeBSD_version >= 800000
3183                 /* Allocate a buf ring */
3184                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3185                     M_WAITOK, &txr->tx_mtx);
3186 #endif
3187         }
3188
3189         /*
3190          * Next the RX queues...
3191          */ 
3192         rsize = roundup2(adapter->num_rx_desc *
3193             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3194         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3195                 rxr = &adapter->rx_rings[i];
3196                 rxr->adapter = adapter;
3197                 rxr->me = i;
3198
3199                 /* Initialize the RX lock */
3200                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3201                     device_get_nameunit(dev), txr->me);
3202                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3203
3204                 if (em_dma_malloc(adapter, rsize,
3205                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3206                         device_printf(dev,
3207                             "Unable to allocate RxDescriptor memory\n");
3208                         error = ENOMEM;
3209                         goto err_rx_desc;
3210                 }
3211                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3212                 bzero((void *)rxr->rx_base, rsize);
3213
3214                 /* Allocate receive buffers for the ring*/
3215                 if (em_allocate_receive_buffers(rxr)) {
3216                         device_printf(dev,
3217                             "Critical Failure setting up receive buffers\n");
3218                         error = ENOMEM;
3219                         goto err_rx_desc;
3220                 }
3221         }
3222
3223         return (0);
3224
3225 err_rx_desc:
3226         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3227                 em_dma_free(adapter, &rxr->rxdma);
3228 err_tx_desc:
3229         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3230                 em_dma_free(adapter, &txr->txdma);
3231         free(adapter->rx_rings, M_DEVBUF);
3232 rx_fail:
3233 #if __FreeBSD_version >= 800000
3234         buf_ring_free(txr->br, M_DEVBUF);
3235 #endif
3236         free(adapter->tx_rings, M_DEVBUF);
3237 fail:
3238         return (error);
3239 }
3240
3241
3242 /*********************************************************************
3243  *
3244  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3245  *  the information needed to transmit a packet on the wire. This is
3246  *  called only once at attach, setup is done every reset.
3247  *
3248  **********************************************************************/
3249 static int
3250 em_allocate_transmit_buffers(struct tx_ring *txr)
3251 {
3252         struct adapter *adapter = txr->adapter;
3253         device_t dev = adapter->dev;
3254         struct em_buffer *txbuf;
3255         int error, i;
3256
3257         /*
3258          * Setup DMA descriptor areas.
3259          */
3260         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3261                                1, 0,                    /* alignment, bounds */
3262                                BUS_SPACE_MAXADDR,       /* lowaddr */
3263                                BUS_SPACE_MAXADDR,       /* highaddr */
3264                                NULL, NULL,              /* filter, filterarg */
3265                                EM_TSO_SIZE,             /* maxsize */
3266                                EM_MAX_SCATTER,          /* nsegments */
3267                                PAGE_SIZE,               /* maxsegsize */
3268                                0,                       /* flags */
3269                                NULL,                    /* lockfunc */
3270                                NULL,                    /* lockfuncarg */
3271                                &txr->txtag))) {
3272                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3273                 goto fail;
3274         }
3275
3276         if (!(txr->tx_buffers =
3277             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3278             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3279                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3280                 error = ENOMEM;
3281                 goto fail;
3282         }
3283
3284         /* Create the descriptor buffer dma maps */
3285         txbuf = txr->tx_buffers;
3286         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3287                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3288                 if (error != 0) {
3289                         device_printf(dev, "Unable to create TX DMA map\n");
3290                         goto fail;
3291                 }
3292         }
3293
3294         return 0;
3295 fail:
3296         /* We free all, it handles case where we are in the middle */
3297         em_free_transmit_structures(adapter);
3298         return (error);
3299 }
3300
3301 /*********************************************************************
3302  *
3303  *  Initialize a transmit ring.
3304  *
3305  **********************************************************************/
3306 static void
3307 em_setup_transmit_ring(struct tx_ring *txr)
3308 {
3309         struct adapter *adapter = txr->adapter;
3310         struct em_buffer *txbuf;
3311         int i;
3312 #ifdef DEV_NETMAP
3313         struct netmap_adapter *na = NA(adapter->ifp);
3314         struct netmap_slot *slot;
3315 #endif /* DEV_NETMAP */
3316
3317         /* Clear the old descriptor contents */
3318         EM_TX_LOCK(txr);
3319 #ifdef DEV_NETMAP
3320         slot = netmap_reset(na, NR_TX, txr->me, 0);
3321 #endif /* DEV_NETMAP */
3322
3323         bzero((void *)txr->tx_base,
3324               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3325         /* Reset indices */
3326         txr->next_avail_desc = 0;
3327         txr->next_to_clean = 0;
3328
3329         /* Free any existing tx buffers. */
3330         txbuf = txr->tx_buffers;
3331         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3332                 if (txbuf->m_head != NULL) {
3333                         bus_dmamap_sync(txr->txtag, txbuf->map,
3334                             BUS_DMASYNC_POSTWRITE);
3335                         bus_dmamap_unload(txr->txtag, txbuf->map);
3336                         m_freem(txbuf->m_head);
3337                         txbuf->m_head = NULL;
3338                 }
3339 #ifdef DEV_NETMAP
3340                 if (slot) {
3341                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3342                         uint64_t paddr;
3343                         void *addr;
3344
3345                         addr = PNMB(slot + si, &paddr);
3346                         txr->tx_base[i].buffer_addr = htole64(paddr);
3347                         /* reload the map for netmap mode */
3348                         netmap_load_map(txr->txtag, txbuf->map, addr);
3349                 }
3350 #endif /* DEV_NETMAP */
3351
3352                 /* clear the watch index */
3353                 txbuf->next_eop = -1;
3354         }
3355
3356         /* Set number of descriptors available */
3357         txr->tx_avail = adapter->num_tx_desc;
3358         txr->queue_status = EM_QUEUE_IDLE;
3359
3360         /* Clear checksum offload context. */
3361         txr->last_hw_offload = 0;
3362         txr->last_hw_ipcss = 0;
3363         txr->last_hw_ipcso = 0;
3364         txr->last_hw_tucss = 0;
3365         txr->last_hw_tucso = 0;
3366
3367         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3368             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3369         EM_TX_UNLOCK(txr);
3370 }
3371
3372 /*********************************************************************
3373  *
3374  *  Initialize all transmit rings.
3375  *
3376  **********************************************************************/
3377 static void
3378 em_setup_transmit_structures(struct adapter *adapter)
3379 {
3380         struct tx_ring *txr = adapter->tx_rings;
3381
3382         for (int i = 0; i < adapter->num_queues; i++, txr++)
3383                 em_setup_transmit_ring(txr);
3384
3385         return;
3386 }
3387
3388 /*********************************************************************
3389  *
3390  *  Enable transmit unit.
3391  *
3392  **********************************************************************/
3393 static void
3394 em_initialize_transmit_unit(struct adapter *adapter)
3395 {
3396         struct tx_ring  *txr = adapter->tx_rings;
3397         struct e1000_hw *hw = &adapter->hw;
3398         u32     tctl, tarc, tipg = 0;
3399
3400          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3401
3402         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3403                 u64 bus_addr = txr->txdma.dma_paddr;
3404                 /* Base and Len of TX Ring */
3405                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3406                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3407                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3408                     (u32)(bus_addr >> 32));
3409                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3410                     (u32)bus_addr);
3411                 /* Init the HEAD/TAIL indices */
3412                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3413                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3414
3415                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3416                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3417                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3418
3419                 txr->queue_status = EM_QUEUE_IDLE;
3420         }
3421
3422         /* Set the default values for the Tx Inter Packet Gap timer */
3423         switch (adapter->hw.mac.type) {
3424         case e1000_80003es2lan:
3425                 tipg = DEFAULT_82543_TIPG_IPGR1;
3426                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3427                     E1000_TIPG_IPGR2_SHIFT;
3428                 break;
3429         default:
3430                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3431                     (adapter->hw.phy.media_type ==
3432                     e1000_media_type_internal_serdes))
3433                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3434                 else
3435                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3436                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3437                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3438         }
3439
3440         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3441         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3442
3443         if(adapter->hw.mac.type >= e1000_82540)
3444                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3445                     adapter->tx_abs_int_delay.value);
3446
3447         if ((adapter->hw.mac.type == e1000_82571) ||
3448             (adapter->hw.mac.type == e1000_82572)) {
3449                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3450                 tarc |= SPEED_MODE_BIT;
3451                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3452         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3453                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3454                 tarc |= 1;
3455                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3456                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3457                 tarc |= 1;
3458                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3459         }
3460
3461         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3462         if (adapter->tx_int_delay.value > 0)
3463                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3464
3465         /* Program the Transmit Control Register */
3466         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3467         tctl &= ~E1000_TCTL_CT;
3468         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3469                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3470
3471         if (adapter->hw.mac.type >= e1000_82571)
3472                 tctl |= E1000_TCTL_MULR;
3473
3474         /* This write will effectively turn on the transmit unit. */
3475         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3476
3477 }
3478
3479
3480 /*********************************************************************
3481  *
3482  *  Free all transmit rings.
3483  *
3484  **********************************************************************/
3485 static void
3486 em_free_transmit_structures(struct adapter *adapter)
3487 {
3488         struct tx_ring *txr = adapter->tx_rings;
3489
3490         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3491                 EM_TX_LOCK(txr);
3492                 em_free_transmit_buffers(txr);
3493                 em_dma_free(adapter, &txr->txdma);
3494                 EM_TX_UNLOCK(txr);
3495                 EM_TX_LOCK_DESTROY(txr);
3496         }
3497
3498         free(adapter->tx_rings, M_DEVBUF);
3499 }
3500
3501 /*********************************************************************
3502  *
3503  *  Free transmit ring related data structures.
3504  *
3505  **********************************************************************/
3506 static void
3507 em_free_transmit_buffers(struct tx_ring *txr)
3508 {
3509         struct adapter          *adapter = txr->adapter;
3510         struct em_buffer        *txbuf;
3511
3512         INIT_DEBUGOUT("free_transmit_ring: begin");
3513
3514         if (txr->tx_buffers == NULL)
3515                 return;
3516
3517         for (int i = 0; i < adapter->num_tx_desc; i++) {
3518                 txbuf = &txr->tx_buffers[i];
3519                 if (txbuf->m_head != NULL) {
3520                         bus_dmamap_sync(txr->txtag, txbuf->map,
3521                             BUS_DMASYNC_POSTWRITE);
3522                         bus_dmamap_unload(txr->txtag,
3523                             txbuf->map);
3524                         m_freem(txbuf->m_head);
3525                         txbuf->m_head = NULL;
3526                         if (txbuf->map != NULL) {
3527                                 bus_dmamap_destroy(txr->txtag,
3528                                     txbuf->map);
3529                                 txbuf->map = NULL;
3530                         }
3531                 } else if (txbuf->map != NULL) {
3532                         bus_dmamap_unload(txr->txtag,
3533                             txbuf->map);
3534                         bus_dmamap_destroy(txr->txtag,
3535                             txbuf->map);
3536                         txbuf->map = NULL;
3537                 }
3538         }
3539 #if __FreeBSD_version >= 800000
3540         if (txr->br != NULL)
3541                 buf_ring_free(txr->br, M_DEVBUF);
3542 #endif
3543         if (txr->tx_buffers != NULL) {
3544                 free(txr->tx_buffers, M_DEVBUF);
3545                 txr->tx_buffers = NULL;
3546         }
3547         if (txr->txtag != NULL) {
3548                 bus_dma_tag_destroy(txr->txtag);
3549                 txr->txtag = NULL;
3550         }
3551         return;
3552 }
3553
3554
3555 /*********************************************************************
3556  *  The offload context is protocol specific (TCP/UDP) and thus
3557  *  only needs to be set when the protocol changes. The occasion
3558  *  of a context change can be a performance detriment, and
3559  *  might be better just disabled. The reason arises in the way
3560  *  in which the controller supports pipelined requests from the
3561  *  Tx data DMA. Up to four requests can be pipelined, and they may
3562  *  belong to the same packet or to multiple packets. However all
3563  *  requests for one packet are issued before a request is issued
3564  *  for a subsequent packet and if a request for the next packet
3565  *  requires a context change, that request will be stalled
3566  *  until the previous request completes. This means setting up
3567  *  a new context effectively disables pipelined Tx data DMA which
3568  *  in turn greatly slow down performance to send small sized
3569  *  frames. 
3570  **********************************************************************/
3571 static void
3572 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3573     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3574 {
3575         struct adapter                  *adapter = txr->adapter;
3576         struct e1000_context_desc       *TXD = NULL;
3577         struct em_buffer                *tx_buffer;
3578         int                             cur, hdr_len;
3579         u32                             cmd = 0;
3580         u16                             offload = 0;
3581         u8                              ipcso, ipcss, tucso, tucss;
3582
3583         ipcss = ipcso = tucss = tucso = 0;
3584         hdr_len = ip_off + (ip->ip_hl << 2);
3585         cur = txr->next_avail_desc;
3586
3587         /* Setup of IP header checksum. */
3588         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3589                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3590                 offload |= CSUM_IP;
3591                 ipcss = ip_off;
3592                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3593                 /*
3594                  * Start offset for header checksum calculation.
3595                  * End offset for header checksum calculation.
3596                  * Offset of place to put the checksum.
3597                  */
3598                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3599                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3600                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3601                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3602                 cmd |= E1000_TXD_CMD_IP;
3603         }
3604
3605         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3606                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3607                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3608                 offload |= CSUM_TCP;
3609                 tucss = hdr_len;
3610                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3611                 /*
3612                  * Setting up new checksum offload context for every frames
3613                  * takes a lot of processing time for hardware. This also
3614                  * reduces performance a lot for small sized frames so avoid
3615                  * it if driver can use previously configured checksum
3616                  * offload context.
3617                  */
3618                 if (txr->last_hw_offload == offload) {
3619                         if (offload & CSUM_IP) {
3620                                 if (txr->last_hw_ipcss == ipcss &&
3621                                     txr->last_hw_ipcso == ipcso &&
3622                                     txr->last_hw_tucss == tucss &&
3623                                     txr->last_hw_tucso == tucso)
3624                                         return;
3625                         } else {
3626                                 if (txr->last_hw_tucss == tucss &&
3627                                     txr->last_hw_tucso == tucso)
3628                                         return;
3629                         }
3630                 }
3631                 txr->last_hw_offload = offload;
3632                 txr->last_hw_tucss = tucss;
3633                 txr->last_hw_tucso = tucso;
3634                 /*
3635                  * Start offset for payload checksum calculation.
3636                  * End offset for payload checksum calculation.
3637                  * Offset of place to put the checksum.
3638                  */
3639                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3640                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3641                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3642                 TXD->upper_setup.tcp_fields.tucso = tucso;
3643                 cmd |= E1000_TXD_CMD_TCP;
3644         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3645                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3646                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3647                 tucss = hdr_len;
3648                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3649                 /*
3650                  * Setting up new checksum offload context for every frames
3651                  * takes a lot of processing time for hardware. This also
3652                  * reduces performance a lot for small sized frames so avoid
3653                  * it if driver can use previously configured checksum
3654                  * offload context.
3655                  */
3656                 if (txr->last_hw_offload == offload) {
3657                         if (offload & CSUM_IP) {
3658                                 if (txr->last_hw_ipcss == ipcss &&
3659                                     txr->last_hw_ipcso == ipcso &&
3660                                     txr->last_hw_tucss == tucss &&
3661                                     txr->last_hw_tucso == tucso)
3662                                         return;
3663                         } else {
3664                                 if (txr->last_hw_tucss == tucss &&
3665                                     txr->last_hw_tucso == tucso)
3666                                         return;
3667                         }
3668                 }
3669                 txr->last_hw_offload = offload;
3670                 txr->last_hw_tucss = tucss;
3671                 txr->last_hw_tucso = tucso;
3672                 /*
3673                  * Start offset for header checksum calculation.
3674                  * End offset for header checksum calculation.
3675                  * Offset of place to put the checksum.
3676                  */
3677                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3678                 TXD->upper_setup.tcp_fields.tucss = tucss;
3679                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3680                 TXD->upper_setup.tcp_fields.tucso = tucso;
3681         }
3682   
3683         if (offload & CSUM_IP) {
3684                 txr->last_hw_ipcss = ipcss;
3685                 txr->last_hw_ipcso = ipcso;
3686         }
3687
3688         TXD->tcp_seg_setup.data = htole32(0);
3689         TXD->cmd_and_length =
3690             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3691         tx_buffer = &txr->tx_buffers[cur];
3692         tx_buffer->m_head = NULL;
3693         tx_buffer->next_eop = -1;
3694
3695         if (++cur == adapter->num_tx_desc)
3696                 cur = 0;
3697
3698         txr->tx_avail--;
3699         txr->next_avail_desc = cur;
3700 }
3701
3702
3703 /**********************************************************************
3704  *
3705  *  Setup work for hardware segmentation offload (TSO)
3706  *
3707  **********************************************************************/
3708 static void
3709 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3710     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3711 {
3712         struct adapter                  *adapter = txr->adapter;
3713         struct e1000_context_desc       *TXD;
3714         struct em_buffer                *tx_buffer;
3715         int cur, hdr_len;
3716
3717         /*
3718          * In theory we can use the same TSO context if and only if
3719          * frame is the same type(IP/TCP) and the same MSS. However
3720          * checking whether a frame has the same IP/TCP structure is
3721          * hard thing so just ignore that and always restablish a
3722          * new TSO context.
3723          */
3724         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3725         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3726                       E1000_TXD_DTYP_D |        /* Data descr type */
3727                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3728
3729         /* IP and/or TCP header checksum calculation and insertion. */
3730         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3731
3732         cur = txr->next_avail_desc;
3733         tx_buffer = &txr->tx_buffers[cur];
3734         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3735
3736         /*
3737          * Start offset for header checksum calculation.
3738          * End offset for header checksum calculation.
3739          * Offset of place put the checksum.
3740          */
3741         TXD->lower_setup.ip_fields.ipcss = ip_off;
3742         TXD->lower_setup.ip_fields.ipcse =
3743             htole16(ip_off + (ip->ip_hl << 2) - 1);
3744         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3745         /*
3746          * Start offset for payload checksum calculation.
3747          * End offset for payload checksum calculation.
3748          * Offset of place to put the checksum.
3749          */
3750         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3751         TXD->upper_setup.tcp_fields.tucse = 0;
3752         TXD->upper_setup.tcp_fields.tucso =
3753             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3754         /*
3755          * Payload size per packet w/o any headers.
3756          * Length of all headers up to payload.
3757          */
3758         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3759         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3760
3761         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3762                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3763                                 E1000_TXD_CMD_TSE |     /* TSE context */
3764                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3765                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3766                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3767
3768         tx_buffer->m_head = NULL;
3769         tx_buffer->next_eop = -1;
3770
3771         if (++cur == adapter->num_tx_desc)
3772                 cur = 0;
3773
3774         txr->tx_avail--;
3775         txr->next_avail_desc = cur;
3776         txr->tx_tso = TRUE;
3777 }
3778
3779
3780 /**********************************************************************
3781  *
3782  *  Examine each tx_buffer in the used queue. If the hardware is done
3783  *  processing the packet then free associated resources. The
3784  *  tx_buffer is put back on the free queue.
3785  *
3786  **********************************************************************/
3787 static void
3788 em_txeof(struct tx_ring *txr)
3789 {
3790         struct adapter  *adapter = txr->adapter;
3791         int first, last, done, processed;
3792         struct em_buffer *tx_buffer;
3793         struct e1000_tx_desc   *tx_desc, *eop_desc;
3794         struct ifnet   *ifp = adapter->ifp;
3795
3796         EM_TX_LOCK_ASSERT(txr);
3797 #ifdef DEV_NETMAP
3798         if (ifp->if_capenable & IFCAP_NETMAP) {
3799                 struct netmap_adapter *na = NA(ifp);
3800
3801                 selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3802                 EM_TX_UNLOCK(txr);
3803                 EM_CORE_LOCK(adapter);
3804                 selwakeuppri(&na->tx_si, PI_NET);
3805                 EM_CORE_UNLOCK(adapter);
3806                 EM_TX_LOCK(txr);
3807                 return;
3808         }
3809 #endif /* DEV_NETMAP */
3810
3811         /* No work, make sure watchdog is off */
3812         if (txr->tx_avail == adapter->num_tx_desc) {
3813                 txr->queue_status = EM_QUEUE_IDLE;
3814                 return;
3815         }
3816
3817         processed = 0;
3818         first = txr->next_to_clean;
3819         tx_desc = &txr->tx_base[first];
3820         tx_buffer = &txr->tx_buffers[first];
3821         last = tx_buffer->next_eop;
3822         eop_desc = &txr->tx_base[last];
3823
3824         /*
3825          * What this does is get the index of the
3826          * first descriptor AFTER the EOP of the 
3827          * first packet, that way we can do the
3828          * simple comparison on the inner while loop.
3829          */
3830         if (++last == adapter->num_tx_desc)
3831                 last = 0;
3832         done = last;
3833
3834         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3835             BUS_DMASYNC_POSTREAD);
3836
3837         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3838                 /* We clean the range of the packet */
3839                 while (first != done) {
3840                         tx_desc->upper.data = 0;
3841                         tx_desc->lower.data = 0;
3842                         tx_desc->buffer_addr = 0;
3843                         ++txr->tx_avail;
3844                         ++processed;
3845
3846                         if (tx_buffer->m_head) {
3847                                 bus_dmamap_sync(txr->txtag,
3848                                     tx_buffer->map,
3849                                     BUS_DMASYNC_POSTWRITE);
3850                                 bus_dmamap_unload(txr->txtag,
3851                                     tx_buffer->map);
3852                                 m_freem(tx_buffer->m_head);
3853                                 tx_buffer->m_head = NULL;
3854                         }
3855                         tx_buffer->next_eop = -1;
3856                         txr->watchdog_time = ticks;
3857
3858                         if (++first == adapter->num_tx_desc)
3859                                 first = 0;
3860
3861                         tx_buffer = &txr->tx_buffers[first];
3862                         tx_desc = &txr->tx_base[first];
3863                 }
3864                 ++ifp->if_opackets;
3865                 /* See if we can continue to the next packet */
3866                 last = tx_buffer->next_eop;
3867                 if (last != -1) {
3868                         eop_desc = &txr->tx_base[last];
3869                         /* Get new done point */
3870                         if (++last == adapter->num_tx_desc) last = 0;
3871                         done = last;
3872                 } else
3873                         break;
3874         }
3875         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3876             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3877
3878         txr->next_to_clean = first;
3879
3880         /*
3881         ** Watchdog calculation, we know there's
3882         ** work outstanding or the first return
3883         ** would have been taken, so none processed
3884         ** for too long indicates a hang. local timer
3885         ** will examine this and do a reset if needed.
3886         */
3887         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3888                 txr->queue_status = EM_QUEUE_HUNG;
3889
3890         /*
3891          * If we have a minimum free, clear IFF_DRV_OACTIVE
3892          * to tell the stack that it is OK to send packets.
3893          * Notice that all writes of OACTIVE happen under the
3894          * TX lock which, with a single queue, guarantees 
3895          * sanity.
3896          */
3897         if (txr->tx_avail >= EM_MAX_SCATTER)
3898                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3899
3900         /* Disable watchdog if all clean */
3901         if (txr->tx_avail == adapter->num_tx_desc) {
3902                 txr->queue_status = EM_QUEUE_IDLE;
3903         } 
3904 }
3905
3906
3907 /*********************************************************************
3908  *
3909  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3910  *
3911  **********************************************************************/
3912 static void
3913 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3914 {
3915         struct adapter          *adapter = rxr->adapter;
3916         struct mbuf             *m;
3917         bus_dma_segment_t       segs[1];
3918         struct em_buffer        *rxbuf;
3919         int                     i, j, error, nsegs;
3920         bool                    cleaned = FALSE;
3921
3922         i = j = rxr->next_to_refresh;
3923         /*
3924         ** Get one descriptor beyond
3925         ** our work mark to control
3926         ** the loop.
3927         */
3928         if (++j == adapter->num_rx_desc)
3929                 j = 0;
3930
3931         while (j != limit) {
3932                 rxbuf = &rxr->rx_buffers[i];
3933                 if (rxbuf->m_head == NULL) {
3934                         m = m_getjcl(M_NOWAIT, MT_DATA,
3935                             M_PKTHDR, adapter->rx_mbuf_sz);
3936                         /*
3937                         ** If we have a temporary resource shortage
3938                         ** that causes a failure, just abort refresh
3939                         ** for now, we will return to this point when
3940                         ** reinvoked from em_rxeof.
3941                         */
3942                         if (m == NULL)
3943                                 goto update;
3944                 } else
3945                         m = rxbuf->m_head;
3946
3947                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3948                 m->m_flags |= M_PKTHDR;
3949                 m->m_data = m->m_ext.ext_buf;
3950
3951                 /* Use bus_dma machinery to setup the memory mapping  */
3952                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3953                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3954                 if (error != 0) {
3955                         printf("Refresh mbufs: hdr dmamap load"
3956                             " failure - %d\n", error);
3957                         m_free(m);
3958                         rxbuf->m_head = NULL;
3959                         goto update;
3960                 }
3961                 rxbuf->m_head = m;
3962                 bus_dmamap_sync(rxr->rxtag,
3963                     rxbuf->map, BUS_DMASYNC_PREREAD);
3964                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3965                 cleaned = TRUE;
3966
3967                 i = j; /* Next is precalulated for us */
3968                 rxr->next_to_refresh = i;
3969                 /* Calculate next controlling index */
3970                 if (++j == adapter->num_rx_desc)
3971                         j = 0;
3972         }
3973 update:
3974         /*
3975         ** Update the tail pointer only if,
3976         ** and as far as we have refreshed.
3977         */
3978         if (cleaned)
3979                 E1000_WRITE_REG(&adapter->hw,
3980                     E1000_RDT(rxr->me), rxr->next_to_refresh);
3981
3982         return;
3983 }
3984
3985
3986 /*********************************************************************
3987  *
3988  *  Allocate memory for rx_buffer structures. Since we use one
3989  *  rx_buffer per received packet, the maximum number of rx_buffer's
3990  *  that we'll need is equal to the number of receive descriptors
3991  *  that we've allocated.
3992  *
3993  **********************************************************************/
3994 static int
3995 em_allocate_receive_buffers(struct rx_ring *rxr)
3996 {
3997         struct adapter          *adapter = rxr->adapter;
3998         device_t                dev = adapter->dev;
3999         struct em_buffer        *rxbuf;
4000         int                     error;
4001
4002         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4003             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4004         if (rxr->rx_buffers == NULL) {
4005                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4006                 return (ENOMEM);
4007         }
4008
4009         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4010                                 1, 0,                   /* alignment, bounds */
4011                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4012                                 BUS_SPACE_MAXADDR,      /* highaddr */
4013                                 NULL, NULL,             /* filter, filterarg */
4014                                 MJUM9BYTES,             /* maxsize */
4015                                 1,                      /* nsegments */
4016                                 MJUM9BYTES,             /* maxsegsize */
4017                                 0,                      /* flags */
4018                                 NULL,                   /* lockfunc */
4019                                 NULL,                   /* lockarg */
4020                                 &rxr->rxtag);
4021         if (error) {
4022                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4023                     __func__, error);
4024                 goto fail;
4025         }
4026
4027         rxbuf = rxr->rx_buffers;
4028         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4029                 rxbuf = &rxr->rx_buffers[i];
4030                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4031                     &rxbuf->map);
4032                 if (error) {
4033                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4034                             __func__, error);
4035                         goto fail;
4036                 }
4037         }
4038
4039         return (0);
4040
4041 fail:
4042         em_free_receive_structures(adapter);
4043         return (error);
4044 }
4045
4046
4047 /*********************************************************************
4048  *
4049  *  Initialize a receive ring and its buffers.
4050  *
4051  **********************************************************************/
4052 static int
4053 em_setup_receive_ring(struct rx_ring *rxr)
4054 {
4055         struct  adapter         *adapter = rxr->adapter;
4056         struct em_buffer        *rxbuf;
4057         bus_dma_segment_t       seg[1];
4058         int                     rsize, nsegs, error = 0;
4059 #ifdef DEV_NETMAP
4060         struct netmap_adapter *na = NA(adapter->ifp);
4061         struct netmap_slot *slot;
4062 #endif
4063
4064
4065         /* Clear the ring contents */
4066         EM_RX_LOCK(rxr);
4067         rsize = roundup2(adapter->num_rx_desc *
4068             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4069         bzero((void *)rxr->rx_base, rsize);
4070 #ifdef DEV_NETMAP
4071         slot = netmap_reset(na, NR_RX, 0, 0);
4072 #endif
4073
4074         /*
4075         ** Free current RX buffer structs and their mbufs
4076         */
4077         for (int i = 0; i < adapter->num_rx_desc; i++) {
4078                 rxbuf = &rxr->rx_buffers[i];
4079                 if (rxbuf->m_head != NULL) {
4080                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4081                             BUS_DMASYNC_POSTREAD);
4082                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4083                         m_freem(rxbuf->m_head);
4084                         rxbuf->m_head = NULL; /* mark as freed */
4085                 }
4086         }
4087
4088         /* Now replenish the mbufs */
4089         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4090                 rxbuf = &rxr->rx_buffers[j];
4091 #ifdef DEV_NETMAP
4092                 if (slot) {
4093                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4094                         uint64_t paddr;
4095                         void *addr;
4096
4097                         addr = PNMB(slot + si, &paddr);
4098                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4099                         /* Update descriptor */
4100                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4101                         continue;
4102                 }
4103 #endif /* DEV_NETMAP */
4104                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4105                     M_PKTHDR, adapter->rx_mbuf_sz);
4106                 if (rxbuf->m_head == NULL) {
4107                         error = ENOBUFS;
4108                         goto fail;
4109                 }
4110                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4111                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4112                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4113
4114                 /* Get the memory mapping */
4115                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4116                     rxbuf->map, rxbuf->m_head, seg,
4117                     &nsegs, BUS_DMA_NOWAIT);
4118                 if (error != 0) {
4119                         m_freem(rxbuf->m_head);
4120                         rxbuf->m_head = NULL;
4121                         goto fail;
4122                 }
4123                 bus_dmamap_sync(rxr->rxtag,
4124                     rxbuf->map, BUS_DMASYNC_PREREAD);
4125
4126                 /* Update descriptor */
4127                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4128         }
4129         rxr->next_to_check = 0;
4130         rxr->next_to_refresh = 0;
4131         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4132             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4133
4134 fail:
4135         EM_RX_UNLOCK(rxr);
4136         return (error);
4137 }
4138
4139 /*********************************************************************
4140  *
4141  *  Initialize all receive rings.
4142  *
4143  **********************************************************************/
4144 static int
4145 em_setup_receive_structures(struct adapter *adapter)
4146 {
4147         struct rx_ring *rxr = adapter->rx_rings;
4148         int q;
4149
4150         for (q = 0; q < adapter->num_queues; q++, rxr++)
4151                 if (em_setup_receive_ring(rxr))
4152                         goto fail;
4153
4154         return (0);
4155 fail:
4156         /*
4157          * Free RX buffers allocated so far, we will only handle
4158          * the rings that completed, the failing case will have
4159          * cleaned up for itself. 'q' failed, so its the terminus.
4160          */
4161         for (int i = 0; i < q; ++i) {
4162                 rxr = &adapter->rx_rings[i];
4163                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4164                         struct em_buffer *rxbuf;
4165                         rxbuf = &rxr->rx_buffers[n];
4166                         if (rxbuf->m_head != NULL) {
4167                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4168                                   BUS_DMASYNC_POSTREAD);
4169                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4170                                 m_freem(rxbuf->m_head);
4171                                 rxbuf->m_head = NULL;
4172                         }
4173                 }
4174                 rxr->next_to_check = 0;
4175                 rxr->next_to_refresh = 0;
4176         }
4177
4178         return (ENOBUFS);
4179 }
4180
4181 /*********************************************************************
4182  *
4183  *  Free all receive rings.
4184  *
4185  **********************************************************************/
4186 static void
4187 em_free_receive_structures(struct adapter *adapter)
4188 {
4189         struct rx_ring *rxr = adapter->rx_rings;
4190
4191         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4192                 em_free_receive_buffers(rxr);
4193                 /* Free the ring memory as well */
4194                 em_dma_free(adapter, &rxr->rxdma);
4195                 EM_RX_LOCK_DESTROY(rxr);
4196         }
4197
4198         free(adapter->rx_rings, M_DEVBUF);
4199 }
4200
4201
4202 /*********************************************************************
4203  *
4204  *  Free receive ring data structures
4205  *
4206  **********************************************************************/
4207 static void
4208 em_free_receive_buffers(struct rx_ring *rxr)
4209 {
4210         struct adapter          *adapter = rxr->adapter;
4211         struct em_buffer        *rxbuf = NULL;
4212
4213         INIT_DEBUGOUT("free_receive_buffers: begin");
4214
4215         if (rxr->rx_buffers != NULL) {
4216                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4217                         rxbuf = &rxr->rx_buffers[i];
4218                         if (rxbuf->map != NULL) {
4219                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4220                                     BUS_DMASYNC_POSTREAD);
4221                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4222                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4223                         }
4224                         if (rxbuf->m_head != NULL) {
4225                                 m_freem(rxbuf->m_head);
4226                                 rxbuf->m_head = NULL;
4227                         }
4228                 }
4229                 free(rxr->rx_buffers, M_DEVBUF);
4230                 rxr->rx_buffers = NULL;
4231                 rxr->next_to_check = 0;
4232                 rxr->next_to_refresh = 0;
4233         }
4234
4235         if (rxr->rxtag != NULL) {
4236                 bus_dma_tag_destroy(rxr->rxtag);
4237                 rxr->rxtag = NULL;
4238         }
4239
4240         return;
4241 }
4242
4243
4244 /*********************************************************************
4245  *
4246  *  Enable receive unit.
4247  *
4248  **********************************************************************/
4249 #define MAX_INTS_PER_SEC        8000
4250 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
4251
4252 static void
4253 em_initialize_receive_unit(struct adapter *adapter)
4254 {
4255         struct rx_ring  *rxr = adapter->rx_rings;
4256         struct ifnet    *ifp = adapter->ifp;
4257         struct e1000_hw *hw = &adapter->hw;
4258         u64     bus_addr;
4259         u32     rctl, rxcsum;
4260
4261         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4262
4263         /*
4264          * Make sure receives are disabled while setting
4265          * up the descriptor ring
4266          */
4267         rctl = E1000_READ_REG(hw, E1000_RCTL);
4268         /* Do not disable if ever enabled on this hardware */
4269         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4270                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4271
4272         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4273             adapter->rx_abs_int_delay.value);
4274         /*
4275          * Set the interrupt throttling rate. Value is calculated
4276          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4277          */
4278         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4279
4280         /*
4281         ** When using MSIX interrupts we need to throttle
4282         ** using the EITR register (82574 only)
4283         */
4284         if (hw->mac.type == e1000_82574) {
4285                 for (int i = 0; i < 4; i++)
4286                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4287                             DEFAULT_ITR);
4288                 /* Disable accelerated acknowledge */
4289                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4290         }
4291
4292         if (ifp->if_capenable & IFCAP_RXCSUM) {
4293                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4294                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4295                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4296         }
4297
4298         /*
4299         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4300         ** long latencies are observed, like Lenovo X60. This
4301         ** change eliminates the problem, but since having positive
4302         ** values in RDTR is a known source of problems on other
4303         ** platforms another solution is being sought.
4304         */
4305         if (hw->mac.type == e1000_82573)
4306                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4307
4308         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4309                 /* Setup the Base and Length of the Rx Descriptor Ring */
4310                 bus_addr = rxr->rxdma.dma_paddr;
4311                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4312                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4313                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4314                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4315                 /* Setup the Head and Tail Descriptor Pointers */
4316                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4317 #ifdef DEV_NETMAP
4318                 /*
4319                  * an init() while a netmap client is active must
4320                  * preserve the rx buffers passed to userspace.
4321                  * In this driver it means we adjust RDT to
4322                  * something different from na->num_rx_desc - 1.
4323                  */
4324                 if (ifp->if_capenable & IFCAP_NETMAP) {
4325                         struct netmap_adapter *na = NA(adapter->ifp);
4326                         struct netmap_kring *kring = &na->rx_rings[i];
4327                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4328
4329                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4330                 } else
4331 #endif /* DEV_NETMAP */
4332                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4333         }
4334
4335         /* Set PTHRESH for improved jumbo performance */
4336         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4337             (adapter->hw.mac.type == e1000_pch2lan) ||
4338             (adapter->hw.mac.type == e1000_ich10lan)) &&
4339             (ifp->if_mtu > ETHERMTU)) {
4340                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4341                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4342         }
4343                 
4344         if (adapter->hw.mac.type == e1000_pch2lan) {
4345                 if (ifp->if_mtu > ETHERMTU)
4346                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4347                 else
4348                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4349         }
4350
4351         /* Setup the Receive Control Register */
4352         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4353         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4354             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4355             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4356
4357         /* Strip the CRC */
4358         rctl |= E1000_RCTL_SECRC;
4359
4360         /* Make sure VLAN Filters are off */
4361         rctl &= ~E1000_RCTL_VFE;
4362         rctl &= ~E1000_RCTL_SBP;
4363
4364         if (adapter->rx_mbuf_sz == MCLBYTES)
4365                 rctl |= E1000_RCTL_SZ_2048;
4366         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4367                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4368         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4369                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4370
4371         if (ifp->if_mtu > ETHERMTU)
4372                 rctl |= E1000_RCTL_LPE;
4373         else
4374                 rctl &= ~E1000_RCTL_LPE;
4375
4376         /* Write out the settings */
4377         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4378
4379         return;
4380 }
4381
4382
4383 /*********************************************************************
4384  *
4385  *  This routine executes in interrupt context. It replenishes
4386  *  the mbufs in the descriptor and sends data which has been
4387  *  dma'ed into host memory to upper layer.
4388  *
4389  *  We loop at most count times if count is > 0, or until done if
4390  *  count < 0.
4391  *  
4392  *  For polling we also now return the number of cleaned packets
4393  *********************************************************************/
4394 static bool
4395 em_rxeof(struct rx_ring *rxr, int count, int *done)
4396 {
4397         struct adapter          *adapter = rxr->adapter;
4398         struct ifnet            *ifp = adapter->ifp;
4399         struct mbuf             *mp, *sendmp;
4400         u8                      status = 0;
4401         u16                     len;
4402         int                     i, processed, rxdone = 0;
4403         bool                    eop;
4404         struct e1000_rx_desc    *cur;
4405
4406         EM_RX_LOCK(rxr);
4407
4408 #ifdef DEV_NETMAP
4409         if (ifp->if_capenable & IFCAP_NETMAP) {
4410                 struct netmap_adapter *na = NA(ifp);
4411
4412                 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4413                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4414                 EM_RX_UNLOCK(rxr);
4415                 EM_CORE_LOCK(adapter);
4416                 selwakeuppri(&na->rx_si, PI_NET);
4417                 EM_CORE_UNLOCK(adapter);
4418                 return (0);
4419         }
4420 #endif /* DEV_NETMAP */
4421
4422         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4423
4424                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4425                         break;
4426
4427                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4428                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4429
4430                 cur = &rxr->rx_base[i];
4431                 status = cur->status;
4432                 mp = sendmp = NULL;
4433
4434                 if ((status & E1000_RXD_STAT_DD) == 0)
4435                         break;
4436
4437                 len = le16toh(cur->length);
4438                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4439
4440                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4441                     (rxr->discard == TRUE)) {
4442                         adapter->dropped_pkts++;
4443                         ++rxr->rx_discarded;
4444                         if (!eop) /* Catch subsequent segs */
4445                                 rxr->discard = TRUE;
4446                         else
4447                                 rxr->discard = FALSE;
4448                         em_rx_discard(rxr, i);
4449                         goto next_desc;
4450                 }
4451
4452                 /* Assign correct length to the current fragment */
4453                 mp = rxr->rx_buffers[i].m_head;
4454                 mp->m_len = len;
4455
4456                 /* Trigger for refresh */
4457                 rxr->rx_buffers[i].m_head = NULL;
4458
4459                 /* First segment? */
4460                 if (rxr->fmp == NULL) {
4461                         mp->m_pkthdr.len = len;
4462                         rxr->fmp = rxr->lmp = mp;
4463                 } else {
4464                         /* Chain mbuf's together */
4465                         mp->m_flags &= ~M_PKTHDR;
4466                         rxr->lmp->m_next = mp;
4467                         rxr->lmp = mp;
4468                         rxr->fmp->m_pkthdr.len += len;
4469                 }
4470
4471                 if (eop) {
4472                         --count;
4473                         sendmp = rxr->fmp;
4474                         sendmp->m_pkthdr.rcvif = ifp;
4475                         ifp->if_ipackets++;
4476                         em_receive_checksum(cur, sendmp);
4477 #ifndef __NO_STRICT_ALIGNMENT
4478                         if (adapter->max_frame_size >
4479                             (MCLBYTES - ETHER_ALIGN) &&
4480                             em_fixup_rx(rxr) != 0)
4481                                 goto skip;
4482 #endif
4483                         if (status & E1000_RXD_STAT_VP) {
4484                                 sendmp->m_pkthdr.ether_vtag =
4485                                     le16toh(cur->special);
4486                                 sendmp->m_flags |= M_VLANTAG;
4487                         }
4488 #ifndef __NO_STRICT_ALIGNMENT
4489 skip:
4490 #endif
4491                         rxr->fmp = rxr->lmp = NULL;
4492                 }
4493 next_desc:
4494                 /* Zero out the receive descriptors status. */
4495                 cur->status = 0;
4496                 ++rxdone;       /* cumulative for POLL */
4497                 ++processed;
4498
4499                 /* Advance our pointers to the next descriptor. */
4500                 if (++i == adapter->num_rx_desc)
4501                         i = 0;
4502
4503                 /* Send to the stack */
4504                 if (sendmp != NULL) {
4505                         rxr->next_to_check = i;
4506                         EM_RX_UNLOCK(rxr);
4507                         (*ifp->if_input)(ifp, sendmp);
4508                         EM_RX_LOCK(rxr);
4509                         i = rxr->next_to_check;
4510                 }
4511
4512                 /* Only refresh mbufs every 8 descriptors */
4513                 if (processed == 8) {
4514                         em_refresh_mbufs(rxr, i);
4515                         processed = 0;
4516                 }
4517         }
4518
4519         /* Catch any remaining refresh work */
4520         if (e1000_rx_unrefreshed(rxr))
4521                 em_refresh_mbufs(rxr, i);
4522
4523         rxr->next_to_check = i;
4524         if (done != NULL)
4525                 *done = rxdone;
4526         EM_RX_UNLOCK(rxr);
4527
4528         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4529 }
4530
4531 static __inline void
4532 em_rx_discard(struct rx_ring *rxr, int i)
4533 {
4534         struct em_buffer        *rbuf;
4535
4536         rbuf = &rxr->rx_buffers[i];
4537         /* Free any previous pieces */
4538         if (rxr->fmp != NULL) {
4539                 rxr->fmp->m_flags |= M_PKTHDR;
4540                 m_freem(rxr->fmp);
4541                 rxr->fmp = NULL;
4542                 rxr->lmp = NULL;
4543         }
4544         /*
4545         ** Free buffer and allow em_refresh_mbufs()
4546         ** to clean up and recharge buffer.
4547         */
4548         if (rbuf->m_head) {
4549                 m_free(rbuf->m_head);
4550                 rbuf->m_head = NULL;
4551         }
4552         return;
4553 }
4554
4555 #ifndef __NO_STRICT_ALIGNMENT
4556 /*
4557  * When jumbo frames are enabled we should realign entire payload on
4558  * architecures with strict alignment. This is serious design mistake of 8254x
4559  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4560  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4561  * payload. On architecures without strict alignment restrictions 8254x still
4562  * performs unaligned memory access which would reduce the performance too.
4563  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4564  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4565  * existing mbuf chain.
4566  *
4567  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4568  * not used at all on architectures with strict alignment.
4569  */
4570 static int
4571 em_fixup_rx(struct rx_ring *rxr)
4572 {
4573         struct adapter *adapter = rxr->adapter;
4574         struct mbuf *m, *n;
4575         int error;
4576
4577         error = 0;
4578         m = rxr->fmp;
4579         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4580                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4581                 m->m_data += ETHER_HDR_LEN;
4582         } else {
4583                 MGETHDR(n, M_NOWAIT, MT_DATA);
4584                 if (n != NULL) {
4585                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4586                         m->m_data += ETHER_HDR_LEN;
4587                         m->m_len -= ETHER_HDR_LEN;
4588                         n->m_len = ETHER_HDR_LEN;
4589                         M_MOVE_PKTHDR(n, m);
4590                         n->m_next = m;
4591                         rxr->fmp = n;
4592                 } else {
4593                         adapter->dropped_pkts++;
4594                         m_freem(rxr->fmp);
4595                         rxr->fmp = NULL;
4596                         error = ENOMEM;
4597                 }
4598         }
4599
4600         return (error);
4601 }
4602 #endif
4603
4604 /*********************************************************************
4605  *
4606  *  Verify that the hardware indicated that the checksum is valid.
4607  *  Inform the stack about the status of checksum so that stack
4608  *  doesn't spend time verifying the checksum.
4609  *
4610  *********************************************************************/
4611 static void
4612 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4613 {
4614         /* Ignore Checksum bit is set */
4615         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4616                 mp->m_pkthdr.csum_flags = 0;
4617                 return;
4618         }
4619
4620         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4621                 /* Did it pass? */
4622                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4623                         /* IP Checksum Good */
4624                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4625                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4626
4627                 } else {
4628                         mp->m_pkthdr.csum_flags = 0;
4629                 }
4630         }
4631
4632         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4633                 /* Did it pass? */
4634                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4635                         mp->m_pkthdr.csum_flags |=
4636                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4637                         mp->m_pkthdr.csum_data = htons(0xffff);
4638                 }
4639         }
4640 }
4641
4642 /*
4643  * This routine is run via an vlan
4644  * config EVENT
4645  */
4646 static void
4647 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4648 {
4649         struct adapter  *adapter = ifp->if_softc;
4650         u32             index, bit;
4651
4652         if (ifp->if_softc !=  arg)   /* Not our event */
4653                 return;
4654
4655         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4656                 return;
4657
4658         EM_CORE_LOCK(adapter);
4659         index = (vtag >> 5) & 0x7F;
4660         bit = vtag & 0x1F;
4661         adapter->shadow_vfta[index] |= (1 << bit);
4662         ++adapter->num_vlans;
4663         /* Re-init to load the changes */
4664         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4665                 em_init_locked(adapter);
4666         EM_CORE_UNLOCK(adapter);
4667 }
4668
4669 /*
4670  * This routine is run via an vlan
4671  * unconfig EVENT
4672  */
4673 static void
4674 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4675 {
4676         struct adapter  *adapter = ifp->if_softc;
4677         u32             index, bit;
4678
4679         if (ifp->if_softc !=  arg)
4680                 return;
4681
4682         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4683                 return;
4684
4685         EM_CORE_LOCK(adapter);
4686         index = (vtag >> 5) & 0x7F;
4687         bit = vtag & 0x1F;
4688         adapter->shadow_vfta[index] &= ~(1 << bit);
4689         --adapter->num_vlans;
4690         /* Re-init to load the changes */
4691         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4692                 em_init_locked(adapter);
4693         EM_CORE_UNLOCK(adapter);
4694 }
4695
4696 static void
4697 em_setup_vlan_hw_support(struct adapter *adapter)
4698 {
4699         struct e1000_hw *hw = &adapter->hw;
4700         u32             reg;
4701
4702         /*
4703         ** We get here thru init_locked, meaning
4704         ** a soft reset, this has already cleared
4705         ** the VFTA and other state, so if there
4706         ** have been no vlan's registered do nothing.
4707         */
4708         if (adapter->num_vlans == 0)
4709                 return;
4710
4711         /*
4712         ** A soft reset zero's out the VFTA, so
4713         ** we need to repopulate it now.
4714         */
4715         for (int i = 0; i < EM_VFTA_SIZE; i++)
4716                 if (adapter->shadow_vfta[i] != 0)
4717                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4718                             i, adapter->shadow_vfta[i]);
4719
4720         reg = E1000_READ_REG(hw, E1000_CTRL);
4721         reg |= E1000_CTRL_VME;
4722         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4723
4724         /* Enable the Filter Table */
4725         reg = E1000_READ_REG(hw, E1000_RCTL);
4726         reg &= ~E1000_RCTL_CFIEN;
4727         reg |= E1000_RCTL_VFE;
4728         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4729 }
4730
4731 static void
4732 em_enable_intr(struct adapter *adapter)
4733 {
4734         struct e1000_hw *hw = &adapter->hw;
4735         u32 ims_mask = IMS_ENABLE_MASK;
4736
4737         if (hw->mac.type == e1000_82574) {
4738                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4739                 ims_mask |= EM_MSIX_MASK;
4740         } 
4741         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4742 }
4743
4744 static void
4745 em_disable_intr(struct adapter *adapter)
4746 {
4747         struct e1000_hw *hw = &adapter->hw;
4748
4749         if (hw->mac.type == e1000_82574)
4750                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4751         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4752 }
4753
4754 /*
4755  * Bit of a misnomer, what this really means is
4756  * to enable OS management of the system... aka
4757  * to disable special hardware management features 
4758  */
4759 static void
4760 em_init_manageability(struct adapter *adapter)
4761 {
4762         /* A shared code workaround */
4763 #define E1000_82542_MANC2H E1000_MANC2H
4764         if (adapter->has_manage) {
4765                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4766                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4767
4768                 /* disable hardware interception of ARP */
4769                 manc &= ~(E1000_MANC_ARP_EN);
4770
4771                 /* enable receiving management packets to the host */
4772                 manc |= E1000_MANC_EN_MNG2HOST;
4773 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4774 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4775                 manc2h |= E1000_MNG2HOST_PORT_623;
4776                 manc2h |= E1000_MNG2HOST_PORT_664;
4777                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4778                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4779         }
4780 }
4781
4782 /*
4783  * Give control back to hardware management
4784  * controller if there is one.
4785  */
4786 static void
4787 em_release_manageability(struct adapter *adapter)
4788 {
4789         if (adapter->has_manage) {
4790                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4791
4792                 /* re-enable hardware interception of ARP */
4793                 manc |= E1000_MANC_ARP_EN;
4794                 manc &= ~E1000_MANC_EN_MNG2HOST;
4795
4796                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4797         }
4798 }
4799
4800 /*
4801  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4802  * For ASF and Pass Through versions of f/w this means
4803  * that the driver is loaded. For AMT version type f/w
4804  * this means that the network i/f is open.
4805  */
4806 static void
4807 em_get_hw_control(struct adapter *adapter)
4808 {
4809         u32 ctrl_ext, swsm;
4810
4811         if (adapter->hw.mac.type == e1000_82573) {
4812                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4813                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4814                     swsm | E1000_SWSM_DRV_LOAD);
4815                 return;
4816         }
4817         /* else */
4818         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4819         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4820             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4821         return;
4822 }
4823
4824 /*
4825  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4826  * For ASF and Pass Through versions of f/w this means that
4827  * the driver is no longer loaded. For AMT versions of the
4828  * f/w this means that the network i/f is closed.
4829  */
4830 static void
4831 em_release_hw_control(struct adapter *adapter)
4832 {
4833         u32 ctrl_ext, swsm;
4834
4835         if (!adapter->has_manage)
4836                 return;
4837
4838         if (adapter->hw.mac.type == e1000_82573) {
4839                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4840                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4841                     swsm & ~E1000_SWSM_DRV_LOAD);
4842                 return;
4843         }
4844         /* else */
4845         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4846         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4847             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4848         return;
4849 }
4850
4851 static int
4852 em_is_valid_ether_addr(u8 *addr)
4853 {
4854         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4855
4856         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4857                 return (FALSE);
4858         }
4859
4860         return (TRUE);
4861 }
4862
4863 /*
4864 ** Parse the interface capabilities with regard
4865 ** to both system management and wake-on-lan for
4866 ** later use.
4867 */
4868 static void
4869 em_get_wakeup(device_t dev)
4870 {
4871         struct adapter  *adapter = device_get_softc(dev);
4872         u16             eeprom_data = 0, device_id, apme_mask;
4873
4874         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4875         apme_mask = EM_EEPROM_APME;
4876
4877         switch (adapter->hw.mac.type) {
4878         case e1000_82573:
4879         case e1000_82583:
4880                 adapter->has_amt = TRUE;
4881                 /* Falls thru */
4882         case e1000_82571:
4883         case e1000_82572:
4884         case e1000_80003es2lan:
4885                 if (adapter->hw.bus.func == 1) {
4886                         e1000_read_nvm(&adapter->hw,
4887                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4888                         break;
4889                 } else
4890                         e1000_read_nvm(&adapter->hw,
4891                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4892                 break;
4893         case e1000_ich8lan:
4894         case e1000_ich9lan:
4895         case e1000_ich10lan:
4896         case e1000_pchlan:
4897         case e1000_pch2lan:
4898                 apme_mask = E1000_WUC_APME;
4899                 adapter->has_amt = TRUE;
4900                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4901                 break;
4902         default:
4903                 e1000_read_nvm(&adapter->hw,
4904                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4905                 break;
4906         }
4907         if (eeprom_data & apme_mask)
4908                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4909         /*
4910          * We have the eeprom settings, now apply the special cases
4911          * where the eeprom may be wrong or the board won't support
4912          * wake on lan on a particular port
4913          */
4914         device_id = pci_get_device(dev);
4915         switch (device_id) {
4916         case E1000_DEV_ID_82571EB_FIBER:
4917                 /* Wake events only supported on port A for dual fiber
4918                  * regardless of eeprom setting */
4919                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4920                     E1000_STATUS_FUNC_1)
4921                         adapter->wol = 0;
4922                 break;
4923         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4924         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4925         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4926                 /* if quad port adapter, disable WoL on all but port A */
4927                 if (global_quad_port_a != 0)
4928                         adapter->wol = 0;
4929                 /* Reset for multiple quad port adapters */
4930                 if (++global_quad_port_a == 4)
4931                         global_quad_port_a = 0;
4932                 break;
4933         }
4934         return;
4935 }
4936
4937
4938 /*
4939  * Enable PCI Wake On Lan capability
4940  */
4941 static void
4942 em_enable_wakeup(device_t dev)
4943 {
4944         struct adapter  *adapter = device_get_softc(dev);
4945         struct ifnet    *ifp = adapter->ifp;
4946         u32             pmc, ctrl, ctrl_ext, rctl;
4947         u16             status;
4948
4949         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4950                 return;
4951
4952         /* Advertise the wakeup capability */
4953         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4954         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4955         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4956         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4957
4958         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4959             (adapter->hw.mac.type == e1000_pchlan) ||
4960             (adapter->hw.mac.type == e1000_ich9lan) ||
4961             (adapter->hw.mac.type == e1000_ich10lan))
4962                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4963
4964         /* Keep the laser running on Fiber adapters */
4965         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4966             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4967                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4968                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4969                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4970         }
4971
4972         /*
4973         ** Determine type of Wakeup: note that wol
4974         ** is set with all bits on by default.
4975         */
4976         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4977                 adapter->wol &= ~E1000_WUFC_MAG;
4978
4979         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4980                 adapter->wol &= ~E1000_WUFC_MC;
4981         else {
4982                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4983                 rctl |= E1000_RCTL_MPE;
4984                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4985         }
4986
4987         if ((adapter->hw.mac.type == e1000_pchlan) ||
4988             (adapter->hw.mac.type == e1000_pch2lan)) {
4989                 if (em_enable_phy_wakeup(adapter))
4990                         return;
4991         } else {
4992                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4993                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4994         }
4995
4996         if (adapter->hw.phy.type == e1000_phy_igp_3)
4997                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4998
4999         /* Request PME */
5000         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5001         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5002         if (ifp->if_capenable & IFCAP_WOL)
5003                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5004         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5005
5006         return;
5007 }
5008
5009 /*
5010 ** WOL in the newer chipset interfaces (pchlan)
5011 ** require thing to be copied into the phy
5012 */
5013 static int
5014 em_enable_phy_wakeup(struct adapter *adapter)
5015 {
5016         struct e1000_hw *hw = &adapter->hw;
5017         u32 mreg, ret = 0;
5018         u16 preg;
5019
5020         /* copy MAC RARs to PHY RARs */
5021         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5022
5023         /* copy MAC MTA to PHY MTA */
5024         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5025                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5026                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5027                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5028                     (u16)((mreg >> 16) & 0xFFFF));
5029         }
5030
5031         /* configure PHY Rx Control register */
5032         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5033         mreg = E1000_READ_REG(hw, E1000_RCTL);
5034         if (mreg & E1000_RCTL_UPE)
5035                 preg |= BM_RCTL_UPE;
5036         if (mreg & E1000_RCTL_MPE)
5037                 preg |= BM_RCTL_MPE;
5038         preg &= ~(BM_RCTL_MO_MASK);
5039         if (mreg & E1000_RCTL_MO_3)
5040                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5041                                 << BM_RCTL_MO_SHIFT);
5042         if (mreg & E1000_RCTL_BAM)
5043                 preg |= BM_RCTL_BAM;
5044         if (mreg & E1000_RCTL_PMCF)
5045                 preg |= BM_RCTL_PMCF;
5046         mreg = E1000_READ_REG(hw, E1000_CTRL);
5047         if (mreg & E1000_CTRL_RFCE)
5048                 preg |= BM_RCTL_RFCE;
5049         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5050
5051         /* enable PHY wakeup in MAC register */
5052         E1000_WRITE_REG(hw, E1000_WUC,
5053             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5054         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5055
5056         /* configure and enable PHY wakeup in PHY registers */
5057         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5058         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5059
5060         /* activate PHY wakeup */
5061         ret = hw->phy.ops.acquire(hw);
5062         if (ret) {
5063                 printf("Could not acquire PHY\n");
5064                 return ret;
5065         }
5066         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5067                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5068         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5069         if (ret) {
5070                 printf("Could not read PHY page 769\n");
5071                 goto out;
5072         }
5073         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5074         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5075         if (ret)
5076                 printf("Could not set PHY Host Wakeup bit\n");
5077 out:
5078         hw->phy.ops.release(hw);
5079
5080         return ret;
5081 }
5082
5083 static void
5084 em_led_func(void *arg, int onoff)
5085 {
5086         struct adapter  *adapter = arg;
5087  
5088         EM_CORE_LOCK(adapter);
5089         if (onoff) {
5090                 e1000_setup_led(&adapter->hw);
5091                 e1000_led_on(&adapter->hw);
5092         } else {
5093                 e1000_led_off(&adapter->hw);
5094                 e1000_cleanup_led(&adapter->hw);
5095         }
5096         EM_CORE_UNLOCK(adapter);
5097 }
5098
5099 /*
5100 ** Disable the L0S and L1 LINK states
5101 */
5102 static void
5103 em_disable_aspm(struct adapter *adapter)
5104 {
5105         int             base, reg;
5106         u16             link_cap,link_ctrl;
5107         device_t        dev = adapter->dev;
5108
5109         switch (adapter->hw.mac.type) {
5110                 case e1000_82573:
5111                 case e1000_82574:
5112                 case e1000_82583:
5113                         break;
5114                 default:
5115                         return;
5116         }
5117         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5118                 return;
5119         reg = base + PCIER_LINK_CAP;
5120         link_cap = pci_read_config(dev, reg, 2);
5121         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5122                 return;
5123         reg = base + PCIER_LINK_CTL;
5124         link_ctrl = pci_read_config(dev, reg, 2);
5125         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5126         pci_write_config(dev, reg, link_ctrl, 2);
5127         return;
5128 }
5129
5130 /**********************************************************************
5131  *
5132  *  Update the board statistics counters.
5133  *
5134  **********************************************************************/
5135 static void
5136 em_update_stats_counters(struct adapter *adapter)
5137 {
5138         struct ifnet   *ifp;
5139
5140         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5141            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5142                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5143                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5144         }
5145         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5146         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5147         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5148         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5149
5150         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5151         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5152         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5153         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5154         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5155         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5156         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5157         /*
5158         ** For watchdog management we need to know if we have been
5159         ** paused during the last interval, so capture that here.
5160         */
5161         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5162         adapter->stats.xoffrxc += adapter->pause_frames;
5163         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5164         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5165         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5166         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5167         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5168         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5169         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5170         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5171         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5172         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5173         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5174         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5175
5176         /* For the 64-bit byte counters the low dword must be read first. */
5177         /* Both registers clear on the read of the high dword */
5178
5179         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5180             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5181         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5182             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5183
5184         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5185         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5186         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5187         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5188         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5189
5190         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5191         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5192
5193         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5194         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5195         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5196         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5197         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5198         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5199         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5200         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5201         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5202         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5203
5204         /* Interrupt Counts */
5205
5206         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5207         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5208         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5209         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5210         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5211         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5212         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5213         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5214         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5215
5216         if (adapter->hw.mac.type >= e1000_82543) {
5217                 adapter->stats.algnerrc += 
5218                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5219                 adapter->stats.rxerrc += 
5220                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5221                 adapter->stats.tncrs += 
5222                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5223                 adapter->stats.cexterr += 
5224                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5225                 adapter->stats.tsctc += 
5226                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5227                 adapter->stats.tsctfc += 
5228                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5229         }
5230         ifp = adapter->ifp;
5231
5232         ifp->if_collisions = adapter->stats.colc;
5233
5234         /* Rx Errors */
5235         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5236             adapter->stats.crcerrs + adapter->stats.algnerrc +
5237             adapter->stats.ruc + adapter->stats.roc +
5238             adapter->stats.mpc + adapter->stats.cexterr;
5239
5240         /* Tx Errors */
5241         ifp->if_oerrors = adapter->stats.ecol +
5242             adapter->stats.latecol + adapter->watchdog_events;
5243 }
5244
5245 /* Export a single 32-bit register via a read-only sysctl. */
5246 static int
5247 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5248 {
5249         struct adapter *adapter;
5250         u_int val;
5251
5252         adapter = oidp->oid_arg1;
5253         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5254         return (sysctl_handle_int(oidp, &val, 0, req));
5255 }
5256
5257 /*
5258  * Add sysctl variables, one per statistic, to the system.
5259  */
5260 static void
5261 em_add_hw_stats(struct adapter *adapter)
5262 {
5263         device_t dev = adapter->dev;
5264
5265         struct tx_ring *txr = adapter->tx_rings;
5266         struct rx_ring *rxr = adapter->rx_rings;
5267
5268         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5269         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5270         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5271         struct e1000_hw_stats *stats = &adapter->stats;
5272
5273         struct sysctl_oid *stat_node, *queue_node, *int_node;
5274         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5275
5276 #define QUEUE_NAME_LEN 32
5277         char namebuf[QUEUE_NAME_LEN];
5278         
5279         /* Driver Statistics */
5280         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5281                         CTLFLAG_RD, &adapter->link_irq,
5282                         "Link MSIX IRQ Handled");
5283         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5284                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5285                          "Std mbuf failed");
5286         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5287                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5288                          "Std mbuf cluster failed");
5289         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5290                         CTLFLAG_RD, &adapter->dropped_pkts,
5291                         "Driver dropped packets");
5292         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5293                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5294                         "Driver tx dma failure in xmit");
5295         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5296                         CTLFLAG_RD, &adapter->rx_overruns,
5297                         "RX overruns");
5298         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5299                         CTLFLAG_RD, &adapter->watchdog_events,
5300                         "Watchdog timeouts");
5301         
5302         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5303                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5304                         em_sysctl_reg_handler, "IU",
5305                         "Device Control Register");
5306         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5307                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5308                         em_sysctl_reg_handler, "IU",
5309                         "Receiver Control Register");
5310         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5311                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5312                         "Flow Control High Watermark");
5313         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5314                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5315                         "Flow Control Low Watermark");
5316
5317         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5318                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5319                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5320                                             CTLFLAG_RD, NULL, "Queue Name");
5321                 queue_list = SYSCTL_CHILDREN(queue_node);
5322
5323                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5324                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5325                                 E1000_TDH(txr->me),
5326                                 em_sysctl_reg_handler, "IU",
5327                                 "Transmit Descriptor Head");
5328                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5329                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5330                                 E1000_TDT(txr->me),
5331                                 em_sysctl_reg_handler, "IU",
5332                                 "Transmit Descriptor Tail");
5333                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5334                                 CTLFLAG_RD, &txr->tx_irq,
5335                                 "Queue MSI-X Transmit Interrupts");
5336                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5337                                 CTLFLAG_RD, &txr->no_desc_avail,
5338                                 "Queue No Descriptor Available");
5339                 
5340                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5341                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5342                                 E1000_RDH(rxr->me),
5343                                 em_sysctl_reg_handler, "IU",
5344                                 "Receive Descriptor Head");
5345                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5346                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5347                                 E1000_RDT(rxr->me),
5348                                 em_sysctl_reg_handler, "IU",
5349                                 "Receive Descriptor Tail");
5350                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5351                                 CTLFLAG_RD, &rxr->rx_irq,
5352                                 "Queue MSI-X Receive Interrupts");
5353         }
5354
5355         /* MAC stats get their own sub node */
5356
5357         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5358                                     CTLFLAG_RD, NULL, "Statistics");
5359         stat_list = SYSCTL_CHILDREN(stat_node);
5360
5361         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5362                         CTLFLAG_RD, &stats->ecol,
5363                         "Excessive collisions");
5364         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5365                         CTLFLAG_RD, &stats->scc,
5366                         "Single collisions");
5367         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5368                         CTLFLAG_RD, &stats->mcc,
5369                         "Multiple collisions");
5370         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5371                         CTLFLAG_RD, &stats->latecol,
5372                         "Late collisions");
5373         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5374                         CTLFLAG_RD, &stats->colc,
5375                         "Collision Count");
5376         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5377                         CTLFLAG_RD, &adapter->stats.symerrs,
5378                         "Symbol Errors");
5379         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5380                         CTLFLAG_RD, &adapter->stats.sec,
5381                         "Sequence Errors");
5382         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5383                         CTLFLAG_RD, &adapter->stats.dc,
5384                         "Defer Count");
5385         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5386                         CTLFLAG_RD, &adapter->stats.mpc,
5387                         "Missed Packets");
5388         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5389                         CTLFLAG_RD, &adapter->stats.rnbc,
5390                         "Receive No Buffers");
5391         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5392                         CTLFLAG_RD, &adapter->stats.ruc,
5393                         "Receive Undersize");
5394         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5395                         CTLFLAG_RD, &adapter->stats.rfc,
5396                         "Fragmented Packets Received ");
5397         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5398                         CTLFLAG_RD, &adapter->stats.roc,
5399                         "Oversized Packets Received");
5400         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5401                         CTLFLAG_RD, &adapter->stats.rjc,
5402                         "Recevied Jabber");
5403         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5404                         CTLFLAG_RD, &adapter->stats.rxerrc,
5405                         "Receive Errors");
5406         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5407                         CTLFLAG_RD, &adapter->stats.crcerrs,
5408                         "CRC errors");
5409         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5410                         CTLFLAG_RD, &adapter->stats.algnerrc,
5411                         "Alignment Errors");
5412         /* On 82575 these are collision counts */
5413         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5414                         CTLFLAG_RD, &adapter->stats.cexterr,
5415                         "Collision/Carrier extension errors");
5416         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5417                         CTLFLAG_RD, &adapter->stats.xonrxc,
5418                         "XON Received");
5419         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5420                         CTLFLAG_RD, &adapter->stats.xontxc,
5421                         "XON Transmitted");
5422         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5423                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5424                         "XOFF Received");
5425         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5426                         CTLFLAG_RD, &adapter->stats.xofftxc,
5427                         "XOFF Transmitted");
5428
5429         /* Packet Reception Stats */
5430         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5431                         CTLFLAG_RD, &adapter->stats.tpr,
5432                         "Total Packets Received ");
5433         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5434                         CTLFLAG_RD, &adapter->stats.gprc,
5435                         "Good Packets Received");
5436         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5437                         CTLFLAG_RD, &adapter->stats.bprc,
5438                         "Broadcast Packets Received");
5439         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5440                         CTLFLAG_RD, &adapter->stats.mprc,
5441                         "Multicast Packets Received");
5442         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5443                         CTLFLAG_RD, &adapter->stats.prc64,
5444                         "64 byte frames received ");
5445         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5446                         CTLFLAG_RD, &adapter->stats.prc127,
5447                         "65-127 byte frames received");
5448         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5449                         CTLFLAG_RD, &adapter->stats.prc255,
5450                         "128-255 byte frames received");
5451         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5452                         CTLFLAG_RD, &adapter->stats.prc511,
5453                         "256-511 byte frames received");
5454         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5455                         CTLFLAG_RD, &adapter->stats.prc1023,
5456                         "512-1023 byte frames received");
5457         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5458                         CTLFLAG_RD, &adapter->stats.prc1522,
5459                         "1023-1522 byte frames received");
5460         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5461                         CTLFLAG_RD, &adapter->stats.gorc, 
5462                         "Good Octets Received"); 
5463
5464         /* Packet Transmission Stats */
5465         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5466                         CTLFLAG_RD, &adapter->stats.gotc, 
5467                         "Good Octets Transmitted"); 
5468         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5469                         CTLFLAG_RD, &adapter->stats.tpt,
5470                         "Total Packets Transmitted");
5471         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5472                         CTLFLAG_RD, &adapter->stats.gptc,
5473                         "Good Packets Transmitted");
5474         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5475                         CTLFLAG_RD, &adapter->stats.bptc,
5476                         "Broadcast Packets Transmitted");
5477         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5478                         CTLFLAG_RD, &adapter->stats.mptc,
5479                         "Multicast Packets Transmitted");
5480         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5481                         CTLFLAG_RD, &adapter->stats.ptc64,
5482                         "64 byte frames transmitted ");
5483         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5484                         CTLFLAG_RD, &adapter->stats.ptc127,
5485                         "65-127 byte frames transmitted");
5486         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5487                         CTLFLAG_RD, &adapter->stats.ptc255,
5488                         "128-255 byte frames transmitted");
5489         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5490                         CTLFLAG_RD, &adapter->stats.ptc511,
5491                         "256-511 byte frames transmitted");
5492         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5493                         CTLFLAG_RD, &adapter->stats.ptc1023,
5494                         "512-1023 byte frames transmitted");
5495         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5496                         CTLFLAG_RD, &adapter->stats.ptc1522,
5497                         "1024-1522 byte frames transmitted");
5498         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5499                         CTLFLAG_RD, &adapter->stats.tsctc,
5500                         "TSO Contexts Transmitted");
5501         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5502                         CTLFLAG_RD, &adapter->stats.tsctfc,
5503                         "TSO Contexts Failed");
5504
5505
5506         /* Interrupt Stats */
5507
5508         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5509                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5510         int_list = SYSCTL_CHILDREN(int_node);
5511
5512         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5513                         CTLFLAG_RD, &adapter->stats.iac,
5514                         "Interrupt Assertion Count");
5515
5516         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5517                         CTLFLAG_RD, &adapter->stats.icrxptc,
5518                         "Interrupt Cause Rx Pkt Timer Expire Count");
5519
5520         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5521                         CTLFLAG_RD, &adapter->stats.icrxatc,
5522                         "Interrupt Cause Rx Abs Timer Expire Count");
5523
5524         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5525                         CTLFLAG_RD, &adapter->stats.ictxptc,
5526                         "Interrupt Cause Tx Pkt Timer Expire Count");
5527
5528         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5529                         CTLFLAG_RD, &adapter->stats.ictxatc,
5530                         "Interrupt Cause Tx Abs Timer Expire Count");
5531
5532         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5533                         CTLFLAG_RD, &adapter->stats.ictxqec,
5534                         "Interrupt Cause Tx Queue Empty Count");
5535
5536         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5537                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5538                         "Interrupt Cause Tx Queue Min Thresh Count");
5539
5540         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5541                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5542                         "Interrupt Cause Rx Desc Min Thresh Count");
5543
5544         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5545                         CTLFLAG_RD, &adapter->stats.icrxoc,
5546                         "Interrupt Cause Receiver Overrun Count");
5547 }
5548
5549 /**********************************************************************
5550  *
5551  *  This routine provides a way to dump out the adapter eeprom,
5552  *  often a useful debug/service tool. This only dumps the first
5553  *  32 words, stuff that matters is in that extent.
5554  *
5555  **********************************************************************/
5556 static int
5557 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5558 {
5559         struct adapter *adapter = (struct adapter *)arg1;
5560         int error;
5561         int result;
5562
5563         result = -1;
5564         error = sysctl_handle_int(oidp, &result, 0, req);
5565
5566         if (error || !req->newptr)
5567                 return (error);
5568
5569         /*
5570          * This value will cause a hex dump of the
5571          * first 32 16-bit words of the EEPROM to
5572          * the screen.
5573          */
5574         if (result == 1)
5575                 em_print_nvm_info(adapter);
5576
5577         return (error);
5578 }
5579
5580 static void
5581 em_print_nvm_info(struct adapter *adapter)
5582 {
5583         u16     eeprom_data;
5584         int     i, j, row = 0;
5585
5586         /* Its a bit crude, but it gets the job done */
5587         printf("\nInterface EEPROM Dump:\n");
5588         printf("Offset\n0x0000  ");
5589         for (i = 0, j = 0; i < 32; i++, j++) {
5590                 if (j == 8) { /* Make the offset block */
5591                         j = 0; ++row;
5592                         printf("\n0x00%x0  ",row);
5593                 }
5594                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5595                 printf("%04x ", eeprom_data);
5596         }
5597         printf("\n");
5598 }
5599
5600 static int
5601 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5602 {
5603         struct em_int_delay_info *info;
5604         struct adapter *adapter;
5605         u32 regval;
5606         int error, usecs, ticks;
5607
5608         info = (struct em_int_delay_info *)arg1;
5609         usecs = info->value;
5610         error = sysctl_handle_int(oidp, &usecs, 0, req);
5611         if (error != 0 || req->newptr == NULL)
5612                 return (error);
5613         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5614                 return (EINVAL);
5615         info->value = usecs;
5616         ticks = EM_USECS_TO_TICKS(usecs);
5617
5618         adapter = info->adapter;
5619         
5620         EM_CORE_LOCK(adapter);
5621         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5622         regval = (regval & ~0xffff) | (ticks & 0xffff);
5623         /* Handle a few special cases. */
5624         switch (info->offset) {
5625         case E1000_RDTR:
5626                 break;
5627         case E1000_TIDV:
5628                 if (ticks == 0) {
5629                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5630                         /* Don't write 0 into the TIDV register. */
5631                         regval++;
5632                 } else
5633                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5634                 break;
5635         }
5636         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5637         EM_CORE_UNLOCK(adapter);
5638         return (0);
5639 }
5640
5641 static void
5642 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5643         const char *description, struct em_int_delay_info *info,
5644         int offset, int value)
5645 {
5646         info->adapter = adapter;
5647         info->offset = offset;
5648         info->value = value;
5649         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5650             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5651             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5652             info, 0, em_sysctl_int_delay, "I", description);
5653 }
5654
5655 static void
5656 em_set_sysctl_value(struct adapter *adapter, const char *name,
5657         const char *description, int *limit, int value)
5658 {
5659         *limit = value;
5660         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5661             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5662             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5663 }
5664
5665
5666 /*
5667 ** Set flow control using sysctl:
5668 ** Flow control values:
5669 **      0 - off
5670 **      1 - rx pause
5671 **      2 - tx pause
5672 **      3 - full
5673 */
5674 static int
5675 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5676 {       
5677         int             error;
5678         static int      input = 3; /* default is full */
5679         struct adapter  *adapter = (struct adapter *) arg1;
5680                     
5681         error = sysctl_handle_int(oidp, &input, 0, req);
5682     
5683         if ((error) || (req->newptr == NULL))
5684                 return (error);
5685                 
5686         if (input == adapter->fc) /* no change? */
5687                 return (error);
5688
5689         switch (input) {
5690                 case e1000_fc_rx_pause:
5691                 case e1000_fc_tx_pause:
5692                 case e1000_fc_full:
5693                 case e1000_fc_none:
5694                         adapter->hw.fc.requested_mode = input;
5695                         adapter->fc = input;
5696                         break;
5697                 default:
5698                         /* Do nothing */
5699                         return (error);
5700         }
5701
5702         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5703         e1000_force_mac_fc(&adapter->hw);
5704         return (error);
5705 }
5706
5707 /*
5708 ** Manage Energy Efficient Ethernet:
5709 ** Control values:
5710 **     0/1 - enabled/disabled
5711 */
5712 static int
5713 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5714 {
5715        struct adapter *adapter = (struct adapter *) arg1;
5716        int             error, value;
5717
5718        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5719        error = sysctl_handle_int(oidp, &value, 0, req);
5720        if (error || req->newptr == NULL)
5721                return (error);
5722        EM_CORE_LOCK(adapter);
5723        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5724        em_init_locked(adapter);
5725        EM_CORE_UNLOCK(adapter);
5726        return (0);
5727 }
5728
5729 static int
5730 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5731 {
5732         struct adapter *adapter;
5733         int error;
5734         int result;
5735
5736         result = -1;
5737         error = sysctl_handle_int(oidp, &result, 0, req);
5738
5739         if (error || !req->newptr)
5740                 return (error);
5741
5742         if (result == 1) {
5743                 adapter = (struct adapter *)arg1;
5744                 em_print_debug_info(adapter);
5745         }
5746
5747         return (error);
5748 }
5749
5750 /*
5751 ** This routine is meant to be fluid, add whatever is
5752 ** needed for debugging a problem.  -jfv
5753 */
5754 static void
5755 em_print_debug_info(struct adapter *adapter)
5756 {
5757         device_t dev = adapter->dev;
5758         struct tx_ring *txr = adapter->tx_rings;
5759         struct rx_ring *rxr = adapter->rx_rings;
5760
5761         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5762                 printf("Interface is RUNNING ");
5763         else
5764                 printf("Interface is NOT RUNNING\n");
5765
5766         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5767                 printf("and INACTIVE\n");
5768         else
5769                 printf("and ACTIVE\n");
5770
5771         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5772             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5773             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5774         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5775             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5776             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5777         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5778         device_printf(dev, "TX descriptors avail = %d\n",
5779             txr->tx_avail);
5780         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5781             txr->no_desc_avail);
5782         device_printf(dev, "RX discarded packets = %ld\n",
5783             rxr->rx_discarded);
5784         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5785         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5786 }