]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/dev/e1000/if_em.c
MFC r240879
[FreeBSD/stable/9.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2011, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int     em_display_debug_stats = 0;
93
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.2";
98
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111         /* Intel(R) PRO/1000 Network Connection */
112         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
175         /* required last entry */
176         { 0, 0, 0, 0, 0}
177 };
178
179 /*********************************************************************
180  *  Table of branding strings for all supported NICs.
181  *********************************************************************/
182
183 static char *em_strings[] = {
184         "Intel(R) PRO/1000 Network Connection"
185 };
186
187 /*********************************************************************
188  *  Function prototypes
189  *********************************************************************/
190 static int      em_probe(device_t);
191 static int      em_attach(device_t);
192 static int      em_detach(device_t);
193 static int      em_shutdown(device_t);
194 static int      em_suspend(device_t);
195 static int      em_resume(device_t);
196 #ifdef EM_MULTIQUEUE
197 static int      em_mq_start(struct ifnet *, struct mbuf *);
198 static int      em_mq_start_locked(struct ifnet *,
199                     struct tx_ring *, struct mbuf *);
200 static void     em_qflush(struct ifnet *);
201 #else
202 static void     em_start(struct ifnet *);
203 static void     em_start_locked(struct ifnet *, struct tx_ring *);
204 #endif
205 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
206 static void     em_init(void *);
207 static void     em_init_locked(struct adapter *);
208 static void     em_stop(void *);
209 static void     em_media_status(struct ifnet *, struct ifmediareq *);
210 static int      em_media_change(struct ifnet *);
211 static void     em_identify_hardware(struct adapter *);
212 static int      em_allocate_pci_resources(struct adapter *);
213 static int      em_allocate_legacy(struct adapter *);
214 static int      em_allocate_msix(struct adapter *);
215 static int      em_allocate_queues(struct adapter *);
216 static int      em_setup_msix(struct adapter *);
217 static void     em_free_pci_resources(struct adapter *);
218 static void     em_local_timer(void *);
219 static void     em_reset(struct adapter *);
220 static int      em_setup_interface(device_t, struct adapter *);
221
222 static void     em_setup_transmit_structures(struct adapter *);
223 static void     em_initialize_transmit_unit(struct adapter *);
224 static int      em_allocate_transmit_buffers(struct tx_ring *);
225 static void     em_free_transmit_structures(struct adapter *);
226 static void     em_free_transmit_buffers(struct tx_ring *);
227
228 static int      em_setup_receive_structures(struct adapter *);
229 static int      em_allocate_receive_buffers(struct rx_ring *);
230 static void     em_initialize_receive_unit(struct adapter *);
231 static void     em_free_receive_structures(struct adapter *);
232 static void     em_free_receive_buffers(struct rx_ring *);
233
234 static void     em_enable_intr(struct adapter *);
235 static void     em_disable_intr(struct adapter *);
236 static void     em_update_stats_counters(struct adapter *);
237 static void     em_add_hw_stats(struct adapter *adapter);
238 static void     em_txeof(struct tx_ring *);
239 static bool     em_rxeof(struct rx_ring *, int, int *);
240 #ifndef __NO_STRICT_ALIGNMENT
241 static int      em_fixup_rx(struct rx_ring *);
242 #endif
243 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245                     struct ip *, u32 *, u32 *);
246 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247                     struct tcphdr *, u32 *, u32 *);
248 static void     em_set_promisc(struct adapter *);
249 static void     em_disable_promisc(struct adapter *);
250 static void     em_set_multi(struct adapter *);
251 static void     em_update_link_status(struct adapter *);
252 static void     em_refresh_mbufs(struct rx_ring *, int);
253 static void     em_register_vlan(void *, struct ifnet *, u16);
254 static void     em_unregister_vlan(void *, struct ifnet *, u16);
255 static void     em_setup_vlan_hw_support(struct adapter *);
256 static int      em_xmit(struct tx_ring *, struct mbuf **);
257 static int      em_dma_malloc(struct adapter *, bus_size_t,
258                     struct em_dma_alloc *, int);
259 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
260 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261 static void     em_print_nvm_info(struct adapter *);
262 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263 static void     em_print_debug_info(struct adapter *);
264 static int      em_is_valid_ether_addr(u8 *);
265 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
267                     const char *, struct em_int_delay_info *, int, int);
268 /* Management and WOL Support */
269 static void     em_init_manageability(struct adapter *);
270 static void     em_release_manageability(struct adapter *);
271 static void     em_get_hw_control(struct adapter *);
272 static void     em_release_hw_control(struct adapter *);
273 static void     em_get_wakeup(device_t);
274 static void     em_enable_wakeup(device_t);
275 static int      em_enable_phy_wakeup(struct adapter *);
276 static void     em_led_func(void *, int);
277 static void     em_disable_aspm(struct adapter *);
278
279 static int      em_irq_fast(void *);
280
281 /* MSIX handlers */
282 static void     em_msix_tx(void *);
283 static void     em_msix_rx(void *);
284 static void     em_msix_link(void *);
285 static void     em_handle_tx(void *context, int pending);
286 static void     em_handle_rx(void *context, int pending);
287 static void     em_handle_link(void *context, int pending);
288
289 static void     em_set_sysctl_value(struct adapter *, const char *,
290                     const char *, int *, int);
291 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
293
294 static __inline void em_rx_discard(struct rx_ring *, int);
295
296 #ifdef DEVICE_POLLING
297 static poll_handler_t em_poll;
298 #endif /* POLLING */
299
300 /*********************************************************************
301  *  FreeBSD Device Interface Entry Points
302  *********************************************************************/
303
304 static device_method_t em_methods[] = {
305         /* Device interface */
306         DEVMETHOD(device_probe, em_probe),
307         DEVMETHOD(device_attach, em_attach),
308         DEVMETHOD(device_detach, em_detach),
309         DEVMETHOD(device_shutdown, em_shutdown),
310         DEVMETHOD(device_suspend, em_suspend),
311         DEVMETHOD(device_resume, em_resume),
312         {0, 0}
313 };
314
315 static driver_t em_driver = {
316         "em", em_methods, sizeof(struct adapter),
317 };
318
319 devclass_t em_devclass;
320 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
321 MODULE_DEPEND(em, pci, 1, 1, 1);
322 MODULE_DEPEND(em, ether, 1, 1, 1);
323
324 /*********************************************************************
325  *  Tunable default values.
326  *********************************************************************/
327
328 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
329 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
330 #define M_TSO_LEN                       66
331
332 /* Allow common code without TSO */
333 #ifndef CSUM_TSO
334 #define CSUM_TSO        0
335 #endif
336
337 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
338
339 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
340 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
341 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
342 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
343 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
344     0, "Default transmit interrupt delay in usecs");
345 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
346     0, "Default receive interrupt delay in usecs");
347
348 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
349 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
350 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
351 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
352 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
353     &em_tx_abs_int_delay_dflt, 0,
354     "Default transmit interrupt delay limit in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
356     &em_rx_abs_int_delay_dflt, 0,
357     "Default receive interrupt delay limit in usecs");
358
359 static int em_rxd = EM_DEFAULT_RXD;
360 static int em_txd = EM_DEFAULT_TXD;
361 TUNABLE_INT("hw.em.rxd", &em_rxd);
362 TUNABLE_INT("hw.em.txd", &em_txd);
363 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
364     "Number of receive descriptors per queue");
365 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
366     "Number of transmit descriptors per queue");
367
368 static int em_smart_pwr_down = FALSE;
369 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
370 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
371     0, "Set to true to leave smart power down enabled on newer adapters");
372
373 /* Controls whether promiscuous also shows bad packets */
374 static int em_debug_sbp = FALSE;
375 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
376 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
377     "Show bad packets in promiscuous mode");
378
379 static int em_enable_msix = TRUE;
380 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
381 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
382     "Enable MSI-X interrupts");
383
384 /* How many packets rxeof tries to clean at a time */
385 static int em_rx_process_limit = 100;
386 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
387 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
388     &em_rx_process_limit, 0,
389     "Maximum number of received packets to process "
390     "at a time, -1 means unlimited");
391
392 /* Energy efficient ethernet - default to OFF */
393 static int eee_setting = 1;
394 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
395 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
396     "Enable Energy Efficient Ethernet");
397
398 /* Global used in WOL setup with multiport cards */
399 static int global_quad_port_a = 0;
400
401 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
402 #include <dev/netmap/if_em_netmap.h>
403 #endif /* DEV_NETMAP */
404
405 /*********************************************************************
406  *  Device identification routine
407  *
408  *  em_probe determines if the driver should be loaded on
409  *  adapter based on PCI vendor/device id of the adapter.
410  *
411  *  return BUS_PROBE_DEFAULT on success, positive on failure
412  *********************************************************************/
413
414 static int
415 em_probe(device_t dev)
416 {
417         char            adapter_name[60];
418         u16             pci_vendor_id = 0;
419         u16             pci_device_id = 0;
420         u16             pci_subvendor_id = 0;
421         u16             pci_subdevice_id = 0;
422         em_vendor_info_t *ent;
423
424         INIT_DEBUGOUT("em_probe: begin");
425
426         pci_vendor_id = pci_get_vendor(dev);
427         if (pci_vendor_id != EM_VENDOR_ID)
428                 return (ENXIO);
429
430         pci_device_id = pci_get_device(dev);
431         pci_subvendor_id = pci_get_subvendor(dev);
432         pci_subdevice_id = pci_get_subdevice(dev);
433
434         ent = em_vendor_info_array;
435         while (ent->vendor_id != 0) {
436                 if ((pci_vendor_id == ent->vendor_id) &&
437                     (pci_device_id == ent->device_id) &&
438
439                     ((pci_subvendor_id == ent->subvendor_id) ||
440                     (ent->subvendor_id == PCI_ANY_ID)) &&
441
442                     ((pci_subdevice_id == ent->subdevice_id) ||
443                     (ent->subdevice_id == PCI_ANY_ID))) {
444                         sprintf(adapter_name, "%s %s",
445                                 em_strings[ent->index],
446                                 em_driver_version);
447                         device_set_desc_copy(dev, adapter_name);
448                         return (BUS_PROBE_DEFAULT);
449                 }
450                 ent++;
451         }
452
453         return (ENXIO);
454 }
455
456 /*********************************************************************
457  *  Device initialization routine
458  *
459  *  The attach entry point is called when the driver is being loaded.
460  *  This routine identifies the type of hardware, allocates all resources
461  *  and initializes the hardware.
462  *
463  *  return 0 on success, positive on failure
464  *********************************************************************/
465
466 static int
467 em_attach(device_t dev)
468 {
469         struct adapter  *adapter;
470         struct e1000_hw *hw;
471         int             error = 0;
472
473         INIT_DEBUGOUT("em_attach: begin");
474
475         if (resource_disabled("em", device_get_unit(dev))) {
476                 device_printf(dev, "Disabled by device hint\n");
477                 return (ENXIO);
478         }
479
480         adapter = device_get_softc(dev);
481         adapter->dev = adapter->osdep.dev = dev;
482         hw = &adapter->hw;
483         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
484
485         /* SYSCTL stuff */
486         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
487             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
488             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
489             em_sysctl_nvm_info, "I", "NVM Information");
490
491         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
494             em_sysctl_debug_info, "I", "Debug Information");
495
496         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499             em_set_flowcntl, "I", "Flow Control");
500
501         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
502
503         /* Determine hardware and mac info */
504         em_identify_hardware(adapter);
505
506         /* Setup PCI resources */
507         if (em_allocate_pci_resources(adapter)) {
508                 device_printf(dev, "Allocation of PCI resources failed\n");
509                 error = ENXIO;
510                 goto err_pci;
511         }
512
513         /*
514         ** For ICH8 and family we need to
515         ** map the flash memory, and this
516         ** must happen after the MAC is 
517         ** identified
518         */
519         if ((hw->mac.type == e1000_ich8lan) ||
520             (hw->mac.type == e1000_ich9lan) ||
521             (hw->mac.type == e1000_ich10lan) ||
522             (hw->mac.type == e1000_pchlan) ||
523             (hw->mac.type == e1000_pch2lan)) {
524                 int rid = EM_BAR_TYPE_FLASH;
525                 adapter->flash = bus_alloc_resource_any(dev,
526                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
527                 if (adapter->flash == NULL) {
528                         device_printf(dev, "Mapping of Flash failed\n");
529                         error = ENXIO;
530                         goto err_pci;
531                 }
532                 /* This is used in the shared code */
533                 hw->flash_address = (u8 *)adapter->flash;
534                 adapter->osdep.flash_bus_space_tag =
535                     rman_get_bustag(adapter->flash);
536                 adapter->osdep.flash_bus_space_handle =
537                     rman_get_bushandle(adapter->flash);
538         }
539
540         /* Do Shared Code initialization */
541         if (e1000_setup_init_funcs(hw, TRUE)) {
542                 device_printf(dev, "Setup of Shared code failed\n");
543                 error = ENXIO;
544                 goto err_pci;
545         }
546
547         e1000_get_bus_info(hw);
548
549         /* Set up some sysctls for the tunable interrupt delays */
550         em_add_int_delay_sysctl(adapter, "rx_int_delay",
551             "receive interrupt delay in usecs", &adapter->rx_int_delay,
552             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
553         em_add_int_delay_sysctl(adapter, "tx_int_delay",
554             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
555             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
556         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
557             "receive interrupt delay limit in usecs",
558             &adapter->rx_abs_int_delay,
559             E1000_REGISTER(hw, E1000_RADV),
560             em_rx_abs_int_delay_dflt);
561         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
562             "transmit interrupt delay limit in usecs",
563             &adapter->tx_abs_int_delay,
564             E1000_REGISTER(hw, E1000_TADV),
565             em_tx_abs_int_delay_dflt);
566
567         /* Sysctl for limiting the amount of work done in the taskqueue */
568         em_set_sysctl_value(adapter, "rx_processing_limit",
569             "max number of rx packets to process", &adapter->rx_process_limit,
570             em_rx_process_limit);
571
572         /*
573          * Validate number of transmit and receive descriptors. It
574          * must not exceed hardware maximum, and must be multiple
575          * of E1000_DBA_ALIGN.
576          */
577         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
578             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
579                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
580                     EM_DEFAULT_TXD, em_txd);
581                 adapter->num_tx_desc = EM_DEFAULT_TXD;
582         } else
583                 adapter->num_tx_desc = em_txd;
584
585         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
586             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
587                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
588                     EM_DEFAULT_RXD, em_rxd);
589                 adapter->num_rx_desc = EM_DEFAULT_RXD;
590         } else
591                 adapter->num_rx_desc = em_rxd;
592
593         hw->mac.autoneg = DO_AUTO_NEG;
594         hw->phy.autoneg_wait_to_complete = FALSE;
595         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
596
597         /* Copper options */
598         if (hw->phy.media_type == e1000_media_type_copper) {
599                 hw->phy.mdix = AUTO_ALL_MODES;
600                 hw->phy.disable_polarity_correction = FALSE;
601                 hw->phy.ms_type = EM_MASTER_SLAVE;
602         }
603
604         /*
605          * Set the frame limits assuming
606          * standard ethernet sized frames.
607          */
608         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
609         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
610
611         /*
612          * This controls when hardware reports transmit completion
613          * status.
614          */
615         hw->mac.report_tx_early = 1;
616
617         /* 
618         ** Get queue/ring memory
619         */
620         if (em_allocate_queues(adapter)) {
621                 error = ENOMEM;
622                 goto err_pci;
623         }
624
625         /* Allocate multicast array memory. */
626         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
627             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
628         if (adapter->mta == NULL) {
629                 device_printf(dev, "Can not allocate multicast setup array\n");
630                 error = ENOMEM;
631                 goto err_late;
632         }
633
634         /* Check SOL/IDER usage */
635         if (e1000_check_reset_block(hw))
636                 device_printf(dev, "PHY reset is blocked"
637                     " due to SOL/IDER session.\n");
638
639         /* Sysctl for setting Energy Efficient Ethernet */
640         hw->dev_spec.ich8lan.eee_disable = eee_setting;
641         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
642             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
643             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
644             adapter, 0, em_sysctl_eee, "I",
645             "Disable Energy Efficient Ethernet");
646
647         /*
648         ** Start from a known state, this is
649         ** important in reading the nvm and
650         ** mac from that.
651         */
652         e1000_reset_hw(hw);
653
654
655         /* Make sure we have a good EEPROM before we read from it */
656         if (e1000_validate_nvm_checksum(hw) < 0) {
657                 /*
658                 ** Some PCI-E parts fail the first check due to
659                 ** the link being in sleep state, call it again,
660                 ** if it fails a second time its a real issue.
661                 */
662                 if (e1000_validate_nvm_checksum(hw) < 0) {
663                         device_printf(dev,
664                             "The EEPROM Checksum Is Not Valid\n");
665                         error = EIO;
666                         goto err_late;
667                 }
668         }
669
670         /* Copy the permanent MAC address out of the EEPROM */
671         if (e1000_read_mac_addr(hw) < 0) {
672                 device_printf(dev, "EEPROM read error while reading MAC"
673                     " address\n");
674                 error = EIO;
675                 goto err_late;
676         }
677
678         if (!em_is_valid_ether_addr(hw->mac.addr)) {
679                 device_printf(dev, "Invalid MAC address\n");
680                 error = EIO;
681                 goto err_late;
682         }
683
684         /*
685         **  Do interrupt configuration
686         */
687         if (adapter->msix > 1) /* Do MSIX */
688                 error = em_allocate_msix(adapter);
689         else  /* MSI or Legacy */
690                 error = em_allocate_legacy(adapter);
691         if (error)
692                 goto err_late;
693
694         /*
695          * Get Wake-on-Lan and Management info for later use
696          */
697         em_get_wakeup(dev);
698
699         /* Setup OS specific network interface */
700         if (em_setup_interface(dev, adapter) != 0)
701                 goto err_late;
702
703         em_reset(adapter);
704
705         /* Initialize statistics */
706         em_update_stats_counters(adapter);
707
708         hw->mac.get_link_status = 1;
709         em_update_link_status(adapter);
710
711         /* Register for VLAN events */
712         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
713             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
714         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
715             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
716
717         em_add_hw_stats(adapter);
718
719         /* Non-AMT based hardware can now take control from firmware */
720         if (adapter->has_manage && !adapter->has_amt)
721                 em_get_hw_control(adapter);
722
723         /* Tell the stack that the interface is not active */
724         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
725         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
726
727         adapter->led_dev = led_create(em_led_func, adapter,
728             device_get_nameunit(dev));
729 #ifdef DEV_NETMAP
730         em_netmap_attach(adapter);
731 #endif /* DEV_NETMAP */
732
733         INIT_DEBUGOUT("em_attach: end");
734
735         return (0);
736
737 err_late:
738         em_free_transmit_structures(adapter);
739         em_free_receive_structures(adapter);
740         em_release_hw_control(adapter);
741         if (adapter->ifp != NULL)
742                 if_free(adapter->ifp);
743 err_pci:
744         em_free_pci_resources(adapter);
745         free(adapter->mta, M_DEVBUF);
746         EM_CORE_LOCK_DESTROY(adapter);
747
748         return (error);
749 }
750
751 /*********************************************************************
752  *  Device removal routine
753  *
754  *  The detach entry point is called when the driver is being removed.
755  *  This routine stops the adapter and deallocates all the resources
756  *  that were allocated for driver operation.
757  *
758  *  return 0 on success, positive on failure
759  *********************************************************************/
760
761 static int
762 em_detach(device_t dev)
763 {
764         struct adapter  *adapter = device_get_softc(dev);
765         struct ifnet    *ifp = adapter->ifp;
766
767         INIT_DEBUGOUT("em_detach: begin");
768
769         /* Make sure VLANS are not using driver */
770         if (adapter->ifp->if_vlantrunk != NULL) {
771                 device_printf(dev,"Vlan in use, detach first\n");
772                 return (EBUSY);
773         }
774
775 #ifdef DEVICE_POLLING
776         if (ifp->if_capenable & IFCAP_POLLING)
777                 ether_poll_deregister(ifp);
778 #endif
779
780         if (adapter->led_dev != NULL)
781                 led_destroy(adapter->led_dev);
782
783         EM_CORE_LOCK(adapter);
784         adapter->in_detach = 1;
785         em_stop(adapter);
786         EM_CORE_UNLOCK(adapter);
787         EM_CORE_LOCK_DESTROY(adapter);
788
789         e1000_phy_hw_reset(&adapter->hw);
790
791         em_release_manageability(adapter);
792         em_release_hw_control(adapter);
793
794         /* Unregister VLAN events */
795         if (adapter->vlan_attach != NULL)
796                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
797         if (adapter->vlan_detach != NULL)
798                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
799
800         ether_ifdetach(adapter->ifp);
801         callout_drain(&adapter->timer);
802
803 #ifdef DEV_NETMAP
804         netmap_detach(ifp);
805 #endif /* DEV_NETMAP */
806
807         em_free_pci_resources(adapter);
808         bus_generic_detach(dev);
809         if_free(ifp);
810
811         em_free_transmit_structures(adapter);
812         em_free_receive_structures(adapter);
813
814         em_release_hw_control(adapter);
815         free(adapter->mta, M_DEVBUF);
816
817         return (0);
818 }
819
820 /*********************************************************************
821  *
822  *  Shutdown entry point
823  *
824  **********************************************************************/
825
826 static int
827 em_shutdown(device_t dev)
828 {
829         return em_suspend(dev);
830 }
831
832 /*
833  * Suspend/resume device methods.
834  */
835 static int
836 em_suspend(device_t dev)
837 {
838         struct adapter *adapter = device_get_softc(dev);
839
840         EM_CORE_LOCK(adapter);
841
842         em_release_manageability(adapter);
843         em_release_hw_control(adapter);
844         em_enable_wakeup(dev);
845
846         EM_CORE_UNLOCK(adapter);
847
848         return bus_generic_suspend(dev);
849 }
850
851 static int
852 em_resume(device_t dev)
853 {
854         struct adapter *adapter = device_get_softc(dev);
855         struct tx_ring  *txr = adapter->tx_rings;
856         struct ifnet *ifp = adapter->ifp;
857
858         EM_CORE_LOCK(adapter);
859         if (adapter->hw.mac.type == e1000_pch2lan)
860                 e1000_resume_workarounds_pchlan(&adapter->hw);
861         em_init_locked(adapter);
862         em_init_manageability(adapter);
863
864         if ((ifp->if_flags & IFF_UP) &&
865             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
867                         EM_TX_LOCK(txr);
868 #ifdef EM_MULTIQUEUE
869                         if (!drbr_empty(ifp, txr->br))
870                                 em_mq_start_locked(ifp, txr, NULL);
871 #else
872                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
873                                 em_start_locked(ifp, txr);
874 #endif
875                         EM_TX_UNLOCK(txr);
876                 }
877         }
878         EM_CORE_UNLOCK(adapter);
879
880         return bus_generic_resume(dev);
881 }
882
883
884 #ifdef EM_MULTIQUEUE
885 /*********************************************************************
886  *  Multiqueue Transmit routines 
887  *
888  *  em_mq_start is called by the stack to initiate a transmit.
889  *  however, if busy the driver can queue the request rather
890  *  than do an immediate send. It is this that is an advantage
891  *  in this driver, rather than also having multiple tx queues.
892  **********************************************************************/
893 static int
894 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
895 {
896         struct adapter  *adapter = txr->adapter;
897         struct mbuf     *next;
898         int             err = 0, enq = 0;
899
900         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
901             IFF_DRV_RUNNING || adapter->link_active == 0) {
902                 if (m != NULL)
903                         err = drbr_enqueue(ifp, txr->br, m);
904                 return (err);
905         }
906
907         enq = 0;
908         if (m == NULL) {
909                 next = drbr_dequeue(ifp, txr->br);
910         } else if (drbr_needs_enqueue(ifp, txr->br)) {
911                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
912                         return (err);
913                 next = drbr_dequeue(ifp, txr->br);
914         } else
915                 next = m;
916
917         /* Process the queue */
918         while (next != NULL) {
919                 if ((err = em_xmit(txr, &next)) != 0) {
920                         if (next != NULL)
921                                 err = drbr_enqueue(ifp, txr->br, next);
922                         break;
923                 }
924                 enq++;
925                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
926                 ETHER_BPF_MTAP(ifp, next);
927                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
928                         break;
929                 next = drbr_dequeue(ifp, txr->br);
930         }
931
932         if (enq > 0) {
933                 /* Set the watchdog */
934                 txr->queue_status = EM_QUEUE_WORKING;
935                 txr->watchdog_time = ticks;
936         }
937
938         if (txr->tx_avail < EM_MAX_SCATTER)
939                 em_txeof(txr);
940         if (txr->tx_avail < EM_MAX_SCATTER)
941                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
942         return (err);
943 }
944
945 /*
946 ** Multiqueue capable stack interface
947 */
948 static int
949 em_mq_start(struct ifnet *ifp, struct mbuf *m)
950 {
951         struct adapter  *adapter = ifp->if_softc;
952         struct tx_ring  *txr = adapter->tx_rings;
953         int             error;
954
955         if (EM_TX_TRYLOCK(txr)) {
956                 error = em_mq_start_locked(ifp, txr, m);
957                 EM_TX_UNLOCK(txr);
958         } else 
959                 error = drbr_enqueue(ifp, txr->br, m);
960
961         return (error);
962 }
963
964 /*
965 ** Flush all ring buffers
966 */
967 static void
968 em_qflush(struct ifnet *ifp)
969 {
970         struct adapter  *adapter = ifp->if_softc;
971         struct tx_ring  *txr = adapter->tx_rings;
972         struct mbuf     *m;
973
974         for (int i = 0; i < adapter->num_queues; i++, txr++) {
975                 EM_TX_LOCK(txr);
976                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
977                         m_freem(m);
978                 EM_TX_UNLOCK(txr);
979         }
980         if_qflush(ifp);
981 }
982 #else  /* !EM_MULTIQUEUE */
983
984 static void
985 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
986 {
987         struct adapter  *adapter = ifp->if_softc;
988         struct mbuf     *m_head;
989
990         EM_TX_LOCK_ASSERT(txr);
991
992         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
993             IFF_DRV_RUNNING)
994                 return;
995
996         if (!adapter->link_active)
997                 return;
998
999         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1000                 /* Call cleanup if number of TX descriptors low */
1001                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1002                         em_txeof(txr);
1003                 if (txr->tx_avail < EM_MAX_SCATTER) {
1004                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1005                         break;
1006                 }
1007                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1008                 if (m_head == NULL)
1009                         break;
1010                 /*
1011                  *  Encapsulation can modify our pointer, and or make it
1012                  *  NULL on failure.  In that event, we can't requeue.
1013                  */
1014                 if (em_xmit(txr, &m_head)) {
1015                         if (m_head == NULL)
1016                                 break;
1017                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1018                         break;
1019                 }
1020
1021                 /* Send a copy of the frame to the BPF listener */
1022                 ETHER_BPF_MTAP(ifp, m_head);
1023
1024                 /* Set timeout in case hardware has problems transmitting. */
1025                 txr->watchdog_time = ticks;
1026                 txr->queue_status = EM_QUEUE_WORKING;
1027         }
1028
1029         return;
1030 }
1031
1032 static void
1033 em_start(struct ifnet *ifp)
1034 {
1035         struct adapter  *adapter = ifp->if_softc;
1036         struct tx_ring  *txr = adapter->tx_rings;
1037
1038         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1039                 EM_TX_LOCK(txr);
1040                 em_start_locked(ifp, txr);
1041                 EM_TX_UNLOCK(txr);
1042         }
1043         return;
1044 }
1045 #endif /* EM_MULTIQUEUE */
1046
1047 /*********************************************************************
1048  *  Ioctl entry point
1049  *
1050  *  em_ioctl is called when the user wants to configure the
1051  *  interface.
1052  *
1053  *  return 0 on success, positive on failure
1054  **********************************************************************/
1055
1056 static int
1057 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1058 {
1059         struct adapter  *adapter = ifp->if_softc;
1060         struct ifreq    *ifr = (struct ifreq *)data;
1061 #if defined(INET) || defined(INET6)
1062         struct ifaddr   *ifa = (struct ifaddr *)data;
1063 #endif
1064         bool            avoid_reset = FALSE;
1065         int             error = 0;
1066
1067         if (adapter->in_detach)
1068                 return (error);
1069
1070         switch (command) {
1071         case SIOCSIFADDR:
1072 #ifdef INET
1073                 if (ifa->ifa_addr->sa_family == AF_INET)
1074                         avoid_reset = TRUE;
1075 #endif
1076 #ifdef INET6
1077                 if (ifa->ifa_addr->sa_family == AF_INET6)
1078                         avoid_reset = TRUE;
1079 #endif
1080                 /*
1081                 ** Calling init results in link renegotiation,
1082                 ** so we avoid doing it when possible.
1083                 */
1084                 if (avoid_reset) {
1085                         ifp->if_flags |= IFF_UP;
1086                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1087                                 em_init(adapter);
1088 #ifdef INET
1089                         if (!(ifp->if_flags & IFF_NOARP))
1090                                 arp_ifinit(ifp, ifa);
1091 #endif
1092                 } else
1093                         error = ether_ioctl(ifp, command, data);
1094                 break;
1095         case SIOCSIFMTU:
1096             {
1097                 int max_frame_size;
1098
1099                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1100
1101                 EM_CORE_LOCK(adapter);
1102                 switch (adapter->hw.mac.type) {
1103                 case e1000_82571:
1104                 case e1000_82572:
1105                 case e1000_ich9lan:
1106                 case e1000_ich10lan:
1107                 case e1000_pch2lan:
1108                 case e1000_82574:
1109                 case e1000_82583:
1110                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1111                         max_frame_size = 9234;
1112                         break;
1113                 case e1000_pchlan:
1114                         max_frame_size = 4096;
1115                         break;
1116                         /* Adapters that do not support jumbo frames */
1117                 case e1000_ich8lan:
1118                         max_frame_size = ETHER_MAX_LEN;
1119                         break;
1120                 default:
1121                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1122                 }
1123                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1124                     ETHER_CRC_LEN) {
1125                         EM_CORE_UNLOCK(adapter);
1126                         error = EINVAL;
1127                         break;
1128                 }
1129
1130                 ifp->if_mtu = ifr->ifr_mtu;
1131                 adapter->max_frame_size =
1132                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1133                 em_init_locked(adapter);
1134                 EM_CORE_UNLOCK(adapter);
1135                 break;
1136             }
1137         case SIOCSIFFLAGS:
1138                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1139                     SIOCSIFFLAGS (Set Interface Flags)");
1140                 EM_CORE_LOCK(adapter);
1141                 if (ifp->if_flags & IFF_UP) {
1142                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1143                                 if ((ifp->if_flags ^ adapter->if_flags) &
1144                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1145                                         em_disable_promisc(adapter);
1146                                         em_set_promisc(adapter);
1147                                 }
1148                         } else
1149                                 em_init_locked(adapter);
1150                 } else
1151                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1152                                 em_stop(adapter);
1153                 adapter->if_flags = ifp->if_flags;
1154                 EM_CORE_UNLOCK(adapter);
1155                 break;
1156         case SIOCADDMULTI:
1157         case SIOCDELMULTI:
1158                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1159                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1160                         EM_CORE_LOCK(adapter);
1161                         em_disable_intr(adapter);
1162                         em_set_multi(adapter);
1163 #ifdef DEVICE_POLLING
1164                         if (!(ifp->if_capenable & IFCAP_POLLING))
1165 #endif
1166                                 em_enable_intr(adapter);
1167                         EM_CORE_UNLOCK(adapter);
1168                 }
1169                 break;
1170         case SIOCSIFMEDIA:
1171                 /* Check SOL/IDER usage */
1172                 EM_CORE_LOCK(adapter);
1173                 if (e1000_check_reset_block(&adapter->hw)) {
1174                         EM_CORE_UNLOCK(adapter);
1175                         device_printf(adapter->dev, "Media change is"
1176                             " blocked due to SOL/IDER session.\n");
1177                         break;
1178                 }
1179                 EM_CORE_UNLOCK(adapter);
1180                 /* falls thru */
1181         case SIOCGIFMEDIA:
1182                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1183                     SIOCxIFMEDIA (Get/Set Interface Media)");
1184                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1185                 break;
1186         case SIOCSIFCAP:
1187             {
1188                 int mask, reinit;
1189
1190                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1191                 reinit = 0;
1192                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1193 #ifdef DEVICE_POLLING
1194                 if (mask & IFCAP_POLLING) {
1195                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1196                                 error = ether_poll_register(em_poll, ifp);
1197                                 if (error)
1198                                         return (error);
1199                                 EM_CORE_LOCK(adapter);
1200                                 em_disable_intr(adapter);
1201                                 ifp->if_capenable |= IFCAP_POLLING;
1202                                 EM_CORE_UNLOCK(adapter);
1203                         } else {
1204                                 error = ether_poll_deregister(ifp);
1205                                 /* Enable interrupt even in error case */
1206                                 EM_CORE_LOCK(adapter);
1207                                 em_enable_intr(adapter);
1208                                 ifp->if_capenable &= ~IFCAP_POLLING;
1209                                 EM_CORE_UNLOCK(adapter);
1210                         }
1211                 }
1212 #endif
1213                 if (mask & IFCAP_HWCSUM) {
1214                         ifp->if_capenable ^= IFCAP_HWCSUM;
1215                         reinit = 1;
1216                 }
1217                 if (mask & IFCAP_TSO4) {
1218                         ifp->if_capenable ^= IFCAP_TSO4;
1219                         reinit = 1;
1220                 }
1221                 if (mask & IFCAP_VLAN_HWTAGGING) {
1222                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1223                         reinit = 1;
1224                 }
1225                 if (mask & IFCAP_VLAN_HWFILTER) {
1226                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1227                         reinit = 1;
1228                 }
1229                 if (mask & IFCAP_VLAN_HWTSO) {
1230                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1231                         reinit = 1;
1232                 }
1233                 if ((mask & IFCAP_WOL) &&
1234                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1235                         if (mask & IFCAP_WOL_MCAST)
1236                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1237                         if (mask & IFCAP_WOL_MAGIC)
1238                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1239                 }
1240                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1241                         em_init(adapter);
1242                 VLAN_CAPABILITIES(ifp);
1243                 break;
1244             }
1245
1246         default:
1247                 error = ether_ioctl(ifp, command, data);
1248                 break;
1249         }
1250
1251         return (error);
1252 }
1253
1254
1255 /*********************************************************************
1256  *  Init entry point
1257  *
1258  *  This routine is used in two ways. It is used by the stack as
1259  *  init entry point in network interface structure. It is also used
1260  *  by the driver as a hw/sw initialization routine to get to a
1261  *  consistent state.
1262  *
1263  *  return 0 on success, positive on failure
1264  **********************************************************************/
1265
1266 static void
1267 em_init_locked(struct adapter *adapter)
1268 {
1269         struct ifnet    *ifp = adapter->ifp;
1270         device_t        dev = adapter->dev;
1271
1272         INIT_DEBUGOUT("em_init: begin");
1273
1274         EM_CORE_LOCK_ASSERT(adapter);
1275
1276         em_disable_intr(adapter);
1277         callout_stop(&adapter->timer);
1278
1279         /* Get the latest mac address, User can use a LAA */
1280         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1281               ETHER_ADDR_LEN);
1282
1283         /* Put the address into the Receive Address Array */
1284         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1285
1286         /*
1287          * With the 82571 adapter, RAR[0] may be overwritten
1288          * when the other port is reset, we make a duplicate
1289          * in RAR[14] for that eventuality, this assures
1290          * the interface continues to function.
1291          */
1292         if (adapter->hw.mac.type == e1000_82571) {
1293                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1294                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1295                     E1000_RAR_ENTRIES - 1);
1296         }
1297
1298         /* Initialize the hardware */
1299         em_reset(adapter);
1300         em_update_link_status(adapter);
1301
1302         /* Setup VLAN support, basic and offload if available */
1303         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1304
1305         /* Set hardware offload abilities */
1306         ifp->if_hwassist = 0;
1307         if (ifp->if_capenable & IFCAP_TXCSUM)
1308                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1309         if (ifp->if_capenable & IFCAP_TSO4)
1310                 ifp->if_hwassist |= CSUM_TSO;
1311
1312         /* Configure for OS presence */
1313         em_init_manageability(adapter);
1314
1315         /* Prepare transmit descriptors and buffers */
1316         em_setup_transmit_structures(adapter);
1317         em_initialize_transmit_unit(adapter);
1318
1319         /* Setup Multicast table */
1320         em_set_multi(adapter);
1321
1322         /*
1323         ** Figure out the desired mbuf
1324         ** pool for doing jumbos
1325         */
1326         if (adapter->max_frame_size <= 2048)
1327                 adapter->rx_mbuf_sz = MCLBYTES;
1328         else if (adapter->max_frame_size <= 4096)
1329                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1330         else
1331                 adapter->rx_mbuf_sz = MJUM9BYTES;
1332
1333         /* Prepare receive descriptors and buffers */
1334         if (em_setup_receive_structures(adapter)) {
1335                 device_printf(dev, "Could not setup receive structures\n");
1336                 em_stop(adapter);
1337                 return;
1338         }
1339         em_initialize_receive_unit(adapter);
1340
1341         /* Use real VLAN Filter support? */
1342         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1343                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1344                         /* Use real VLAN Filter support */
1345                         em_setup_vlan_hw_support(adapter);
1346                 else {
1347                         u32 ctrl;
1348                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1349                         ctrl |= E1000_CTRL_VME;
1350                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1351                 }
1352         }
1353
1354         /* Don't lose promiscuous settings */
1355         em_set_promisc(adapter);
1356
1357         /* Set the interface as ACTIVE */
1358         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1359         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1360
1361         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1362         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1363
1364         /* MSI/X configuration for 82574 */
1365         if (adapter->hw.mac.type == e1000_82574) {
1366                 int tmp;
1367                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1368                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1369                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1370                 /* Set the IVAR - interrupt vector routing. */
1371                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1372         }
1373
1374 #ifdef DEVICE_POLLING
1375         /*
1376          * Only enable interrupts if we are not polling, make sure
1377          * they are off otherwise.
1378          */
1379         if (ifp->if_capenable & IFCAP_POLLING)
1380                 em_disable_intr(adapter);
1381         else
1382 #endif /* DEVICE_POLLING */
1383                 em_enable_intr(adapter);
1384
1385         /* AMT based hardware can now take control from firmware */
1386         if (adapter->has_manage && adapter->has_amt)
1387                 em_get_hw_control(adapter);
1388 }
1389
1390 static void
1391 em_init(void *arg)
1392 {
1393         struct adapter *adapter = arg;
1394
1395         EM_CORE_LOCK(adapter);
1396         em_init_locked(adapter);
1397         EM_CORE_UNLOCK(adapter);
1398 }
1399
1400
1401 #ifdef DEVICE_POLLING
1402 /*********************************************************************
1403  *
1404  *  Legacy polling routine: note this only works with single queue
1405  *
1406  *********************************************************************/
1407 static int
1408 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1409 {
1410         struct adapter *adapter = ifp->if_softc;
1411         struct tx_ring  *txr = adapter->tx_rings;
1412         struct rx_ring  *rxr = adapter->rx_rings;
1413         u32             reg_icr;
1414         int             rx_done;
1415
1416         EM_CORE_LOCK(adapter);
1417         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1418                 EM_CORE_UNLOCK(adapter);
1419                 return (0);
1420         }
1421
1422         if (cmd == POLL_AND_CHECK_STATUS) {
1423                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1424                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1425                         callout_stop(&adapter->timer);
1426                         adapter->hw.mac.get_link_status = 1;
1427                         em_update_link_status(adapter);
1428                         callout_reset(&adapter->timer, hz,
1429                             em_local_timer, adapter);
1430                 }
1431         }
1432         EM_CORE_UNLOCK(adapter);
1433
1434         em_rxeof(rxr, count, &rx_done);
1435
1436         EM_TX_LOCK(txr);
1437         em_txeof(txr);
1438 #ifdef EM_MULTIQUEUE
1439         if (!drbr_empty(ifp, txr->br))
1440                 em_mq_start_locked(ifp, txr, NULL);
1441 #else
1442         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1443                 em_start_locked(ifp, txr);
1444 #endif
1445         EM_TX_UNLOCK(txr);
1446
1447         return (rx_done);
1448 }
1449 #endif /* DEVICE_POLLING */
1450
1451
1452 /*********************************************************************
1453  *
1454  *  Fast Legacy/MSI Combined Interrupt Service routine  
1455  *
1456  *********************************************************************/
1457 static int
1458 em_irq_fast(void *arg)
1459 {
1460         struct adapter  *adapter = arg;
1461         struct ifnet    *ifp;
1462         u32             reg_icr;
1463
1464         ifp = adapter->ifp;
1465
1466         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1467
1468         /* Hot eject?  */
1469         if (reg_icr == 0xffffffff)
1470                 return FILTER_STRAY;
1471
1472         /* Definitely not our interrupt.  */
1473         if (reg_icr == 0x0)
1474                 return FILTER_STRAY;
1475
1476         /*
1477          * Starting with the 82571 chip, bit 31 should be used to
1478          * determine whether the interrupt belongs to us.
1479          */
1480         if (adapter->hw.mac.type >= e1000_82571 &&
1481             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1482                 return FILTER_STRAY;
1483
1484         em_disable_intr(adapter);
1485         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1486
1487         /* Link status change */
1488         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1489                 adapter->hw.mac.get_link_status = 1;
1490                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1491         }
1492
1493         if (reg_icr & E1000_ICR_RXO)
1494                 adapter->rx_overruns++;
1495         return FILTER_HANDLED;
1496 }
1497
1498 /* Combined RX/TX handler, used by Legacy and MSI */
1499 static void
1500 em_handle_que(void *context, int pending)
1501 {
1502         struct adapter  *adapter = context;
1503         struct ifnet    *ifp = adapter->ifp;
1504         struct tx_ring  *txr = adapter->tx_rings;
1505         struct rx_ring  *rxr = adapter->rx_rings;
1506
1507
1508         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1509                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1510                 EM_TX_LOCK(txr);
1511                 em_txeof(txr);
1512 #ifdef EM_MULTIQUEUE
1513                 if (!drbr_empty(ifp, txr->br))
1514                         em_mq_start_locked(ifp, txr, NULL);
1515 #else
1516                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1517                         em_start_locked(ifp, txr);
1518 #endif
1519                 EM_TX_UNLOCK(txr);
1520                 if (more) {
1521                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1522                         return;
1523                 }
1524         }
1525
1526         em_enable_intr(adapter);
1527         return;
1528 }
1529
1530
1531 /*********************************************************************
1532  *
1533  *  MSIX Interrupt Service Routines
1534  *
1535  **********************************************************************/
1536 static void
1537 em_msix_tx(void *arg)
1538 {
1539         struct tx_ring *txr = arg;
1540         struct adapter *adapter = txr->adapter;
1541         struct ifnet    *ifp = adapter->ifp;
1542
1543         ++txr->tx_irq;
1544         EM_TX_LOCK(txr);
1545         em_txeof(txr);
1546 #ifdef EM_MULTIQUEUE
1547         if (!drbr_empty(ifp, txr->br))
1548                 em_mq_start_locked(ifp, txr, NULL);
1549 #else
1550         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1551                 em_start_locked(ifp, txr);
1552 #endif
1553         /* Reenable this interrupt */
1554         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1555         EM_TX_UNLOCK(txr);
1556         return;
1557 }
1558
1559 /*********************************************************************
1560  *
1561  *  MSIX RX Interrupt Service routine
1562  *
1563  **********************************************************************/
1564
1565 static void
1566 em_msix_rx(void *arg)
1567 {
1568         struct rx_ring  *rxr = arg;
1569         struct adapter  *adapter = rxr->adapter;
1570         bool            more;
1571
1572         ++rxr->rx_irq;
1573         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1574         if (more)
1575                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1576         else
1577                 /* Reenable this interrupt */
1578                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1579         return;
1580 }
1581
1582 /*********************************************************************
1583  *
1584  *  MSIX Link Fast Interrupt Service routine
1585  *
1586  **********************************************************************/
1587 static void
1588 em_msix_link(void *arg)
1589 {
1590         struct adapter  *adapter = arg;
1591         u32             reg_icr;
1592
1593         ++adapter->link_irq;
1594         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1595
1596         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1597                 adapter->hw.mac.get_link_status = 1;
1598                 em_handle_link(adapter, 0);
1599         } else
1600                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1601                     EM_MSIX_LINK | E1000_IMS_LSC);
1602         return;
1603 }
1604
1605 static void
1606 em_handle_rx(void *context, int pending)
1607 {
1608         struct rx_ring  *rxr = context;
1609         struct adapter  *adapter = rxr->adapter;
1610         bool            more;
1611
1612         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1613         if (more)
1614                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1615         else
1616                 /* Reenable this interrupt */
1617                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1618 }
1619
1620 static void
1621 em_handle_tx(void *context, int pending)
1622 {
1623         struct tx_ring  *txr = context;
1624         struct adapter  *adapter = txr->adapter;
1625         struct ifnet    *ifp = adapter->ifp;
1626
1627         EM_TX_LOCK(txr);
1628         em_txeof(txr);
1629 #ifdef EM_MULTIQUEUE
1630         if (!drbr_empty(ifp, txr->br))
1631                 em_mq_start_locked(ifp, txr, NULL);
1632 #else
1633         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1634                 em_start_locked(ifp, txr);
1635 #endif
1636         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1637         EM_TX_UNLOCK(txr);
1638 }
1639
1640 static void
1641 em_handle_link(void *context, int pending)
1642 {
1643         struct adapter  *adapter = context;
1644         struct tx_ring  *txr = adapter->tx_rings;
1645         struct ifnet *ifp = adapter->ifp;
1646
1647         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1648                 return;
1649
1650         EM_CORE_LOCK(adapter);
1651         callout_stop(&adapter->timer);
1652         em_update_link_status(adapter);
1653         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1654         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1655             EM_MSIX_LINK | E1000_IMS_LSC);
1656         if (adapter->link_active) {
1657                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1658                         EM_TX_LOCK(txr);
1659 #ifdef EM_MULTIQUEUE
1660                         if (!drbr_empty(ifp, txr->br))
1661                                 em_mq_start_locked(ifp, txr, NULL);
1662 #else
1663                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1664                                 em_start_locked(ifp, txr);
1665 #endif
1666                         EM_TX_UNLOCK(txr);
1667                 }
1668         }
1669         EM_CORE_UNLOCK(adapter);
1670 }
1671
1672
1673 /*********************************************************************
1674  *
1675  *  Media Ioctl callback
1676  *
1677  *  This routine is called whenever the user queries the status of
1678  *  the interface using ifconfig.
1679  *
1680  **********************************************************************/
1681 static void
1682 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1683 {
1684         struct adapter *adapter = ifp->if_softc;
1685         u_char fiber_type = IFM_1000_SX;
1686
1687         INIT_DEBUGOUT("em_media_status: begin");
1688
1689         EM_CORE_LOCK(adapter);
1690         em_update_link_status(adapter);
1691
1692         ifmr->ifm_status = IFM_AVALID;
1693         ifmr->ifm_active = IFM_ETHER;
1694
1695         if (!adapter->link_active) {
1696                 EM_CORE_UNLOCK(adapter);
1697                 return;
1698         }
1699
1700         ifmr->ifm_status |= IFM_ACTIVE;
1701
1702         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1703             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1704                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1705         } else {
1706                 switch (adapter->link_speed) {
1707                 case 10:
1708                         ifmr->ifm_active |= IFM_10_T;
1709                         break;
1710                 case 100:
1711                         ifmr->ifm_active |= IFM_100_TX;
1712                         break;
1713                 case 1000:
1714                         ifmr->ifm_active |= IFM_1000_T;
1715                         break;
1716                 }
1717                 if (adapter->link_duplex == FULL_DUPLEX)
1718                         ifmr->ifm_active |= IFM_FDX;
1719                 else
1720                         ifmr->ifm_active |= IFM_HDX;
1721         }
1722         EM_CORE_UNLOCK(adapter);
1723 }
1724
1725 /*********************************************************************
1726  *
1727  *  Media Ioctl callback
1728  *
1729  *  This routine is called when the user changes speed/duplex using
1730  *  media/mediopt option with ifconfig.
1731  *
1732  **********************************************************************/
1733 static int
1734 em_media_change(struct ifnet *ifp)
1735 {
1736         struct adapter *adapter = ifp->if_softc;
1737         struct ifmedia  *ifm = &adapter->media;
1738
1739         INIT_DEBUGOUT("em_media_change: begin");
1740
1741         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1742                 return (EINVAL);
1743
1744         EM_CORE_LOCK(adapter);
1745         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1746         case IFM_AUTO:
1747                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1748                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1749                 break;
1750         case IFM_1000_LX:
1751         case IFM_1000_SX:
1752         case IFM_1000_T:
1753                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1754                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1755                 break;
1756         case IFM_100_TX:
1757                 adapter->hw.mac.autoneg = FALSE;
1758                 adapter->hw.phy.autoneg_advertised = 0;
1759                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1760                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1761                 else
1762                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1763                 break;
1764         case IFM_10_T:
1765                 adapter->hw.mac.autoneg = FALSE;
1766                 adapter->hw.phy.autoneg_advertised = 0;
1767                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1768                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1769                 else
1770                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1771                 break;
1772         default:
1773                 device_printf(adapter->dev, "Unsupported media type\n");
1774         }
1775
1776         em_init_locked(adapter);
1777         EM_CORE_UNLOCK(adapter);
1778
1779         return (0);
1780 }
1781
1782 /*********************************************************************
1783  *
1784  *  This routine maps the mbufs to tx descriptors.
1785  *
1786  *  return 0 on success, positive on failure
1787  **********************************************************************/
1788
1789 static int
1790 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1791 {
1792         struct adapter          *adapter = txr->adapter;
1793         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1794         bus_dmamap_t            map;
1795         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1796         struct e1000_tx_desc    *ctxd = NULL;
1797         struct mbuf             *m_head;
1798         struct ether_header     *eh;
1799         struct ip               *ip = NULL;
1800         struct tcphdr           *tp = NULL;
1801         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1802         int                     ip_off, poff;
1803         int                     nsegs, i, j, first, last = 0;
1804         int                     error, do_tso, tso_desc = 0, remap = 1;
1805
1806 retry:
1807         m_head = *m_headp;
1808         txd_upper = txd_lower = txd_used = txd_saved = 0;
1809         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1810         ip_off = poff = 0;
1811
1812         /*
1813          * Intel recommends entire IP/TCP header length reside in a single
1814          * buffer. If multiple descriptors are used to describe the IP and
1815          * TCP header, each descriptor should describe one or more
1816          * complete headers; descriptors referencing only parts of headers
1817          * are not supported. If all layer headers are not coalesced into
1818          * a single buffer, each buffer should not cross a 4KB boundary,
1819          * or be larger than the maximum read request size.
1820          * Controller also requires modifing IP/TCP header to make TSO work
1821          * so we firstly get a writable mbuf chain then coalesce ethernet/
1822          * IP/TCP header into a single buffer to meet the requirement of
1823          * controller. This also simplifies IP/TCP/UDP checksum offloading
1824          * which also has similiar restrictions.
1825          */
1826         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1827                 if (do_tso || (m_head->m_next != NULL && 
1828                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1829                         if (M_WRITABLE(*m_headp) == 0) {
1830                                 m_head = m_dup(*m_headp, M_DONTWAIT);
1831                                 m_freem(*m_headp);
1832                                 if (m_head == NULL) {
1833                                         *m_headp = NULL;
1834                                         return (ENOBUFS);
1835                                 }
1836                                 *m_headp = m_head;
1837                         }
1838                 }
1839                 /*
1840                  * XXX
1841                  * Assume IPv4, we don't have TSO/checksum offload support
1842                  * for IPv6 yet.
1843                  */
1844                 ip_off = sizeof(struct ether_header);
1845                 m_head = m_pullup(m_head, ip_off);
1846                 if (m_head == NULL) {
1847                         *m_headp = NULL;
1848                         return (ENOBUFS);
1849                 }
1850                 eh = mtod(m_head, struct ether_header *);
1851                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1852                         ip_off = sizeof(struct ether_vlan_header);
1853                         m_head = m_pullup(m_head, ip_off);
1854                         if (m_head == NULL) {
1855                                 *m_headp = NULL;
1856                                 return (ENOBUFS);
1857                         }
1858                 }
1859                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1860                 if (m_head == NULL) {
1861                         *m_headp = NULL;
1862                         return (ENOBUFS);
1863                 }
1864                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1865                 poff = ip_off + (ip->ip_hl << 2);
1866                 if (do_tso) {
1867                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1868                         if (m_head == NULL) {
1869                                 *m_headp = NULL;
1870                                 return (ENOBUFS);
1871                         }
1872                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1873                         /*
1874                          * TSO workaround:
1875                          *   pull 4 more bytes of data into it.
1876                          */
1877                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1878                         if (m_head == NULL) {
1879                                 *m_headp = NULL;
1880                                 return (ENOBUFS);
1881                         }
1882                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1883                         ip->ip_len = 0;
1884                         ip->ip_sum = 0;
1885                         /*
1886                          * The pseudo TCP checksum does not include TCP payload
1887                          * length so driver should recompute the checksum here
1888                          * what hardware expect to see. This is adherence of
1889                          * Microsoft's Large Send specification.
1890                          */
1891                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1892                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1893                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1894                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1895                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1896                         if (m_head == NULL) {
1897                                 *m_headp = NULL;
1898                                 return (ENOBUFS);
1899                         }
1900                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1901                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1902                         if (m_head == NULL) {
1903                                 *m_headp = NULL;
1904                                 return (ENOBUFS);
1905                         }
1906                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1907                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1908                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1909                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1910                         if (m_head == NULL) {
1911                                 *m_headp = NULL;
1912                                 return (ENOBUFS);
1913                         }
1914                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1915                 }
1916                 *m_headp = m_head;
1917         }
1918
1919         /*
1920          * Map the packet for DMA
1921          *
1922          * Capture the first descriptor index,
1923          * this descriptor will have the index
1924          * of the EOP which is the only one that
1925          * now gets a DONE bit writeback.
1926          */
1927         first = txr->next_avail_desc;
1928         tx_buffer = &txr->tx_buffers[first];
1929         tx_buffer_mapped = tx_buffer;
1930         map = tx_buffer->map;
1931
1932         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1933             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1934
1935         /*
1936          * There are two types of errors we can (try) to handle:
1937          * - EFBIG means the mbuf chain was too long and bus_dma ran
1938          *   out of segments.  Defragment the mbuf chain and try again.
1939          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1940          *   at this point in time.  Defer sending and try again later.
1941          * All other errors, in particular EINVAL, are fatal and prevent the
1942          * mbuf chain from ever going through.  Drop it and report error.
1943          */
1944         if (error == EFBIG && remap) {
1945                 struct mbuf *m;
1946
1947                 m = m_defrag(*m_headp, M_DONTWAIT);
1948                 if (m == NULL) {
1949                         adapter->mbuf_alloc_failed++;
1950                         m_freem(*m_headp);
1951                         *m_headp = NULL;
1952                         return (ENOBUFS);
1953                 }
1954                 *m_headp = m;
1955
1956                 /* Try it again, but only once */
1957                 remap = 0;
1958                 goto retry;
1959         } else if (error == ENOMEM) {
1960                 adapter->no_tx_dma_setup++;
1961                 return (error);
1962         } else if (error != 0) {
1963                 adapter->no_tx_dma_setup++;
1964                 m_freem(*m_headp);
1965                 *m_headp = NULL;
1966                 return (error);
1967         }
1968
1969         /*
1970          * TSO Hardware workaround, if this packet is not
1971          * TSO, and is only a single descriptor long, and
1972          * it follows a TSO burst, then we need to add a
1973          * sentinel descriptor to prevent premature writeback.
1974          */
1975         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1976                 if (nsegs == 1)
1977                         tso_desc = TRUE;
1978                 txr->tx_tso = FALSE;
1979         }
1980
1981         if (nsegs > (txr->tx_avail - 2)) {
1982                 txr->no_desc_avail++;
1983                 bus_dmamap_unload(txr->txtag, map);
1984                 return (ENOBUFS);
1985         }
1986         m_head = *m_headp;
1987
1988         /* Do hardware assists */
1989         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1990                 em_tso_setup(txr, m_head, ip_off, ip, tp,
1991                     &txd_upper, &txd_lower);
1992                 /* we need to make a final sentinel transmit desc */
1993                 tso_desc = TRUE;
1994         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1995                 em_transmit_checksum_setup(txr, m_head,
1996                     ip_off, ip, &txd_upper, &txd_lower);
1997
1998         if (m_head->m_flags & M_VLANTAG) {
1999                 /* Set the vlan id. */
2000                 txd_upper |=
2001                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2002                 /* Tell hardware to add tag */
2003                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2004         }
2005
2006         i = txr->next_avail_desc;
2007
2008         /* Set up our transmit descriptors */
2009         for (j = 0; j < nsegs; j++) {
2010                 bus_size_t seg_len;
2011                 bus_addr_t seg_addr;
2012
2013                 tx_buffer = &txr->tx_buffers[i];
2014                 ctxd = &txr->tx_base[i];
2015                 seg_addr = segs[j].ds_addr;
2016                 seg_len  = segs[j].ds_len;
2017                 /*
2018                 ** TSO Workaround:
2019                 ** If this is the last descriptor, we want to
2020                 ** split it so we have a small final sentinel
2021                 */
2022                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2023                         seg_len -= 4;
2024                         ctxd->buffer_addr = htole64(seg_addr);
2025                         ctxd->lower.data = htole32(
2026                         adapter->txd_cmd | txd_lower | seg_len);
2027                         ctxd->upper.data =
2028                             htole32(txd_upper);
2029                         if (++i == adapter->num_tx_desc)
2030                                 i = 0;
2031                         /* Now make the sentinel */     
2032                         ++txd_used; /* using an extra txd */
2033                         ctxd = &txr->tx_base[i];
2034                         tx_buffer = &txr->tx_buffers[i];
2035                         ctxd->buffer_addr =
2036                             htole64(seg_addr + seg_len);
2037                         ctxd->lower.data = htole32(
2038                         adapter->txd_cmd | txd_lower | 4);
2039                         ctxd->upper.data =
2040                             htole32(txd_upper);
2041                         last = i;
2042                         if (++i == adapter->num_tx_desc)
2043                                 i = 0;
2044                 } else {
2045                         ctxd->buffer_addr = htole64(seg_addr);
2046                         ctxd->lower.data = htole32(
2047                         adapter->txd_cmd | txd_lower | seg_len);
2048                         ctxd->upper.data =
2049                             htole32(txd_upper);
2050                         last = i;
2051                         if (++i == adapter->num_tx_desc)
2052                                 i = 0;
2053                 }
2054                 tx_buffer->m_head = NULL;
2055                 tx_buffer->next_eop = -1;
2056         }
2057
2058         txr->next_avail_desc = i;
2059         txr->tx_avail -= nsegs;
2060         if (tso_desc) /* TSO used an extra for sentinel */
2061                 txr->tx_avail -= txd_used;
2062
2063         tx_buffer->m_head = m_head;
2064         /*
2065         ** Here we swap the map so the last descriptor,
2066         ** which gets the completion interrupt has the
2067         ** real map, and the first descriptor gets the
2068         ** unused map from this descriptor.
2069         */
2070         tx_buffer_mapped->map = tx_buffer->map;
2071         tx_buffer->map = map;
2072         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2073
2074         /*
2075          * Last Descriptor of Packet
2076          * needs End Of Packet (EOP)
2077          * and Report Status (RS)
2078          */
2079         ctxd->lower.data |=
2080             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2081         /*
2082          * Keep track in the first buffer which
2083          * descriptor will be written back
2084          */
2085         tx_buffer = &txr->tx_buffers[first];
2086         tx_buffer->next_eop = last;
2087         /* Update the watchdog time early and often */
2088         txr->watchdog_time = ticks;
2089
2090         /*
2091          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2092          * that this frame is available to transmit.
2093          */
2094         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2095             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2096         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2097
2098         return (0);
2099 }
2100
2101 static void
2102 em_set_promisc(struct adapter *adapter)
2103 {
2104         struct ifnet    *ifp = adapter->ifp;
2105         u32             reg_rctl;
2106
2107         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2108
2109         if (ifp->if_flags & IFF_PROMISC) {
2110                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2111                 /* Turn this on if you want to see bad packets */
2112                 if (em_debug_sbp)
2113                         reg_rctl |= E1000_RCTL_SBP;
2114                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2115         } else if (ifp->if_flags & IFF_ALLMULTI) {
2116                 reg_rctl |= E1000_RCTL_MPE;
2117                 reg_rctl &= ~E1000_RCTL_UPE;
2118                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2119         }
2120 }
2121
2122 static void
2123 em_disable_promisc(struct adapter *adapter)
2124 {
2125         u32     reg_rctl;
2126
2127         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2128
2129         reg_rctl &=  (~E1000_RCTL_UPE);
2130         reg_rctl &=  (~E1000_RCTL_MPE);
2131         reg_rctl &=  (~E1000_RCTL_SBP);
2132         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2133 }
2134
2135
2136 /*********************************************************************
2137  *  Multicast Update
2138  *
2139  *  This routine is called whenever multicast address list is updated.
2140  *
2141  **********************************************************************/
2142
2143 static void
2144 em_set_multi(struct adapter *adapter)
2145 {
2146         struct ifnet    *ifp = adapter->ifp;
2147         struct ifmultiaddr *ifma;
2148         u32 reg_rctl = 0;
2149         u8  *mta; /* Multicast array memory */
2150         int mcnt = 0;
2151
2152         IOCTL_DEBUGOUT("em_set_multi: begin");
2153
2154         mta = adapter->mta;
2155         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2156
2157         if (adapter->hw.mac.type == e1000_82542 && 
2158             adapter->hw.revision_id == E1000_REVISION_2) {
2159                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2160                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2161                         e1000_pci_clear_mwi(&adapter->hw);
2162                 reg_rctl |= E1000_RCTL_RST;
2163                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2164                 msec_delay(5);
2165         }
2166
2167 #if __FreeBSD_version < 800000
2168         IF_ADDR_LOCK(ifp);
2169 #else
2170         if_maddr_rlock(ifp);
2171 #endif
2172         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2173                 if (ifma->ifma_addr->sa_family != AF_LINK)
2174                         continue;
2175
2176                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2177                         break;
2178
2179                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2180                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2181                 mcnt++;
2182         }
2183 #if __FreeBSD_version < 800000
2184         IF_ADDR_UNLOCK(ifp);
2185 #else
2186         if_maddr_runlock(ifp);
2187 #endif
2188         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2189                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2190                 reg_rctl |= E1000_RCTL_MPE;
2191                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2192         } else
2193                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2194
2195         if (adapter->hw.mac.type == e1000_82542 && 
2196             adapter->hw.revision_id == E1000_REVISION_2) {
2197                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2198                 reg_rctl &= ~E1000_RCTL_RST;
2199                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2200                 msec_delay(5);
2201                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2202                         e1000_pci_set_mwi(&adapter->hw);
2203         }
2204 }
2205
2206
2207 /*********************************************************************
2208  *  Timer routine
2209  *
2210  *  This routine checks for link status and updates statistics.
2211  *
2212  **********************************************************************/
2213
2214 static void
2215 em_local_timer(void *arg)
2216 {
2217         struct adapter  *adapter = arg;
2218         struct ifnet    *ifp = adapter->ifp;
2219         struct tx_ring  *txr = adapter->tx_rings;
2220         struct rx_ring  *rxr = adapter->rx_rings;
2221         u32             trigger;
2222
2223         EM_CORE_LOCK_ASSERT(adapter);
2224
2225         em_update_link_status(adapter);
2226         em_update_stats_counters(adapter);
2227
2228         /* Reset LAA into RAR[0] on 82571 */
2229         if ((adapter->hw.mac.type == e1000_82571) &&
2230             e1000_get_laa_state_82571(&adapter->hw))
2231                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2232
2233         /* Mask to use in the irq trigger */
2234         if (adapter->msix_mem)
2235                 trigger = rxr->ims; /* RX for 82574 */
2236         else
2237                 trigger = E1000_ICS_RXDMT0;
2238
2239         /*
2240         ** Check on the state of the TX queue(s), this 
2241         ** can be done without the lock because its RO
2242         ** and the HUNG state will be static if set.
2243         */
2244         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2245                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2246                     (adapter->pause_frames == 0))
2247                         goto hung;
2248                 /* Schedule a TX tasklet if needed */
2249                 if (txr->tx_avail <= EM_MAX_SCATTER)
2250                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2251         }
2252         
2253         adapter->pause_frames = 0;
2254         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2255 #ifndef DEVICE_POLLING
2256         /* Trigger an RX interrupt to guarantee mbuf refresh */
2257         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2258 #endif
2259         return;
2260 hung:
2261         /* Looks like we're hung */
2262         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2263         device_printf(adapter->dev,
2264             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2265             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2266             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2267         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2268             "Next TX to Clean = %d\n",
2269             txr->me, txr->tx_avail, txr->next_to_clean);
2270         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2271         adapter->watchdog_events++;
2272         adapter->pause_frames = 0;
2273         em_init_locked(adapter);
2274 }
2275
2276
2277 static void
2278 em_update_link_status(struct adapter *adapter)
2279 {
2280         struct e1000_hw *hw = &adapter->hw;
2281         struct ifnet *ifp = adapter->ifp;
2282         device_t dev = adapter->dev;
2283         struct tx_ring *txr = adapter->tx_rings;
2284         u32 link_check = 0;
2285
2286         /* Get the cached link value or read phy for real */
2287         switch (hw->phy.media_type) {
2288         case e1000_media_type_copper:
2289                 if (hw->mac.get_link_status) {
2290                         /* Do the work to read phy */
2291                         e1000_check_for_link(hw);
2292                         link_check = !hw->mac.get_link_status;
2293                         if (link_check) /* ESB2 fix */
2294                                 e1000_cfg_on_link_up(hw);
2295                 } else
2296                         link_check = TRUE;
2297                 break;
2298         case e1000_media_type_fiber:
2299                 e1000_check_for_link(hw);
2300                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2301                                  E1000_STATUS_LU);
2302                 break;
2303         case e1000_media_type_internal_serdes:
2304                 e1000_check_for_link(hw);
2305                 link_check = adapter->hw.mac.serdes_has_link;
2306                 break;
2307         default:
2308         case e1000_media_type_unknown:
2309                 break;
2310         }
2311
2312         /* Now check for a transition */
2313         if (link_check && (adapter->link_active == 0)) {
2314                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2315                     &adapter->link_duplex);
2316                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2317                 if ((adapter->link_speed != SPEED_1000) &&
2318                     ((hw->mac.type == e1000_82571) ||
2319                     (hw->mac.type == e1000_82572))) {
2320                         int tarc0;
2321                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2322                         tarc0 &= ~SPEED_MODE_BIT;
2323                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2324                 }
2325                 if (bootverbose)
2326                         device_printf(dev, "Link is up %d Mbps %s\n",
2327                             adapter->link_speed,
2328                             ((adapter->link_duplex == FULL_DUPLEX) ?
2329                             "Full Duplex" : "Half Duplex"));
2330                 adapter->link_active = 1;
2331                 adapter->smartspeed = 0;
2332                 ifp->if_baudrate = adapter->link_speed * 1000000;
2333                 if_link_state_change(ifp, LINK_STATE_UP);
2334         } else if (!link_check && (adapter->link_active == 1)) {
2335                 ifp->if_baudrate = adapter->link_speed = 0;
2336                 adapter->link_duplex = 0;
2337                 if (bootverbose)
2338                         device_printf(dev, "Link is Down\n");
2339                 adapter->link_active = 0;
2340                 /* Link down, disable watchdog */
2341                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2342                         txr->queue_status = EM_QUEUE_IDLE;
2343                 if_link_state_change(ifp, LINK_STATE_DOWN);
2344         }
2345 }
2346
2347 /*********************************************************************
2348  *
2349  *  This routine disables all traffic on the adapter by issuing a
2350  *  global reset on the MAC and deallocates TX/RX buffers.
2351  *
2352  *  This routine should always be called with BOTH the CORE
2353  *  and TX locks.
2354  **********************************************************************/
2355
2356 static void
2357 em_stop(void *arg)
2358 {
2359         struct adapter  *adapter = arg;
2360         struct ifnet    *ifp = adapter->ifp;
2361         struct tx_ring  *txr = adapter->tx_rings;
2362
2363         EM_CORE_LOCK_ASSERT(adapter);
2364
2365         INIT_DEBUGOUT("em_stop: begin");
2366
2367         em_disable_intr(adapter);
2368         callout_stop(&adapter->timer);
2369
2370         /* Tell the stack that the interface is no longer active */
2371         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2372         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2373
2374         /* Unarm watchdog timer. */
2375         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2376                 EM_TX_LOCK(txr);
2377                 txr->queue_status = EM_QUEUE_IDLE;
2378                 EM_TX_UNLOCK(txr);
2379         }
2380
2381         e1000_reset_hw(&adapter->hw);
2382         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2383
2384         e1000_led_off(&adapter->hw);
2385         e1000_cleanup_led(&adapter->hw);
2386 }
2387
2388
2389 /*********************************************************************
2390  *
2391  *  Determine hardware revision.
2392  *
2393  **********************************************************************/
2394 static void
2395 em_identify_hardware(struct adapter *adapter)
2396 {
2397         device_t dev = adapter->dev;
2398
2399         /* Make sure our PCI config space has the necessary stuff set */
2400         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2401         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2402             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2403                 device_printf(dev, "Memory Access and/or Bus Master bits "
2404                     "were not set!\n");
2405                 adapter->hw.bus.pci_cmd_word |=
2406                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2407                 pci_write_config(dev, PCIR_COMMAND,
2408                     adapter->hw.bus.pci_cmd_word, 2);
2409         }
2410
2411         /* Save off the information about this board */
2412         adapter->hw.vendor_id = pci_get_vendor(dev);
2413         adapter->hw.device_id = pci_get_device(dev);
2414         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2415         adapter->hw.subsystem_vendor_id =
2416             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2417         adapter->hw.subsystem_device_id =
2418             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2419
2420         /* Do Shared Code Init and Setup */
2421         if (e1000_set_mac_type(&adapter->hw)) {
2422                 device_printf(dev, "Setup init failure\n");
2423                 return;
2424         }
2425 }
2426
2427 static int
2428 em_allocate_pci_resources(struct adapter *adapter)
2429 {
2430         device_t        dev = adapter->dev;
2431         int             rid;
2432
2433         rid = PCIR_BAR(0);
2434         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2435             &rid, RF_ACTIVE);
2436         if (adapter->memory == NULL) {
2437                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2438                 return (ENXIO);
2439         }
2440         adapter->osdep.mem_bus_space_tag =
2441             rman_get_bustag(adapter->memory);
2442         adapter->osdep.mem_bus_space_handle =
2443             rman_get_bushandle(adapter->memory);
2444         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2445
2446         /* Default to a single queue */
2447         adapter->num_queues = 1;
2448
2449         /*
2450          * Setup MSI/X or MSI if PCI Express
2451          */
2452         adapter->msix = em_setup_msix(adapter);
2453
2454         adapter->hw.back = &adapter->osdep;
2455
2456         return (0);
2457 }
2458
2459 /*********************************************************************
2460  *
2461  *  Setup the Legacy or MSI Interrupt handler
2462  *
2463  **********************************************************************/
2464 int
2465 em_allocate_legacy(struct adapter *adapter)
2466 {
2467         device_t dev = adapter->dev;
2468         struct tx_ring  *txr = adapter->tx_rings;
2469         int error, rid = 0;
2470
2471         /* Manually turn off all interrupts */
2472         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2473
2474         if (adapter->msix == 1) /* using MSI */
2475                 rid = 1;
2476         /* We allocate a single interrupt resource */
2477         adapter->res = bus_alloc_resource_any(dev,
2478             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2479         if (adapter->res == NULL) {
2480                 device_printf(dev, "Unable to allocate bus resource: "
2481                     "interrupt\n");
2482                 return (ENXIO);
2483         }
2484
2485         /*
2486          * Allocate a fast interrupt and the associated
2487          * deferred processing contexts.
2488          */
2489         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2490         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2491             taskqueue_thread_enqueue, &adapter->tq);
2492         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2493             device_get_nameunit(adapter->dev));
2494         /* Use a TX only tasklet for local timer */
2495         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2496         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2497             taskqueue_thread_enqueue, &txr->tq);
2498         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2499             device_get_nameunit(adapter->dev));
2500         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2501         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2502             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2503                 device_printf(dev, "Failed to register fast interrupt "
2504                             "handler: %d\n", error);
2505                 taskqueue_free(adapter->tq);
2506                 adapter->tq = NULL;
2507                 return (error);
2508         }
2509         
2510         return (0);
2511 }
2512
2513 /*********************************************************************
2514  *
2515  *  Setup the MSIX Interrupt handlers
2516  *   This is not really Multiqueue, rather
2517  *   its just seperate interrupt vectors
2518  *   for TX, RX, and Link.
2519  *
2520  **********************************************************************/
2521 int
2522 em_allocate_msix(struct adapter *adapter)
2523 {
2524         device_t        dev = adapter->dev;
2525         struct          tx_ring *txr = adapter->tx_rings;
2526         struct          rx_ring *rxr = adapter->rx_rings;
2527         int             error, rid, vector = 0;
2528
2529
2530         /* Make sure all interrupts are disabled */
2531         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2532
2533         /* First set up ring resources */
2534         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2535
2536                 /* RX ring */
2537                 rid = vector + 1;
2538
2539                 rxr->res = bus_alloc_resource_any(dev,
2540                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2541                 if (rxr->res == NULL) {
2542                         device_printf(dev,
2543                             "Unable to allocate bus resource: "
2544                             "RX MSIX Interrupt %d\n", i);
2545                         return (ENXIO);
2546                 }
2547                 if ((error = bus_setup_intr(dev, rxr->res,
2548                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2549                     rxr, &rxr->tag)) != 0) {
2550                         device_printf(dev, "Failed to register RX handler");
2551                         return (error);
2552                 }
2553 #if __FreeBSD_version >= 800504
2554                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2555 #endif
2556                 rxr->msix = vector++; /* NOTE increment vector for TX */
2557                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2558                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2559                     taskqueue_thread_enqueue, &rxr->tq);
2560                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2561                     device_get_nameunit(adapter->dev));
2562                 /*
2563                 ** Set the bit to enable interrupt
2564                 ** in E1000_IMS -- bits 20 and 21
2565                 ** are for RX0 and RX1, note this has
2566                 ** NOTHING to do with the MSIX vector
2567                 */
2568                 rxr->ims = 1 << (20 + i);
2569                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2570
2571                 /* TX ring */
2572                 rid = vector + 1;
2573                 txr->res = bus_alloc_resource_any(dev,
2574                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2575                 if (txr->res == NULL) {
2576                         device_printf(dev,
2577                             "Unable to allocate bus resource: "
2578                             "TX MSIX Interrupt %d\n", i);
2579                         return (ENXIO);
2580                 }
2581                 if ((error = bus_setup_intr(dev, txr->res,
2582                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2583                     txr, &txr->tag)) != 0) {
2584                         device_printf(dev, "Failed to register TX handler");
2585                         return (error);
2586                 }
2587 #if __FreeBSD_version >= 800504
2588                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2589 #endif
2590                 txr->msix = vector++; /* Increment vector for next pass */
2591                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2592                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2593                     taskqueue_thread_enqueue, &txr->tq);
2594                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2595                     device_get_nameunit(adapter->dev));
2596                 /*
2597                 ** Set the bit to enable interrupt
2598                 ** in E1000_IMS -- bits 22 and 23
2599                 ** are for TX0 and TX1, note this has
2600                 ** NOTHING to do with the MSIX vector
2601                 */
2602                 txr->ims = 1 << (22 + i);
2603                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2604         }
2605
2606         /* Link interrupt */
2607         ++rid;
2608         adapter->res = bus_alloc_resource_any(dev,
2609             SYS_RES_IRQ, &rid, RF_ACTIVE);
2610         if (!adapter->res) {
2611                 device_printf(dev,"Unable to allocate "
2612                     "bus resource: Link interrupt [%d]\n", rid);
2613                 return (ENXIO);
2614         }
2615         /* Set the link handler function */
2616         error = bus_setup_intr(dev, adapter->res,
2617             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2618             em_msix_link, adapter, &adapter->tag);
2619         if (error) {
2620                 adapter->res = NULL;
2621                 device_printf(dev, "Failed to register LINK handler");
2622                 return (error);
2623         }
2624 #if __FreeBSD_version >= 800504
2625                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2626 #endif
2627         adapter->linkvec = vector;
2628         adapter->ivars |=  (8 | vector) << 16;
2629         adapter->ivars |= 0x80000000;
2630
2631         return (0);
2632 }
2633
2634
2635 static void
2636 em_free_pci_resources(struct adapter *adapter)
2637 {
2638         device_t        dev = adapter->dev;
2639         struct tx_ring  *txr;
2640         struct rx_ring  *rxr;
2641         int             rid;
2642
2643
2644         /*
2645         ** Release all the queue interrupt resources:
2646         */
2647         for (int i = 0; i < adapter->num_queues; i++) {
2648                 txr = &adapter->tx_rings[i];
2649                 rxr = &adapter->rx_rings[i];
2650                 /* an early abort? */
2651                 if ((txr == NULL) || (rxr == NULL))
2652                         break;
2653                 rid = txr->msix +1;
2654                 if (txr->tag != NULL) {
2655                         bus_teardown_intr(dev, txr->res, txr->tag);
2656                         txr->tag = NULL;
2657                 }
2658                 if (txr->res != NULL)
2659                         bus_release_resource(dev, SYS_RES_IRQ,
2660                             rid, txr->res);
2661                 rid = rxr->msix +1;
2662                 if (rxr->tag != NULL) {
2663                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2664                         rxr->tag = NULL;
2665                 }
2666                 if (rxr->res != NULL)
2667                         bus_release_resource(dev, SYS_RES_IRQ,
2668                             rid, rxr->res);
2669         }
2670
2671         if (adapter->linkvec) /* we are doing MSIX */
2672                 rid = adapter->linkvec + 1;
2673         else
2674                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2675
2676         if (adapter->tag != NULL) {
2677                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2678                 adapter->tag = NULL;
2679         }
2680
2681         if (adapter->res != NULL)
2682                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2683
2684
2685         if (adapter->msix)
2686                 pci_release_msi(dev);
2687
2688         if (adapter->msix_mem != NULL)
2689                 bus_release_resource(dev, SYS_RES_MEMORY,
2690                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2691
2692         if (adapter->memory != NULL)
2693                 bus_release_resource(dev, SYS_RES_MEMORY,
2694                     PCIR_BAR(0), adapter->memory);
2695
2696         if (adapter->flash != NULL)
2697                 bus_release_resource(dev, SYS_RES_MEMORY,
2698                     EM_FLASH, adapter->flash);
2699 }
2700
2701 /*
2702  * Setup MSI or MSI/X
2703  */
2704 static int
2705 em_setup_msix(struct adapter *adapter)
2706 {
2707         device_t dev = adapter->dev;
2708         int val = 0;
2709
2710         /*
2711         ** Setup MSI/X for Hartwell: tests have shown
2712         ** use of two queues to be unstable, and to
2713         ** provide no great gain anyway, so we simply
2714         ** seperate the interrupts and use a single queue.
2715         */
2716         if ((adapter->hw.mac.type == e1000_82574) &&
2717             (em_enable_msix == TRUE)) {
2718                 /* Map the MSIX BAR */
2719                 int rid = PCIR_BAR(EM_MSIX_BAR);
2720                 adapter->msix_mem = bus_alloc_resource_any(dev,
2721                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2722                 if (!adapter->msix_mem) {
2723                         /* May not be enabled */
2724                         device_printf(adapter->dev,
2725                             "Unable to map MSIX table \n");
2726                         goto msi;
2727                 }
2728                 val = pci_msix_count(dev); 
2729                 /* We only need 3 vectors */
2730                 if (val > 3)
2731                         val = 3;
2732                 if ((val != 3) && (val != 5)) {
2733                         bus_release_resource(dev, SYS_RES_MEMORY,
2734                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2735                         adapter->msix_mem = NULL;
2736                         device_printf(adapter->dev,
2737                             "MSIX: incorrect vectors, using MSI\n");
2738                         goto msi;
2739                 }
2740
2741                 if (pci_alloc_msix(dev, &val) == 0) {
2742                         device_printf(adapter->dev,
2743                             "Using MSIX interrupts "
2744                             "with %d vectors\n", val);
2745                 }
2746
2747                 return (val);
2748         }
2749 msi:
2750         val = pci_msi_count(dev);
2751         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2752                 adapter->msix = 1;
2753                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2754                 return (val);
2755         } 
2756         /* Should only happen due to manual configuration */
2757         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2758         return (0);
2759 }
2760
2761
2762 /*********************************************************************
2763  *
2764  *  Initialize the hardware to a configuration
2765  *  as specified by the adapter structure.
2766  *
2767  **********************************************************************/
2768 static void
2769 em_reset(struct adapter *adapter)
2770 {
2771         device_t        dev = adapter->dev;
2772         struct ifnet    *ifp = adapter->ifp;
2773         struct e1000_hw *hw = &adapter->hw;
2774         u16             rx_buffer_size;
2775         u32             pba;
2776
2777         INIT_DEBUGOUT("em_reset: begin");
2778
2779         /* Set up smart power down as default off on newer adapters. */
2780         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2781             hw->mac.type == e1000_82572)) {
2782                 u16 phy_tmp = 0;
2783
2784                 /* Speed up time to link by disabling smart power down. */
2785                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2786                 phy_tmp &= ~IGP02E1000_PM_SPD;
2787                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2788         }
2789
2790         /*
2791          * Packet Buffer Allocation (PBA)
2792          * Writing PBA sets the receive portion of the buffer
2793          * the remainder is used for the transmit buffer.
2794          */
2795         switch (hw->mac.type) {
2796         /* Total Packet Buffer on these is 48K */
2797         case e1000_82571:
2798         case e1000_82572:
2799         case e1000_80003es2lan:
2800                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2801                 break;
2802         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2803                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2804                 break;
2805         case e1000_82574:
2806         case e1000_82583:
2807                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2808                 break;
2809         case e1000_ich8lan:
2810                 pba = E1000_PBA_8K;
2811                 break;
2812         case e1000_ich9lan:
2813         case e1000_ich10lan:
2814                 /* Boost Receive side for jumbo frames */
2815                 if (adapter->max_frame_size > 4096)
2816                         pba = E1000_PBA_14K;
2817                 else
2818                         pba = E1000_PBA_10K;
2819                 break;
2820         case e1000_pchlan:
2821         case e1000_pch2lan:
2822                 pba = E1000_PBA_26K;
2823                 break;
2824         default:
2825                 if (adapter->max_frame_size > 8192)
2826                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2827                 else
2828                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2829         }
2830         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2831
2832         /*
2833          * These parameters control the automatic generation (Tx) and
2834          * response (Rx) to Ethernet PAUSE frames.
2835          * - High water mark should allow for at least two frames to be
2836          *   received after sending an XOFF.
2837          * - Low water mark works best when it is very near the high water mark.
2838          *   This allows the receiver to restart by sending XON when it has
2839          *   drained a bit. Here we use an arbitary value of 1500 which will
2840          *   restart after one full frame is pulled from the buffer. There
2841          *   could be several smaller frames in the buffer and if so they will
2842          *   not trigger the XON until their total number reduces the buffer
2843          *   by 1500.
2844          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2845          */
2846         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2847         hw->fc.high_water = rx_buffer_size -
2848             roundup2(adapter->max_frame_size, 1024);
2849         hw->fc.low_water = hw->fc.high_water - 1500;
2850
2851         if (adapter->fc) /* locally set flow control value? */
2852                 hw->fc.requested_mode = adapter->fc;
2853         else
2854                 hw->fc.requested_mode = e1000_fc_full;
2855
2856         if (hw->mac.type == e1000_80003es2lan)
2857                 hw->fc.pause_time = 0xFFFF;
2858         else
2859                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2860
2861         hw->fc.send_xon = TRUE;
2862
2863         /* Device specific overrides/settings */
2864         switch (hw->mac.type) {
2865         case e1000_pchlan:
2866                 /* Workaround: no TX flow ctrl for PCH */
2867                 hw->fc.requested_mode = e1000_fc_rx_pause;
2868                 hw->fc.pause_time = 0xFFFF; /* override */
2869                 if (ifp->if_mtu > ETHERMTU) {
2870                         hw->fc.high_water = 0x3500;
2871                         hw->fc.low_water = 0x1500;
2872                 } else {
2873                         hw->fc.high_water = 0x5000;
2874                         hw->fc.low_water = 0x3000;
2875                 }
2876                 hw->fc.refresh_time = 0x1000;
2877                 break;
2878         case e1000_pch2lan:
2879                 hw->fc.high_water = 0x5C20;
2880                 hw->fc.low_water = 0x5048;
2881                 hw->fc.pause_time = 0x0650;
2882                 hw->fc.refresh_time = 0x0400;
2883                 /* Jumbos need adjusted PBA */
2884                 if (ifp->if_mtu > ETHERMTU)
2885                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2886                 else
2887                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2888                 break;
2889         case e1000_ich9lan:
2890         case e1000_ich10lan:
2891                 if (ifp->if_mtu > ETHERMTU) {
2892                         hw->fc.high_water = 0x2800;
2893                         hw->fc.low_water = hw->fc.high_water - 8;
2894                         break;
2895                 } 
2896                 /* else fall thru */
2897         default:
2898                 if (hw->mac.type == e1000_80003es2lan)
2899                         hw->fc.pause_time = 0xFFFF;
2900                 break;
2901         }
2902
2903         /* Issue a global reset */
2904         e1000_reset_hw(hw);
2905         E1000_WRITE_REG(hw, E1000_WUC, 0);
2906         em_disable_aspm(adapter);
2907         /* and a re-init */
2908         if (e1000_init_hw(hw) < 0) {
2909                 device_printf(dev, "Hardware Initialization Failed\n");
2910                 return;
2911         }
2912
2913         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2914         e1000_get_phy_info(hw);
2915         e1000_check_for_link(hw);
2916         return;
2917 }
2918
2919 /*********************************************************************
2920  *
2921  *  Setup networking device structure and register an interface.
2922  *
2923  **********************************************************************/
2924 static int
2925 em_setup_interface(device_t dev, struct adapter *adapter)
2926 {
2927         struct ifnet   *ifp;
2928
2929         INIT_DEBUGOUT("em_setup_interface: begin");
2930
2931         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2932         if (ifp == NULL) {
2933                 device_printf(dev, "can not allocate ifnet structure\n");
2934                 return (-1);
2935         }
2936         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2937         ifp->if_init =  em_init;
2938         ifp->if_softc = adapter;
2939         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2940         ifp->if_ioctl = em_ioctl;
2941 #ifdef EM_MULTIQUEUE
2942         /* Multiqueue stack interface */
2943         ifp->if_transmit = em_mq_start;
2944         ifp->if_qflush = em_qflush;
2945 #else
2946         ifp->if_start = em_start;
2947         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2948         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2949         IFQ_SET_READY(&ifp->if_snd);
2950 #endif  
2951
2952         ether_ifattach(ifp, adapter->hw.mac.addr);
2953
2954         ifp->if_capabilities = ifp->if_capenable = 0;
2955
2956
2957         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2958         ifp->if_capabilities |= IFCAP_TSO4;
2959         /*
2960          * Tell the upper layer(s) we
2961          * support full VLAN capability
2962          */
2963         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2964         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2965                              |  IFCAP_VLAN_HWTSO
2966                              |  IFCAP_VLAN_MTU;
2967         ifp->if_capenable = ifp->if_capabilities;
2968
2969         /*
2970         ** Don't turn this on by default, if vlans are
2971         ** created on another pseudo device (eg. lagg)
2972         ** then vlan events are not passed thru, breaking
2973         ** operation, but with HW FILTER off it works. If
2974         ** using vlans directly on the em driver you can
2975         ** enable this and get full hardware tag filtering.
2976         */
2977         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2978
2979 #ifdef DEVICE_POLLING
2980         ifp->if_capabilities |= IFCAP_POLLING;
2981 #endif
2982
2983         /* Enable only WOL MAGIC by default */
2984         if (adapter->wol) {
2985                 ifp->if_capabilities |= IFCAP_WOL;
2986                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2987         }
2988                 
2989         /*
2990          * Specify the media types supported by this adapter and register
2991          * callbacks to update media and link information
2992          */
2993         ifmedia_init(&adapter->media, IFM_IMASK,
2994             em_media_change, em_media_status);
2995         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2996             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2997                 u_char fiber_type = IFM_1000_SX;        /* default type */
2998
2999                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3000                             0, NULL);
3001                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3002         } else {
3003                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3004                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3005                             0, NULL);
3006                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3007                             0, NULL);
3008                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3009                             0, NULL);
3010                 if (adapter->hw.phy.type != e1000_phy_ife) {
3011                         ifmedia_add(&adapter->media,
3012                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3013                         ifmedia_add(&adapter->media,
3014                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3015                 }
3016         }
3017         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3018         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3019         return (0);
3020 }
3021
3022
3023 /*
3024  * Manage DMA'able memory.
3025  */
3026 static void
3027 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3028 {
3029         if (error)
3030                 return;
3031         *(bus_addr_t *) arg = segs[0].ds_addr;
3032 }
3033
3034 static int
3035 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3036         struct em_dma_alloc *dma, int mapflags)
3037 {
3038         int error;
3039
3040         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3041                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3042                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3043                                 BUS_SPACE_MAXADDR,      /* highaddr */
3044                                 NULL, NULL,             /* filter, filterarg */
3045                                 size,                   /* maxsize */
3046                                 1,                      /* nsegments */
3047                                 size,                   /* maxsegsize */
3048                                 0,                      /* flags */
3049                                 NULL,                   /* lockfunc */
3050                                 NULL,                   /* lockarg */
3051                                 &dma->dma_tag);
3052         if (error) {
3053                 device_printf(adapter->dev,
3054                     "%s: bus_dma_tag_create failed: %d\n",
3055                     __func__, error);
3056                 goto fail_0;
3057         }
3058
3059         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3060             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3061         if (error) {
3062                 device_printf(adapter->dev,
3063                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3064                     __func__, (uintmax_t)size, error);
3065                 goto fail_2;
3066         }
3067
3068         dma->dma_paddr = 0;
3069         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3070             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3071         if (error || dma->dma_paddr == 0) {
3072                 device_printf(adapter->dev,
3073                     "%s: bus_dmamap_load failed: %d\n",
3074                     __func__, error);
3075                 goto fail_3;
3076         }
3077
3078         return (0);
3079
3080 fail_3:
3081         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3082 fail_2:
3083         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3084         bus_dma_tag_destroy(dma->dma_tag);
3085 fail_0:
3086         dma->dma_map = NULL;
3087         dma->dma_tag = NULL;
3088
3089         return (error);
3090 }
3091
3092 static void
3093 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3094 {
3095         if (dma->dma_tag == NULL)
3096                 return;
3097         if (dma->dma_map != NULL) {
3098                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3099                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3100                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3101                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3102                 dma->dma_map = NULL;
3103         }
3104         bus_dma_tag_destroy(dma->dma_tag);
3105         dma->dma_tag = NULL;
3106 }
3107
3108
3109 /*********************************************************************
3110  *
3111  *  Allocate memory for the transmit and receive rings, and then
3112  *  the descriptors associated with each, called only once at attach.
3113  *
3114  **********************************************************************/
3115 static int
3116 em_allocate_queues(struct adapter *adapter)
3117 {
3118         device_t                dev = adapter->dev;
3119         struct tx_ring          *txr = NULL;
3120         struct rx_ring          *rxr = NULL;
3121         int rsize, tsize, error = E1000_SUCCESS;
3122         int txconf = 0, rxconf = 0;
3123
3124
3125         /* Allocate the TX ring struct memory */
3126         if (!(adapter->tx_rings =
3127             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3128             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3129                 device_printf(dev, "Unable to allocate TX ring memory\n");
3130                 error = ENOMEM;
3131                 goto fail;
3132         }
3133
3134         /* Now allocate the RX */
3135         if (!(adapter->rx_rings =
3136             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3137             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3138                 device_printf(dev, "Unable to allocate RX ring memory\n");
3139                 error = ENOMEM;
3140                 goto rx_fail;
3141         }
3142
3143         tsize = roundup2(adapter->num_tx_desc *
3144             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3145         /*
3146          * Now set up the TX queues, txconf is needed to handle the
3147          * possibility that things fail midcourse and we need to
3148          * undo memory gracefully
3149          */ 
3150         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3151                 /* Set up some basics */
3152                 txr = &adapter->tx_rings[i];
3153                 txr->adapter = adapter;
3154                 txr->me = i;
3155
3156                 /* Initialize the TX lock */
3157                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3158                     device_get_nameunit(dev), txr->me);
3159                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3160
3161                 if (em_dma_malloc(adapter, tsize,
3162                         &txr->txdma, BUS_DMA_NOWAIT)) {
3163                         device_printf(dev,
3164                             "Unable to allocate TX Descriptor memory\n");
3165                         error = ENOMEM;
3166                         goto err_tx_desc;
3167                 }
3168                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3169                 bzero((void *)txr->tx_base, tsize);
3170
3171                 if (em_allocate_transmit_buffers(txr)) {
3172                         device_printf(dev,
3173                             "Critical Failure setting up transmit buffers\n");
3174                         error = ENOMEM;
3175                         goto err_tx_desc;
3176                 }
3177 #if __FreeBSD_version >= 800000
3178                 /* Allocate a buf ring */
3179                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3180                     M_WAITOK, &txr->tx_mtx);
3181 #endif
3182         }
3183
3184         /*
3185          * Next the RX queues...
3186          */ 
3187         rsize = roundup2(adapter->num_rx_desc *
3188             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3189         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3190                 rxr = &adapter->rx_rings[i];
3191                 rxr->adapter = adapter;
3192                 rxr->me = i;
3193
3194                 /* Initialize the RX lock */
3195                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3196                     device_get_nameunit(dev), txr->me);
3197                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3198
3199                 if (em_dma_malloc(adapter, rsize,
3200                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3201                         device_printf(dev,
3202                             "Unable to allocate RxDescriptor memory\n");
3203                         error = ENOMEM;
3204                         goto err_rx_desc;
3205                 }
3206                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3207                 bzero((void *)rxr->rx_base, rsize);
3208
3209                 /* Allocate receive buffers for the ring*/
3210                 if (em_allocate_receive_buffers(rxr)) {
3211                         device_printf(dev,
3212                             "Critical Failure setting up receive buffers\n");
3213                         error = ENOMEM;
3214                         goto err_rx_desc;
3215                 }
3216         }
3217
3218         return (0);
3219
3220 err_rx_desc:
3221         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3222                 em_dma_free(adapter, &rxr->rxdma);
3223 err_tx_desc:
3224         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3225                 em_dma_free(adapter, &txr->txdma);
3226         free(adapter->rx_rings, M_DEVBUF);
3227 rx_fail:
3228 #if __FreeBSD_version >= 800000
3229         buf_ring_free(txr->br, M_DEVBUF);
3230 #endif
3231         free(adapter->tx_rings, M_DEVBUF);
3232 fail:
3233         return (error);
3234 }
3235
3236
3237 /*********************************************************************
3238  *
3239  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3240  *  the information needed to transmit a packet on the wire. This is
3241  *  called only once at attach, setup is done every reset.
3242  *
3243  **********************************************************************/
3244 static int
3245 em_allocate_transmit_buffers(struct tx_ring *txr)
3246 {
3247         struct adapter *adapter = txr->adapter;
3248         device_t dev = adapter->dev;
3249         struct em_buffer *txbuf;
3250         int error, i;
3251
3252         /*
3253          * Setup DMA descriptor areas.
3254          */
3255         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3256                                1, 0,                    /* alignment, bounds */
3257                                BUS_SPACE_MAXADDR,       /* lowaddr */
3258                                BUS_SPACE_MAXADDR,       /* highaddr */
3259                                NULL, NULL,              /* filter, filterarg */
3260                                EM_TSO_SIZE,             /* maxsize */
3261                                EM_MAX_SCATTER,          /* nsegments */
3262                                PAGE_SIZE,               /* maxsegsize */
3263                                0,                       /* flags */
3264                                NULL,                    /* lockfunc */
3265                                NULL,                    /* lockfuncarg */
3266                                &txr->txtag))) {
3267                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3268                 goto fail;
3269         }
3270
3271         if (!(txr->tx_buffers =
3272             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3273             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3274                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3275                 error = ENOMEM;
3276                 goto fail;
3277         }
3278
3279         /* Create the descriptor buffer dma maps */
3280         txbuf = txr->tx_buffers;
3281         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3282                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3283                 if (error != 0) {
3284                         device_printf(dev, "Unable to create TX DMA map\n");
3285                         goto fail;
3286                 }
3287         }
3288
3289         return 0;
3290 fail:
3291         /* We free all, it handles case where we are in the middle */
3292         em_free_transmit_structures(adapter);
3293         return (error);
3294 }
3295
3296 /*********************************************************************
3297  *
3298  *  Initialize a transmit ring.
3299  *
3300  **********************************************************************/
3301 static void
3302 em_setup_transmit_ring(struct tx_ring *txr)
3303 {
3304         struct adapter *adapter = txr->adapter;
3305         struct em_buffer *txbuf;
3306         int i;
3307 #ifdef DEV_NETMAP
3308         struct netmap_adapter *na = NA(adapter->ifp);
3309         struct netmap_slot *slot;
3310 #endif /* DEV_NETMAP */
3311
3312         /* Clear the old descriptor contents */
3313         EM_TX_LOCK(txr);
3314 #ifdef DEV_NETMAP
3315         slot = netmap_reset(na, NR_TX, txr->me, 0);
3316 #endif /* DEV_NETMAP */
3317
3318         bzero((void *)txr->tx_base,
3319               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3320         /* Reset indices */
3321         txr->next_avail_desc = 0;
3322         txr->next_to_clean = 0;
3323
3324         /* Free any existing tx buffers. */
3325         txbuf = txr->tx_buffers;
3326         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3327                 if (txbuf->m_head != NULL) {
3328                         bus_dmamap_sync(txr->txtag, txbuf->map,
3329                             BUS_DMASYNC_POSTWRITE);
3330                         bus_dmamap_unload(txr->txtag, txbuf->map);
3331                         m_freem(txbuf->m_head);
3332                         txbuf->m_head = NULL;
3333                 }
3334 #ifdef DEV_NETMAP
3335                 if (slot) {
3336                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3337                         uint64_t paddr;
3338                         void *addr;
3339
3340                         addr = PNMB(slot + si, &paddr);
3341                         txr->tx_base[i].buffer_addr = htole64(paddr);
3342                         /* reload the map for netmap mode */
3343                         netmap_load_map(txr->txtag, txbuf->map, addr);
3344                 }
3345 #endif /* DEV_NETMAP */
3346
3347                 /* clear the watch index */
3348                 txbuf->next_eop = -1;
3349         }
3350
3351         /* Set number of descriptors available */
3352         txr->tx_avail = adapter->num_tx_desc;
3353         txr->queue_status = EM_QUEUE_IDLE;
3354
3355         /* Clear checksum offload context. */
3356         txr->last_hw_offload = 0;
3357         txr->last_hw_ipcss = 0;
3358         txr->last_hw_ipcso = 0;
3359         txr->last_hw_tucss = 0;
3360         txr->last_hw_tucso = 0;
3361
3362         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3363             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3364         EM_TX_UNLOCK(txr);
3365 }
3366
3367 /*********************************************************************
3368  *
3369  *  Initialize all transmit rings.
3370  *
3371  **********************************************************************/
3372 static void
3373 em_setup_transmit_structures(struct adapter *adapter)
3374 {
3375         struct tx_ring *txr = adapter->tx_rings;
3376
3377         for (int i = 0; i < adapter->num_queues; i++, txr++)
3378                 em_setup_transmit_ring(txr);
3379
3380         return;
3381 }
3382
3383 /*********************************************************************
3384  *
3385  *  Enable transmit unit.
3386  *
3387  **********************************************************************/
3388 static void
3389 em_initialize_transmit_unit(struct adapter *adapter)
3390 {
3391         struct tx_ring  *txr = adapter->tx_rings;
3392         struct e1000_hw *hw = &adapter->hw;
3393         u32     tctl, tarc, tipg = 0;
3394
3395          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3396
3397         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3398                 u64 bus_addr = txr->txdma.dma_paddr;
3399                 /* Base and Len of TX Ring */
3400                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3401                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3402                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3403                     (u32)(bus_addr >> 32));
3404                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3405                     (u32)bus_addr);
3406                 /* Init the HEAD/TAIL indices */
3407                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3408                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3409
3410                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3411                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3412                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3413
3414                 txr->queue_status = EM_QUEUE_IDLE;
3415         }
3416
3417         /* Set the default values for the Tx Inter Packet Gap timer */
3418         switch (adapter->hw.mac.type) {
3419         case e1000_80003es2lan:
3420                 tipg = DEFAULT_82543_TIPG_IPGR1;
3421                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3422                     E1000_TIPG_IPGR2_SHIFT;
3423                 break;
3424         default:
3425                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3426                     (adapter->hw.phy.media_type ==
3427                     e1000_media_type_internal_serdes))
3428                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3429                 else
3430                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3431                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3432                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3433         }
3434
3435         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3436         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3437
3438         if(adapter->hw.mac.type >= e1000_82540)
3439                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3440                     adapter->tx_abs_int_delay.value);
3441
3442         if ((adapter->hw.mac.type == e1000_82571) ||
3443             (adapter->hw.mac.type == e1000_82572)) {
3444                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3445                 tarc |= SPEED_MODE_BIT;
3446                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3447         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3448                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3449                 tarc |= 1;
3450                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3451                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3452                 tarc |= 1;
3453                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3454         }
3455
3456         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3457         if (adapter->tx_int_delay.value > 0)
3458                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3459
3460         /* Program the Transmit Control Register */
3461         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3462         tctl &= ~E1000_TCTL_CT;
3463         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3464                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3465
3466         if (adapter->hw.mac.type >= e1000_82571)
3467                 tctl |= E1000_TCTL_MULR;
3468
3469         /* This write will effectively turn on the transmit unit. */
3470         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3471
3472 }
3473
3474
3475 /*********************************************************************
3476  *
3477  *  Free all transmit rings.
3478  *
3479  **********************************************************************/
3480 static void
3481 em_free_transmit_structures(struct adapter *adapter)
3482 {
3483         struct tx_ring *txr = adapter->tx_rings;
3484
3485         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3486                 EM_TX_LOCK(txr);
3487                 em_free_transmit_buffers(txr);
3488                 em_dma_free(adapter, &txr->txdma);
3489                 EM_TX_UNLOCK(txr);
3490                 EM_TX_LOCK_DESTROY(txr);
3491         }
3492
3493         free(adapter->tx_rings, M_DEVBUF);
3494 }
3495
3496 /*********************************************************************
3497  *
3498  *  Free transmit ring related data structures.
3499  *
3500  **********************************************************************/
3501 static void
3502 em_free_transmit_buffers(struct tx_ring *txr)
3503 {
3504         struct adapter          *adapter = txr->adapter;
3505         struct em_buffer        *txbuf;
3506
3507         INIT_DEBUGOUT("free_transmit_ring: begin");
3508
3509         if (txr->tx_buffers == NULL)
3510                 return;
3511
3512         for (int i = 0; i < adapter->num_tx_desc; i++) {
3513                 txbuf = &txr->tx_buffers[i];
3514                 if (txbuf->m_head != NULL) {
3515                         bus_dmamap_sync(txr->txtag, txbuf->map,
3516                             BUS_DMASYNC_POSTWRITE);
3517                         bus_dmamap_unload(txr->txtag,
3518                             txbuf->map);
3519                         m_freem(txbuf->m_head);
3520                         txbuf->m_head = NULL;
3521                         if (txbuf->map != NULL) {
3522                                 bus_dmamap_destroy(txr->txtag,
3523                                     txbuf->map);
3524                                 txbuf->map = NULL;
3525                         }
3526                 } else if (txbuf->map != NULL) {
3527                         bus_dmamap_unload(txr->txtag,
3528                             txbuf->map);
3529                         bus_dmamap_destroy(txr->txtag,
3530                             txbuf->map);
3531                         txbuf->map = NULL;
3532                 }
3533         }
3534 #if __FreeBSD_version >= 800000
3535         if (txr->br != NULL)
3536                 buf_ring_free(txr->br, M_DEVBUF);
3537 #endif
3538         if (txr->tx_buffers != NULL) {
3539                 free(txr->tx_buffers, M_DEVBUF);
3540                 txr->tx_buffers = NULL;
3541         }
3542         if (txr->txtag != NULL) {
3543                 bus_dma_tag_destroy(txr->txtag);
3544                 txr->txtag = NULL;
3545         }
3546         return;
3547 }
3548
3549
3550 /*********************************************************************
3551  *  The offload context is protocol specific (TCP/UDP) and thus
3552  *  only needs to be set when the protocol changes. The occasion
3553  *  of a context change can be a performance detriment, and
3554  *  might be better just disabled. The reason arises in the way
3555  *  in which the controller supports pipelined requests from the
3556  *  Tx data DMA. Up to four requests can be pipelined, and they may
3557  *  belong to the same packet or to multiple packets. However all
3558  *  requests for one packet are issued before a request is issued
3559  *  for a subsequent packet and if a request for the next packet
3560  *  requires a context change, that request will be stalled
3561  *  until the previous request completes. This means setting up
3562  *  a new context effectively disables pipelined Tx data DMA which
3563  *  in turn greatly slow down performance to send small sized
3564  *  frames. 
3565  **********************************************************************/
3566 static void
3567 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3568     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3569 {
3570         struct adapter                  *adapter = txr->adapter;
3571         struct e1000_context_desc       *TXD = NULL;
3572         struct em_buffer                *tx_buffer;
3573         int                             cur, hdr_len;
3574         u32                             cmd = 0;
3575         u16                             offload = 0;
3576         u8                              ipcso, ipcss, tucso, tucss;
3577
3578         ipcss = ipcso = tucss = tucso = 0;
3579         hdr_len = ip_off + (ip->ip_hl << 2);
3580         cur = txr->next_avail_desc;
3581
3582         /* Setup of IP header checksum. */
3583         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3584                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3585                 offload |= CSUM_IP;
3586                 ipcss = ip_off;
3587                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3588                 /*
3589                  * Start offset for header checksum calculation.
3590                  * End offset for header checksum calculation.
3591                  * Offset of place to put the checksum.
3592                  */
3593                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3594                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3595                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3596                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3597                 cmd |= E1000_TXD_CMD_IP;
3598         }
3599
3600         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3601                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3602                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3603                 offload |= CSUM_TCP;
3604                 tucss = hdr_len;
3605                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3606                 /*
3607                  * Setting up new checksum offload context for every frames
3608                  * takes a lot of processing time for hardware. This also
3609                  * reduces performance a lot for small sized frames so avoid
3610                  * it if driver can use previously configured checksum
3611                  * offload context.
3612                  */
3613                 if (txr->last_hw_offload == offload) {
3614                         if (offload & CSUM_IP) {
3615                                 if (txr->last_hw_ipcss == ipcss &&
3616                                     txr->last_hw_ipcso == ipcso &&
3617                                     txr->last_hw_tucss == tucss &&
3618                                     txr->last_hw_tucso == tucso)
3619                                         return;
3620                         } else {
3621                                 if (txr->last_hw_tucss == tucss &&
3622                                     txr->last_hw_tucso == tucso)
3623                                         return;
3624                         }
3625                 }
3626                 txr->last_hw_offload = offload;
3627                 txr->last_hw_tucss = tucss;
3628                 txr->last_hw_tucso = tucso;
3629                 /*
3630                  * Start offset for payload checksum calculation.
3631                  * End offset for payload checksum calculation.
3632                  * Offset of place to put the checksum.
3633                  */
3634                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3635                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3636                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3637                 TXD->upper_setup.tcp_fields.tucso = tucso;
3638                 cmd |= E1000_TXD_CMD_TCP;
3639         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3640                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3641                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3642                 tucss = hdr_len;
3643                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3644                 /*
3645                  * Setting up new checksum offload context for every frames
3646                  * takes a lot of processing time for hardware. This also
3647                  * reduces performance a lot for small sized frames so avoid
3648                  * it if driver can use previously configured checksum
3649                  * offload context.
3650                  */
3651                 if (txr->last_hw_offload == offload) {
3652                         if (offload & CSUM_IP) {
3653                                 if (txr->last_hw_ipcss == ipcss &&
3654                                     txr->last_hw_ipcso == ipcso &&
3655                                     txr->last_hw_tucss == tucss &&
3656                                     txr->last_hw_tucso == tucso)
3657                                         return;
3658                         } else {
3659                                 if (txr->last_hw_tucss == tucss &&
3660                                     txr->last_hw_tucso == tucso)
3661                                         return;
3662                         }
3663                 }
3664                 txr->last_hw_offload = offload;
3665                 txr->last_hw_tucss = tucss;
3666                 txr->last_hw_tucso = tucso;
3667                 /*
3668                  * Start offset for header checksum calculation.
3669                  * End offset for header checksum calculation.
3670                  * Offset of place to put the checksum.
3671                  */
3672                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3673                 TXD->upper_setup.tcp_fields.tucss = tucss;
3674                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3675                 TXD->upper_setup.tcp_fields.tucso = tucso;
3676         }
3677   
3678         if (offload & CSUM_IP) {
3679                 txr->last_hw_ipcss = ipcss;
3680                 txr->last_hw_ipcso = ipcso;
3681         }
3682
3683         TXD->tcp_seg_setup.data = htole32(0);
3684         TXD->cmd_and_length =
3685             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3686         tx_buffer = &txr->tx_buffers[cur];
3687         tx_buffer->m_head = NULL;
3688         tx_buffer->next_eop = -1;
3689
3690         if (++cur == adapter->num_tx_desc)
3691                 cur = 0;
3692
3693         txr->tx_avail--;
3694         txr->next_avail_desc = cur;
3695 }
3696
3697
3698 /**********************************************************************
3699  *
3700  *  Setup work for hardware segmentation offload (TSO)
3701  *
3702  **********************************************************************/
3703 static void
3704 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3705     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3706 {
3707         struct adapter                  *adapter = txr->adapter;
3708         struct e1000_context_desc       *TXD;
3709         struct em_buffer                *tx_buffer;
3710         int cur, hdr_len;
3711
3712         /*
3713          * In theory we can use the same TSO context if and only if
3714          * frame is the same type(IP/TCP) and the same MSS. However
3715          * checking whether a frame has the same IP/TCP structure is
3716          * hard thing so just ignore that and always restablish a
3717          * new TSO context.
3718          */
3719         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3720         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3721                       E1000_TXD_DTYP_D |        /* Data descr type */
3722                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3723
3724         /* IP and/or TCP header checksum calculation and insertion. */
3725         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3726
3727         cur = txr->next_avail_desc;
3728         tx_buffer = &txr->tx_buffers[cur];
3729         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3730
3731         /*
3732          * Start offset for header checksum calculation.
3733          * End offset for header checksum calculation.
3734          * Offset of place put the checksum.
3735          */
3736         TXD->lower_setup.ip_fields.ipcss = ip_off;
3737         TXD->lower_setup.ip_fields.ipcse =
3738             htole16(ip_off + (ip->ip_hl << 2) - 1);
3739         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3740         /*
3741          * Start offset for payload checksum calculation.
3742          * End offset for payload checksum calculation.
3743          * Offset of place to put the checksum.
3744          */
3745         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3746         TXD->upper_setup.tcp_fields.tucse = 0;
3747         TXD->upper_setup.tcp_fields.tucso =
3748             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3749         /*
3750          * Payload size per packet w/o any headers.
3751          * Length of all headers up to payload.
3752          */
3753         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3754         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3755
3756         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3757                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3758                                 E1000_TXD_CMD_TSE |     /* TSE context */
3759                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3760                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3761                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3762
3763         tx_buffer->m_head = NULL;
3764         tx_buffer->next_eop = -1;
3765
3766         if (++cur == adapter->num_tx_desc)
3767                 cur = 0;
3768
3769         txr->tx_avail--;
3770         txr->next_avail_desc = cur;
3771         txr->tx_tso = TRUE;
3772 }
3773
3774
3775 /**********************************************************************
3776  *
3777  *  Examine each tx_buffer in the used queue. If the hardware is done
3778  *  processing the packet then free associated resources. The
3779  *  tx_buffer is put back on the free queue.
3780  *
3781  **********************************************************************/
3782 static void
3783 em_txeof(struct tx_ring *txr)
3784 {
3785         struct adapter  *adapter = txr->adapter;
3786         int first, last, done, processed;
3787         struct em_buffer *tx_buffer;
3788         struct e1000_tx_desc   *tx_desc, *eop_desc;
3789         struct ifnet   *ifp = adapter->ifp;
3790
3791         EM_TX_LOCK_ASSERT(txr);
3792 #ifdef DEV_NETMAP
3793         if (ifp->if_capenable & IFCAP_NETMAP) {
3794                 struct netmap_adapter *na = NA(ifp);
3795
3796                 selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3797                 EM_TX_UNLOCK(txr);
3798                 EM_CORE_LOCK(adapter);
3799                 selwakeuppri(&na->tx_si, PI_NET);
3800                 EM_CORE_UNLOCK(adapter);
3801                 EM_TX_LOCK(txr);
3802                 return;
3803         }
3804 #endif /* DEV_NETMAP */
3805
3806         /* No work, make sure watchdog is off */
3807         if (txr->tx_avail == adapter->num_tx_desc) {
3808                 txr->queue_status = EM_QUEUE_IDLE;
3809                 return;
3810         }
3811
3812         processed = 0;
3813         first = txr->next_to_clean;
3814         tx_desc = &txr->tx_base[first];
3815         tx_buffer = &txr->tx_buffers[first];
3816         last = tx_buffer->next_eop;
3817         eop_desc = &txr->tx_base[last];
3818
3819         /*
3820          * What this does is get the index of the
3821          * first descriptor AFTER the EOP of the 
3822          * first packet, that way we can do the
3823          * simple comparison on the inner while loop.
3824          */
3825         if (++last == adapter->num_tx_desc)
3826                 last = 0;
3827         done = last;
3828
3829         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3830             BUS_DMASYNC_POSTREAD);
3831
3832         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3833                 /* We clean the range of the packet */
3834                 while (first != done) {
3835                         tx_desc->upper.data = 0;
3836                         tx_desc->lower.data = 0;
3837                         tx_desc->buffer_addr = 0;
3838                         ++txr->tx_avail;
3839                         ++processed;
3840
3841                         if (tx_buffer->m_head) {
3842                                 bus_dmamap_sync(txr->txtag,
3843                                     tx_buffer->map,
3844                                     BUS_DMASYNC_POSTWRITE);
3845                                 bus_dmamap_unload(txr->txtag,
3846                                     tx_buffer->map);
3847                                 m_freem(tx_buffer->m_head);
3848                                 tx_buffer->m_head = NULL;
3849                         }
3850                         tx_buffer->next_eop = -1;
3851                         txr->watchdog_time = ticks;
3852
3853                         if (++first == adapter->num_tx_desc)
3854                                 first = 0;
3855
3856                         tx_buffer = &txr->tx_buffers[first];
3857                         tx_desc = &txr->tx_base[first];
3858                 }
3859                 ++ifp->if_opackets;
3860                 /* See if we can continue to the next packet */
3861                 last = tx_buffer->next_eop;
3862                 if (last != -1) {
3863                         eop_desc = &txr->tx_base[last];
3864                         /* Get new done point */
3865                         if (++last == adapter->num_tx_desc) last = 0;
3866                         done = last;
3867                 } else
3868                         break;
3869         }
3870         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3871             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3872
3873         txr->next_to_clean = first;
3874
3875         /*
3876         ** Watchdog calculation, we know there's
3877         ** work outstanding or the first return
3878         ** would have been taken, so none processed
3879         ** for too long indicates a hang. local timer
3880         ** will examine this and do a reset if needed.
3881         */
3882         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3883                 txr->queue_status = EM_QUEUE_HUNG;
3884
3885         /*
3886          * If we have a minimum free, clear IFF_DRV_OACTIVE
3887          * to tell the stack that it is OK to send packets.
3888          * Notice that all writes of OACTIVE happen under the
3889          * TX lock which, with a single queue, guarantees 
3890          * sanity.
3891          */
3892         if (txr->tx_avail >= EM_MAX_SCATTER)
3893                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3894
3895         /* Disable watchdog if all clean */
3896         if (txr->tx_avail == adapter->num_tx_desc) {
3897                 txr->queue_status = EM_QUEUE_IDLE;
3898         } 
3899 }
3900
3901
3902 /*********************************************************************
3903  *
3904  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3905  *
3906  **********************************************************************/
3907 static void
3908 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3909 {
3910         struct adapter          *adapter = rxr->adapter;
3911         struct mbuf             *m;
3912         bus_dma_segment_t       segs[1];
3913         struct em_buffer        *rxbuf;
3914         int                     i, j, error, nsegs;
3915         bool                    cleaned = FALSE;
3916
3917         i = j = rxr->next_to_refresh;
3918         /*
3919         ** Get one descriptor beyond
3920         ** our work mark to control
3921         ** the loop.
3922         */
3923         if (++j == adapter->num_rx_desc)
3924                 j = 0;
3925
3926         while (j != limit) {
3927                 rxbuf = &rxr->rx_buffers[i];
3928                 if (rxbuf->m_head == NULL) {
3929                         m = m_getjcl(M_DONTWAIT, MT_DATA,
3930                             M_PKTHDR, adapter->rx_mbuf_sz);
3931                         /*
3932                         ** If we have a temporary resource shortage
3933                         ** that causes a failure, just abort refresh
3934                         ** for now, we will return to this point when
3935                         ** reinvoked from em_rxeof.
3936                         */
3937                         if (m == NULL)
3938                                 goto update;
3939                 } else
3940                         m = rxbuf->m_head;
3941
3942                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3943                 m->m_flags |= M_PKTHDR;
3944                 m->m_data = m->m_ext.ext_buf;
3945
3946                 /* Use bus_dma machinery to setup the memory mapping  */
3947                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3948                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3949                 if (error != 0) {
3950                         printf("Refresh mbufs: hdr dmamap load"
3951                             " failure - %d\n", error);
3952                         m_free(m);
3953                         rxbuf->m_head = NULL;
3954                         goto update;
3955                 }
3956                 rxbuf->m_head = m;
3957                 bus_dmamap_sync(rxr->rxtag,
3958                     rxbuf->map, BUS_DMASYNC_PREREAD);
3959                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3960                 cleaned = TRUE;
3961
3962                 i = j; /* Next is precalulated for us */
3963                 rxr->next_to_refresh = i;
3964                 /* Calculate next controlling index */
3965                 if (++j == adapter->num_rx_desc)
3966                         j = 0;
3967         }
3968 update:
3969         /*
3970         ** Update the tail pointer only if,
3971         ** and as far as we have refreshed.
3972         */
3973         if (cleaned)
3974                 E1000_WRITE_REG(&adapter->hw,
3975                     E1000_RDT(rxr->me), rxr->next_to_refresh);
3976
3977         return;
3978 }
3979
3980
3981 /*********************************************************************
3982  *
3983  *  Allocate memory for rx_buffer structures. Since we use one
3984  *  rx_buffer per received packet, the maximum number of rx_buffer's
3985  *  that we'll need is equal to the number of receive descriptors
3986  *  that we've allocated.
3987  *
3988  **********************************************************************/
3989 static int
3990 em_allocate_receive_buffers(struct rx_ring *rxr)
3991 {
3992         struct adapter          *adapter = rxr->adapter;
3993         device_t                dev = adapter->dev;
3994         struct em_buffer        *rxbuf;
3995         int                     error;
3996
3997         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3998             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3999         if (rxr->rx_buffers == NULL) {
4000                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4001                 return (ENOMEM);
4002         }
4003
4004         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4005                                 1, 0,                   /* alignment, bounds */
4006                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4007                                 BUS_SPACE_MAXADDR,      /* highaddr */
4008                                 NULL, NULL,             /* filter, filterarg */
4009                                 MJUM9BYTES,             /* maxsize */
4010                                 1,                      /* nsegments */
4011                                 MJUM9BYTES,             /* maxsegsize */
4012                                 0,                      /* flags */
4013                                 NULL,                   /* lockfunc */
4014                                 NULL,                   /* lockarg */
4015                                 &rxr->rxtag);
4016         if (error) {
4017                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4018                     __func__, error);
4019                 goto fail;
4020         }
4021
4022         rxbuf = rxr->rx_buffers;
4023         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4024                 rxbuf = &rxr->rx_buffers[i];
4025                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4026                     &rxbuf->map);
4027                 if (error) {
4028                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4029                             __func__, error);
4030                         goto fail;
4031                 }
4032         }
4033
4034         return (0);
4035
4036 fail:
4037         em_free_receive_structures(adapter);
4038         return (error);
4039 }
4040
4041
4042 /*********************************************************************
4043  *
4044  *  Initialize a receive ring and its buffers.
4045  *
4046  **********************************************************************/
4047 static int
4048 em_setup_receive_ring(struct rx_ring *rxr)
4049 {
4050         struct  adapter         *adapter = rxr->adapter;
4051         struct em_buffer        *rxbuf;
4052         bus_dma_segment_t       seg[1];
4053         int                     rsize, nsegs, error = 0;
4054 #ifdef DEV_NETMAP
4055         struct netmap_adapter *na = NA(adapter->ifp);
4056         struct netmap_slot *slot;
4057 #endif
4058
4059
4060         /* Clear the ring contents */
4061         EM_RX_LOCK(rxr);
4062         rsize = roundup2(adapter->num_rx_desc *
4063             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4064         bzero((void *)rxr->rx_base, rsize);
4065 #ifdef DEV_NETMAP
4066         slot = netmap_reset(na, NR_RX, 0, 0);
4067 #endif
4068
4069         /*
4070         ** Free current RX buffer structs and their mbufs
4071         */
4072         for (int i = 0; i < adapter->num_rx_desc; i++) {
4073                 rxbuf = &rxr->rx_buffers[i];
4074                 if (rxbuf->m_head != NULL) {
4075                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4076                             BUS_DMASYNC_POSTREAD);
4077                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4078                         m_freem(rxbuf->m_head);
4079                         rxbuf->m_head = NULL; /* mark as freed */
4080                 }
4081         }
4082
4083         /* Now replenish the mbufs */
4084         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4085                 rxbuf = &rxr->rx_buffers[j];
4086 #ifdef DEV_NETMAP
4087                 if (slot) {
4088                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4089                         uint64_t paddr;
4090                         void *addr;
4091
4092                         addr = PNMB(slot + si, &paddr);
4093                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4094                         /* Update descriptor */
4095                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4096                         continue;
4097                 }
4098 #endif /* DEV_NETMAP */
4099                 rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4100                     M_PKTHDR, adapter->rx_mbuf_sz);
4101                 if (rxbuf->m_head == NULL) {
4102                         error = ENOBUFS;
4103                         goto fail;
4104                 }
4105                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4106                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4107                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4108
4109                 /* Get the memory mapping */
4110                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4111                     rxbuf->map, rxbuf->m_head, seg,
4112                     &nsegs, BUS_DMA_NOWAIT);
4113                 if (error != 0) {
4114                         m_freem(rxbuf->m_head);
4115                         rxbuf->m_head = NULL;
4116                         goto fail;
4117                 }
4118                 bus_dmamap_sync(rxr->rxtag,
4119                     rxbuf->map, BUS_DMASYNC_PREREAD);
4120
4121                 /* Update descriptor */
4122                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4123         }
4124         rxr->next_to_check = 0;
4125         rxr->next_to_refresh = 0;
4126         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4127             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4128
4129 fail:
4130         EM_RX_UNLOCK(rxr);
4131         return (error);
4132 }
4133
4134 /*********************************************************************
4135  *
4136  *  Initialize all receive rings.
4137  *
4138  **********************************************************************/
4139 static int
4140 em_setup_receive_structures(struct adapter *adapter)
4141 {
4142         struct rx_ring *rxr = adapter->rx_rings;
4143         int q;
4144
4145         for (q = 0; q < adapter->num_queues; q++, rxr++)
4146                 if (em_setup_receive_ring(rxr))
4147                         goto fail;
4148
4149         return (0);
4150 fail:
4151         /*
4152          * Free RX buffers allocated so far, we will only handle
4153          * the rings that completed, the failing case will have
4154          * cleaned up for itself. 'q' failed, so its the terminus.
4155          */
4156         for (int i = 0; i < q; ++i) {
4157                 rxr = &adapter->rx_rings[i];
4158                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4159                         struct em_buffer *rxbuf;
4160                         rxbuf = &rxr->rx_buffers[n];
4161                         if (rxbuf->m_head != NULL) {
4162                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4163                                   BUS_DMASYNC_POSTREAD);
4164                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4165                                 m_freem(rxbuf->m_head);
4166                                 rxbuf->m_head = NULL;
4167                         }
4168                 }
4169                 rxr->next_to_check = 0;
4170                 rxr->next_to_refresh = 0;
4171         }
4172
4173         return (ENOBUFS);
4174 }
4175
4176 /*********************************************************************
4177  *
4178  *  Free all receive rings.
4179  *
4180  **********************************************************************/
4181 static void
4182 em_free_receive_structures(struct adapter *adapter)
4183 {
4184         struct rx_ring *rxr = adapter->rx_rings;
4185
4186         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4187                 em_free_receive_buffers(rxr);
4188                 /* Free the ring memory as well */
4189                 em_dma_free(adapter, &rxr->rxdma);
4190                 EM_RX_LOCK_DESTROY(rxr);
4191         }
4192
4193         free(adapter->rx_rings, M_DEVBUF);
4194 }
4195
4196
4197 /*********************************************************************
4198  *
4199  *  Free receive ring data structures
4200  *
4201  **********************************************************************/
4202 static void
4203 em_free_receive_buffers(struct rx_ring *rxr)
4204 {
4205         struct adapter          *adapter = rxr->adapter;
4206         struct em_buffer        *rxbuf = NULL;
4207
4208         INIT_DEBUGOUT("free_receive_buffers: begin");
4209
4210         if (rxr->rx_buffers != NULL) {
4211                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4212                         rxbuf = &rxr->rx_buffers[i];
4213                         if (rxbuf->map != NULL) {
4214                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4215                                     BUS_DMASYNC_POSTREAD);
4216                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4217                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4218                         }
4219                         if (rxbuf->m_head != NULL) {
4220                                 m_freem(rxbuf->m_head);
4221                                 rxbuf->m_head = NULL;
4222                         }
4223                 }
4224                 free(rxr->rx_buffers, M_DEVBUF);
4225                 rxr->rx_buffers = NULL;
4226                 rxr->next_to_check = 0;
4227                 rxr->next_to_refresh = 0;
4228         }
4229
4230         if (rxr->rxtag != NULL) {
4231                 bus_dma_tag_destroy(rxr->rxtag);
4232                 rxr->rxtag = NULL;
4233         }
4234
4235         return;
4236 }
4237
4238
4239 /*********************************************************************
4240  *
4241  *  Enable receive unit.
4242  *
4243  **********************************************************************/
4244 #define MAX_INTS_PER_SEC        8000
4245 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
4246
4247 static void
4248 em_initialize_receive_unit(struct adapter *adapter)
4249 {
4250         struct rx_ring  *rxr = adapter->rx_rings;
4251         struct ifnet    *ifp = adapter->ifp;
4252         struct e1000_hw *hw = &adapter->hw;
4253         u64     bus_addr;
4254         u32     rctl, rxcsum;
4255
4256         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4257
4258         /*
4259          * Make sure receives are disabled while setting
4260          * up the descriptor ring
4261          */
4262         rctl = E1000_READ_REG(hw, E1000_RCTL);
4263         /* Do not disable if ever enabled on this hardware */
4264         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4265                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4266
4267         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4268             adapter->rx_abs_int_delay.value);
4269         /*
4270          * Set the interrupt throttling rate. Value is calculated
4271          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4272          */
4273         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4274
4275         /*
4276         ** When using MSIX interrupts we need to throttle
4277         ** using the EITR register (82574 only)
4278         */
4279         if (hw->mac.type == e1000_82574) {
4280                 for (int i = 0; i < 4; i++)
4281                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4282                             DEFAULT_ITR);
4283                 /* Disable accelerated acknowledge */
4284                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4285         }
4286
4287         if (ifp->if_capenable & IFCAP_RXCSUM) {
4288                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4289                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4290                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4291         }
4292
4293         /*
4294         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4295         ** long latencies are observed, like Lenovo X60. This
4296         ** change eliminates the problem, but since having positive
4297         ** values in RDTR is a known source of problems on other
4298         ** platforms another solution is being sought.
4299         */
4300         if (hw->mac.type == e1000_82573)
4301                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4302
4303         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4304                 /* Setup the Base and Length of the Rx Descriptor Ring */
4305                 bus_addr = rxr->rxdma.dma_paddr;
4306                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4307                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4308                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4309                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4310                 /* Setup the Head and Tail Descriptor Pointers */
4311                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4312 #ifdef DEV_NETMAP
4313                 /*
4314                  * an init() while a netmap client is active must
4315                  * preserve the rx buffers passed to userspace.
4316                  * In this driver it means we adjust RDT to
4317                  * something different from na->num_rx_desc - 1.
4318                  */
4319                 if (ifp->if_capenable & IFCAP_NETMAP) {
4320                         struct netmap_adapter *na = NA(adapter->ifp);
4321                         struct netmap_kring *kring = &na->rx_rings[i];
4322                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4323
4324                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4325                 } else
4326 #endif /* DEV_NETMAP */
4327                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4328         }
4329
4330         /* Set PTHRESH for improved jumbo performance */
4331         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4332             (adapter->hw.mac.type == e1000_pch2lan) ||
4333             (adapter->hw.mac.type == e1000_ich10lan)) &&
4334             (ifp->if_mtu > ETHERMTU)) {
4335                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4336                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4337         }
4338                 
4339         if (adapter->hw.mac.type == e1000_pch2lan) {
4340                 if (ifp->if_mtu > ETHERMTU)
4341                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4342                 else
4343                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4344         }
4345
4346         /* Setup the Receive Control Register */
4347         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4348         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4349             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4350             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4351
4352         /* Strip the CRC */
4353         rctl |= E1000_RCTL_SECRC;
4354
4355         /* Make sure VLAN Filters are off */
4356         rctl &= ~E1000_RCTL_VFE;
4357         rctl &= ~E1000_RCTL_SBP;
4358
4359         if (adapter->rx_mbuf_sz == MCLBYTES)
4360                 rctl |= E1000_RCTL_SZ_2048;
4361         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4362                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4363         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4364                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4365
4366         if (ifp->if_mtu > ETHERMTU)
4367                 rctl |= E1000_RCTL_LPE;
4368         else
4369                 rctl &= ~E1000_RCTL_LPE;
4370
4371         /* Write out the settings */
4372         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4373
4374         return;
4375 }
4376
4377
4378 /*********************************************************************
4379  *
4380  *  This routine executes in interrupt context. It replenishes
4381  *  the mbufs in the descriptor and sends data which has been
4382  *  dma'ed into host memory to upper layer.
4383  *
4384  *  We loop at most count times if count is > 0, or until done if
4385  *  count < 0.
4386  *  
4387  *  For polling we also now return the number of cleaned packets
4388  *********************************************************************/
4389 static bool
4390 em_rxeof(struct rx_ring *rxr, int count, int *done)
4391 {
4392         struct adapter          *adapter = rxr->adapter;
4393         struct ifnet            *ifp = adapter->ifp;
4394         struct mbuf             *mp, *sendmp;
4395         u8                      status = 0;
4396         u16                     len;
4397         int                     i, processed, rxdone = 0;
4398         bool                    eop;
4399         struct e1000_rx_desc    *cur;
4400
4401         EM_RX_LOCK(rxr);
4402
4403 #ifdef DEV_NETMAP
4404         if (ifp->if_capenable & IFCAP_NETMAP) {
4405                 struct netmap_adapter *na = NA(ifp);
4406
4407                 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4408                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4409                 EM_RX_UNLOCK(rxr);
4410                 EM_CORE_LOCK(adapter);
4411                 selwakeuppri(&na->rx_si, PI_NET);
4412                 EM_CORE_UNLOCK(adapter);
4413                 return (0);
4414         }
4415 #endif /* DEV_NETMAP */
4416
4417         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4418
4419                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4420                         break;
4421
4422                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4423                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4424
4425                 cur = &rxr->rx_base[i];
4426                 status = cur->status;
4427                 mp = sendmp = NULL;
4428
4429                 if ((status & E1000_RXD_STAT_DD) == 0)
4430                         break;
4431
4432                 len = le16toh(cur->length);
4433                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4434
4435                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4436                     (rxr->discard == TRUE)) {
4437                         adapter->dropped_pkts++;
4438                         ++rxr->rx_discarded;
4439                         if (!eop) /* Catch subsequent segs */
4440                                 rxr->discard = TRUE;
4441                         else
4442                                 rxr->discard = FALSE;
4443                         em_rx_discard(rxr, i);
4444                         goto next_desc;
4445                 }
4446
4447                 /* Assign correct length to the current fragment */
4448                 mp = rxr->rx_buffers[i].m_head;
4449                 mp->m_len = len;
4450
4451                 /* Trigger for refresh */
4452                 rxr->rx_buffers[i].m_head = NULL;
4453
4454                 /* First segment? */
4455                 if (rxr->fmp == NULL) {
4456                         mp->m_pkthdr.len = len;
4457                         rxr->fmp = rxr->lmp = mp;
4458                 } else {
4459                         /* Chain mbuf's together */
4460                         mp->m_flags &= ~M_PKTHDR;
4461                         rxr->lmp->m_next = mp;
4462                         rxr->lmp = mp;
4463                         rxr->fmp->m_pkthdr.len += len;
4464                 }
4465
4466                 if (eop) {
4467                         --count;
4468                         sendmp = rxr->fmp;
4469                         sendmp->m_pkthdr.rcvif = ifp;
4470                         ifp->if_ipackets++;
4471                         em_receive_checksum(cur, sendmp);
4472 #ifndef __NO_STRICT_ALIGNMENT
4473                         if (adapter->max_frame_size >
4474                             (MCLBYTES - ETHER_ALIGN) &&
4475                             em_fixup_rx(rxr) != 0)
4476                                 goto skip;
4477 #endif
4478                         if (status & E1000_RXD_STAT_VP) {
4479                                 sendmp->m_pkthdr.ether_vtag =
4480                                     le16toh(cur->special);
4481                                 sendmp->m_flags |= M_VLANTAG;
4482                         }
4483 #ifndef __NO_STRICT_ALIGNMENT
4484 skip:
4485 #endif
4486                         rxr->fmp = rxr->lmp = NULL;
4487                 }
4488 next_desc:
4489                 /* Zero out the receive descriptors status. */
4490                 cur->status = 0;
4491                 ++rxdone;       /* cumulative for POLL */
4492                 ++processed;
4493
4494                 /* Advance our pointers to the next descriptor. */
4495                 if (++i == adapter->num_rx_desc)
4496                         i = 0;
4497
4498                 /* Send to the stack */
4499                 if (sendmp != NULL) {
4500                         rxr->next_to_check = i;
4501                         EM_RX_UNLOCK(rxr);
4502                         (*ifp->if_input)(ifp, sendmp);
4503                         EM_RX_LOCK(rxr);
4504                         i = rxr->next_to_check;
4505                 }
4506
4507                 /* Only refresh mbufs every 8 descriptors */
4508                 if (processed == 8) {
4509                         em_refresh_mbufs(rxr, i);
4510                         processed = 0;
4511                 }
4512         }
4513
4514         /* Catch any remaining refresh work */
4515         if (e1000_rx_unrefreshed(rxr))
4516                 em_refresh_mbufs(rxr, i);
4517
4518         rxr->next_to_check = i;
4519         if (done != NULL)
4520                 *done = rxdone;
4521         EM_RX_UNLOCK(rxr);
4522
4523         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4524 }
4525
4526 static __inline void
4527 em_rx_discard(struct rx_ring *rxr, int i)
4528 {
4529         struct em_buffer        *rbuf;
4530
4531         rbuf = &rxr->rx_buffers[i];
4532         /* Free any previous pieces */
4533         if (rxr->fmp != NULL) {
4534                 rxr->fmp->m_flags |= M_PKTHDR;
4535                 m_freem(rxr->fmp);
4536                 rxr->fmp = NULL;
4537                 rxr->lmp = NULL;
4538         }
4539         /*
4540         ** Free buffer and allow em_refresh_mbufs()
4541         ** to clean up and recharge buffer.
4542         */
4543         if (rbuf->m_head) {
4544                 m_free(rbuf->m_head);
4545                 rbuf->m_head = NULL;
4546         }
4547         return;
4548 }
4549
4550 #ifndef __NO_STRICT_ALIGNMENT
4551 /*
4552  * When jumbo frames are enabled we should realign entire payload on
4553  * architecures with strict alignment. This is serious design mistake of 8254x
4554  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4555  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4556  * payload. On architecures without strict alignment restrictions 8254x still
4557  * performs unaligned memory access which would reduce the performance too.
4558  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4559  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4560  * existing mbuf chain.
4561  *
4562  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4563  * not used at all on architectures with strict alignment.
4564  */
4565 static int
4566 em_fixup_rx(struct rx_ring *rxr)
4567 {
4568         struct adapter *adapter = rxr->adapter;
4569         struct mbuf *m, *n;
4570         int error;
4571
4572         error = 0;
4573         m = rxr->fmp;
4574         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4575                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4576                 m->m_data += ETHER_HDR_LEN;
4577         } else {
4578                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4579                 if (n != NULL) {
4580                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4581                         m->m_data += ETHER_HDR_LEN;
4582                         m->m_len -= ETHER_HDR_LEN;
4583                         n->m_len = ETHER_HDR_LEN;
4584                         M_MOVE_PKTHDR(n, m);
4585                         n->m_next = m;
4586                         rxr->fmp = n;
4587                 } else {
4588                         adapter->dropped_pkts++;
4589                         m_freem(rxr->fmp);
4590                         rxr->fmp = NULL;
4591                         error = ENOMEM;
4592                 }
4593         }
4594
4595         return (error);
4596 }
4597 #endif
4598
4599 /*********************************************************************
4600  *
4601  *  Verify that the hardware indicated that the checksum is valid.
4602  *  Inform the stack about the status of checksum so that stack
4603  *  doesn't spend time verifying the checksum.
4604  *
4605  *********************************************************************/
4606 static void
4607 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4608 {
4609         /* Ignore Checksum bit is set */
4610         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4611                 mp->m_pkthdr.csum_flags = 0;
4612                 return;
4613         }
4614
4615         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4616                 /* Did it pass? */
4617                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4618                         /* IP Checksum Good */
4619                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4620                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4621
4622                 } else {
4623                         mp->m_pkthdr.csum_flags = 0;
4624                 }
4625         }
4626
4627         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4628                 /* Did it pass? */
4629                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4630                         mp->m_pkthdr.csum_flags |=
4631                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4632                         mp->m_pkthdr.csum_data = htons(0xffff);
4633                 }
4634         }
4635 }
4636
4637 /*
4638  * This routine is run via an vlan
4639  * config EVENT
4640  */
4641 static void
4642 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4643 {
4644         struct adapter  *adapter = ifp->if_softc;
4645         u32             index, bit;
4646
4647         if (ifp->if_softc !=  arg)   /* Not our event */
4648                 return;
4649
4650         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4651                 return;
4652
4653         EM_CORE_LOCK(adapter);
4654         index = (vtag >> 5) & 0x7F;
4655         bit = vtag & 0x1F;
4656         adapter->shadow_vfta[index] |= (1 << bit);
4657         ++adapter->num_vlans;
4658         /* Re-init to load the changes */
4659         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4660                 em_init_locked(adapter);
4661         EM_CORE_UNLOCK(adapter);
4662 }
4663
4664 /*
4665  * This routine is run via an vlan
4666  * unconfig EVENT
4667  */
4668 static void
4669 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4670 {
4671         struct adapter  *adapter = ifp->if_softc;
4672         u32             index, bit;
4673
4674         if (ifp->if_softc !=  arg)
4675                 return;
4676
4677         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4678                 return;
4679
4680         EM_CORE_LOCK(adapter);
4681         index = (vtag >> 5) & 0x7F;
4682         bit = vtag & 0x1F;
4683         adapter->shadow_vfta[index] &= ~(1 << bit);
4684         --adapter->num_vlans;
4685         /* Re-init to load the changes */
4686         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4687                 em_init_locked(adapter);
4688         EM_CORE_UNLOCK(adapter);
4689 }
4690
4691 static void
4692 em_setup_vlan_hw_support(struct adapter *adapter)
4693 {
4694         struct e1000_hw *hw = &adapter->hw;
4695         u32             reg;
4696
4697         /*
4698         ** We get here thru init_locked, meaning
4699         ** a soft reset, this has already cleared
4700         ** the VFTA and other state, so if there
4701         ** have been no vlan's registered do nothing.
4702         */
4703         if (adapter->num_vlans == 0)
4704                 return;
4705
4706         /*
4707         ** A soft reset zero's out the VFTA, so
4708         ** we need to repopulate it now.
4709         */
4710         for (int i = 0; i < EM_VFTA_SIZE; i++)
4711                 if (adapter->shadow_vfta[i] != 0)
4712                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4713                             i, adapter->shadow_vfta[i]);
4714
4715         reg = E1000_READ_REG(hw, E1000_CTRL);
4716         reg |= E1000_CTRL_VME;
4717         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4718
4719         /* Enable the Filter Table */
4720         reg = E1000_READ_REG(hw, E1000_RCTL);
4721         reg &= ~E1000_RCTL_CFIEN;
4722         reg |= E1000_RCTL_VFE;
4723         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4724 }
4725
4726 static void
4727 em_enable_intr(struct adapter *adapter)
4728 {
4729         struct e1000_hw *hw = &adapter->hw;
4730         u32 ims_mask = IMS_ENABLE_MASK;
4731
4732         if (hw->mac.type == e1000_82574) {
4733                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4734                 ims_mask |= EM_MSIX_MASK;
4735         } 
4736         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4737 }
4738
4739 static void
4740 em_disable_intr(struct adapter *adapter)
4741 {
4742         struct e1000_hw *hw = &adapter->hw;
4743
4744         if (hw->mac.type == e1000_82574)
4745                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4746         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4747 }
4748
4749 /*
4750  * Bit of a misnomer, what this really means is
4751  * to enable OS management of the system... aka
4752  * to disable special hardware management features 
4753  */
4754 static void
4755 em_init_manageability(struct adapter *adapter)
4756 {
4757         /* A shared code workaround */
4758 #define E1000_82542_MANC2H E1000_MANC2H
4759         if (adapter->has_manage) {
4760                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4761                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4762
4763                 /* disable hardware interception of ARP */
4764                 manc &= ~(E1000_MANC_ARP_EN);
4765
4766                 /* enable receiving management packets to the host */
4767                 manc |= E1000_MANC_EN_MNG2HOST;
4768 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4769 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4770                 manc2h |= E1000_MNG2HOST_PORT_623;
4771                 manc2h |= E1000_MNG2HOST_PORT_664;
4772                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4773                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4774         }
4775 }
4776
4777 /*
4778  * Give control back to hardware management
4779  * controller if there is one.
4780  */
4781 static void
4782 em_release_manageability(struct adapter *adapter)
4783 {
4784         if (adapter->has_manage) {
4785                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4786
4787                 /* re-enable hardware interception of ARP */
4788                 manc |= E1000_MANC_ARP_EN;
4789                 manc &= ~E1000_MANC_EN_MNG2HOST;
4790
4791                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4792         }
4793 }
4794
4795 /*
4796  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4797  * For ASF and Pass Through versions of f/w this means
4798  * that the driver is loaded. For AMT version type f/w
4799  * this means that the network i/f is open.
4800  */
4801 static void
4802 em_get_hw_control(struct adapter *adapter)
4803 {
4804         u32 ctrl_ext, swsm;
4805
4806         if (adapter->hw.mac.type == e1000_82573) {
4807                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4808                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4809                     swsm | E1000_SWSM_DRV_LOAD);
4810                 return;
4811         }
4812         /* else */
4813         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4814         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4815             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4816         return;
4817 }
4818
4819 /*
4820  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4821  * For ASF and Pass Through versions of f/w this means that
4822  * the driver is no longer loaded. For AMT versions of the
4823  * f/w this means that the network i/f is closed.
4824  */
4825 static void
4826 em_release_hw_control(struct adapter *adapter)
4827 {
4828         u32 ctrl_ext, swsm;
4829
4830         if (!adapter->has_manage)
4831                 return;
4832
4833         if (adapter->hw.mac.type == e1000_82573) {
4834                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4835                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4836                     swsm & ~E1000_SWSM_DRV_LOAD);
4837                 return;
4838         }
4839         /* else */
4840         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4841         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4842             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4843         return;
4844 }
4845
4846 static int
4847 em_is_valid_ether_addr(u8 *addr)
4848 {
4849         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4850
4851         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4852                 return (FALSE);
4853         }
4854
4855         return (TRUE);
4856 }
4857
4858 /*
4859 ** Parse the interface capabilities with regard
4860 ** to both system management and wake-on-lan for
4861 ** later use.
4862 */
4863 static void
4864 em_get_wakeup(device_t dev)
4865 {
4866         struct adapter  *adapter = device_get_softc(dev);
4867         u16             eeprom_data = 0, device_id, apme_mask;
4868
4869         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4870         apme_mask = EM_EEPROM_APME;
4871
4872         switch (adapter->hw.mac.type) {
4873         case e1000_82573:
4874         case e1000_82583:
4875                 adapter->has_amt = TRUE;
4876                 /* Falls thru */
4877         case e1000_82571:
4878         case e1000_82572:
4879         case e1000_80003es2lan:
4880                 if (adapter->hw.bus.func == 1) {
4881                         e1000_read_nvm(&adapter->hw,
4882                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4883                         break;
4884                 } else
4885                         e1000_read_nvm(&adapter->hw,
4886                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4887                 break;
4888         case e1000_ich8lan:
4889         case e1000_ich9lan:
4890         case e1000_ich10lan:
4891         case e1000_pchlan:
4892         case e1000_pch2lan:
4893                 apme_mask = E1000_WUC_APME;
4894                 adapter->has_amt = TRUE;
4895                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4896                 break;
4897         default:
4898                 e1000_read_nvm(&adapter->hw,
4899                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4900                 break;
4901         }
4902         if (eeprom_data & apme_mask)
4903                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4904         /*
4905          * We have the eeprom settings, now apply the special cases
4906          * where the eeprom may be wrong or the board won't support
4907          * wake on lan on a particular port
4908          */
4909         device_id = pci_get_device(dev);
4910         switch (device_id) {
4911         case E1000_DEV_ID_82571EB_FIBER:
4912                 /* Wake events only supported on port A for dual fiber
4913                  * regardless of eeprom setting */
4914                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4915                     E1000_STATUS_FUNC_1)
4916                         adapter->wol = 0;
4917                 break;
4918         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4919         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4920         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4921                 /* if quad port adapter, disable WoL on all but port A */
4922                 if (global_quad_port_a != 0)
4923                         adapter->wol = 0;
4924                 /* Reset for multiple quad port adapters */
4925                 if (++global_quad_port_a == 4)
4926                         global_quad_port_a = 0;
4927                 break;
4928         }
4929         return;
4930 }
4931
4932
4933 /*
4934  * Enable PCI Wake On Lan capability
4935  */
4936 static void
4937 em_enable_wakeup(device_t dev)
4938 {
4939         struct adapter  *adapter = device_get_softc(dev);
4940         struct ifnet    *ifp = adapter->ifp;
4941         u32             pmc, ctrl, ctrl_ext, rctl;
4942         u16             status;
4943
4944         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4945                 return;
4946
4947         /* Advertise the wakeup capability */
4948         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4949         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4950         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4951         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4952
4953         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4954             (adapter->hw.mac.type == e1000_pchlan) ||
4955             (adapter->hw.mac.type == e1000_ich9lan) ||
4956             (adapter->hw.mac.type == e1000_ich10lan))
4957                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4958
4959         /* Keep the laser running on Fiber adapters */
4960         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4961             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4962                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4963                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4964                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4965         }
4966
4967         /*
4968         ** Determine type of Wakeup: note that wol
4969         ** is set with all bits on by default.
4970         */
4971         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4972                 adapter->wol &= ~E1000_WUFC_MAG;
4973
4974         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4975                 adapter->wol &= ~E1000_WUFC_MC;
4976         else {
4977                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4978                 rctl |= E1000_RCTL_MPE;
4979                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4980         }
4981
4982         if ((adapter->hw.mac.type == e1000_pchlan) ||
4983             (adapter->hw.mac.type == e1000_pch2lan)) {
4984                 if (em_enable_phy_wakeup(adapter))
4985                         return;
4986         } else {
4987                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4988                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4989         }
4990
4991         if (adapter->hw.phy.type == e1000_phy_igp_3)
4992                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4993
4994         /* Request PME */
4995         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4996         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4997         if (ifp->if_capenable & IFCAP_WOL)
4998                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4999         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5000
5001         return;
5002 }
5003
5004 /*
5005 ** WOL in the newer chipset interfaces (pchlan)
5006 ** require thing to be copied into the phy
5007 */
5008 static int
5009 em_enable_phy_wakeup(struct adapter *adapter)
5010 {
5011         struct e1000_hw *hw = &adapter->hw;
5012         u32 mreg, ret = 0;
5013         u16 preg;
5014
5015         /* copy MAC RARs to PHY RARs */
5016         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5017
5018         /* copy MAC MTA to PHY MTA */
5019         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5020                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5021                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5022                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5023                     (u16)((mreg >> 16) & 0xFFFF));
5024         }
5025
5026         /* configure PHY Rx Control register */
5027         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5028         mreg = E1000_READ_REG(hw, E1000_RCTL);
5029         if (mreg & E1000_RCTL_UPE)
5030                 preg |= BM_RCTL_UPE;
5031         if (mreg & E1000_RCTL_MPE)
5032                 preg |= BM_RCTL_MPE;
5033         preg &= ~(BM_RCTL_MO_MASK);
5034         if (mreg & E1000_RCTL_MO_3)
5035                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5036                                 << BM_RCTL_MO_SHIFT);
5037         if (mreg & E1000_RCTL_BAM)
5038                 preg |= BM_RCTL_BAM;
5039         if (mreg & E1000_RCTL_PMCF)
5040                 preg |= BM_RCTL_PMCF;
5041         mreg = E1000_READ_REG(hw, E1000_CTRL);
5042         if (mreg & E1000_CTRL_RFCE)
5043                 preg |= BM_RCTL_RFCE;
5044         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5045
5046         /* enable PHY wakeup in MAC register */
5047         E1000_WRITE_REG(hw, E1000_WUC,
5048             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5049         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5050
5051         /* configure and enable PHY wakeup in PHY registers */
5052         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5053         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5054
5055         /* activate PHY wakeup */
5056         ret = hw->phy.ops.acquire(hw);
5057         if (ret) {
5058                 printf("Could not acquire PHY\n");
5059                 return ret;
5060         }
5061         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5062                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5063         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5064         if (ret) {
5065                 printf("Could not read PHY page 769\n");
5066                 goto out;
5067         }
5068         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5069         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5070         if (ret)
5071                 printf("Could not set PHY Host Wakeup bit\n");
5072 out:
5073         hw->phy.ops.release(hw);
5074
5075         return ret;
5076 }
5077
5078 static void
5079 em_led_func(void *arg, int onoff)
5080 {
5081         struct adapter  *adapter = arg;
5082  
5083         EM_CORE_LOCK(adapter);
5084         if (onoff) {
5085                 e1000_setup_led(&adapter->hw);
5086                 e1000_led_on(&adapter->hw);
5087         } else {
5088                 e1000_led_off(&adapter->hw);
5089                 e1000_cleanup_led(&adapter->hw);
5090         }
5091         EM_CORE_UNLOCK(adapter);
5092 }
5093
5094 /*
5095 ** Disable the L0S and L1 LINK states
5096 */
5097 static void
5098 em_disable_aspm(struct adapter *adapter)
5099 {
5100         int             base, reg;
5101         u16             link_cap,link_ctrl;
5102         device_t        dev = adapter->dev;
5103
5104         switch (adapter->hw.mac.type) {
5105                 case e1000_82573:
5106                 case e1000_82574:
5107                 case e1000_82583:
5108                         break;
5109                 default:
5110                         return;
5111         }
5112         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5113                 return;
5114         reg = base + PCIR_EXPRESS_LINK_CAP;
5115         link_cap = pci_read_config(dev, reg, 2);
5116         if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
5117                 return;
5118         reg = base + PCIR_EXPRESS_LINK_CTL;
5119         link_ctrl = pci_read_config(dev, reg, 2);
5120         link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
5121         pci_write_config(dev, reg, link_ctrl, 2);
5122         return;
5123 }
5124
5125 /**********************************************************************
5126  *
5127  *  Update the board statistics counters.
5128  *
5129  **********************************************************************/
5130 static void
5131 em_update_stats_counters(struct adapter *adapter)
5132 {
5133         struct ifnet   *ifp;
5134
5135         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5136            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5137                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5138                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5139         }
5140         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5141         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5142         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5143         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5144
5145         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5146         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5147         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5148         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5149         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5150         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5151         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5152         /*
5153         ** For watchdog management we need to know if we have been
5154         ** paused during the last interval, so capture that here.
5155         */
5156         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5157         adapter->stats.xoffrxc += adapter->pause_frames;
5158         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5159         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5160         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5161         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5162         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5163         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5164         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5165         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5166         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5167         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5168         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5169         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5170
5171         /* For the 64-bit byte counters the low dword must be read first. */
5172         /* Both registers clear on the read of the high dword */
5173
5174         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5175             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5176         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5177             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5178
5179         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5180         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5181         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5182         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5183         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5184
5185         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5186         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5187
5188         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5189         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5190         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5191         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5192         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5193         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5194         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5195         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5196         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5197         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5198
5199         /* Interrupt Counts */
5200
5201         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5202         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5203         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5204         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5205         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5206         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5207         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5208         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5209         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5210
5211         if (adapter->hw.mac.type >= e1000_82543) {
5212                 adapter->stats.algnerrc += 
5213                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5214                 adapter->stats.rxerrc += 
5215                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5216                 adapter->stats.tncrs += 
5217                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5218                 adapter->stats.cexterr += 
5219                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5220                 adapter->stats.tsctc += 
5221                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5222                 adapter->stats.tsctfc += 
5223                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5224         }
5225         ifp = adapter->ifp;
5226
5227         ifp->if_collisions = adapter->stats.colc;
5228
5229         /* Rx Errors */
5230         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5231             adapter->stats.crcerrs + adapter->stats.algnerrc +
5232             adapter->stats.ruc + adapter->stats.roc +
5233             adapter->stats.mpc + adapter->stats.cexterr;
5234
5235         /* Tx Errors */
5236         ifp->if_oerrors = adapter->stats.ecol +
5237             adapter->stats.latecol + adapter->watchdog_events;
5238 }
5239
5240 /* Export a single 32-bit register via a read-only sysctl. */
5241 static int
5242 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5243 {
5244         struct adapter *adapter;
5245         u_int val;
5246
5247         adapter = oidp->oid_arg1;
5248         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5249         return (sysctl_handle_int(oidp, &val, 0, req));
5250 }
5251
5252 /*
5253  * Add sysctl variables, one per statistic, to the system.
5254  */
5255 static void
5256 em_add_hw_stats(struct adapter *adapter)
5257 {
5258         device_t dev = adapter->dev;
5259
5260         struct tx_ring *txr = adapter->tx_rings;
5261         struct rx_ring *rxr = adapter->rx_rings;
5262
5263         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5264         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5265         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5266         struct e1000_hw_stats *stats = &adapter->stats;
5267
5268         struct sysctl_oid *stat_node, *queue_node, *int_node;
5269         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5270
5271 #define QUEUE_NAME_LEN 32
5272         char namebuf[QUEUE_NAME_LEN];
5273         
5274         /* Driver Statistics */
5275         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5276                         CTLFLAG_RD, &adapter->link_irq,
5277                         "Link MSIX IRQ Handled");
5278         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5279                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5280                          "Std mbuf failed");
5281         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5282                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5283                          "Std mbuf cluster failed");
5284         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5285                         CTLFLAG_RD, &adapter->dropped_pkts,
5286                         "Driver dropped packets");
5287         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5288                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5289                         "Driver tx dma failure in xmit");
5290         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5291                         CTLFLAG_RD, &adapter->rx_overruns,
5292                         "RX overruns");
5293         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5294                         CTLFLAG_RD, &adapter->watchdog_events,
5295                         "Watchdog timeouts");
5296         
5297         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5298                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5299                         em_sysctl_reg_handler, "IU",
5300                         "Device Control Register");
5301         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5302                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5303                         em_sysctl_reg_handler, "IU",
5304                         "Receiver Control Register");
5305         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5306                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5307                         "Flow Control High Watermark");
5308         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5309                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5310                         "Flow Control Low Watermark");
5311
5312         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5313                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5314                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5315                                             CTLFLAG_RD, NULL, "Queue Name");
5316                 queue_list = SYSCTL_CHILDREN(queue_node);
5317
5318                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5319                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5320                                 E1000_TDH(txr->me),
5321                                 em_sysctl_reg_handler, "IU",
5322                                 "Transmit Descriptor Head");
5323                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5324                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5325                                 E1000_TDT(txr->me),
5326                                 em_sysctl_reg_handler, "IU",
5327                                 "Transmit Descriptor Tail");
5328                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5329                                 CTLFLAG_RD, &txr->tx_irq,
5330                                 "Queue MSI-X Transmit Interrupts");
5331                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5332                                 CTLFLAG_RD, &txr->no_desc_avail,
5333                                 "Queue No Descriptor Available");
5334                 
5335                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5336                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5337                                 E1000_RDH(rxr->me),
5338                                 em_sysctl_reg_handler, "IU",
5339                                 "Receive Descriptor Head");
5340                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5341                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5342                                 E1000_RDT(rxr->me),
5343                                 em_sysctl_reg_handler, "IU",
5344                                 "Receive Descriptor Tail");
5345                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5346                                 CTLFLAG_RD, &rxr->rx_irq,
5347                                 "Queue MSI-X Receive Interrupts");
5348         }
5349
5350         /* MAC stats get their own sub node */
5351
5352         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5353                                     CTLFLAG_RD, NULL, "Statistics");
5354         stat_list = SYSCTL_CHILDREN(stat_node);
5355
5356         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5357                         CTLFLAG_RD, &stats->ecol,
5358                         "Excessive collisions");
5359         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5360                         CTLFLAG_RD, &stats->scc,
5361                         "Single collisions");
5362         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5363                         CTLFLAG_RD, &stats->mcc,
5364                         "Multiple collisions");
5365         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5366                         CTLFLAG_RD, &stats->latecol,
5367                         "Late collisions");
5368         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5369                         CTLFLAG_RD, &stats->colc,
5370                         "Collision Count");
5371         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5372                         CTLFLAG_RD, &adapter->stats.symerrs,
5373                         "Symbol Errors");
5374         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5375                         CTLFLAG_RD, &adapter->stats.sec,
5376                         "Sequence Errors");
5377         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5378                         CTLFLAG_RD, &adapter->stats.dc,
5379                         "Defer Count");
5380         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5381                         CTLFLAG_RD, &adapter->stats.mpc,
5382                         "Missed Packets");
5383         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5384                         CTLFLAG_RD, &adapter->stats.rnbc,
5385                         "Receive No Buffers");
5386         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5387                         CTLFLAG_RD, &adapter->stats.ruc,
5388                         "Receive Undersize");
5389         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5390                         CTLFLAG_RD, &adapter->stats.rfc,
5391                         "Fragmented Packets Received ");
5392         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5393                         CTLFLAG_RD, &adapter->stats.roc,
5394                         "Oversized Packets Received");
5395         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5396                         CTLFLAG_RD, &adapter->stats.rjc,
5397                         "Recevied Jabber");
5398         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5399                         CTLFLAG_RD, &adapter->stats.rxerrc,
5400                         "Receive Errors");
5401         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5402                         CTLFLAG_RD, &adapter->stats.crcerrs,
5403                         "CRC errors");
5404         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5405                         CTLFLAG_RD, &adapter->stats.algnerrc,
5406                         "Alignment Errors");
5407         /* On 82575 these are collision counts */
5408         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5409                         CTLFLAG_RD, &adapter->stats.cexterr,
5410                         "Collision/Carrier extension errors");
5411         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5412                         CTLFLAG_RD, &adapter->stats.xonrxc,
5413                         "XON Received");
5414         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5415                         CTLFLAG_RD, &adapter->stats.xontxc,
5416                         "XON Transmitted");
5417         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5418                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5419                         "XOFF Received");
5420         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5421                         CTLFLAG_RD, &adapter->stats.xofftxc,
5422                         "XOFF Transmitted");
5423
5424         /* Packet Reception Stats */
5425         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5426                         CTLFLAG_RD, &adapter->stats.tpr,
5427                         "Total Packets Received ");
5428         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5429                         CTLFLAG_RD, &adapter->stats.gprc,
5430                         "Good Packets Received");
5431         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5432                         CTLFLAG_RD, &adapter->stats.bprc,
5433                         "Broadcast Packets Received");
5434         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5435                         CTLFLAG_RD, &adapter->stats.mprc,
5436                         "Multicast Packets Received");
5437         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5438                         CTLFLAG_RD, &adapter->stats.prc64,
5439                         "64 byte frames received ");
5440         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5441                         CTLFLAG_RD, &adapter->stats.prc127,
5442                         "65-127 byte frames received");
5443         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5444                         CTLFLAG_RD, &adapter->stats.prc255,
5445                         "128-255 byte frames received");
5446         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5447                         CTLFLAG_RD, &adapter->stats.prc511,
5448                         "256-511 byte frames received");
5449         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5450                         CTLFLAG_RD, &adapter->stats.prc1023,
5451                         "512-1023 byte frames received");
5452         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5453                         CTLFLAG_RD, &adapter->stats.prc1522,
5454                         "1023-1522 byte frames received");
5455         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5456                         CTLFLAG_RD, &adapter->stats.gorc, 
5457                         "Good Octets Received"); 
5458
5459         /* Packet Transmission Stats */
5460         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5461                         CTLFLAG_RD, &adapter->stats.gotc, 
5462                         "Good Octets Transmitted"); 
5463         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5464                         CTLFLAG_RD, &adapter->stats.tpt,
5465                         "Total Packets Transmitted");
5466         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5467                         CTLFLAG_RD, &adapter->stats.gptc,
5468                         "Good Packets Transmitted");
5469         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5470                         CTLFLAG_RD, &adapter->stats.bptc,
5471                         "Broadcast Packets Transmitted");
5472         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5473                         CTLFLAG_RD, &adapter->stats.mptc,
5474                         "Multicast Packets Transmitted");
5475         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5476                         CTLFLAG_RD, &adapter->stats.ptc64,
5477                         "64 byte frames transmitted ");
5478         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5479                         CTLFLAG_RD, &adapter->stats.ptc127,
5480                         "65-127 byte frames transmitted");
5481         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5482                         CTLFLAG_RD, &adapter->stats.ptc255,
5483                         "128-255 byte frames transmitted");
5484         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5485                         CTLFLAG_RD, &adapter->stats.ptc511,
5486                         "256-511 byte frames transmitted");
5487         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5488                         CTLFLAG_RD, &adapter->stats.ptc1023,
5489                         "512-1023 byte frames transmitted");
5490         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5491                         CTLFLAG_RD, &adapter->stats.ptc1522,
5492                         "1024-1522 byte frames transmitted");
5493         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5494                         CTLFLAG_RD, &adapter->stats.tsctc,
5495                         "TSO Contexts Transmitted");
5496         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5497                         CTLFLAG_RD, &adapter->stats.tsctfc,
5498                         "TSO Contexts Failed");
5499
5500
5501         /* Interrupt Stats */
5502
5503         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5504                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5505         int_list = SYSCTL_CHILDREN(int_node);
5506
5507         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5508                         CTLFLAG_RD, &adapter->stats.iac,
5509                         "Interrupt Assertion Count");
5510
5511         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5512                         CTLFLAG_RD, &adapter->stats.icrxptc,
5513                         "Interrupt Cause Rx Pkt Timer Expire Count");
5514
5515         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5516                         CTLFLAG_RD, &adapter->stats.icrxatc,
5517                         "Interrupt Cause Rx Abs Timer Expire Count");
5518
5519         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5520                         CTLFLAG_RD, &adapter->stats.ictxptc,
5521                         "Interrupt Cause Tx Pkt Timer Expire Count");
5522
5523         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5524                         CTLFLAG_RD, &adapter->stats.ictxatc,
5525                         "Interrupt Cause Tx Abs Timer Expire Count");
5526
5527         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5528                         CTLFLAG_RD, &adapter->stats.ictxqec,
5529                         "Interrupt Cause Tx Queue Empty Count");
5530
5531         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5532                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5533                         "Interrupt Cause Tx Queue Min Thresh Count");
5534
5535         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5536                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5537                         "Interrupt Cause Rx Desc Min Thresh Count");
5538
5539         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5540                         CTLFLAG_RD, &adapter->stats.icrxoc,
5541                         "Interrupt Cause Receiver Overrun Count");
5542 }
5543
5544 /**********************************************************************
5545  *
5546  *  This routine provides a way to dump out the adapter eeprom,
5547  *  often a useful debug/service tool. This only dumps the first
5548  *  32 words, stuff that matters is in that extent.
5549  *
5550  **********************************************************************/
5551 static int
5552 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5553 {
5554         struct adapter *adapter = (struct adapter *)arg1;
5555         int error;
5556         int result;
5557
5558         result = -1;
5559         error = sysctl_handle_int(oidp, &result, 0, req);
5560
5561         if (error || !req->newptr)
5562                 return (error);
5563
5564         /*
5565          * This value will cause a hex dump of the
5566          * first 32 16-bit words of the EEPROM to
5567          * the screen.
5568          */
5569         if (result == 1)
5570                 em_print_nvm_info(adapter);
5571
5572         return (error);
5573 }
5574
5575 static void
5576 em_print_nvm_info(struct adapter *adapter)
5577 {
5578         u16     eeprom_data;
5579         int     i, j, row = 0;
5580
5581         /* Its a bit crude, but it gets the job done */
5582         printf("\nInterface EEPROM Dump:\n");
5583         printf("Offset\n0x0000  ");
5584         for (i = 0, j = 0; i < 32; i++, j++) {
5585                 if (j == 8) { /* Make the offset block */
5586                         j = 0; ++row;
5587                         printf("\n0x00%x0  ",row);
5588                 }
5589                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5590                 printf("%04x ", eeprom_data);
5591         }
5592         printf("\n");
5593 }
5594
5595 static int
5596 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5597 {
5598         struct em_int_delay_info *info;
5599         struct adapter *adapter;
5600         u32 regval;
5601         int error, usecs, ticks;
5602
5603         info = (struct em_int_delay_info *)arg1;
5604         usecs = info->value;
5605         error = sysctl_handle_int(oidp, &usecs, 0, req);
5606         if (error != 0 || req->newptr == NULL)
5607                 return (error);
5608         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5609                 return (EINVAL);
5610         info->value = usecs;
5611         ticks = EM_USECS_TO_TICKS(usecs);
5612
5613         adapter = info->adapter;
5614         
5615         EM_CORE_LOCK(adapter);
5616         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5617         regval = (regval & ~0xffff) | (ticks & 0xffff);
5618         /* Handle a few special cases. */
5619         switch (info->offset) {
5620         case E1000_RDTR:
5621                 break;
5622         case E1000_TIDV:
5623                 if (ticks == 0) {
5624                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5625                         /* Don't write 0 into the TIDV register. */
5626                         regval++;
5627                 } else
5628                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5629                 break;
5630         }
5631         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5632         EM_CORE_UNLOCK(adapter);
5633         return (0);
5634 }
5635
5636 static void
5637 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5638         const char *description, struct em_int_delay_info *info,
5639         int offset, int value)
5640 {
5641         info->adapter = adapter;
5642         info->offset = offset;
5643         info->value = value;
5644         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5645             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5646             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5647             info, 0, em_sysctl_int_delay, "I", description);
5648 }
5649
5650 static void
5651 em_set_sysctl_value(struct adapter *adapter, const char *name,
5652         const char *description, int *limit, int value)
5653 {
5654         *limit = value;
5655         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5656             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5657             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5658 }
5659
5660
5661 /*
5662 ** Set flow control using sysctl:
5663 ** Flow control values:
5664 **      0 - off
5665 **      1 - rx pause
5666 **      2 - tx pause
5667 **      3 - full
5668 */
5669 static int
5670 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5671 {       
5672         int             error;
5673         static int      input = 3; /* default is full */
5674         struct adapter  *adapter = (struct adapter *) arg1;
5675                     
5676         error = sysctl_handle_int(oidp, &input, 0, req);
5677     
5678         if ((error) || (req->newptr == NULL))
5679                 return (error);
5680                 
5681         if (input == adapter->fc) /* no change? */
5682                 return (error);
5683
5684         switch (input) {
5685                 case e1000_fc_rx_pause:
5686                 case e1000_fc_tx_pause:
5687                 case e1000_fc_full:
5688                 case e1000_fc_none:
5689                         adapter->hw.fc.requested_mode = input;
5690                         adapter->fc = input;
5691                         break;
5692                 default:
5693                         /* Do nothing */
5694                         return (error);
5695         }
5696
5697         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5698         e1000_force_mac_fc(&adapter->hw);
5699         return (error);
5700 }
5701
5702 /*
5703 ** Manage Energy Efficient Ethernet:
5704 ** Control values:
5705 **     0/1 - enabled/disabled
5706 */
5707 static int
5708 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5709 {
5710        struct adapter *adapter = (struct adapter *) arg1;
5711        int             error, value;
5712
5713        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5714        error = sysctl_handle_int(oidp, &value, 0, req);
5715        if (error || req->newptr == NULL)
5716                return (error);
5717        EM_CORE_LOCK(adapter);
5718        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5719        em_init_locked(adapter);
5720        EM_CORE_UNLOCK(adapter);
5721        return (0);
5722 }
5723
5724 static int
5725 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5726 {
5727         struct adapter *adapter;
5728         int error;
5729         int result;
5730
5731         result = -1;
5732         error = sysctl_handle_int(oidp, &result, 0, req);
5733
5734         if (error || !req->newptr)
5735                 return (error);
5736
5737         if (result == 1) {
5738                 adapter = (struct adapter *)arg1;
5739                 em_print_debug_info(adapter);
5740         }
5741
5742         return (error);
5743 }
5744
5745 /*
5746 ** This routine is meant to be fluid, add whatever is
5747 ** needed for debugging a problem.  -jfv
5748 */
5749 static void
5750 em_print_debug_info(struct adapter *adapter)
5751 {
5752         device_t dev = adapter->dev;
5753         struct tx_ring *txr = adapter->tx_rings;
5754         struct rx_ring *rxr = adapter->rx_rings;
5755
5756         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5757                 printf("Interface is RUNNING ");
5758         else
5759                 printf("Interface is NOT RUNNING\n");
5760
5761         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5762                 printf("and INACTIVE\n");
5763         else
5764                 printf("and ACTIVE\n");
5765
5766         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5767             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5768             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5769         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5770             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5771             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5772         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5773         device_printf(dev, "TX descriptors avail = %d\n",
5774             txr->tx_avail);
5775         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5776             txr->no_desc_avail);
5777         device_printf(dev, "RX discarded packets = %ld\n",
5778             rxr->rx_discarded);
5779         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5780         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5781 }