]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
Update libarchive to 3.0.3
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2011, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int     em_display_debug_stats = 0;
93
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.2";
98
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111         /* Intel(R) PRO/1000 Network Connection */
112         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
175         /* required last entry */
176         { 0, 0, 0, 0, 0}
177 };
178
179 /*********************************************************************
180  *  Table of branding strings for all supported NICs.
181  *********************************************************************/
182
183 static char *em_strings[] = {
184         "Intel(R) PRO/1000 Network Connection"
185 };
186
187 /*********************************************************************
188  *  Function prototypes
189  *********************************************************************/
190 static int      em_probe(device_t);
191 static int      em_attach(device_t);
192 static int      em_detach(device_t);
193 static int      em_shutdown(device_t);
194 static int      em_suspend(device_t);
195 static int      em_resume(device_t);
196 static void     em_start(struct ifnet *);
197 static void     em_start_locked(struct ifnet *, struct tx_ring *);
198 #ifdef EM_MULTIQUEUE
199 static int      em_mq_start(struct ifnet *, struct mbuf *);
200 static int      em_mq_start_locked(struct ifnet *,
201                     struct tx_ring *, struct mbuf *);
202 static void     em_qflush(struct ifnet *);
203 #endif
204 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
205 static void     em_init(void *);
206 static void     em_init_locked(struct adapter *);
207 static void     em_stop(void *);
208 static void     em_media_status(struct ifnet *, struct ifmediareq *);
209 static int      em_media_change(struct ifnet *);
210 static void     em_identify_hardware(struct adapter *);
211 static int      em_allocate_pci_resources(struct adapter *);
212 static int      em_allocate_legacy(struct adapter *);
213 static int      em_allocate_msix(struct adapter *);
214 static int      em_allocate_queues(struct adapter *);
215 static int      em_setup_msix(struct adapter *);
216 static void     em_free_pci_resources(struct adapter *);
217 static void     em_local_timer(void *);
218 static void     em_reset(struct adapter *);
219 static int      em_setup_interface(device_t, struct adapter *);
220
221 static void     em_setup_transmit_structures(struct adapter *);
222 static void     em_initialize_transmit_unit(struct adapter *);
223 static int      em_allocate_transmit_buffers(struct tx_ring *);
224 static void     em_free_transmit_structures(struct adapter *);
225 static void     em_free_transmit_buffers(struct tx_ring *);
226
227 static int      em_setup_receive_structures(struct adapter *);
228 static int      em_allocate_receive_buffers(struct rx_ring *);
229 static void     em_initialize_receive_unit(struct adapter *);
230 static void     em_free_receive_structures(struct adapter *);
231 static void     em_free_receive_buffers(struct rx_ring *);
232
233 static void     em_enable_intr(struct adapter *);
234 static void     em_disable_intr(struct adapter *);
235 static void     em_update_stats_counters(struct adapter *);
236 static void     em_add_hw_stats(struct adapter *adapter);
237 static bool     em_txeof(struct tx_ring *);
238 static bool     em_rxeof(struct rx_ring *, int, int *);
239 #ifndef __NO_STRICT_ALIGNMENT
240 static int      em_fixup_rx(struct rx_ring *);
241 #endif
242 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
243 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
244                     struct ip *, u32 *, u32 *);
245 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
246                     struct tcphdr *, u32 *, u32 *);
247 static void     em_set_promisc(struct adapter *);
248 static void     em_disable_promisc(struct adapter *);
249 static void     em_set_multi(struct adapter *);
250 static void     em_update_link_status(struct adapter *);
251 static void     em_refresh_mbufs(struct rx_ring *, int);
252 static void     em_register_vlan(void *, struct ifnet *, u16);
253 static void     em_unregister_vlan(void *, struct ifnet *, u16);
254 static void     em_setup_vlan_hw_support(struct adapter *);
255 static int      em_xmit(struct tx_ring *, struct mbuf **);
256 static int      em_dma_malloc(struct adapter *, bus_size_t,
257                     struct em_dma_alloc *, int);
258 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
259 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
260 static void     em_print_nvm_info(struct adapter *);
261 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
262 static void     em_print_debug_info(struct adapter *);
263 static int      em_is_valid_ether_addr(u8 *);
264 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
265 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
266                     const char *, struct em_int_delay_info *, int, int);
267 /* Management and WOL Support */
268 static void     em_init_manageability(struct adapter *);
269 static void     em_release_manageability(struct adapter *);
270 static void     em_get_hw_control(struct adapter *);
271 static void     em_release_hw_control(struct adapter *);
272 static void     em_get_wakeup(device_t);
273 static void     em_enable_wakeup(device_t);
274 static int      em_enable_phy_wakeup(struct adapter *);
275 static void     em_led_func(void *, int);
276 static void     em_disable_aspm(struct adapter *);
277
278 static int      em_irq_fast(void *);
279
280 /* MSIX handlers */
281 static void     em_msix_tx(void *);
282 static void     em_msix_rx(void *);
283 static void     em_msix_link(void *);
284 static void     em_handle_tx(void *context, int pending);
285 static void     em_handle_rx(void *context, int pending);
286 static void     em_handle_link(void *context, int pending);
287
288 static void     em_set_sysctl_value(struct adapter *, const char *,
289                     const char *, int *, int);
290 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
291
292 static __inline void em_rx_discard(struct rx_ring *, int);
293
294 #ifdef DEVICE_POLLING
295 static poll_handler_t em_poll;
296 #endif /* POLLING */
297
298 /*********************************************************************
299  *  FreeBSD Device Interface Entry Points
300  *********************************************************************/
301
302 static device_method_t em_methods[] = {
303         /* Device interface */
304         DEVMETHOD(device_probe, em_probe),
305         DEVMETHOD(device_attach, em_attach),
306         DEVMETHOD(device_detach, em_detach),
307         DEVMETHOD(device_shutdown, em_shutdown),
308         DEVMETHOD(device_suspend, em_suspend),
309         DEVMETHOD(device_resume, em_resume),
310         {0, 0}
311 };
312
313 static driver_t em_driver = {
314         "em", em_methods, sizeof(struct adapter),
315 };
316
317 devclass_t em_devclass;
318 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
319 MODULE_DEPEND(em, pci, 1, 1, 1);
320 MODULE_DEPEND(em, ether, 1, 1, 1);
321
322 /*********************************************************************
323  *  Tunable default values.
324  *********************************************************************/
325
326 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
327 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
328 #define M_TSO_LEN                       66
329
330 /* Allow common code without TSO */
331 #ifndef CSUM_TSO
332 #define CSUM_TSO        0
333 #endif
334
335 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
336
337 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
338 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
339 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
340 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
341 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
342     0, "Default transmit interrupt delay in usecs");
343 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
344     0, "Default receive interrupt delay in usecs");
345
346 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
347 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
348 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
349 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
350 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
351     &em_tx_abs_int_delay_dflt, 0,
352     "Default transmit interrupt delay limit in usecs");
353 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
354     &em_rx_abs_int_delay_dflt, 0,
355     "Default receive interrupt delay limit in usecs");
356
357 static int em_rxd = EM_DEFAULT_RXD;
358 static int em_txd = EM_DEFAULT_TXD;
359 TUNABLE_INT("hw.em.rxd", &em_rxd);
360 TUNABLE_INT("hw.em.txd", &em_txd);
361 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
362     "Number of receive descriptors per queue");
363 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
364     "Number of transmit descriptors per queue");
365
366 static int em_smart_pwr_down = FALSE;
367 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
368 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
369     0, "Set to true to leave smart power down enabled on newer adapters");
370
371 /* Controls whether promiscuous also shows bad packets */
372 static int em_debug_sbp = FALSE;
373 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
374 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
375     "Show bad packets in promiscuous mode");
376
377 static int em_enable_msix = TRUE;
378 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
379 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
380     "Enable MSI-X interrupts");
381
382 /* How many packets rxeof tries to clean at a time */
383 static int em_rx_process_limit = 100;
384 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
385 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
386     &em_rx_process_limit, 0,
387     "Maximum number of received packets to process "
388     "at a time, -1 means unlimited");
389
390 /* Energy efficient ethernet - default to OFF */
391 static int eee_setting = 0;
392 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
393 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
394     "Enable Energy Efficient Ethernet");
395
396 /* Global used in WOL setup with multiport cards */
397 static int global_quad_port_a = 0;
398
399 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
400 #include <dev/netmap/if_em_netmap.h>
401 #endif /* DEV_NETMAP */
402
403 /*********************************************************************
404  *  Device identification routine
405  *
406  *  em_probe determines if the driver should be loaded on
407  *  adapter based on PCI vendor/device id of the adapter.
408  *
409  *  return BUS_PROBE_DEFAULT on success, positive on failure
410  *********************************************************************/
411
412 static int
413 em_probe(device_t dev)
414 {
415         char            adapter_name[60];
416         u16             pci_vendor_id = 0;
417         u16             pci_device_id = 0;
418         u16             pci_subvendor_id = 0;
419         u16             pci_subdevice_id = 0;
420         em_vendor_info_t *ent;
421
422         INIT_DEBUGOUT("em_probe: begin");
423
424         pci_vendor_id = pci_get_vendor(dev);
425         if (pci_vendor_id != EM_VENDOR_ID)
426                 return (ENXIO);
427
428         pci_device_id = pci_get_device(dev);
429         pci_subvendor_id = pci_get_subvendor(dev);
430         pci_subdevice_id = pci_get_subdevice(dev);
431
432         ent = em_vendor_info_array;
433         while (ent->vendor_id != 0) {
434                 if ((pci_vendor_id == ent->vendor_id) &&
435                     (pci_device_id == ent->device_id) &&
436
437                     ((pci_subvendor_id == ent->subvendor_id) ||
438                     (ent->subvendor_id == PCI_ANY_ID)) &&
439
440                     ((pci_subdevice_id == ent->subdevice_id) ||
441                     (ent->subdevice_id == PCI_ANY_ID))) {
442                         sprintf(adapter_name, "%s %s",
443                                 em_strings[ent->index],
444                                 em_driver_version);
445                         device_set_desc_copy(dev, adapter_name);
446                         return (BUS_PROBE_DEFAULT);
447                 }
448                 ent++;
449         }
450
451         return (ENXIO);
452 }
453
454 /*********************************************************************
455  *  Device initialization routine
456  *
457  *  The attach entry point is called when the driver is being loaded.
458  *  This routine identifies the type of hardware, allocates all resources
459  *  and initializes the hardware.
460  *
461  *  return 0 on success, positive on failure
462  *********************************************************************/
463
464 static int
465 em_attach(device_t dev)
466 {
467         struct adapter  *adapter;
468         struct e1000_hw *hw;
469         int             error = 0;
470
471         INIT_DEBUGOUT("em_attach: begin");
472
473         if (resource_disabled("em", device_get_unit(dev))) {
474                 device_printf(dev, "Disabled by device hint\n");
475                 return (ENXIO);
476         }
477
478         adapter = device_get_softc(dev);
479         adapter->dev = adapter->osdep.dev = dev;
480         hw = &adapter->hw;
481         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
482
483         /* SYSCTL stuff */
484         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
485             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
486             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
487             em_sysctl_nvm_info, "I", "NVM Information");
488
489         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
490             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
491             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
492             em_sysctl_debug_info, "I", "Debug Information");
493
494         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
495             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
496             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
497             em_set_flowcntl, "I", "Flow Control");
498
499         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
500
501         /* Determine hardware and mac info */
502         em_identify_hardware(adapter);
503
504         /* Setup PCI resources */
505         if (em_allocate_pci_resources(adapter)) {
506                 device_printf(dev, "Allocation of PCI resources failed\n");
507                 error = ENXIO;
508                 goto err_pci;
509         }
510
511         /*
512         ** For ICH8 and family we need to
513         ** map the flash memory, and this
514         ** must happen after the MAC is 
515         ** identified
516         */
517         if ((hw->mac.type == e1000_ich8lan) ||
518             (hw->mac.type == e1000_ich9lan) ||
519             (hw->mac.type == e1000_ich10lan) ||
520             (hw->mac.type == e1000_pchlan) ||
521             (hw->mac.type == e1000_pch2lan)) {
522                 int rid = EM_BAR_TYPE_FLASH;
523                 adapter->flash = bus_alloc_resource_any(dev,
524                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
525                 if (adapter->flash == NULL) {
526                         device_printf(dev, "Mapping of Flash failed\n");
527                         error = ENXIO;
528                         goto err_pci;
529                 }
530                 /* This is used in the shared code */
531                 hw->flash_address = (u8 *)adapter->flash;
532                 adapter->osdep.flash_bus_space_tag =
533                     rman_get_bustag(adapter->flash);
534                 adapter->osdep.flash_bus_space_handle =
535                     rman_get_bushandle(adapter->flash);
536         }
537
538         /* Do Shared Code initialization */
539         if (e1000_setup_init_funcs(hw, TRUE)) {
540                 device_printf(dev, "Setup of Shared code failed\n");
541                 error = ENXIO;
542                 goto err_pci;
543         }
544
545         e1000_get_bus_info(hw);
546
547         /* Set up some sysctls for the tunable interrupt delays */
548         em_add_int_delay_sysctl(adapter, "rx_int_delay",
549             "receive interrupt delay in usecs", &adapter->rx_int_delay,
550             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
551         em_add_int_delay_sysctl(adapter, "tx_int_delay",
552             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
553             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
554         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
555             "receive interrupt delay limit in usecs",
556             &adapter->rx_abs_int_delay,
557             E1000_REGISTER(hw, E1000_RADV),
558             em_rx_abs_int_delay_dflt);
559         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
560             "transmit interrupt delay limit in usecs",
561             &adapter->tx_abs_int_delay,
562             E1000_REGISTER(hw, E1000_TADV),
563             em_tx_abs_int_delay_dflt);
564
565         /* Sysctl for limiting the amount of work done in the taskqueue */
566         em_set_sysctl_value(adapter, "rx_processing_limit",
567             "max number of rx packets to process", &adapter->rx_process_limit,
568             em_rx_process_limit);
569
570         /*
571          * Validate number of transmit and receive descriptors. It
572          * must not exceed hardware maximum, and must be multiple
573          * of E1000_DBA_ALIGN.
574          */
575         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
576             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
577                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
578                     EM_DEFAULT_TXD, em_txd);
579                 adapter->num_tx_desc = EM_DEFAULT_TXD;
580         } else
581                 adapter->num_tx_desc = em_txd;
582
583         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
584             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
585                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
586                     EM_DEFAULT_RXD, em_rxd);
587                 adapter->num_rx_desc = EM_DEFAULT_RXD;
588         } else
589                 adapter->num_rx_desc = em_rxd;
590
591         hw->mac.autoneg = DO_AUTO_NEG;
592         hw->phy.autoneg_wait_to_complete = FALSE;
593         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
594
595         /* Copper options */
596         if (hw->phy.media_type == e1000_media_type_copper) {
597                 hw->phy.mdix = AUTO_ALL_MODES;
598                 hw->phy.disable_polarity_correction = FALSE;
599                 hw->phy.ms_type = EM_MASTER_SLAVE;
600         }
601
602         /*
603          * Set the frame limits assuming
604          * standard ethernet sized frames.
605          */
606         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
607         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
608
609         /*
610          * This controls when hardware reports transmit completion
611          * status.
612          */
613         hw->mac.report_tx_early = 1;
614
615         /* 
616         ** Get queue/ring memory
617         */
618         if (em_allocate_queues(adapter)) {
619                 error = ENOMEM;
620                 goto err_pci;
621         }
622
623         /* Allocate multicast array memory. */
624         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
625             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
626         if (adapter->mta == NULL) {
627                 device_printf(dev, "Can not allocate multicast setup array\n");
628                 error = ENOMEM;
629                 goto err_late;
630         }
631
632         /* Check SOL/IDER usage */
633         if (e1000_check_reset_block(hw))
634                 device_printf(dev, "PHY reset is blocked"
635                     " due to SOL/IDER session.\n");
636
637         /* Sysctl for setting Energy Efficient Ethernet */
638         em_set_sysctl_value(adapter, "eee_control",
639             "enable Energy Efficient Ethernet",
640             &hw->dev_spec.ich8lan.eee_disable, eee_setting);
641
642         /*
643         ** Start from a known state, this is
644         ** important in reading the nvm and
645         ** mac from that.
646         */
647         e1000_reset_hw(hw);
648
649
650         /* Make sure we have a good EEPROM before we read from it */
651         if (e1000_validate_nvm_checksum(hw) < 0) {
652                 /*
653                 ** Some PCI-E parts fail the first check due to
654                 ** the link being in sleep state, call it again,
655                 ** if it fails a second time its a real issue.
656                 */
657                 if (e1000_validate_nvm_checksum(hw) < 0) {
658                         device_printf(dev,
659                             "The EEPROM Checksum Is Not Valid\n");
660                         error = EIO;
661                         goto err_late;
662                 }
663         }
664
665         /* Copy the permanent MAC address out of the EEPROM */
666         if (e1000_read_mac_addr(hw) < 0) {
667                 device_printf(dev, "EEPROM read error while reading MAC"
668                     " address\n");
669                 error = EIO;
670                 goto err_late;
671         }
672
673         if (!em_is_valid_ether_addr(hw->mac.addr)) {
674                 device_printf(dev, "Invalid MAC address\n");
675                 error = EIO;
676                 goto err_late;
677         }
678
679         /*
680         **  Do interrupt configuration
681         */
682         if (adapter->msix > 1) /* Do MSIX */
683                 error = em_allocate_msix(adapter);
684         else  /* MSI or Legacy */
685                 error = em_allocate_legacy(adapter);
686         if (error)
687                 goto err_late;
688
689         /*
690          * Get Wake-on-Lan and Management info for later use
691          */
692         em_get_wakeup(dev);
693
694         /* Setup OS specific network interface */
695         if (em_setup_interface(dev, adapter) != 0)
696                 goto err_late;
697
698         em_reset(adapter);
699
700         /* Initialize statistics */
701         em_update_stats_counters(adapter);
702
703         hw->mac.get_link_status = 1;
704         em_update_link_status(adapter);
705
706         /* Register for VLAN events */
707         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
708             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
709         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
710             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
711
712         em_add_hw_stats(adapter);
713
714         /* Non-AMT based hardware can now take control from firmware */
715         if (adapter->has_manage && !adapter->has_amt)
716                 em_get_hw_control(adapter);
717
718         /* Tell the stack that the interface is not active */
719         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
720         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
721
722         adapter->led_dev = led_create(em_led_func, adapter,
723             device_get_nameunit(dev));
724 #ifdef DEV_NETMAP
725         em_netmap_attach(adapter);
726 #endif /* DEV_NETMAP */
727
728         INIT_DEBUGOUT("em_attach: end");
729
730         return (0);
731
732 err_late:
733         em_free_transmit_structures(adapter);
734         em_free_receive_structures(adapter);
735         em_release_hw_control(adapter);
736         if (adapter->ifp != NULL)
737                 if_free(adapter->ifp);
738 err_pci:
739         em_free_pci_resources(adapter);
740         free(adapter->mta, M_DEVBUF);
741         EM_CORE_LOCK_DESTROY(adapter);
742
743         return (error);
744 }
745
746 /*********************************************************************
747  *  Device removal routine
748  *
749  *  The detach entry point is called when the driver is being removed.
750  *  This routine stops the adapter and deallocates all the resources
751  *  that were allocated for driver operation.
752  *
753  *  return 0 on success, positive on failure
754  *********************************************************************/
755
756 static int
757 em_detach(device_t dev)
758 {
759         struct adapter  *adapter = device_get_softc(dev);
760         struct ifnet    *ifp = adapter->ifp;
761
762         INIT_DEBUGOUT("em_detach: begin");
763
764         /* Make sure VLANS are not using driver */
765         if (adapter->ifp->if_vlantrunk != NULL) {
766                 device_printf(dev,"Vlan in use, detach first\n");
767                 return (EBUSY);
768         }
769
770 #ifdef DEVICE_POLLING
771         if (ifp->if_capenable & IFCAP_POLLING)
772                 ether_poll_deregister(ifp);
773 #endif
774
775         if (adapter->led_dev != NULL)
776                 led_destroy(adapter->led_dev);
777
778         EM_CORE_LOCK(adapter);
779         adapter->in_detach = 1;
780         em_stop(adapter);
781         EM_CORE_UNLOCK(adapter);
782         EM_CORE_LOCK_DESTROY(adapter);
783
784         e1000_phy_hw_reset(&adapter->hw);
785
786         em_release_manageability(adapter);
787         em_release_hw_control(adapter);
788
789         /* Unregister VLAN events */
790         if (adapter->vlan_attach != NULL)
791                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
792         if (adapter->vlan_detach != NULL)
793                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
794
795         ether_ifdetach(adapter->ifp);
796         callout_drain(&adapter->timer);
797
798 #ifdef DEV_NETMAP
799         netmap_detach(ifp);
800 #endif /* DEV_NETMAP */
801
802         em_free_pci_resources(adapter);
803         bus_generic_detach(dev);
804         if_free(ifp);
805
806         em_free_transmit_structures(adapter);
807         em_free_receive_structures(adapter);
808
809         em_release_hw_control(adapter);
810         free(adapter->mta, M_DEVBUF);
811
812         return (0);
813 }
814
815 /*********************************************************************
816  *
817  *  Shutdown entry point
818  *
819  **********************************************************************/
820
821 static int
822 em_shutdown(device_t dev)
823 {
824         return em_suspend(dev);
825 }
826
827 /*
828  * Suspend/resume device methods.
829  */
830 static int
831 em_suspend(device_t dev)
832 {
833         struct adapter *adapter = device_get_softc(dev);
834
835         EM_CORE_LOCK(adapter);
836
837         em_release_manageability(adapter);
838         em_release_hw_control(adapter);
839         em_enable_wakeup(dev);
840
841         EM_CORE_UNLOCK(adapter);
842
843         return bus_generic_suspend(dev);
844 }
845
846 static int
847 em_resume(device_t dev)
848 {
849         struct adapter *adapter = device_get_softc(dev);
850         struct ifnet *ifp = adapter->ifp;
851
852         EM_CORE_LOCK(adapter);
853         if (adapter->hw.mac.type == e1000_pch2lan)
854                 e1000_resume_workarounds_pchlan(&adapter->hw);
855         em_init_locked(adapter);
856         em_init_manageability(adapter);
857         EM_CORE_UNLOCK(adapter);
858         em_start(ifp);
859
860         return bus_generic_resume(dev);
861 }
862
863
864 #ifdef EM_MULTIQUEUE
865 /*********************************************************************
866  *  Multiqueue Transmit routines 
867  *
868  *  em_mq_start is called by the stack to initiate a transmit.
869  *  however, if busy the driver can queue the request rather
870  *  than do an immediate send. It is this that is an advantage
871  *  in this driver, rather than also having multiple tx queues.
872  **********************************************************************/
873 static int
874 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
875 {
876         struct adapter  *adapter = txr->adapter;
877         struct mbuf     *next;
878         int             err = 0, enq = 0;
879
880         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
881             IFF_DRV_RUNNING || adapter->link_active == 0) {
882                 if (m != NULL)
883                         err = drbr_enqueue(ifp, txr->br, m);
884                 return (err);
885         }
886
887         enq = 0;
888         if (m == NULL) {
889                 next = drbr_dequeue(ifp, txr->br);
890         } else if (drbr_needs_enqueue(ifp, txr->br)) {
891                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
892                         return (err);
893                 next = drbr_dequeue(ifp, txr->br);
894         } else
895                 next = m;
896
897         /* Process the queue */
898         while (next != NULL) {
899                 if ((err = em_xmit(txr, &next)) != 0) {
900                         if (next != NULL)
901                                 err = drbr_enqueue(ifp, txr->br, next);
902                         break;
903                 }
904                 enq++;
905                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
906                 ETHER_BPF_MTAP(ifp, next);
907                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
908                         break;
909                 next = drbr_dequeue(ifp, txr->br);
910         }
911
912         if (enq > 0) {
913                 /* Set the watchdog */
914                 txr->queue_status = EM_QUEUE_WORKING;
915                 txr->watchdog_time = ticks;
916         }
917
918         if (txr->tx_avail < EM_MAX_SCATTER)
919                 em_txeof(txr);
920         if (txr->tx_avail < EM_MAX_SCATTER)
921                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
922         return (err);
923 }
924
925 /*
926 ** Multiqueue capable stack interface
927 */
928 static int
929 em_mq_start(struct ifnet *ifp, struct mbuf *m)
930 {
931         struct adapter  *adapter = ifp->if_softc;
932         struct tx_ring  *txr = adapter->tx_rings;
933         int             error;
934
935         if (EM_TX_TRYLOCK(txr)) {
936                 error = em_mq_start_locked(ifp, txr, m);
937                 EM_TX_UNLOCK(txr);
938         } else 
939                 error = drbr_enqueue(ifp, txr->br, m);
940
941         return (error);
942 }
943
944 /*
945 ** Flush all ring buffers
946 */
947 static void
948 em_qflush(struct ifnet *ifp)
949 {
950         struct adapter  *adapter = ifp->if_softc;
951         struct tx_ring  *txr = adapter->tx_rings;
952         struct mbuf     *m;
953
954         for (int i = 0; i < adapter->num_queues; i++, txr++) {
955                 EM_TX_LOCK(txr);
956                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
957                         m_freem(m);
958                 EM_TX_UNLOCK(txr);
959         }
960         if_qflush(ifp);
961 }
962 #endif /* EM_MULTIQUEUE */
963
964 static void
965 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
966 {
967         struct adapter  *adapter = ifp->if_softc;
968         struct mbuf     *m_head;
969
970         EM_TX_LOCK_ASSERT(txr);
971
972         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
973             IFF_DRV_RUNNING)
974                 return;
975
976         if (!adapter->link_active)
977                 return;
978
979         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
980                 /* Call cleanup if number of TX descriptors low */
981                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
982                         em_txeof(txr);
983                 if (txr->tx_avail < EM_MAX_SCATTER) {
984                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
985                         break;
986                 }
987                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
988                 if (m_head == NULL)
989                         break;
990                 /*
991                  *  Encapsulation can modify our pointer, and or make it
992                  *  NULL on failure.  In that event, we can't requeue.
993                  */
994                 if (em_xmit(txr, &m_head)) {
995                         if (m_head == NULL)
996                                 break;
997                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
998                         break;
999                 }
1000
1001                 /* Send a copy of the frame to the BPF listener */
1002                 ETHER_BPF_MTAP(ifp, m_head);
1003
1004                 /* Set timeout in case hardware has problems transmitting. */
1005                 txr->watchdog_time = ticks;
1006                 txr->queue_status = EM_QUEUE_WORKING;
1007         }
1008
1009         return;
1010 }
1011
1012 static void
1013 em_start(struct ifnet *ifp)
1014 {
1015         struct adapter  *adapter = ifp->if_softc;
1016         struct tx_ring  *txr = adapter->tx_rings;
1017
1018         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1019                 EM_TX_LOCK(txr);
1020                 em_start_locked(ifp, txr);
1021                 EM_TX_UNLOCK(txr);
1022         }
1023         /*
1024         ** If we went inactive schedule
1025         ** a task to clean up.
1026         */
1027         if (ifp->if_drv_flags & IFF_DRV_OACTIVE)
1028                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1029         return;
1030 }
1031
1032 /*********************************************************************
1033  *  Ioctl entry point
1034  *
1035  *  em_ioctl is called when the user wants to configure the
1036  *  interface.
1037  *
1038  *  return 0 on success, positive on failure
1039  **********************************************************************/
1040
1041 static int
1042 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1043 {
1044         struct adapter  *adapter = ifp->if_softc;
1045         struct ifreq    *ifr = (struct ifreq *)data;
1046 #if defined(INET) || defined(INET6)
1047         struct ifaddr   *ifa = (struct ifaddr *)data;
1048 #endif
1049         bool            avoid_reset = FALSE;
1050         int             error = 0;
1051
1052         if (adapter->in_detach)
1053                 return (error);
1054
1055         switch (command) {
1056         case SIOCSIFADDR:
1057 #ifdef INET
1058                 if (ifa->ifa_addr->sa_family == AF_INET)
1059                         avoid_reset = TRUE;
1060 #endif
1061 #ifdef INET6
1062                 if (ifa->ifa_addr->sa_family == AF_INET6)
1063                         avoid_reset = TRUE;
1064 #endif
1065                 /*
1066                 ** Calling init results in link renegotiation,
1067                 ** so we avoid doing it when possible.
1068                 */
1069                 if (avoid_reset) {
1070                         ifp->if_flags |= IFF_UP;
1071                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1072                                 em_init(adapter);
1073 #ifdef INET
1074                         if (!(ifp->if_flags & IFF_NOARP))
1075                                 arp_ifinit(ifp, ifa);
1076 #endif
1077                 } else
1078                         error = ether_ioctl(ifp, command, data);
1079                 break;
1080         case SIOCSIFMTU:
1081             {
1082                 int max_frame_size;
1083
1084                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1085
1086                 EM_CORE_LOCK(adapter);
1087                 switch (adapter->hw.mac.type) {
1088                 case e1000_82571:
1089                 case e1000_82572:
1090                 case e1000_ich9lan:
1091                 case e1000_ich10lan:
1092                 case e1000_pch2lan:
1093                 case e1000_82574:
1094                 case e1000_82583:
1095                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1096                         max_frame_size = 9234;
1097                         break;
1098                 case e1000_pchlan:
1099                         max_frame_size = 4096;
1100                         break;
1101                         /* Adapters that do not support jumbo frames */
1102                 case e1000_ich8lan:
1103                         max_frame_size = ETHER_MAX_LEN;
1104                         break;
1105                 default:
1106                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1107                 }
1108                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1109                     ETHER_CRC_LEN) {
1110                         EM_CORE_UNLOCK(adapter);
1111                         error = EINVAL;
1112                         break;
1113                 }
1114
1115                 ifp->if_mtu = ifr->ifr_mtu;
1116                 adapter->max_frame_size =
1117                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1118                 em_init_locked(adapter);
1119                 EM_CORE_UNLOCK(adapter);
1120                 break;
1121             }
1122         case SIOCSIFFLAGS:
1123                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1124                     SIOCSIFFLAGS (Set Interface Flags)");
1125                 EM_CORE_LOCK(adapter);
1126                 if (ifp->if_flags & IFF_UP) {
1127                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1128                                 if ((ifp->if_flags ^ adapter->if_flags) &
1129                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1130                                         em_disable_promisc(adapter);
1131                                         em_set_promisc(adapter);
1132                                 }
1133                         } else
1134                                 em_init_locked(adapter);
1135                 } else
1136                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1137                                 em_stop(adapter);
1138                 adapter->if_flags = ifp->if_flags;
1139                 EM_CORE_UNLOCK(adapter);
1140                 break;
1141         case SIOCADDMULTI:
1142         case SIOCDELMULTI:
1143                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1144                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1145                         EM_CORE_LOCK(adapter);
1146                         em_disable_intr(adapter);
1147                         em_set_multi(adapter);
1148 #ifdef DEVICE_POLLING
1149                         if (!(ifp->if_capenable & IFCAP_POLLING))
1150 #endif
1151                                 em_enable_intr(adapter);
1152                         EM_CORE_UNLOCK(adapter);
1153                 }
1154                 break;
1155         case SIOCSIFMEDIA:
1156                 /* Check SOL/IDER usage */
1157                 EM_CORE_LOCK(adapter);
1158                 if (e1000_check_reset_block(&adapter->hw)) {
1159                         EM_CORE_UNLOCK(adapter);
1160                         device_printf(adapter->dev, "Media change is"
1161                             " blocked due to SOL/IDER session.\n");
1162                         break;
1163                 }
1164                 EM_CORE_UNLOCK(adapter);
1165                 /* falls thru */
1166         case SIOCGIFMEDIA:
1167                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1168                     SIOCxIFMEDIA (Get/Set Interface Media)");
1169                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1170                 break;
1171         case SIOCSIFCAP:
1172             {
1173                 int mask, reinit;
1174
1175                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1176                 reinit = 0;
1177                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1178 #ifdef DEVICE_POLLING
1179                 if (mask & IFCAP_POLLING) {
1180                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1181                                 error = ether_poll_register(em_poll, ifp);
1182                                 if (error)
1183                                         return (error);
1184                                 EM_CORE_LOCK(adapter);
1185                                 em_disable_intr(adapter);
1186                                 ifp->if_capenable |= IFCAP_POLLING;
1187                                 EM_CORE_UNLOCK(adapter);
1188                         } else {
1189                                 error = ether_poll_deregister(ifp);
1190                                 /* Enable interrupt even in error case */
1191                                 EM_CORE_LOCK(adapter);
1192                                 em_enable_intr(adapter);
1193                                 ifp->if_capenable &= ~IFCAP_POLLING;
1194                                 EM_CORE_UNLOCK(adapter);
1195                         }
1196                 }
1197 #endif
1198                 if (mask & IFCAP_HWCSUM) {
1199                         ifp->if_capenable ^= IFCAP_HWCSUM;
1200                         reinit = 1;
1201                 }
1202                 if (mask & IFCAP_TSO4) {
1203                         ifp->if_capenable ^= IFCAP_TSO4;
1204                         reinit = 1;
1205                 }
1206                 if (mask & IFCAP_VLAN_HWTAGGING) {
1207                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1208                         reinit = 1;
1209                 }
1210                 if (mask & IFCAP_VLAN_HWFILTER) {
1211                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1212                         reinit = 1;
1213                 }
1214                 if (mask & IFCAP_VLAN_HWTSO) {
1215                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1216                         reinit = 1;
1217                 }
1218                 if ((mask & IFCAP_WOL) &&
1219                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1220                         if (mask & IFCAP_WOL_MCAST)
1221                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1222                         if (mask & IFCAP_WOL_MAGIC)
1223                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1224                 }
1225                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1226                         em_init(adapter);
1227                 VLAN_CAPABILITIES(ifp);
1228                 break;
1229             }
1230
1231         default:
1232                 error = ether_ioctl(ifp, command, data);
1233                 break;
1234         }
1235
1236         return (error);
1237 }
1238
1239
1240 /*********************************************************************
1241  *  Init entry point
1242  *
1243  *  This routine is used in two ways. It is used by the stack as
1244  *  init entry point in network interface structure. It is also used
1245  *  by the driver as a hw/sw initialization routine to get to a
1246  *  consistent state.
1247  *
1248  *  return 0 on success, positive on failure
1249  **********************************************************************/
1250
1251 static void
1252 em_init_locked(struct adapter *adapter)
1253 {
1254         struct ifnet    *ifp = adapter->ifp;
1255         device_t        dev = adapter->dev;
1256
1257         INIT_DEBUGOUT("em_init: begin");
1258
1259         EM_CORE_LOCK_ASSERT(adapter);
1260
1261         em_disable_intr(adapter);
1262         callout_stop(&adapter->timer);
1263
1264         /* Get the latest mac address, User can use a LAA */
1265         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1266               ETHER_ADDR_LEN);
1267
1268         /* Put the address into the Receive Address Array */
1269         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1270
1271         /*
1272          * With the 82571 adapter, RAR[0] may be overwritten
1273          * when the other port is reset, we make a duplicate
1274          * in RAR[14] for that eventuality, this assures
1275          * the interface continues to function.
1276          */
1277         if (adapter->hw.mac.type == e1000_82571) {
1278                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1279                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1280                     E1000_RAR_ENTRIES - 1);
1281         }
1282
1283         /* Initialize the hardware */
1284         em_reset(adapter);
1285         em_update_link_status(adapter);
1286
1287         /* Setup VLAN support, basic and offload if available */
1288         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1289
1290         /* Set hardware offload abilities */
1291         ifp->if_hwassist = 0;
1292         if (ifp->if_capenable & IFCAP_TXCSUM)
1293                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1294         if (ifp->if_capenable & IFCAP_TSO4)
1295                 ifp->if_hwassist |= CSUM_TSO;
1296
1297         /* Configure for OS presence */
1298         em_init_manageability(adapter);
1299
1300         /* Prepare transmit descriptors and buffers */
1301         em_setup_transmit_structures(adapter);
1302         em_initialize_transmit_unit(adapter);
1303
1304         /* Setup Multicast table */
1305         em_set_multi(adapter);
1306
1307         /*
1308         ** Figure out the desired mbuf
1309         ** pool for doing jumbos
1310         */
1311         if (adapter->max_frame_size <= 2048)
1312                 adapter->rx_mbuf_sz = MCLBYTES;
1313         else if (adapter->max_frame_size <= 4096)
1314                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1315         else
1316                 adapter->rx_mbuf_sz = MJUM9BYTES;
1317
1318         /* Prepare receive descriptors and buffers */
1319         if (em_setup_receive_structures(adapter)) {
1320                 device_printf(dev, "Could not setup receive structures\n");
1321                 em_stop(adapter);
1322                 return;
1323         }
1324         em_initialize_receive_unit(adapter);
1325
1326         /* Use real VLAN Filter support? */
1327         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1328                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1329                         /* Use real VLAN Filter support */
1330                         em_setup_vlan_hw_support(adapter);
1331                 else {
1332                         u32 ctrl;
1333                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1334                         ctrl |= E1000_CTRL_VME;
1335                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1336                 }
1337         }
1338
1339         /* Don't lose promiscuous settings */
1340         em_set_promisc(adapter);
1341
1342         /* Set the interface as ACTIVE */
1343         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1344         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1345
1346         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1347         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1348
1349         /* MSI/X configuration for 82574 */
1350         if (adapter->hw.mac.type == e1000_82574) {
1351                 int tmp;
1352                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1353                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1354                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1355                 /* Set the IVAR - interrupt vector routing. */
1356                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1357         }
1358
1359 #ifdef DEVICE_POLLING
1360         /*
1361          * Only enable interrupts if we are not polling, make sure
1362          * they are off otherwise.
1363          */
1364         if (ifp->if_capenable & IFCAP_POLLING)
1365                 em_disable_intr(adapter);
1366         else
1367 #endif /* DEVICE_POLLING */
1368                 em_enable_intr(adapter);
1369
1370         /* AMT based hardware can now take control from firmware */
1371         if (adapter->has_manage && adapter->has_amt)
1372                 em_get_hw_control(adapter);
1373 }
1374
1375 static void
1376 em_init(void *arg)
1377 {
1378         struct adapter *adapter = arg;
1379
1380         EM_CORE_LOCK(adapter);
1381         em_init_locked(adapter);
1382         EM_CORE_UNLOCK(adapter);
1383 }
1384
1385
1386 #ifdef DEVICE_POLLING
1387 /*********************************************************************
1388  *
1389  *  Legacy polling routine: note this only works with single queue
1390  *
1391  *********************************************************************/
1392 static int
1393 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1394 {
1395         struct adapter *adapter = ifp->if_softc;
1396         struct tx_ring  *txr = adapter->tx_rings;
1397         struct rx_ring  *rxr = adapter->rx_rings;
1398         u32             reg_icr;
1399         int             rx_done;
1400
1401         EM_CORE_LOCK(adapter);
1402         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1403                 EM_CORE_UNLOCK(adapter);
1404                 return (0);
1405         }
1406
1407         if (cmd == POLL_AND_CHECK_STATUS) {
1408                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1409                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1410                         callout_stop(&adapter->timer);
1411                         adapter->hw.mac.get_link_status = 1;
1412                         em_update_link_status(adapter);
1413                         callout_reset(&adapter->timer, hz,
1414                             em_local_timer, adapter);
1415                 }
1416         }
1417         EM_CORE_UNLOCK(adapter);
1418
1419         em_rxeof(rxr, count, &rx_done);
1420
1421         EM_TX_LOCK(txr);
1422         em_txeof(txr);
1423 #ifdef EM_MULTIQUEUE
1424         if (!drbr_empty(ifp, txr->br))
1425                 em_mq_start_locked(ifp, txr, NULL);
1426 #else
1427         em_start_locked(ifp, txr);
1428 #endif
1429         EM_TX_UNLOCK(txr);
1430
1431         return (rx_done);
1432 }
1433 #endif /* DEVICE_POLLING */
1434
1435
1436 /*********************************************************************
1437  *
1438  *  Fast Legacy/MSI Combined Interrupt Service routine  
1439  *
1440  *********************************************************************/
1441 static int
1442 em_irq_fast(void *arg)
1443 {
1444         struct adapter  *adapter = arg;
1445         struct ifnet    *ifp;
1446         u32             reg_icr;
1447
1448         ifp = adapter->ifp;
1449
1450         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1451
1452         /* Hot eject?  */
1453         if (reg_icr == 0xffffffff)
1454                 return FILTER_STRAY;
1455
1456         /* Definitely not our interrupt.  */
1457         if (reg_icr == 0x0)
1458                 return FILTER_STRAY;
1459
1460         /*
1461          * Starting with the 82571 chip, bit 31 should be used to
1462          * determine whether the interrupt belongs to us.
1463          */
1464         if (adapter->hw.mac.type >= e1000_82571 &&
1465             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1466                 return FILTER_STRAY;
1467
1468         em_disable_intr(adapter);
1469         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1470
1471         /* Link status change */
1472         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1473                 adapter->hw.mac.get_link_status = 1;
1474                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1475         }
1476
1477         if (reg_icr & E1000_ICR_RXO)
1478                 adapter->rx_overruns++;
1479         return FILTER_HANDLED;
1480 }
1481
1482 /* Combined RX/TX handler, used by Legacy and MSI */
1483 static void
1484 em_handle_que(void *context, int pending)
1485 {
1486         struct adapter  *adapter = context;
1487         struct ifnet    *ifp = adapter->ifp;
1488         struct tx_ring  *txr = adapter->tx_rings;
1489         struct rx_ring  *rxr = adapter->rx_rings;
1490
1491
1492         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1493                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1494                 EM_TX_LOCK(txr);
1495                 em_txeof(txr);
1496 #ifdef EM_MULTIQUEUE
1497                 if (!drbr_empty(ifp, txr->br))
1498                         em_mq_start_locked(ifp, txr, NULL);
1499 #else
1500                 em_start_locked(ifp, txr);
1501 #endif
1502                 EM_TX_UNLOCK(txr);
1503                 if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1504                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1505                         return;
1506                 }
1507         }
1508
1509         em_enable_intr(adapter);
1510         return;
1511 }
1512
1513
1514 /*********************************************************************
1515  *
1516  *  MSIX Interrupt Service Routines
1517  *
1518  **********************************************************************/
1519 static void
1520 em_msix_tx(void *arg)
1521 {
1522         struct tx_ring *txr = arg;
1523         struct adapter *adapter = txr->adapter;
1524         bool            more;
1525
1526         ++txr->tx_irq;
1527         EM_TX_LOCK(txr);
1528         more = em_txeof(txr);
1529         EM_TX_UNLOCK(txr);
1530         if (more)
1531                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1532         else
1533                 /* Reenable this interrupt */
1534                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1535         return;
1536 }
1537
1538 /*********************************************************************
1539  *
1540  *  MSIX RX Interrupt Service routine
1541  *
1542  **********************************************************************/
1543
1544 static void
1545 em_msix_rx(void *arg)
1546 {
1547         struct rx_ring  *rxr = arg;
1548         struct adapter  *adapter = rxr->adapter;
1549         bool            more;
1550
1551         ++rxr->rx_irq;
1552         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1553         if (more)
1554                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1555         else
1556                 /* Reenable this interrupt */
1557                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1558         return;
1559 }
1560
1561 /*********************************************************************
1562  *
1563  *  MSIX Link Fast Interrupt Service routine
1564  *
1565  **********************************************************************/
1566 static void
1567 em_msix_link(void *arg)
1568 {
1569         struct adapter  *adapter = arg;
1570         u32             reg_icr;
1571
1572         ++adapter->link_irq;
1573         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1574
1575         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1576                 adapter->hw.mac.get_link_status = 1;
1577                 em_handle_link(adapter, 0);
1578         } else
1579                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1580                     EM_MSIX_LINK | E1000_IMS_LSC);
1581         return;
1582 }
1583
1584 static void
1585 em_handle_rx(void *context, int pending)
1586 {
1587         struct rx_ring  *rxr = context;
1588         struct adapter  *adapter = rxr->adapter;
1589         bool            more;
1590
1591         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1592         if (more)
1593                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1594         else
1595                 /* Reenable this interrupt */
1596                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1597 }
1598
1599 static void
1600 em_handle_tx(void *context, int pending)
1601 {
1602         struct tx_ring  *txr = context;
1603         struct adapter  *adapter = txr->adapter;
1604         struct ifnet    *ifp = adapter->ifp;
1605
1606         EM_TX_LOCK(txr);
1607         em_txeof(txr);
1608 #ifdef EM_MULTIQUEUE
1609         if (!drbr_empty(ifp, txr->br))
1610                 em_mq_start_locked(ifp, txr, NULL);
1611 #else
1612         em_start_locked(ifp, txr);
1613 #endif
1614         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1615         EM_TX_UNLOCK(txr);
1616 }
1617
1618 static void
1619 em_handle_link(void *context, int pending)
1620 {
1621         struct adapter  *adapter = context;
1622         struct ifnet *ifp = adapter->ifp;
1623
1624         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1625                 return;
1626
1627         EM_CORE_LOCK(adapter);
1628         callout_stop(&adapter->timer);
1629         em_update_link_status(adapter);
1630         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1631         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1632             EM_MSIX_LINK | E1000_IMS_LSC);
1633         EM_CORE_UNLOCK(adapter);
1634 }
1635
1636
1637 /*********************************************************************
1638  *
1639  *  Media Ioctl callback
1640  *
1641  *  This routine is called whenever the user queries the status of
1642  *  the interface using ifconfig.
1643  *
1644  **********************************************************************/
1645 static void
1646 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1647 {
1648         struct adapter *adapter = ifp->if_softc;
1649         u_char fiber_type = IFM_1000_SX;
1650
1651         INIT_DEBUGOUT("em_media_status: begin");
1652
1653         EM_CORE_LOCK(adapter);
1654         em_update_link_status(adapter);
1655
1656         ifmr->ifm_status = IFM_AVALID;
1657         ifmr->ifm_active = IFM_ETHER;
1658
1659         if (!adapter->link_active) {
1660                 EM_CORE_UNLOCK(adapter);
1661                 return;
1662         }
1663
1664         ifmr->ifm_status |= IFM_ACTIVE;
1665
1666         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1667             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1668                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1669         } else {
1670                 switch (adapter->link_speed) {
1671                 case 10:
1672                         ifmr->ifm_active |= IFM_10_T;
1673                         break;
1674                 case 100:
1675                         ifmr->ifm_active |= IFM_100_TX;
1676                         break;
1677                 case 1000:
1678                         ifmr->ifm_active |= IFM_1000_T;
1679                         break;
1680                 }
1681                 if (adapter->link_duplex == FULL_DUPLEX)
1682                         ifmr->ifm_active |= IFM_FDX;
1683                 else
1684                         ifmr->ifm_active |= IFM_HDX;
1685         }
1686         EM_CORE_UNLOCK(adapter);
1687 }
1688
1689 /*********************************************************************
1690  *
1691  *  Media Ioctl callback
1692  *
1693  *  This routine is called when the user changes speed/duplex using
1694  *  media/mediopt option with ifconfig.
1695  *
1696  **********************************************************************/
1697 static int
1698 em_media_change(struct ifnet *ifp)
1699 {
1700         struct adapter *adapter = ifp->if_softc;
1701         struct ifmedia  *ifm = &adapter->media;
1702
1703         INIT_DEBUGOUT("em_media_change: begin");
1704
1705         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1706                 return (EINVAL);
1707
1708         EM_CORE_LOCK(adapter);
1709         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1710         case IFM_AUTO:
1711                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1712                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1713                 break;
1714         case IFM_1000_LX:
1715         case IFM_1000_SX:
1716         case IFM_1000_T:
1717                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1718                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1719                 break;
1720         case IFM_100_TX:
1721                 adapter->hw.mac.autoneg = FALSE;
1722                 adapter->hw.phy.autoneg_advertised = 0;
1723                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1724                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1725                 else
1726                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1727                 break;
1728         case IFM_10_T:
1729                 adapter->hw.mac.autoneg = FALSE;
1730                 adapter->hw.phy.autoneg_advertised = 0;
1731                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1732                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1733                 else
1734                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1735                 break;
1736         default:
1737                 device_printf(adapter->dev, "Unsupported media type\n");
1738         }
1739
1740         em_init_locked(adapter);
1741         EM_CORE_UNLOCK(adapter);
1742
1743         return (0);
1744 }
1745
1746 /*********************************************************************
1747  *
1748  *  This routine maps the mbufs to tx descriptors.
1749  *
1750  *  return 0 on success, positive on failure
1751  **********************************************************************/
1752
1753 static int
1754 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1755 {
1756         struct adapter          *adapter = txr->adapter;
1757         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1758         bus_dmamap_t            map;
1759         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1760         struct e1000_tx_desc    *ctxd = NULL;
1761         struct mbuf             *m_head;
1762         struct ether_header     *eh;
1763         struct ip               *ip = NULL;
1764         struct tcphdr           *tp = NULL;
1765         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1766         int                     ip_off, poff;
1767         int                     nsegs, i, j, first, last = 0;
1768         int                     error, do_tso, tso_desc = 0, remap = 1;
1769
1770 retry:
1771         m_head = *m_headp;
1772         txd_upper = txd_lower = txd_used = txd_saved = 0;
1773         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1774         ip_off = poff = 0;
1775
1776         /*
1777          * Intel recommends entire IP/TCP header length reside in a single
1778          * buffer. If multiple descriptors are used to describe the IP and
1779          * TCP header, each descriptor should describe one or more
1780          * complete headers; descriptors referencing only parts of headers
1781          * are not supported. If all layer headers are not coalesced into
1782          * a single buffer, each buffer should not cross a 4KB boundary,
1783          * or be larger than the maximum read request size.
1784          * Controller also requires modifing IP/TCP header to make TSO work
1785          * so we firstly get a writable mbuf chain then coalesce ethernet/
1786          * IP/TCP header into a single buffer to meet the requirement of
1787          * controller. This also simplifies IP/TCP/UDP checksum offloading
1788          * which also has similiar restrictions.
1789          */
1790         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1791                 if (do_tso || (m_head->m_next != NULL && 
1792                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1793                         if (M_WRITABLE(*m_headp) == 0) {
1794                                 m_head = m_dup(*m_headp, M_DONTWAIT);
1795                                 m_freem(*m_headp);
1796                                 if (m_head == NULL) {
1797                                         *m_headp = NULL;
1798                                         return (ENOBUFS);
1799                                 }
1800                                 *m_headp = m_head;
1801                         }
1802                 }
1803                 /*
1804                  * XXX
1805                  * Assume IPv4, we don't have TSO/checksum offload support
1806                  * for IPv6 yet.
1807                  */
1808                 ip_off = sizeof(struct ether_header);
1809                 m_head = m_pullup(m_head, ip_off);
1810                 if (m_head == NULL) {
1811                         *m_headp = NULL;
1812                         return (ENOBUFS);
1813                 }
1814                 eh = mtod(m_head, struct ether_header *);
1815                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1816                         ip_off = sizeof(struct ether_vlan_header);
1817                         m_head = m_pullup(m_head, ip_off);
1818                         if (m_head == NULL) {
1819                                 *m_headp = NULL;
1820                                 return (ENOBUFS);
1821                         }
1822                 }
1823                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1824                 if (m_head == NULL) {
1825                         *m_headp = NULL;
1826                         return (ENOBUFS);
1827                 }
1828                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1829                 poff = ip_off + (ip->ip_hl << 2);
1830                 if (do_tso) {
1831                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1832                         if (m_head == NULL) {
1833                                 *m_headp = NULL;
1834                                 return (ENOBUFS);
1835                         }
1836                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1837                         /*
1838                          * TSO workaround:
1839                          *   pull 4 more bytes of data into it.
1840                          */
1841                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1842                         if (m_head == NULL) {
1843                                 *m_headp = NULL;
1844                                 return (ENOBUFS);
1845                         }
1846                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1847                         ip->ip_len = 0;
1848                         ip->ip_sum = 0;
1849                         /*
1850                          * The pseudo TCP checksum does not include TCP payload
1851                          * length so driver should recompute the checksum here
1852                          * what hardware expect to see. This is adherence of
1853                          * Microsoft's Large Send specification.
1854                          */
1855                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1856                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1857                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1858                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1859                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1860                         if (m_head == NULL) {
1861                                 *m_headp = NULL;
1862                                 return (ENOBUFS);
1863                         }
1864                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1865                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1866                         if (m_head == NULL) {
1867                                 *m_headp = NULL;
1868                                 return (ENOBUFS);
1869                         }
1870                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1871                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1872                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1873                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1874                         if (m_head == NULL) {
1875                                 *m_headp = NULL;
1876                                 return (ENOBUFS);
1877                         }
1878                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1879                 }
1880                 *m_headp = m_head;
1881         }
1882
1883         /*
1884          * Map the packet for DMA
1885          *
1886          * Capture the first descriptor index,
1887          * this descriptor will have the index
1888          * of the EOP which is the only one that
1889          * now gets a DONE bit writeback.
1890          */
1891         first = txr->next_avail_desc;
1892         tx_buffer = &txr->tx_buffers[first];
1893         tx_buffer_mapped = tx_buffer;
1894         map = tx_buffer->map;
1895
1896         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1897             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1898
1899         /*
1900          * There are two types of errors we can (try) to handle:
1901          * - EFBIG means the mbuf chain was too long and bus_dma ran
1902          *   out of segments.  Defragment the mbuf chain and try again.
1903          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1904          *   at this point in time.  Defer sending and try again later.
1905          * All other errors, in particular EINVAL, are fatal and prevent the
1906          * mbuf chain from ever going through.  Drop it and report error.
1907          */
1908         if (error == EFBIG && remap) {
1909                 struct mbuf *m;
1910
1911                 m = m_defrag(*m_headp, M_DONTWAIT);
1912                 if (m == NULL) {
1913                         adapter->mbuf_alloc_failed++;
1914                         m_freem(*m_headp);
1915                         *m_headp = NULL;
1916                         return (ENOBUFS);
1917                 }
1918                 *m_headp = m;
1919
1920                 /* Try it again, but only once */
1921                 remap = 0;
1922                 goto retry;
1923         } else if (error == ENOMEM) {
1924                 adapter->no_tx_dma_setup++;
1925                 return (error);
1926         } else if (error != 0) {
1927                 adapter->no_tx_dma_setup++;
1928                 m_freem(*m_headp);
1929                 *m_headp = NULL;
1930                 return (error);
1931         }
1932
1933         /*
1934          * TSO Hardware workaround, if this packet is not
1935          * TSO, and is only a single descriptor long, and
1936          * it follows a TSO burst, then we need to add a
1937          * sentinel descriptor to prevent premature writeback.
1938          */
1939         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1940                 if (nsegs == 1)
1941                         tso_desc = TRUE;
1942                 txr->tx_tso = FALSE;
1943         }
1944
1945         if (nsegs > (txr->tx_avail - 2)) {
1946                 txr->no_desc_avail++;
1947                 bus_dmamap_unload(txr->txtag, map);
1948                 return (ENOBUFS);
1949         }
1950         m_head = *m_headp;
1951
1952         /* Do hardware assists */
1953         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1954                 em_tso_setup(txr, m_head, ip_off, ip, tp,
1955                     &txd_upper, &txd_lower);
1956                 /* we need to make a final sentinel transmit desc */
1957                 tso_desc = TRUE;
1958         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1959                 em_transmit_checksum_setup(txr, m_head,
1960                     ip_off, ip, &txd_upper, &txd_lower);
1961
1962         if (m_head->m_flags & M_VLANTAG) {
1963                 /* Set the vlan id. */
1964                 txd_upper |=
1965                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
1966                 /* Tell hardware to add tag */
1967                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
1968         }
1969
1970         i = txr->next_avail_desc;
1971
1972         /* Set up our transmit descriptors */
1973         for (j = 0; j < nsegs; j++) {
1974                 bus_size_t seg_len;
1975                 bus_addr_t seg_addr;
1976
1977                 tx_buffer = &txr->tx_buffers[i];
1978                 ctxd = &txr->tx_base[i];
1979                 seg_addr = segs[j].ds_addr;
1980                 seg_len  = segs[j].ds_len;
1981                 /*
1982                 ** TSO Workaround:
1983                 ** If this is the last descriptor, we want to
1984                 ** split it so we have a small final sentinel
1985                 */
1986                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1987                         seg_len -= 4;
1988                         ctxd->buffer_addr = htole64(seg_addr);
1989                         ctxd->lower.data = htole32(
1990                         adapter->txd_cmd | txd_lower | seg_len);
1991                         ctxd->upper.data =
1992                             htole32(txd_upper);
1993                         if (++i == adapter->num_tx_desc)
1994                                 i = 0;
1995                         /* Now make the sentinel */     
1996                         ++txd_used; /* using an extra txd */
1997                         ctxd = &txr->tx_base[i];
1998                         tx_buffer = &txr->tx_buffers[i];
1999                         ctxd->buffer_addr =
2000                             htole64(seg_addr + seg_len);
2001                         ctxd->lower.data = htole32(
2002                         adapter->txd_cmd | txd_lower | 4);
2003                         ctxd->upper.data =
2004                             htole32(txd_upper);
2005                         last = i;
2006                         if (++i == adapter->num_tx_desc)
2007                                 i = 0;
2008                 } else {
2009                         ctxd->buffer_addr = htole64(seg_addr);
2010                         ctxd->lower.data = htole32(
2011                         adapter->txd_cmd | txd_lower | seg_len);
2012                         ctxd->upper.data =
2013                             htole32(txd_upper);
2014                         last = i;
2015                         if (++i == adapter->num_tx_desc)
2016                                 i = 0;
2017                 }
2018                 tx_buffer->m_head = NULL;
2019                 tx_buffer->next_eop = -1;
2020         }
2021
2022         txr->next_avail_desc = i;
2023         txr->tx_avail -= nsegs;
2024         if (tso_desc) /* TSO used an extra for sentinel */
2025                 txr->tx_avail -= txd_used;
2026
2027         tx_buffer->m_head = m_head;
2028         /*
2029         ** Here we swap the map so the last descriptor,
2030         ** which gets the completion interrupt has the
2031         ** real map, and the first descriptor gets the
2032         ** unused map from this descriptor.
2033         */
2034         tx_buffer_mapped->map = tx_buffer->map;
2035         tx_buffer->map = map;
2036         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2037
2038         /*
2039          * Last Descriptor of Packet
2040          * needs End Of Packet (EOP)
2041          * and Report Status (RS)
2042          */
2043         ctxd->lower.data |=
2044             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2045         /*
2046          * Keep track in the first buffer which
2047          * descriptor will be written back
2048          */
2049         tx_buffer = &txr->tx_buffers[first];
2050         tx_buffer->next_eop = last;
2051         /* Update the watchdog time early and often */
2052         txr->watchdog_time = ticks;
2053
2054         /*
2055          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2056          * that this frame is available to transmit.
2057          */
2058         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2059             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2060         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2061
2062         return (0);
2063 }
2064
2065 static void
2066 em_set_promisc(struct adapter *adapter)
2067 {
2068         struct ifnet    *ifp = adapter->ifp;
2069         u32             reg_rctl;
2070
2071         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2072
2073         if (ifp->if_flags & IFF_PROMISC) {
2074                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2075                 /* Turn this on if you want to see bad packets */
2076                 if (em_debug_sbp)
2077                         reg_rctl |= E1000_RCTL_SBP;
2078                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2079         } else if (ifp->if_flags & IFF_ALLMULTI) {
2080                 reg_rctl |= E1000_RCTL_MPE;
2081                 reg_rctl &= ~E1000_RCTL_UPE;
2082                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2083         }
2084 }
2085
2086 static void
2087 em_disable_promisc(struct adapter *adapter)
2088 {
2089         u32     reg_rctl;
2090
2091         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2092
2093         reg_rctl &=  (~E1000_RCTL_UPE);
2094         reg_rctl &=  (~E1000_RCTL_MPE);
2095         reg_rctl &=  (~E1000_RCTL_SBP);
2096         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2097 }
2098
2099
2100 /*********************************************************************
2101  *  Multicast Update
2102  *
2103  *  This routine is called whenever multicast address list is updated.
2104  *
2105  **********************************************************************/
2106
2107 static void
2108 em_set_multi(struct adapter *adapter)
2109 {
2110         struct ifnet    *ifp = adapter->ifp;
2111         struct ifmultiaddr *ifma;
2112         u32 reg_rctl = 0;
2113         u8  *mta; /* Multicast array memory */
2114         int mcnt = 0;
2115
2116         IOCTL_DEBUGOUT("em_set_multi: begin");
2117
2118         mta = adapter->mta;
2119         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2120
2121         if (adapter->hw.mac.type == e1000_82542 && 
2122             adapter->hw.revision_id == E1000_REVISION_2) {
2123                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2124                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2125                         e1000_pci_clear_mwi(&adapter->hw);
2126                 reg_rctl |= E1000_RCTL_RST;
2127                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2128                 msec_delay(5);
2129         }
2130
2131 #if __FreeBSD_version < 800000
2132         IF_ADDR_LOCK(ifp);
2133 #else
2134         if_maddr_rlock(ifp);
2135 #endif
2136         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2137                 if (ifma->ifma_addr->sa_family != AF_LINK)
2138                         continue;
2139
2140                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2141                         break;
2142
2143                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2144                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2145                 mcnt++;
2146         }
2147 #if __FreeBSD_version < 800000
2148         IF_ADDR_UNLOCK(ifp);
2149 #else
2150         if_maddr_runlock(ifp);
2151 #endif
2152         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2153                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2154                 reg_rctl |= E1000_RCTL_MPE;
2155                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2156         } else
2157                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2158
2159         if (adapter->hw.mac.type == e1000_82542 && 
2160             adapter->hw.revision_id == E1000_REVISION_2) {
2161                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2162                 reg_rctl &= ~E1000_RCTL_RST;
2163                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2164                 msec_delay(5);
2165                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2166                         e1000_pci_set_mwi(&adapter->hw);
2167         }
2168 }
2169
2170
2171 /*********************************************************************
2172  *  Timer routine
2173  *
2174  *  This routine checks for link status and updates statistics.
2175  *
2176  **********************************************************************/
2177
2178 static void
2179 em_local_timer(void *arg)
2180 {
2181         struct adapter  *adapter = arg;
2182         struct ifnet    *ifp = adapter->ifp;
2183         struct tx_ring  *txr = adapter->tx_rings;
2184         struct rx_ring  *rxr = adapter->rx_rings;
2185         u32             trigger;
2186
2187         EM_CORE_LOCK_ASSERT(adapter);
2188
2189         em_update_link_status(adapter);
2190         em_update_stats_counters(adapter);
2191
2192         /* Reset LAA into RAR[0] on 82571 */
2193         if ((adapter->hw.mac.type == e1000_82571) &&
2194             e1000_get_laa_state_82571(&adapter->hw))
2195                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2196
2197         /* Mask to use in the irq trigger */
2198         if (adapter->msix_mem)
2199                 trigger = rxr->ims; /* RX for 82574 */
2200         else
2201                 trigger = E1000_ICS_RXDMT0;
2202
2203         /*
2204         ** Check on the state of the TX queue(s), this 
2205         ** can be done without the lock because its RO
2206         ** and the HUNG state will be static if set.
2207         */
2208         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2209                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2210                     (adapter->pause_frames == 0))
2211                         goto hung;
2212                 /* Schedule a TX tasklet if needed */
2213                 if (txr->tx_avail <= EM_MAX_SCATTER)
2214                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2215         }
2216         
2217         adapter->pause_frames = 0;
2218         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2219 #ifndef DEVICE_POLLING
2220         /* Trigger an RX interrupt to guarantee mbuf refresh */
2221         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2222 #endif
2223         return;
2224 hung:
2225         /* Looks like we're hung */
2226         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2227         device_printf(adapter->dev,
2228             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2229             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2230             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2231         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2232             "Next TX to Clean = %d\n",
2233             txr->me, txr->tx_avail, txr->next_to_clean);
2234         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2235         adapter->watchdog_events++;
2236         adapter->pause_frames = 0;
2237         em_init_locked(adapter);
2238 }
2239
2240
2241 static void
2242 em_update_link_status(struct adapter *adapter)
2243 {
2244         struct e1000_hw *hw = &adapter->hw;
2245         struct ifnet *ifp = adapter->ifp;
2246         device_t dev = adapter->dev;
2247         struct tx_ring *txr = adapter->tx_rings;
2248         u32 link_check = 0;
2249
2250         /* Get the cached link value or read phy for real */
2251         switch (hw->phy.media_type) {
2252         case e1000_media_type_copper:
2253                 if (hw->mac.get_link_status) {
2254                         /* Do the work to read phy */
2255                         e1000_check_for_link(hw);
2256                         link_check = !hw->mac.get_link_status;
2257                         if (link_check) /* ESB2 fix */
2258                                 e1000_cfg_on_link_up(hw);
2259                 } else
2260                         link_check = TRUE;
2261                 break;
2262         case e1000_media_type_fiber:
2263                 e1000_check_for_link(hw);
2264                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2265                                  E1000_STATUS_LU);
2266                 break;
2267         case e1000_media_type_internal_serdes:
2268                 e1000_check_for_link(hw);
2269                 link_check = adapter->hw.mac.serdes_has_link;
2270                 break;
2271         default:
2272         case e1000_media_type_unknown:
2273                 break;
2274         }
2275
2276         /* Now check for a transition */
2277         if (link_check && (adapter->link_active == 0)) {
2278                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2279                     &adapter->link_duplex);
2280                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2281                 if ((adapter->link_speed != SPEED_1000) &&
2282                     ((hw->mac.type == e1000_82571) ||
2283                     (hw->mac.type == e1000_82572))) {
2284                         int tarc0;
2285                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2286                         tarc0 &= ~SPEED_MODE_BIT;
2287                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2288                 }
2289                 if (bootverbose)
2290                         device_printf(dev, "Link is up %d Mbps %s\n",
2291                             adapter->link_speed,
2292                             ((adapter->link_duplex == FULL_DUPLEX) ?
2293                             "Full Duplex" : "Half Duplex"));
2294                 adapter->link_active = 1;
2295                 adapter->smartspeed = 0;
2296                 ifp->if_baudrate = adapter->link_speed * 1000000;
2297                 if_link_state_change(ifp, LINK_STATE_UP);
2298         } else if (!link_check && (adapter->link_active == 1)) {
2299                 ifp->if_baudrate = adapter->link_speed = 0;
2300                 adapter->link_duplex = 0;
2301                 if (bootverbose)
2302                         device_printf(dev, "Link is Down\n");
2303                 adapter->link_active = 0;
2304                 /* Link down, disable watchdog */
2305                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2306                         txr->queue_status = EM_QUEUE_IDLE;
2307                 if_link_state_change(ifp, LINK_STATE_DOWN);
2308         }
2309 }
2310
2311 /*********************************************************************
2312  *
2313  *  This routine disables all traffic on the adapter by issuing a
2314  *  global reset on the MAC and deallocates TX/RX buffers.
2315  *
2316  *  This routine should always be called with BOTH the CORE
2317  *  and TX locks.
2318  **********************************************************************/
2319
2320 static void
2321 em_stop(void *arg)
2322 {
2323         struct adapter  *adapter = arg;
2324         struct ifnet    *ifp = adapter->ifp;
2325         struct tx_ring  *txr = adapter->tx_rings;
2326
2327         EM_CORE_LOCK_ASSERT(adapter);
2328
2329         INIT_DEBUGOUT("em_stop: begin");
2330
2331         em_disable_intr(adapter);
2332         callout_stop(&adapter->timer);
2333
2334         /* Tell the stack that the interface is no longer active */
2335         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2336         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2337
2338         /* Unarm watchdog timer. */
2339         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2340                 EM_TX_LOCK(txr);
2341                 txr->queue_status = EM_QUEUE_IDLE;
2342                 EM_TX_UNLOCK(txr);
2343         }
2344
2345         e1000_reset_hw(&adapter->hw);
2346         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2347
2348         e1000_led_off(&adapter->hw);
2349         e1000_cleanup_led(&adapter->hw);
2350 }
2351
2352
2353 /*********************************************************************
2354  *
2355  *  Determine hardware revision.
2356  *
2357  **********************************************************************/
2358 static void
2359 em_identify_hardware(struct adapter *adapter)
2360 {
2361         device_t dev = adapter->dev;
2362
2363         /* Make sure our PCI config space has the necessary stuff set */
2364         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2365         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2366             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2367                 device_printf(dev, "Memory Access and/or Bus Master bits "
2368                     "were not set!\n");
2369                 adapter->hw.bus.pci_cmd_word |=
2370                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2371                 pci_write_config(dev, PCIR_COMMAND,
2372                     adapter->hw.bus.pci_cmd_word, 2);
2373         }
2374
2375         /* Save off the information about this board */
2376         adapter->hw.vendor_id = pci_get_vendor(dev);
2377         adapter->hw.device_id = pci_get_device(dev);
2378         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2379         adapter->hw.subsystem_vendor_id =
2380             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2381         adapter->hw.subsystem_device_id =
2382             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2383
2384         /* Do Shared Code Init and Setup */
2385         if (e1000_set_mac_type(&adapter->hw)) {
2386                 device_printf(dev, "Setup init failure\n");
2387                 return;
2388         }
2389 }
2390
2391 static int
2392 em_allocate_pci_resources(struct adapter *adapter)
2393 {
2394         device_t        dev = adapter->dev;
2395         int             rid;
2396
2397         rid = PCIR_BAR(0);
2398         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2399             &rid, RF_ACTIVE);
2400         if (adapter->memory == NULL) {
2401                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2402                 return (ENXIO);
2403         }
2404         adapter->osdep.mem_bus_space_tag =
2405             rman_get_bustag(adapter->memory);
2406         adapter->osdep.mem_bus_space_handle =
2407             rman_get_bushandle(adapter->memory);
2408         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2409
2410         /* Default to a single queue */
2411         adapter->num_queues = 1;
2412
2413         /*
2414          * Setup MSI/X or MSI if PCI Express
2415          */
2416         adapter->msix = em_setup_msix(adapter);
2417
2418         adapter->hw.back = &adapter->osdep;
2419
2420         return (0);
2421 }
2422
2423 /*********************************************************************
2424  *
2425  *  Setup the Legacy or MSI Interrupt handler
2426  *
2427  **********************************************************************/
2428 int
2429 em_allocate_legacy(struct adapter *adapter)
2430 {
2431         device_t dev = adapter->dev;
2432         struct tx_ring  *txr = adapter->tx_rings;
2433         int error, rid = 0;
2434
2435         /* Manually turn off all interrupts */
2436         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2437
2438         if (adapter->msix == 1) /* using MSI */
2439                 rid = 1;
2440         /* We allocate a single interrupt resource */
2441         adapter->res = bus_alloc_resource_any(dev,
2442             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2443         if (adapter->res == NULL) {
2444                 device_printf(dev, "Unable to allocate bus resource: "
2445                     "interrupt\n");
2446                 return (ENXIO);
2447         }
2448
2449         /*
2450          * Allocate a fast interrupt and the associated
2451          * deferred processing contexts.
2452          */
2453         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2454         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2455             taskqueue_thread_enqueue, &adapter->tq);
2456         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2457             device_get_nameunit(adapter->dev));
2458         /* Use a TX only tasklet for local timer */
2459         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2460         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2461             taskqueue_thread_enqueue, &txr->tq);
2462         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2463             device_get_nameunit(adapter->dev));
2464         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2465         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2466             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2467                 device_printf(dev, "Failed to register fast interrupt "
2468                             "handler: %d\n", error);
2469                 taskqueue_free(adapter->tq);
2470                 adapter->tq = NULL;
2471                 return (error);
2472         }
2473         
2474         return (0);
2475 }
2476
2477 /*********************************************************************
2478  *
2479  *  Setup the MSIX Interrupt handlers
2480  *   This is not really Multiqueue, rather
2481  *   its just seperate interrupt vectors
2482  *   for TX, RX, and Link.
2483  *
2484  **********************************************************************/
2485 int
2486 em_allocate_msix(struct adapter *adapter)
2487 {
2488         device_t        dev = adapter->dev;
2489         struct          tx_ring *txr = adapter->tx_rings;
2490         struct          rx_ring *rxr = adapter->rx_rings;
2491         int             error, rid, vector = 0;
2492
2493
2494         /* Make sure all interrupts are disabled */
2495         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2496
2497         /* First set up ring resources */
2498         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2499
2500                 /* RX ring */
2501                 rid = vector + 1;
2502
2503                 rxr->res = bus_alloc_resource_any(dev,
2504                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2505                 if (rxr->res == NULL) {
2506                         device_printf(dev,
2507                             "Unable to allocate bus resource: "
2508                             "RX MSIX Interrupt %d\n", i);
2509                         return (ENXIO);
2510                 }
2511                 if ((error = bus_setup_intr(dev, rxr->res,
2512                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2513                     rxr, &rxr->tag)) != 0) {
2514                         device_printf(dev, "Failed to register RX handler");
2515                         return (error);
2516                 }
2517 #if __FreeBSD_version >= 800504
2518                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2519 #endif
2520                 rxr->msix = vector++; /* NOTE increment vector for TX */
2521                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2522                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2523                     taskqueue_thread_enqueue, &rxr->tq);
2524                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2525                     device_get_nameunit(adapter->dev));
2526                 /*
2527                 ** Set the bit to enable interrupt
2528                 ** in E1000_IMS -- bits 20 and 21
2529                 ** are for RX0 and RX1, note this has
2530                 ** NOTHING to do with the MSIX vector
2531                 */
2532                 rxr->ims = 1 << (20 + i);
2533                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2534
2535                 /* TX ring */
2536                 rid = vector + 1;
2537                 txr->res = bus_alloc_resource_any(dev,
2538                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2539                 if (txr->res == NULL) {
2540                         device_printf(dev,
2541                             "Unable to allocate bus resource: "
2542                             "TX MSIX Interrupt %d\n", i);
2543                         return (ENXIO);
2544                 }
2545                 if ((error = bus_setup_intr(dev, txr->res,
2546                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2547                     txr, &txr->tag)) != 0) {
2548                         device_printf(dev, "Failed to register TX handler");
2549                         return (error);
2550                 }
2551 #if __FreeBSD_version >= 800504
2552                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2553 #endif
2554                 txr->msix = vector++; /* Increment vector for next pass */
2555                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2556                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2557                     taskqueue_thread_enqueue, &txr->tq);
2558                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2559                     device_get_nameunit(adapter->dev));
2560                 /*
2561                 ** Set the bit to enable interrupt
2562                 ** in E1000_IMS -- bits 22 and 23
2563                 ** are for TX0 and TX1, note this has
2564                 ** NOTHING to do with the MSIX vector
2565                 */
2566                 txr->ims = 1 << (22 + i);
2567                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2568         }
2569
2570         /* Link interrupt */
2571         ++rid;
2572         adapter->res = bus_alloc_resource_any(dev,
2573             SYS_RES_IRQ, &rid, RF_ACTIVE);
2574         if (!adapter->res) {
2575                 device_printf(dev,"Unable to allocate "
2576                     "bus resource: Link interrupt [%d]\n", rid);
2577                 return (ENXIO);
2578         }
2579         /* Set the link handler function */
2580         error = bus_setup_intr(dev, adapter->res,
2581             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2582             em_msix_link, adapter, &adapter->tag);
2583         if (error) {
2584                 adapter->res = NULL;
2585                 device_printf(dev, "Failed to register LINK handler");
2586                 return (error);
2587         }
2588 #if __FreeBSD_version >= 800504
2589                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2590 #endif
2591         adapter->linkvec = vector;
2592         adapter->ivars |=  (8 | vector) << 16;
2593         adapter->ivars |= 0x80000000;
2594
2595         return (0);
2596 }
2597
2598
2599 static void
2600 em_free_pci_resources(struct adapter *adapter)
2601 {
2602         device_t        dev = adapter->dev;
2603         struct tx_ring  *txr;
2604         struct rx_ring  *rxr;
2605         int             rid;
2606
2607
2608         /*
2609         ** Release all the queue interrupt resources:
2610         */
2611         for (int i = 0; i < adapter->num_queues; i++) {
2612                 txr = &adapter->tx_rings[i];
2613                 rxr = &adapter->rx_rings[i];
2614                 /* an early abort? */
2615                 if ((txr == NULL) || (rxr == NULL))
2616                         break;
2617                 rid = txr->msix +1;
2618                 if (txr->tag != NULL) {
2619                         bus_teardown_intr(dev, txr->res, txr->tag);
2620                         txr->tag = NULL;
2621                 }
2622                 if (txr->res != NULL)
2623                         bus_release_resource(dev, SYS_RES_IRQ,
2624                             rid, txr->res);
2625                 rid = rxr->msix +1;
2626                 if (rxr->tag != NULL) {
2627                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2628                         rxr->tag = NULL;
2629                 }
2630                 if (rxr->res != NULL)
2631                         bus_release_resource(dev, SYS_RES_IRQ,
2632                             rid, rxr->res);
2633         }
2634
2635         if (adapter->linkvec) /* we are doing MSIX */
2636                 rid = adapter->linkvec + 1;
2637         else
2638                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2639
2640         if (adapter->tag != NULL) {
2641                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2642                 adapter->tag = NULL;
2643         }
2644
2645         if (adapter->res != NULL)
2646                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2647
2648
2649         if (adapter->msix)
2650                 pci_release_msi(dev);
2651
2652         if (adapter->msix_mem != NULL)
2653                 bus_release_resource(dev, SYS_RES_MEMORY,
2654                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2655
2656         if (adapter->memory != NULL)
2657                 bus_release_resource(dev, SYS_RES_MEMORY,
2658                     PCIR_BAR(0), adapter->memory);
2659
2660         if (adapter->flash != NULL)
2661                 bus_release_resource(dev, SYS_RES_MEMORY,
2662                     EM_FLASH, adapter->flash);
2663 }
2664
2665 /*
2666  * Setup MSI or MSI/X
2667  */
2668 static int
2669 em_setup_msix(struct adapter *adapter)
2670 {
2671         device_t dev = adapter->dev;
2672         int val = 0;
2673
2674         /*
2675         ** Setup MSI/X for Hartwell: tests have shown
2676         ** use of two queues to be unstable, and to
2677         ** provide no great gain anyway, so we simply
2678         ** seperate the interrupts and use a single queue.
2679         */
2680         if ((adapter->hw.mac.type == e1000_82574) &&
2681             (em_enable_msix == TRUE)) {
2682                 /* Map the MSIX BAR */
2683                 int rid = PCIR_BAR(EM_MSIX_BAR);
2684                 adapter->msix_mem = bus_alloc_resource_any(dev,
2685                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2686                 if (!adapter->msix_mem) {
2687                         /* May not be enabled */
2688                         device_printf(adapter->dev,
2689                             "Unable to map MSIX table \n");
2690                         goto msi;
2691                 }
2692                 val = pci_msix_count(dev); 
2693                 /* We only need 3 vectors */
2694                 if (val > 3)
2695                         val = 3;
2696                 if ((val != 3) && (val != 5)) {
2697                         bus_release_resource(dev, SYS_RES_MEMORY,
2698                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2699                         adapter->msix_mem = NULL;
2700                         device_printf(adapter->dev,
2701                             "MSIX: incorrect vectors, using MSI\n");
2702                         goto msi;
2703                 }
2704
2705                 if (pci_alloc_msix(dev, &val) == 0) {
2706                         device_printf(adapter->dev,
2707                             "Using MSIX interrupts "
2708                             "with %d vectors\n", val);
2709                 }
2710
2711                 return (val);
2712         }
2713 msi:
2714         val = pci_msi_count(dev);
2715         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2716                 adapter->msix = 1;
2717                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2718                 return (val);
2719         } 
2720         /* Should only happen due to manual configuration */
2721         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2722         return (0);
2723 }
2724
2725
2726 /*********************************************************************
2727  *
2728  *  Initialize the hardware to a configuration
2729  *  as specified by the adapter structure.
2730  *
2731  **********************************************************************/
2732 static void
2733 em_reset(struct adapter *adapter)
2734 {
2735         device_t        dev = adapter->dev;
2736         struct ifnet    *ifp = adapter->ifp;
2737         struct e1000_hw *hw = &adapter->hw;
2738         u16             rx_buffer_size;
2739         u32             pba;
2740
2741         INIT_DEBUGOUT("em_reset: begin");
2742
2743         /* Set up smart power down as default off on newer adapters. */
2744         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2745             hw->mac.type == e1000_82572)) {
2746                 u16 phy_tmp = 0;
2747
2748                 /* Speed up time to link by disabling smart power down. */
2749                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2750                 phy_tmp &= ~IGP02E1000_PM_SPD;
2751                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2752         }
2753
2754         /*
2755          * Packet Buffer Allocation (PBA)
2756          * Writing PBA sets the receive portion of the buffer
2757          * the remainder is used for the transmit buffer.
2758          */
2759         switch (hw->mac.type) {
2760         /* Total Packet Buffer on these is 48K */
2761         case e1000_82571:
2762         case e1000_82572:
2763         case e1000_80003es2lan:
2764                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2765                 break;
2766         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2767                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2768                 break;
2769         case e1000_82574:
2770         case e1000_82583:
2771                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2772                 break;
2773         case e1000_ich8lan:
2774                 pba = E1000_PBA_8K;
2775                 break;
2776         case e1000_ich9lan:
2777         case e1000_ich10lan:
2778                 /* Boost Receive side for jumbo frames */
2779                 if (adapter->max_frame_size > 4096)
2780                         pba = E1000_PBA_14K;
2781                 else
2782                         pba = E1000_PBA_10K;
2783                 break;
2784         case e1000_pchlan:
2785         case e1000_pch2lan:
2786                 pba = E1000_PBA_26K;
2787                 break;
2788         default:
2789                 if (adapter->max_frame_size > 8192)
2790                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2791                 else
2792                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2793         }
2794         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2795
2796         /*
2797          * These parameters control the automatic generation (Tx) and
2798          * response (Rx) to Ethernet PAUSE frames.
2799          * - High water mark should allow for at least two frames to be
2800          *   received after sending an XOFF.
2801          * - Low water mark works best when it is very near the high water mark.
2802          *   This allows the receiver to restart by sending XON when it has
2803          *   drained a bit. Here we use an arbitary value of 1500 which will
2804          *   restart after one full frame is pulled from the buffer. There
2805          *   could be several smaller frames in the buffer and if so they will
2806          *   not trigger the XON until their total number reduces the buffer
2807          *   by 1500.
2808          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2809          */
2810         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2811         hw->fc.high_water = rx_buffer_size -
2812             roundup2(adapter->max_frame_size, 1024);
2813         hw->fc.low_water = hw->fc.high_water - 1500;
2814
2815         if (adapter->fc) /* locally set flow control value? */
2816                 hw->fc.requested_mode = adapter->fc;
2817         else
2818                 hw->fc.requested_mode = e1000_fc_full;
2819
2820         if (hw->mac.type == e1000_80003es2lan)
2821                 hw->fc.pause_time = 0xFFFF;
2822         else
2823                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2824
2825         hw->fc.send_xon = TRUE;
2826
2827         /* Device specific overrides/settings */
2828         switch (hw->mac.type) {
2829         case e1000_pchlan:
2830                 /* Workaround: no TX flow ctrl for PCH */
2831                 hw->fc.requested_mode = e1000_fc_rx_pause;
2832                 hw->fc.pause_time = 0xFFFF; /* override */
2833                 if (ifp->if_mtu > ETHERMTU) {
2834                         hw->fc.high_water = 0x3500;
2835                         hw->fc.low_water = 0x1500;
2836                 } else {
2837                         hw->fc.high_water = 0x5000;
2838                         hw->fc.low_water = 0x3000;
2839                 }
2840                 hw->fc.refresh_time = 0x1000;
2841                 break;
2842         case e1000_pch2lan:
2843                 hw->fc.high_water = 0x5C20;
2844                 hw->fc.low_water = 0x5048;
2845                 hw->fc.pause_time = 0x0650;
2846                 hw->fc.refresh_time = 0x0400;
2847                 /* Jumbos need adjusted PBA */
2848                 if (ifp->if_mtu > ETHERMTU)
2849                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2850                 else
2851                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2852                 break;
2853         case e1000_ich9lan:
2854         case e1000_ich10lan:
2855                 if (ifp->if_mtu > ETHERMTU) {
2856                         hw->fc.high_water = 0x2800;
2857                         hw->fc.low_water = hw->fc.high_water - 8;
2858                         break;
2859                 } 
2860                 /* else fall thru */
2861         default:
2862                 if (hw->mac.type == e1000_80003es2lan)
2863                         hw->fc.pause_time = 0xFFFF;
2864                 break;
2865         }
2866
2867         /* Issue a global reset */
2868         e1000_reset_hw(hw);
2869         E1000_WRITE_REG(hw, E1000_WUC, 0);
2870         em_disable_aspm(adapter);
2871         /* and a re-init */
2872         if (e1000_init_hw(hw) < 0) {
2873                 device_printf(dev, "Hardware Initialization Failed\n");
2874                 return;
2875         }
2876
2877         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2878         e1000_get_phy_info(hw);
2879         e1000_check_for_link(hw);
2880         return;
2881 }
2882
2883 /*********************************************************************
2884  *
2885  *  Setup networking device structure and register an interface.
2886  *
2887  **********************************************************************/
2888 static int
2889 em_setup_interface(device_t dev, struct adapter *adapter)
2890 {
2891         struct ifnet   *ifp;
2892
2893         INIT_DEBUGOUT("em_setup_interface: begin");
2894
2895         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2896         if (ifp == NULL) {
2897                 device_printf(dev, "can not allocate ifnet structure\n");
2898                 return (-1);
2899         }
2900         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2901         ifp->if_init =  em_init;
2902         ifp->if_softc = adapter;
2903         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2904         ifp->if_ioctl = em_ioctl;
2905         ifp->if_start = em_start;
2906         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2907         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2908         IFQ_SET_READY(&ifp->if_snd);
2909
2910         ether_ifattach(ifp, adapter->hw.mac.addr);
2911
2912         ifp->if_capabilities = ifp->if_capenable = 0;
2913
2914 #ifdef EM_MULTIQUEUE
2915         /* Multiqueue stack interface */
2916         ifp->if_transmit = em_mq_start;
2917         ifp->if_qflush = em_qflush;
2918 #endif  
2919
2920         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2921         ifp->if_capabilities |= IFCAP_TSO4;
2922         /*
2923          * Tell the upper layer(s) we
2924          * support full VLAN capability
2925          */
2926         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2927         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2928                              |  IFCAP_VLAN_HWTSO
2929                              |  IFCAP_VLAN_MTU;
2930         ifp->if_capenable = ifp->if_capabilities;
2931
2932         /*
2933         ** Don't turn this on by default, if vlans are
2934         ** created on another pseudo device (eg. lagg)
2935         ** then vlan events are not passed thru, breaking
2936         ** operation, but with HW FILTER off it works. If
2937         ** using vlans directly on the em driver you can
2938         ** enable this and get full hardware tag filtering.
2939         */
2940         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2941
2942 #ifdef DEVICE_POLLING
2943         ifp->if_capabilities |= IFCAP_POLLING;
2944 #endif
2945
2946         /* Enable only WOL MAGIC by default */
2947         if (adapter->wol) {
2948                 ifp->if_capabilities |= IFCAP_WOL;
2949                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2950         }
2951                 
2952         /*
2953          * Specify the media types supported by this adapter and register
2954          * callbacks to update media and link information
2955          */
2956         ifmedia_init(&adapter->media, IFM_IMASK,
2957             em_media_change, em_media_status);
2958         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2959             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2960                 u_char fiber_type = IFM_1000_SX;        /* default type */
2961
2962                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
2963                             0, NULL);
2964                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2965         } else {
2966                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2967                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2968                             0, NULL);
2969                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2970                             0, NULL);
2971                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2972                             0, NULL);
2973                 if (adapter->hw.phy.type != e1000_phy_ife) {
2974                         ifmedia_add(&adapter->media,
2975                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2976                         ifmedia_add(&adapter->media,
2977                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2978                 }
2979         }
2980         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2981         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2982         return (0);
2983 }
2984
2985
2986 /*
2987  * Manage DMA'able memory.
2988  */
2989 static void
2990 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2991 {
2992         if (error)
2993                 return;
2994         *(bus_addr_t *) arg = segs[0].ds_addr;
2995 }
2996
2997 static int
2998 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2999         struct em_dma_alloc *dma, int mapflags)
3000 {
3001         int error;
3002
3003         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3004                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3005                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3006                                 BUS_SPACE_MAXADDR,      /* highaddr */
3007                                 NULL, NULL,             /* filter, filterarg */
3008                                 size,                   /* maxsize */
3009                                 1,                      /* nsegments */
3010                                 size,                   /* maxsegsize */
3011                                 0,                      /* flags */
3012                                 NULL,                   /* lockfunc */
3013                                 NULL,                   /* lockarg */
3014                                 &dma->dma_tag);
3015         if (error) {
3016                 device_printf(adapter->dev,
3017                     "%s: bus_dma_tag_create failed: %d\n",
3018                     __func__, error);
3019                 goto fail_0;
3020         }
3021
3022         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3023             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3024         if (error) {
3025                 device_printf(adapter->dev,
3026                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3027                     __func__, (uintmax_t)size, error);
3028                 goto fail_2;
3029         }
3030
3031         dma->dma_paddr = 0;
3032         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3033             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3034         if (error || dma->dma_paddr == 0) {
3035                 device_printf(adapter->dev,
3036                     "%s: bus_dmamap_load failed: %d\n",
3037                     __func__, error);
3038                 goto fail_3;
3039         }
3040
3041         return (0);
3042
3043 fail_3:
3044         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3045 fail_2:
3046         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3047         bus_dma_tag_destroy(dma->dma_tag);
3048 fail_0:
3049         dma->dma_map = NULL;
3050         dma->dma_tag = NULL;
3051
3052         return (error);
3053 }
3054
3055 static void
3056 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3057 {
3058         if (dma->dma_tag == NULL)
3059                 return;
3060         if (dma->dma_map != NULL) {
3061                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3062                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3063                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3064                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3065                 dma->dma_map = NULL;
3066         }
3067         bus_dma_tag_destroy(dma->dma_tag);
3068         dma->dma_tag = NULL;
3069 }
3070
3071
3072 /*********************************************************************
3073  *
3074  *  Allocate memory for the transmit and receive rings, and then
3075  *  the descriptors associated with each, called only once at attach.
3076  *
3077  **********************************************************************/
3078 static int
3079 em_allocate_queues(struct adapter *adapter)
3080 {
3081         device_t                dev = adapter->dev;
3082         struct tx_ring          *txr = NULL;
3083         struct rx_ring          *rxr = NULL;
3084         int rsize, tsize, error = E1000_SUCCESS;
3085         int txconf = 0, rxconf = 0;
3086
3087
3088         /* Allocate the TX ring struct memory */
3089         if (!(adapter->tx_rings =
3090             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3091             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3092                 device_printf(dev, "Unable to allocate TX ring memory\n");
3093                 error = ENOMEM;
3094                 goto fail;
3095         }
3096
3097         /* Now allocate the RX */
3098         if (!(adapter->rx_rings =
3099             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3100             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3101                 device_printf(dev, "Unable to allocate RX ring memory\n");
3102                 error = ENOMEM;
3103                 goto rx_fail;
3104         }
3105
3106         tsize = roundup2(adapter->num_tx_desc *
3107             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3108         /*
3109          * Now set up the TX queues, txconf is needed to handle the
3110          * possibility that things fail midcourse and we need to
3111          * undo memory gracefully
3112          */ 
3113         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3114                 /* Set up some basics */
3115                 txr = &adapter->tx_rings[i];
3116                 txr->adapter = adapter;
3117                 txr->me = i;
3118
3119                 /* Initialize the TX lock */
3120                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3121                     device_get_nameunit(dev), txr->me);
3122                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3123
3124                 if (em_dma_malloc(adapter, tsize,
3125                         &txr->txdma, BUS_DMA_NOWAIT)) {
3126                         device_printf(dev,
3127                             "Unable to allocate TX Descriptor memory\n");
3128                         error = ENOMEM;
3129                         goto err_tx_desc;
3130                 }
3131                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3132                 bzero((void *)txr->tx_base, tsize);
3133
3134                 if (em_allocate_transmit_buffers(txr)) {
3135                         device_printf(dev,
3136                             "Critical Failure setting up transmit buffers\n");
3137                         error = ENOMEM;
3138                         goto err_tx_desc;
3139                 }
3140 #if __FreeBSD_version >= 800000
3141                 /* Allocate a buf ring */
3142                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3143                     M_WAITOK, &txr->tx_mtx);
3144 #endif
3145         }
3146
3147         /*
3148          * Next the RX queues...
3149          */ 
3150         rsize = roundup2(adapter->num_rx_desc *
3151             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3152         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3153                 rxr = &adapter->rx_rings[i];
3154                 rxr->adapter = adapter;
3155                 rxr->me = i;
3156
3157                 /* Initialize the RX lock */
3158                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3159                     device_get_nameunit(dev), txr->me);
3160                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3161
3162                 if (em_dma_malloc(adapter, rsize,
3163                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3164                         device_printf(dev,
3165                             "Unable to allocate RxDescriptor memory\n");
3166                         error = ENOMEM;
3167                         goto err_rx_desc;
3168                 }
3169                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3170                 bzero((void *)rxr->rx_base, rsize);
3171
3172                 /* Allocate receive buffers for the ring*/
3173                 if (em_allocate_receive_buffers(rxr)) {
3174                         device_printf(dev,
3175                             "Critical Failure setting up receive buffers\n");
3176                         error = ENOMEM;
3177                         goto err_rx_desc;
3178                 }
3179         }
3180
3181         return (0);
3182
3183 err_rx_desc:
3184         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3185                 em_dma_free(adapter, &rxr->rxdma);
3186 err_tx_desc:
3187         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3188                 em_dma_free(adapter, &txr->txdma);
3189         free(adapter->rx_rings, M_DEVBUF);
3190 rx_fail:
3191 #if __FreeBSD_version >= 800000
3192         buf_ring_free(txr->br, M_DEVBUF);
3193 #endif
3194         free(adapter->tx_rings, M_DEVBUF);
3195 fail:
3196         return (error);
3197 }
3198
3199
3200 /*********************************************************************
3201  *
3202  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3203  *  the information needed to transmit a packet on the wire. This is
3204  *  called only once at attach, setup is done every reset.
3205  *
3206  **********************************************************************/
3207 static int
3208 em_allocate_transmit_buffers(struct tx_ring *txr)
3209 {
3210         struct adapter *adapter = txr->adapter;
3211         device_t dev = adapter->dev;
3212         struct em_buffer *txbuf;
3213         int error, i;
3214
3215         /*
3216          * Setup DMA descriptor areas.
3217          */
3218         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3219                                1, 0,                    /* alignment, bounds */
3220                                BUS_SPACE_MAXADDR,       /* lowaddr */
3221                                BUS_SPACE_MAXADDR,       /* highaddr */
3222                                NULL, NULL,              /* filter, filterarg */
3223                                EM_TSO_SIZE,             /* maxsize */
3224                                EM_MAX_SCATTER,          /* nsegments */
3225                                PAGE_SIZE,               /* maxsegsize */
3226                                0,                       /* flags */
3227                                NULL,                    /* lockfunc */
3228                                NULL,                    /* lockfuncarg */
3229                                &txr->txtag))) {
3230                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3231                 goto fail;
3232         }
3233
3234         if (!(txr->tx_buffers =
3235             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3236             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3237                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3238                 error = ENOMEM;
3239                 goto fail;
3240         }
3241
3242         /* Create the descriptor buffer dma maps */
3243         txbuf = txr->tx_buffers;
3244         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3245                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3246                 if (error != 0) {
3247                         device_printf(dev, "Unable to create TX DMA map\n");
3248                         goto fail;
3249                 }
3250         }
3251
3252         return 0;
3253 fail:
3254         /* We free all, it handles case where we are in the middle */
3255         em_free_transmit_structures(adapter);
3256         return (error);
3257 }
3258
3259 /*********************************************************************
3260  *
3261  *  Initialize a transmit ring.
3262  *
3263  **********************************************************************/
3264 static void
3265 em_setup_transmit_ring(struct tx_ring *txr)
3266 {
3267         struct adapter *adapter = txr->adapter;
3268         struct em_buffer *txbuf;
3269         int i;
3270 #ifdef DEV_NETMAP
3271         struct netmap_adapter *na = NA(adapter->ifp);
3272         struct netmap_slot *slot;
3273 #endif /* DEV_NETMAP */
3274
3275         /* Clear the old descriptor contents */
3276         EM_TX_LOCK(txr);
3277 #ifdef DEV_NETMAP
3278         slot = netmap_reset(na, NR_TX, txr->me, 0);
3279 #endif /* DEV_NETMAP */
3280
3281         bzero((void *)txr->tx_base,
3282               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3283         /* Reset indices */
3284         txr->next_avail_desc = 0;
3285         txr->next_to_clean = 0;
3286
3287         /* Free any existing tx buffers. */
3288         txbuf = txr->tx_buffers;
3289         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3290                 if (txbuf->m_head != NULL) {
3291                         bus_dmamap_sync(txr->txtag, txbuf->map,
3292                             BUS_DMASYNC_POSTWRITE);
3293                         bus_dmamap_unload(txr->txtag, txbuf->map);
3294                         m_freem(txbuf->m_head);
3295                         txbuf->m_head = NULL;
3296                 }
3297 #ifdef DEV_NETMAP
3298                 if (slot) {
3299                         int si = netmap_tidx_n2k(na, txr->me, i);
3300                         uint64_t paddr;
3301                         void *addr;
3302
3303                         addr = PNMB(slot + si, &paddr);
3304                         txr->tx_base[i].buffer_addr = htole64(paddr);
3305                         /* reload the map for netmap mode */
3306                         netmap_load_map(txr->txtag, txbuf->map, addr);
3307                 }
3308 #endif /* DEV_NETMAP */
3309
3310                 /* clear the watch index */
3311                 txbuf->next_eop = -1;
3312         }
3313
3314         /* Set number of descriptors available */
3315         txr->tx_avail = adapter->num_tx_desc;
3316         txr->queue_status = EM_QUEUE_IDLE;
3317
3318         /* Clear checksum offload context. */
3319         txr->last_hw_offload = 0;
3320         txr->last_hw_ipcss = 0;
3321         txr->last_hw_ipcso = 0;
3322         txr->last_hw_tucss = 0;
3323         txr->last_hw_tucso = 0;
3324
3325         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3326             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3327         EM_TX_UNLOCK(txr);
3328 }
3329
3330 /*********************************************************************
3331  *
3332  *  Initialize all transmit rings.
3333  *
3334  **********************************************************************/
3335 static void
3336 em_setup_transmit_structures(struct adapter *adapter)
3337 {
3338         struct tx_ring *txr = adapter->tx_rings;
3339
3340         for (int i = 0; i < adapter->num_queues; i++, txr++)
3341                 em_setup_transmit_ring(txr);
3342
3343         return;
3344 }
3345
3346 /*********************************************************************
3347  *
3348  *  Enable transmit unit.
3349  *
3350  **********************************************************************/
3351 static void
3352 em_initialize_transmit_unit(struct adapter *adapter)
3353 {
3354         struct tx_ring  *txr = adapter->tx_rings;
3355         struct e1000_hw *hw = &adapter->hw;
3356         u32     tctl, tarc, tipg = 0;
3357
3358          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3359
3360         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3361                 u64 bus_addr = txr->txdma.dma_paddr;
3362                 /* Base and Len of TX Ring */
3363                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3364                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3365                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3366                     (u32)(bus_addr >> 32));
3367                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3368                     (u32)bus_addr);
3369                 /* Init the HEAD/TAIL indices */
3370                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3371                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3372
3373                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3374                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3375                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3376
3377                 txr->queue_status = EM_QUEUE_IDLE;
3378         }
3379
3380         /* Set the default values for the Tx Inter Packet Gap timer */
3381         switch (adapter->hw.mac.type) {
3382         case e1000_80003es2lan:
3383                 tipg = DEFAULT_82543_TIPG_IPGR1;
3384                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3385                     E1000_TIPG_IPGR2_SHIFT;
3386                 break;
3387         default:
3388                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3389                     (adapter->hw.phy.media_type ==
3390                     e1000_media_type_internal_serdes))
3391                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3392                 else
3393                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3394                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3395                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3396         }
3397
3398         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3399         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3400
3401         if(adapter->hw.mac.type >= e1000_82540)
3402                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3403                     adapter->tx_abs_int_delay.value);
3404
3405         if ((adapter->hw.mac.type == e1000_82571) ||
3406             (adapter->hw.mac.type == e1000_82572)) {
3407                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3408                 tarc |= SPEED_MODE_BIT;
3409                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3410         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3411                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3412                 tarc |= 1;
3413                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3414                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3415                 tarc |= 1;
3416                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3417         }
3418
3419         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3420         if (adapter->tx_int_delay.value > 0)
3421                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3422
3423         /* Program the Transmit Control Register */
3424         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3425         tctl &= ~E1000_TCTL_CT;
3426         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3427                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3428
3429         if (adapter->hw.mac.type >= e1000_82571)
3430                 tctl |= E1000_TCTL_MULR;
3431
3432         /* This write will effectively turn on the transmit unit. */
3433         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3434
3435 }
3436
3437
3438 /*********************************************************************
3439  *
3440  *  Free all transmit rings.
3441  *
3442  **********************************************************************/
3443 static void
3444 em_free_transmit_structures(struct adapter *adapter)
3445 {
3446         struct tx_ring *txr = adapter->tx_rings;
3447
3448         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3449                 EM_TX_LOCK(txr);
3450                 em_free_transmit_buffers(txr);
3451                 em_dma_free(adapter, &txr->txdma);
3452                 EM_TX_UNLOCK(txr);
3453                 EM_TX_LOCK_DESTROY(txr);
3454         }
3455
3456         free(adapter->tx_rings, M_DEVBUF);
3457 }
3458
3459 /*********************************************************************
3460  *
3461  *  Free transmit ring related data structures.
3462  *
3463  **********************************************************************/
3464 static void
3465 em_free_transmit_buffers(struct tx_ring *txr)
3466 {
3467         struct adapter          *adapter = txr->adapter;
3468         struct em_buffer        *txbuf;
3469
3470         INIT_DEBUGOUT("free_transmit_ring: begin");
3471
3472         if (txr->tx_buffers == NULL)
3473                 return;
3474
3475         for (int i = 0; i < adapter->num_tx_desc; i++) {
3476                 txbuf = &txr->tx_buffers[i];
3477                 if (txbuf->m_head != NULL) {
3478                         bus_dmamap_sync(txr->txtag, txbuf->map,
3479                             BUS_DMASYNC_POSTWRITE);
3480                         bus_dmamap_unload(txr->txtag,
3481                             txbuf->map);
3482                         m_freem(txbuf->m_head);
3483                         txbuf->m_head = NULL;
3484                         if (txbuf->map != NULL) {
3485                                 bus_dmamap_destroy(txr->txtag,
3486                                     txbuf->map);
3487                                 txbuf->map = NULL;
3488                         }
3489                 } else if (txbuf->map != NULL) {
3490                         bus_dmamap_unload(txr->txtag,
3491                             txbuf->map);
3492                         bus_dmamap_destroy(txr->txtag,
3493                             txbuf->map);
3494                         txbuf->map = NULL;
3495                 }
3496         }
3497 #if __FreeBSD_version >= 800000
3498         if (txr->br != NULL)
3499                 buf_ring_free(txr->br, M_DEVBUF);
3500 #endif
3501         if (txr->tx_buffers != NULL) {
3502                 free(txr->tx_buffers, M_DEVBUF);
3503                 txr->tx_buffers = NULL;
3504         }
3505         if (txr->txtag != NULL) {
3506                 bus_dma_tag_destroy(txr->txtag);
3507                 txr->txtag = NULL;
3508         }
3509         return;
3510 }
3511
3512
3513 /*********************************************************************
3514  *  The offload context is protocol specific (TCP/UDP) and thus
3515  *  only needs to be set when the protocol changes. The occasion
3516  *  of a context change can be a performance detriment, and
3517  *  might be better just disabled. The reason arises in the way
3518  *  in which the controller supports pipelined requests from the
3519  *  Tx data DMA. Up to four requests can be pipelined, and they may
3520  *  belong to the same packet or to multiple packets. However all
3521  *  requests for one packet are issued before a request is issued
3522  *  for a subsequent packet and if a request for the next packet
3523  *  requires a context change, that request will be stalled
3524  *  until the previous request completes. This means setting up
3525  *  a new context effectively disables pipelined Tx data DMA which
3526  *  in turn greatly slow down performance to send small sized
3527  *  frames. 
3528  **********************************************************************/
3529 static void
3530 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3531     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3532 {
3533         struct adapter                  *adapter = txr->adapter;
3534         struct e1000_context_desc       *TXD = NULL;
3535         struct em_buffer                *tx_buffer;
3536         int                             cur, hdr_len;
3537         u32                             cmd = 0;
3538         u16                             offload = 0;
3539         u8                              ipcso, ipcss, tucso, tucss;
3540
3541         ipcss = ipcso = tucss = tucso = 0;
3542         hdr_len = ip_off + (ip->ip_hl << 2);
3543         cur = txr->next_avail_desc;
3544
3545         /* Setup of IP header checksum. */
3546         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3547                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3548                 offload |= CSUM_IP;
3549                 ipcss = ip_off;
3550                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3551                 /*
3552                  * Start offset for header checksum calculation.
3553                  * End offset for header checksum calculation.
3554                  * Offset of place to put the checksum.
3555                  */
3556                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3557                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3558                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3559                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3560                 cmd |= E1000_TXD_CMD_IP;
3561         }
3562
3563         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3564                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3565                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3566                 offload |= CSUM_TCP;
3567                 tucss = hdr_len;
3568                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3569                 /*
3570                  * Setting up new checksum offload context for every frames
3571                  * takes a lot of processing time for hardware. This also
3572                  * reduces performance a lot for small sized frames so avoid
3573                  * it if driver can use previously configured checksum
3574                  * offload context.
3575                  */
3576                 if (txr->last_hw_offload == offload) {
3577                         if (offload & CSUM_IP) {
3578                                 if (txr->last_hw_ipcss == ipcss &&
3579                                     txr->last_hw_ipcso == ipcso &&
3580                                     txr->last_hw_tucss == tucss &&
3581                                     txr->last_hw_tucso == tucso)
3582                                         return;
3583                         } else {
3584                                 if (txr->last_hw_tucss == tucss &&
3585                                     txr->last_hw_tucso == tucso)
3586                                         return;
3587                         }
3588                 }
3589                 txr->last_hw_offload = offload;
3590                 txr->last_hw_tucss = tucss;
3591                 txr->last_hw_tucso = tucso;
3592                 /*
3593                  * Start offset for payload checksum calculation.
3594                  * End offset for payload checksum calculation.
3595                  * Offset of place to put the checksum.
3596                  */
3597                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3598                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3599                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3600                 TXD->upper_setup.tcp_fields.tucso = tucso;
3601                 cmd |= E1000_TXD_CMD_TCP;
3602         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3603                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3604                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3605                 tucss = hdr_len;
3606                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3607                 /*
3608                  * Setting up new checksum offload context for every frames
3609                  * takes a lot of processing time for hardware. This also
3610                  * reduces performance a lot for small sized frames so avoid
3611                  * it if driver can use previously configured checksum
3612                  * offload context.
3613                  */
3614                 if (txr->last_hw_offload == offload) {
3615                         if (offload & CSUM_IP) {
3616                                 if (txr->last_hw_ipcss == ipcss &&
3617                                     txr->last_hw_ipcso == ipcso &&
3618                                     txr->last_hw_tucss == tucss &&
3619                                     txr->last_hw_tucso == tucso)
3620                                         return;
3621                         } else {
3622                                 if (txr->last_hw_tucss == tucss &&
3623                                     txr->last_hw_tucso == tucso)
3624                                         return;
3625                         }
3626                 }
3627                 txr->last_hw_offload = offload;
3628                 txr->last_hw_tucss = tucss;
3629                 txr->last_hw_tucso = tucso;
3630                 /*
3631                  * Start offset for header checksum calculation.
3632                  * End offset for header checksum calculation.
3633                  * Offset of place to put the checksum.
3634                  */
3635                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3636                 TXD->upper_setup.tcp_fields.tucss = tucss;
3637                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3638                 TXD->upper_setup.tcp_fields.tucso = tucso;
3639         }
3640   
3641         if (offload & CSUM_IP) {
3642                 txr->last_hw_ipcss = ipcss;
3643                 txr->last_hw_ipcso = ipcso;
3644         }
3645
3646         TXD->tcp_seg_setup.data = htole32(0);
3647         TXD->cmd_and_length =
3648             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3649         tx_buffer = &txr->tx_buffers[cur];
3650         tx_buffer->m_head = NULL;
3651         tx_buffer->next_eop = -1;
3652
3653         if (++cur == adapter->num_tx_desc)
3654                 cur = 0;
3655
3656         txr->tx_avail--;
3657         txr->next_avail_desc = cur;
3658 }
3659
3660
3661 /**********************************************************************
3662  *
3663  *  Setup work for hardware segmentation offload (TSO)
3664  *
3665  **********************************************************************/
3666 static void
3667 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3668     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3669 {
3670         struct adapter                  *adapter = txr->adapter;
3671         struct e1000_context_desc       *TXD;
3672         struct em_buffer                *tx_buffer;
3673         int cur, hdr_len;
3674
3675         /*
3676          * In theory we can use the same TSO context if and only if
3677          * frame is the same type(IP/TCP) and the same MSS. However
3678          * checking whether a frame has the same IP/TCP structure is
3679          * hard thing so just ignore that and always restablish a
3680          * new TSO context.
3681          */
3682         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3683         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3684                       E1000_TXD_DTYP_D |        /* Data descr type */
3685                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3686
3687         /* IP and/or TCP header checksum calculation and insertion. */
3688         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3689
3690         cur = txr->next_avail_desc;
3691         tx_buffer = &txr->tx_buffers[cur];
3692         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3693
3694         /*
3695          * Start offset for header checksum calculation.
3696          * End offset for header checksum calculation.
3697          * Offset of place put the checksum.
3698          */
3699         TXD->lower_setup.ip_fields.ipcss = ip_off;
3700         TXD->lower_setup.ip_fields.ipcse =
3701             htole16(ip_off + (ip->ip_hl << 2) - 1);
3702         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3703         /*
3704          * Start offset for payload checksum calculation.
3705          * End offset for payload checksum calculation.
3706          * Offset of place to put the checksum.
3707          */
3708         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3709         TXD->upper_setup.tcp_fields.tucse = 0;
3710         TXD->upper_setup.tcp_fields.tucso =
3711             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3712         /*
3713          * Payload size per packet w/o any headers.
3714          * Length of all headers up to payload.
3715          */
3716         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3717         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3718
3719         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3720                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3721                                 E1000_TXD_CMD_TSE |     /* TSE context */
3722                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3723                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3724                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3725
3726         tx_buffer->m_head = NULL;
3727         tx_buffer->next_eop = -1;
3728
3729         if (++cur == adapter->num_tx_desc)
3730                 cur = 0;
3731
3732         txr->tx_avail--;
3733         txr->next_avail_desc = cur;
3734         txr->tx_tso = TRUE;
3735 }
3736
3737
3738 /**********************************************************************
3739  *
3740  *  Examine each tx_buffer in the used queue. If the hardware is done
3741  *  processing the packet then free associated resources. The
3742  *  tx_buffer is put back on the free queue.
3743  *
3744  **********************************************************************/
3745 static bool
3746 em_txeof(struct tx_ring *txr)
3747 {
3748         struct adapter  *adapter = txr->adapter;
3749         int first, last, done, processed;
3750         struct em_buffer *tx_buffer;
3751         struct e1000_tx_desc   *tx_desc, *eop_desc;
3752         struct ifnet   *ifp = adapter->ifp;
3753
3754         EM_TX_LOCK_ASSERT(txr);
3755 #ifdef DEV_NETMAP
3756         if (ifp->if_capenable & IFCAP_NETMAP) {
3757                 struct netmap_adapter *na = NA(ifp);
3758
3759                 selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3760                 EM_TX_UNLOCK(txr);
3761                 EM_CORE_LOCK(adapter);
3762                 selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
3763                 EM_CORE_UNLOCK(adapter);
3764                 EM_TX_LOCK(txr);
3765                 return (FALSE);
3766         }
3767 #endif /* DEV_NETMAP */
3768
3769         /* No work, make sure watchdog is off */
3770         if (txr->tx_avail == adapter->num_tx_desc) {
3771                 txr->queue_status = EM_QUEUE_IDLE;
3772                 return (FALSE);
3773         }
3774
3775         processed = 0;
3776         first = txr->next_to_clean;
3777         tx_desc = &txr->tx_base[first];
3778         tx_buffer = &txr->tx_buffers[first];
3779         last = tx_buffer->next_eop;
3780         eop_desc = &txr->tx_base[last];
3781
3782         /*
3783          * What this does is get the index of the
3784          * first descriptor AFTER the EOP of the 
3785          * first packet, that way we can do the
3786          * simple comparison on the inner while loop.
3787          */
3788         if (++last == adapter->num_tx_desc)
3789                 last = 0;
3790         done = last;
3791
3792         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3793             BUS_DMASYNC_POSTREAD);
3794
3795         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3796                 /* We clean the range of the packet */
3797                 while (first != done) {
3798                         tx_desc->upper.data = 0;
3799                         tx_desc->lower.data = 0;
3800                         tx_desc->buffer_addr = 0;
3801                         ++txr->tx_avail;
3802                         ++processed;
3803
3804                         if (tx_buffer->m_head) {
3805                                 bus_dmamap_sync(txr->txtag,
3806                                     tx_buffer->map,
3807                                     BUS_DMASYNC_POSTWRITE);
3808                                 bus_dmamap_unload(txr->txtag,
3809                                     tx_buffer->map);
3810                                 m_freem(tx_buffer->m_head);
3811                                 tx_buffer->m_head = NULL;
3812                         }
3813                         tx_buffer->next_eop = -1;
3814                         txr->watchdog_time = ticks;
3815
3816                         if (++first == adapter->num_tx_desc)
3817                                 first = 0;
3818
3819                         tx_buffer = &txr->tx_buffers[first];
3820                         tx_desc = &txr->tx_base[first];
3821                 }
3822                 ++ifp->if_opackets;
3823                 /* See if we can continue to the next packet */
3824                 last = tx_buffer->next_eop;
3825                 if (last != -1) {
3826                         eop_desc = &txr->tx_base[last];
3827                         /* Get new done point */
3828                         if (++last == adapter->num_tx_desc) last = 0;
3829                         done = last;
3830                 } else
3831                         break;
3832         }
3833         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3834             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3835
3836         txr->next_to_clean = first;
3837
3838         /*
3839         ** Watchdog calculation, we know there's
3840         ** work outstanding or the first return
3841         ** would have been taken, so none processed
3842         ** for too long indicates a hang. local timer
3843         ** will examine this and do a reset if needed.
3844         */
3845         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3846                 txr->queue_status = EM_QUEUE_HUNG;
3847
3848         /*
3849          * If we have a minimum free, clear IFF_DRV_OACTIVE
3850          * to tell the stack that it is OK to send packets.
3851          * Notice that all writes of OACTIVE happen under the
3852          * TX lock which, with a single queue, guarantees 
3853          * sanity.
3854          */
3855         if (txr->tx_avail >= EM_MAX_SCATTER)
3856                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3857
3858         /* Disable watchdog if all clean */
3859         if (txr->tx_avail == adapter->num_tx_desc) {
3860                 txr->queue_status = EM_QUEUE_IDLE;
3861                 return (FALSE);
3862         } 
3863
3864         return (TRUE);
3865 }
3866
3867
3868 /*********************************************************************
3869  *
3870  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3871  *
3872  **********************************************************************/
3873 static void
3874 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3875 {
3876         struct adapter          *adapter = rxr->adapter;
3877         struct mbuf             *m;
3878         bus_dma_segment_t       segs[1];
3879         struct em_buffer        *rxbuf;
3880         int                     i, j, error, nsegs;
3881         bool                    cleaned = FALSE;
3882
3883         i = j = rxr->next_to_refresh;
3884         /*
3885         ** Get one descriptor beyond
3886         ** our work mark to control
3887         ** the loop.
3888         */
3889         if (++j == adapter->num_rx_desc)
3890                 j = 0;
3891
3892         while (j != limit) {
3893                 rxbuf = &rxr->rx_buffers[i];
3894                 if (rxbuf->m_head == NULL) {
3895                         m = m_getjcl(M_DONTWAIT, MT_DATA,
3896                             M_PKTHDR, adapter->rx_mbuf_sz);
3897                         /*
3898                         ** If we have a temporary resource shortage
3899                         ** that causes a failure, just abort refresh
3900                         ** for now, we will return to this point when
3901                         ** reinvoked from em_rxeof.
3902                         */
3903                         if (m == NULL)
3904                                 goto update;
3905                 } else
3906                         m = rxbuf->m_head;
3907
3908                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3909                 m->m_flags |= M_PKTHDR;
3910                 m->m_data = m->m_ext.ext_buf;
3911
3912                 /* Use bus_dma machinery to setup the memory mapping  */
3913                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3914                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3915                 if (error != 0) {
3916                         printf("Refresh mbufs: hdr dmamap load"
3917                             " failure - %d\n", error);
3918                         m_free(m);
3919                         rxbuf->m_head = NULL;
3920                         goto update;
3921                 }
3922                 rxbuf->m_head = m;
3923                 bus_dmamap_sync(rxr->rxtag,
3924                     rxbuf->map, BUS_DMASYNC_PREREAD);
3925                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3926                 cleaned = TRUE;
3927
3928                 i = j; /* Next is precalulated for us */
3929                 rxr->next_to_refresh = i;
3930                 /* Calculate next controlling index */
3931                 if (++j == adapter->num_rx_desc)
3932                         j = 0;
3933         }
3934 update:
3935         /*
3936         ** Update the tail pointer only if,
3937         ** and as far as we have refreshed.
3938         */
3939         if (cleaned)
3940                 E1000_WRITE_REG(&adapter->hw,
3941                     E1000_RDT(rxr->me), rxr->next_to_refresh);
3942
3943         return;
3944 }
3945
3946
3947 /*********************************************************************
3948  *
3949  *  Allocate memory for rx_buffer structures. Since we use one
3950  *  rx_buffer per received packet, the maximum number of rx_buffer's
3951  *  that we'll need is equal to the number of receive descriptors
3952  *  that we've allocated.
3953  *
3954  **********************************************************************/
3955 static int
3956 em_allocate_receive_buffers(struct rx_ring *rxr)
3957 {
3958         struct adapter          *adapter = rxr->adapter;
3959         device_t                dev = adapter->dev;
3960         struct em_buffer        *rxbuf;
3961         int                     error;
3962
3963         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3964             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3965         if (rxr->rx_buffers == NULL) {
3966                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3967                 return (ENOMEM);
3968         }
3969
3970         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3971                                 1, 0,                   /* alignment, bounds */
3972                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3973                                 BUS_SPACE_MAXADDR,      /* highaddr */
3974                                 NULL, NULL,             /* filter, filterarg */
3975                                 MJUM9BYTES,             /* maxsize */
3976                                 1,                      /* nsegments */
3977                                 MJUM9BYTES,             /* maxsegsize */
3978                                 0,                      /* flags */
3979                                 NULL,                   /* lockfunc */
3980                                 NULL,                   /* lockarg */
3981                                 &rxr->rxtag);
3982         if (error) {
3983                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3984                     __func__, error);
3985                 goto fail;
3986         }
3987
3988         rxbuf = rxr->rx_buffers;
3989         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3990                 rxbuf = &rxr->rx_buffers[i];
3991                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3992                     &rxbuf->map);
3993                 if (error) {
3994                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3995                             __func__, error);
3996                         goto fail;
3997                 }
3998         }
3999
4000         return (0);
4001
4002 fail:
4003         em_free_receive_structures(adapter);
4004         return (error);
4005 }
4006
4007
4008 /*********************************************************************
4009  *
4010  *  Initialize a receive ring and its buffers.
4011  *
4012  **********************************************************************/
4013 static int
4014 em_setup_receive_ring(struct rx_ring *rxr)
4015 {
4016         struct  adapter         *adapter = rxr->adapter;
4017         struct em_buffer        *rxbuf;
4018         bus_dma_segment_t       seg[1];
4019         int                     rsize, nsegs, error;
4020 #ifdef DEV_NETMAP
4021         struct netmap_adapter *na = NA(adapter->ifp);
4022         struct netmap_slot *slot;
4023 #endif
4024
4025
4026         /* Clear the ring contents */
4027         EM_RX_LOCK(rxr);
4028         rsize = roundup2(adapter->num_rx_desc *
4029             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4030         bzero((void *)rxr->rx_base, rsize);
4031 #ifdef DEV_NETMAP
4032         slot = netmap_reset(na, NR_RX, 0, 0);
4033 #endif
4034
4035         /*
4036         ** Free current RX buffer structs and their mbufs
4037         */
4038         for (int i = 0; i < adapter->num_rx_desc; i++) {
4039                 rxbuf = &rxr->rx_buffers[i];
4040                 if (rxbuf->m_head != NULL) {
4041                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4042                             BUS_DMASYNC_POSTREAD);
4043                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4044                         m_freem(rxbuf->m_head);
4045                         rxbuf->m_head = NULL; /* mark as freed */
4046                 }
4047         }
4048
4049         /* Now replenish the mbufs */
4050         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4051                 rxbuf = &rxr->rx_buffers[j];
4052 #ifdef DEV_NETMAP
4053                 if (slot) {
4054                         int si = netmap_ridx_n2k(na, rxr->me, j);
4055                         uint64_t paddr;
4056                         void *addr;
4057
4058                         addr = PNMB(slot + si, &paddr);
4059                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4060                         /* Update descriptor */
4061                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4062                         continue;
4063                 }
4064 #endif /* DEV_NETMAP */
4065                 rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4066                     M_PKTHDR, adapter->rx_mbuf_sz);
4067                 if (rxbuf->m_head == NULL) {
4068                         error = ENOBUFS;
4069                         goto fail;
4070                 }
4071                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4072                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4073                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4074
4075                 /* Get the memory mapping */
4076                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4077                     rxbuf->map, rxbuf->m_head, seg,
4078                     &nsegs, BUS_DMA_NOWAIT);
4079                 if (error != 0) {
4080                         m_freem(rxbuf->m_head);
4081                         rxbuf->m_head = NULL;
4082                         goto fail;
4083                 }
4084                 bus_dmamap_sync(rxr->rxtag,
4085                     rxbuf->map, BUS_DMASYNC_PREREAD);
4086
4087                 /* Update descriptor */
4088                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4089         }
4090         rxr->next_to_check = 0;
4091         rxr->next_to_refresh = 0;
4092         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4093             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4094
4095 fail:
4096         EM_RX_UNLOCK(rxr);
4097         return (error);
4098 }
4099
4100 /*********************************************************************
4101  *
4102  *  Initialize all receive rings.
4103  *
4104  **********************************************************************/
4105 static int
4106 em_setup_receive_structures(struct adapter *adapter)
4107 {
4108         struct rx_ring *rxr = adapter->rx_rings;
4109         int q;
4110
4111         for (q = 0; q < adapter->num_queues; q++, rxr++)
4112                 if (em_setup_receive_ring(rxr))
4113                         goto fail;
4114
4115         return (0);
4116 fail:
4117         /*
4118          * Free RX buffers allocated so far, we will only handle
4119          * the rings that completed, the failing case will have
4120          * cleaned up for itself. 'q' failed, so its the terminus.
4121          */
4122         for (int i = 0; i < q; ++i) {
4123                 rxr = &adapter->rx_rings[i];
4124                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4125                         struct em_buffer *rxbuf;
4126                         rxbuf = &rxr->rx_buffers[n];
4127                         if (rxbuf->m_head != NULL) {
4128                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4129                                   BUS_DMASYNC_POSTREAD);
4130                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4131                                 m_freem(rxbuf->m_head);
4132                                 rxbuf->m_head = NULL;
4133                         }
4134                 }
4135                 rxr->next_to_check = 0;
4136                 rxr->next_to_refresh = 0;
4137         }
4138
4139         return (ENOBUFS);
4140 }
4141
4142 /*********************************************************************
4143  *
4144  *  Free all receive rings.
4145  *
4146  **********************************************************************/
4147 static void
4148 em_free_receive_structures(struct adapter *adapter)
4149 {
4150         struct rx_ring *rxr = adapter->rx_rings;
4151
4152         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4153                 em_free_receive_buffers(rxr);
4154                 /* Free the ring memory as well */
4155                 em_dma_free(adapter, &rxr->rxdma);
4156                 EM_RX_LOCK_DESTROY(rxr);
4157         }
4158
4159         free(adapter->rx_rings, M_DEVBUF);
4160 }
4161
4162
4163 /*********************************************************************
4164  *
4165  *  Free receive ring data structures
4166  *
4167  **********************************************************************/
4168 static void
4169 em_free_receive_buffers(struct rx_ring *rxr)
4170 {
4171         struct adapter          *adapter = rxr->adapter;
4172         struct em_buffer        *rxbuf = NULL;
4173
4174         INIT_DEBUGOUT("free_receive_buffers: begin");
4175
4176         if (rxr->rx_buffers != NULL) {
4177                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4178                         rxbuf = &rxr->rx_buffers[i];
4179                         if (rxbuf->map != NULL) {
4180                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4181                                     BUS_DMASYNC_POSTREAD);
4182                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4183                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4184                         }
4185                         if (rxbuf->m_head != NULL) {
4186                                 m_freem(rxbuf->m_head);
4187                                 rxbuf->m_head = NULL;
4188                         }
4189                 }
4190                 free(rxr->rx_buffers, M_DEVBUF);
4191                 rxr->rx_buffers = NULL;
4192                 rxr->next_to_check = 0;
4193                 rxr->next_to_refresh = 0;
4194         }
4195
4196         if (rxr->rxtag != NULL) {
4197                 bus_dma_tag_destroy(rxr->rxtag);
4198                 rxr->rxtag = NULL;
4199         }
4200
4201         return;
4202 }
4203
4204
4205 /*********************************************************************
4206  *
4207  *  Enable receive unit.
4208  *
4209  **********************************************************************/
4210 #define MAX_INTS_PER_SEC        8000
4211 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
4212
4213 static void
4214 em_initialize_receive_unit(struct adapter *adapter)
4215 {
4216         struct rx_ring  *rxr = adapter->rx_rings;
4217         struct ifnet    *ifp = adapter->ifp;
4218         struct e1000_hw *hw = &adapter->hw;
4219         u64     bus_addr;
4220         u32     rctl, rxcsum;
4221
4222         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4223
4224         /*
4225          * Make sure receives are disabled while setting
4226          * up the descriptor ring
4227          */
4228         rctl = E1000_READ_REG(hw, E1000_RCTL);
4229         /* Do not disable if ever enabled on this hardware */
4230         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4231                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4232
4233         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4234             adapter->rx_abs_int_delay.value);
4235         /*
4236          * Set the interrupt throttling rate. Value is calculated
4237          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4238          */
4239         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4240
4241         /*
4242         ** When using MSIX interrupts we need to throttle
4243         ** using the EITR register (82574 only)
4244         */
4245         if (hw->mac.type == e1000_82574) {
4246                 for (int i = 0; i < 4; i++)
4247                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4248                             DEFAULT_ITR);
4249                 /* Disable accelerated acknowledge */
4250                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4251         }
4252
4253         if (ifp->if_capenable & IFCAP_RXCSUM) {
4254                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4255                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4256                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4257         }
4258
4259         /*
4260         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4261         ** long latencies are observed, like Lenovo X60. This
4262         ** change eliminates the problem, but since having positive
4263         ** values in RDTR is a known source of problems on other
4264         ** platforms another solution is being sought.
4265         */
4266         if (hw->mac.type == e1000_82573)
4267                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4268
4269         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4270                 /* Setup the Base and Length of the Rx Descriptor Ring */
4271                 bus_addr = rxr->rxdma.dma_paddr;
4272                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4273                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4274                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4275                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4276                 /* Setup the Head and Tail Descriptor Pointers */
4277                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4278 #ifdef DEV_NETMAP
4279                 /*
4280                  * an init() while a netmap client is active must
4281                  * preserve the rx buffers passed to userspace.
4282                  * In this driver it means we adjust RDT to
4283                  * something different from na->num_rx_desc - 1.
4284                  */
4285                 if (ifp->if_capenable & IFCAP_NETMAP) {
4286                         struct netmap_adapter *na = NA(adapter->ifp);
4287                         struct netmap_kring *kring = &na->rx_rings[i];
4288                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4289
4290                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4291                 } else
4292 #endif /* DEV_NETMAP */
4293                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4294         }
4295
4296         /* Set PTHRESH for improved jumbo performance */
4297         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4298             (adapter->hw.mac.type == e1000_pch2lan) ||
4299             (adapter->hw.mac.type == e1000_ich10lan)) &&
4300             (ifp->if_mtu > ETHERMTU)) {
4301                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4302                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4303         }
4304                 
4305         if (adapter->hw.mac.type == e1000_pch2lan) {
4306                 if (ifp->if_mtu > ETHERMTU)
4307                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4308                 else
4309                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4310         }
4311
4312         /* Setup the Receive Control Register */
4313         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4314         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4315             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4316             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4317
4318         /* Strip the CRC */
4319         rctl |= E1000_RCTL_SECRC;
4320
4321         /* Make sure VLAN Filters are off */
4322         rctl &= ~E1000_RCTL_VFE;
4323         rctl &= ~E1000_RCTL_SBP;
4324
4325         if (adapter->rx_mbuf_sz == MCLBYTES)
4326                 rctl |= E1000_RCTL_SZ_2048;
4327         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4328                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4329         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4330                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4331
4332         if (ifp->if_mtu > ETHERMTU)
4333                 rctl |= E1000_RCTL_LPE;
4334         else
4335                 rctl &= ~E1000_RCTL_LPE;
4336
4337         /* Write out the settings */
4338         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4339
4340         return;
4341 }
4342
4343
4344 /*********************************************************************
4345  *
4346  *  This routine executes in interrupt context. It replenishes
4347  *  the mbufs in the descriptor and sends data which has been
4348  *  dma'ed into host memory to upper layer.
4349  *
4350  *  We loop at most count times if count is > 0, or until done if
4351  *  count < 0.
4352  *  
4353  *  For polling we also now return the number of cleaned packets
4354  *********************************************************************/
4355 static bool
4356 em_rxeof(struct rx_ring *rxr, int count, int *done)
4357 {
4358         struct adapter          *adapter = rxr->adapter;
4359         struct ifnet            *ifp = adapter->ifp;
4360         struct mbuf             *mp, *sendmp;
4361         u8                      status = 0;
4362         u16                     len;
4363         int                     i, processed, rxdone = 0;
4364         bool                    eop;
4365         struct e1000_rx_desc    *cur;
4366
4367         EM_RX_LOCK(rxr);
4368
4369 #ifdef DEV_NETMAP
4370         if (ifp->if_capenable & IFCAP_NETMAP) {
4371                 struct netmap_adapter *na = NA(ifp);
4372
4373                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4374                 EM_RX_UNLOCK(rxr);
4375                 EM_CORE_LOCK(adapter);
4376                 selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET);
4377                 EM_CORE_UNLOCK(adapter);
4378                 return (0);
4379         }
4380 #endif /* DEV_NETMAP */
4381
4382         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4383
4384                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4385                         break;
4386
4387                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4388                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4389
4390                 cur = &rxr->rx_base[i];
4391                 status = cur->status;
4392                 mp = sendmp = NULL;
4393
4394                 if ((status & E1000_RXD_STAT_DD) == 0)
4395                         break;
4396
4397                 len = le16toh(cur->length);
4398                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4399
4400                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4401                     (rxr->discard == TRUE)) {
4402                         ifp->if_ierrors++;
4403                         ++rxr->rx_discarded;
4404                         if (!eop) /* Catch subsequent segs */
4405                                 rxr->discard = TRUE;
4406                         else
4407                                 rxr->discard = FALSE;
4408                         em_rx_discard(rxr, i);
4409                         goto next_desc;
4410                 }
4411
4412                 /* Assign correct length to the current fragment */
4413                 mp = rxr->rx_buffers[i].m_head;
4414                 mp->m_len = len;
4415
4416                 /* Trigger for refresh */
4417                 rxr->rx_buffers[i].m_head = NULL;
4418
4419                 /* First segment? */
4420                 if (rxr->fmp == NULL) {
4421                         mp->m_pkthdr.len = len;
4422                         rxr->fmp = rxr->lmp = mp;
4423                 } else {
4424                         /* Chain mbuf's together */
4425                         mp->m_flags &= ~M_PKTHDR;
4426                         rxr->lmp->m_next = mp;
4427                         rxr->lmp = mp;
4428                         rxr->fmp->m_pkthdr.len += len;
4429                 }
4430
4431                 if (eop) {
4432                         --count;
4433                         sendmp = rxr->fmp;
4434                         sendmp->m_pkthdr.rcvif = ifp;
4435                         ifp->if_ipackets++;
4436                         em_receive_checksum(cur, sendmp);
4437 #ifndef __NO_STRICT_ALIGNMENT
4438                         if (adapter->max_frame_size >
4439                             (MCLBYTES - ETHER_ALIGN) &&
4440                             em_fixup_rx(rxr) != 0)
4441                                 goto skip;
4442 #endif
4443                         if (status & E1000_RXD_STAT_VP) {
4444                                 sendmp->m_pkthdr.ether_vtag =
4445                                     le16toh(cur->special);
4446                                 sendmp->m_flags |= M_VLANTAG;
4447                         }
4448 #ifndef __NO_STRICT_ALIGNMENT
4449 skip:
4450 #endif
4451                         rxr->fmp = rxr->lmp = NULL;
4452                 }
4453 next_desc:
4454                 /* Zero out the receive descriptors status. */
4455                 cur->status = 0;
4456                 ++rxdone;       /* cumulative for POLL */
4457                 ++processed;
4458
4459                 /* Advance our pointers to the next descriptor. */
4460                 if (++i == adapter->num_rx_desc)
4461                         i = 0;
4462
4463                 /* Send to the stack */
4464                 if (sendmp != NULL) {
4465                         rxr->next_to_check = i;
4466                         EM_RX_UNLOCK(rxr);
4467                         (*ifp->if_input)(ifp, sendmp);
4468                         EM_RX_LOCK(rxr);
4469                         i = rxr->next_to_check;
4470                 }
4471
4472                 /* Only refresh mbufs every 8 descriptors */
4473                 if (processed == 8) {
4474                         em_refresh_mbufs(rxr, i);
4475                         processed = 0;
4476                 }
4477         }
4478
4479         /* Catch any remaining refresh work */
4480         if (e1000_rx_unrefreshed(rxr))
4481                 em_refresh_mbufs(rxr, i);
4482
4483         rxr->next_to_check = i;
4484         if (done != NULL)
4485                 *done = rxdone;
4486         EM_RX_UNLOCK(rxr);
4487
4488         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4489 }
4490
4491 static __inline void
4492 em_rx_discard(struct rx_ring *rxr, int i)
4493 {
4494         struct em_buffer        *rbuf;
4495
4496         rbuf = &rxr->rx_buffers[i];
4497         /* Free any previous pieces */
4498         if (rxr->fmp != NULL) {
4499                 rxr->fmp->m_flags |= M_PKTHDR;
4500                 m_freem(rxr->fmp);
4501                 rxr->fmp = NULL;
4502                 rxr->lmp = NULL;
4503         }
4504         /*
4505         ** Free buffer and allow em_refresh_mbufs()
4506         ** to clean up and recharge buffer.
4507         */
4508         if (rbuf->m_head) {
4509                 m_free(rbuf->m_head);
4510                 rbuf->m_head = NULL;
4511         }
4512         return;
4513 }
4514
4515 #ifndef __NO_STRICT_ALIGNMENT
4516 /*
4517  * When jumbo frames are enabled we should realign entire payload on
4518  * architecures with strict alignment. This is serious design mistake of 8254x
4519  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4520  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4521  * payload. On architecures without strict alignment restrictions 8254x still
4522  * performs unaligned memory access which would reduce the performance too.
4523  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4524  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4525  * existing mbuf chain.
4526  *
4527  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4528  * not used at all on architectures with strict alignment.
4529  */
4530 static int
4531 em_fixup_rx(struct rx_ring *rxr)
4532 {
4533         struct adapter *adapter = rxr->adapter;
4534         struct mbuf *m, *n;
4535         int error;
4536
4537         error = 0;
4538         m = rxr->fmp;
4539         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4540                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4541                 m->m_data += ETHER_HDR_LEN;
4542         } else {
4543                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4544                 if (n != NULL) {
4545                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4546                         m->m_data += ETHER_HDR_LEN;
4547                         m->m_len -= ETHER_HDR_LEN;
4548                         n->m_len = ETHER_HDR_LEN;
4549                         M_MOVE_PKTHDR(n, m);
4550                         n->m_next = m;
4551                         rxr->fmp = n;
4552                 } else {
4553                         adapter->dropped_pkts++;
4554                         m_freem(rxr->fmp);
4555                         rxr->fmp = NULL;
4556                         error = ENOMEM;
4557                 }
4558         }
4559
4560         return (error);
4561 }
4562 #endif
4563
4564 /*********************************************************************
4565  *
4566  *  Verify that the hardware indicated that the checksum is valid.
4567  *  Inform the stack about the status of checksum so that stack
4568  *  doesn't spend time verifying the checksum.
4569  *
4570  *********************************************************************/
4571 static void
4572 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4573 {
4574         /* Ignore Checksum bit is set */
4575         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4576                 mp->m_pkthdr.csum_flags = 0;
4577                 return;
4578         }
4579
4580         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4581                 /* Did it pass? */
4582                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4583                         /* IP Checksum Good */
4584                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4585                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4586
4587                 } else {
4588                         mp->m_pkthdr.csum_flags = 0;
4589                 }
4590         }
4591
4592         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4593                 /* Did it pass? */
4594                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4595                         mp->m_pkthdr.csum_flags |=
4596                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4597                         mp->m_pkthdr.csum_data = htons(0xffff);
4598                 }
4599         }
4600 }
4601
4602 /*
4603  * This routine is run via an vlan
4604  * config EVENT
4605  */
4606 static void
4607 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4608 {
4609         struct adapter  *adapter = ifp->if_softc;
4610         u32             index, bit;
4611
4612         if (ifp->if_softc !=  arg)   /* Not our event */
4613                 return;
4614
4615         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4616                 return;
4617
4618         EM_CORE_LOCK(adapter);
4619         index = (vtag >> 5) & 0x7F;
4620         bit = vtag & 0x1F;
4621         adapter->shadow_vfta[index] |= (1 << bit);
4622         ++adapter->num_vlans;
4623         /* Re-init to load the changes */
4624         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4625                 em_init_locked(adapter);
4626         EM_CORE_UNLOCK(adapter);
4627 }
4628
4629 /*
4630  * This routine is run via an vlan
4631  * unconfig EVENT
4632  */
4633 static void
4634 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4635 {
4636         struct adapter  *adapter = ifp->if_softc;
4637         u32             index, bit;
4638
4639         if (ifp->if_softc !=  arg)
4640                 return;
4641
4642         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4643                 return;
4644
4645         EM_CORE_LOCK(adapter);
4646         index = (vtag >> 5) & 0x7F;
4647         bit = vtag & 0x1F;
4648         adapter->shadow_vfta[index] &= ~(1 << bit);
4649         --adapter->num_vlans;
4650         /* Re-init to load the changes */
4651         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4652                 em_init_locked(adapter);
4653         EM_CORE_UNLOCK(adapter);
4654 }
4655
4656 static void
4657 em_setup_vlan_hw_support(struct adapter *adapter)
4658 {
4659         struct e1000_hw *hw = &adapter->hw;
4660         u32             reg;
4661
4662         /*
4663         ** We get here thru init_locked, meaning
4664         ** a soft reset, this has already cleared
4665         ** the VFTA and other state, so if there
4666         ** have been no vlan's registered do nothing.
4667         */
4668         if (adapter->num_vlans == 0)
4669                 return;
4670
4671         /*
4672         ** A soft reset zero's out the VFTA, so
4673         ** we need to repopulate it now.
4674         */
4675         for (int i = 0; i < EM_VFTA_SIZE; i++)
4676                 if (adapter->shadow_vfta[i] != 0)
4677                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4678                             i, adapter->shadow_vfta[i]);
4679
4680         reg = E1000_READ_REG(hw, E1000_CTRL);
4681         reg |= E1000_CTRL_VME;
4682         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4683
4684         /* Enable the Filter Table */
4685         reg = E1000_READ_REG(hw, E1000_RCTL);
4686         reg &= ~E1000_RCTL_CFIEN;
4687         reg |= E1000_RCTL_VFE;
4688         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4689 }
4690
4691 static void
4692 em_enable_intr(struct adapter *adapter)
4693 {
4694         struct e1000_hw *hw = &adapter->hw;
4695         u32 ims_mask = IMS_ENABLE_MASK;
4696
4697         if (hw->mac.type == e1000_82574) {
4698                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4699                 ims_mask |= EM_MSIX_MASK;
4700         } 
4701         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4702 }
4703
4704 static void
4705 em_disable_intr(struct adapter *adapter)
4706 {
4707         struct e1000_hw *hw = &adapter->hw;
4708
4709         if (hw->mac.type == e1000_82574)
4710                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4711         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4712 }
4713
4714 /*
4715  * Bit of a misnomer, what this really means is
4716  * to enable OS management of the system... aka
4717  * to disable special hardware management features 
4718  */
4719 static void
4720 em_init_manageability(struct adapter *adapter)
4721 {
4722         /* A shared code workaround */
4723 #define E1000_82542_MANC2H E1000_MANC2H
4724         if (adapter->has_manage) {
4725                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4726                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4727
4728                 /* disable hardware interception of ARP */
4729                 manc &= ~(E1000_MANC_ARP_EN);
4730
4731                 /* enable receiving management packets to the host */
4732                 manc |= E1000_MANC_EN_MNG2HOST;
4733 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4734 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4735                 manc2h |= E1000_MNG2HOST_PORT_623;
4736                 manc2h |= E1000_MNG2HOST_PORT_664;
4737                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4738                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4739         }
4740 }
4741
4742 /*
4743  * Give control back to hardware management
4744  * controller if there is one.
4745  */
4746 static void
4747 em_release_manageability(struct adapter *adapter)
4748 {
4749         if (adapter->has_manage) {
4750                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4751
4752                 /* re-enable hardware interception of ARP */
4753                 manc |= E1000_MANC_ARP_EN;
4754                 manc &= ~E1000_MANC_EN_MNG2HOST;
4755
4756                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4757         }
4758 }
4759
4760 /*
4761  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4762  * For ASF and Pass Through versions of f/w this means
4763  * that the driver is loaded. For AMT version type f/w
4764  * this means that the network i/f is open.
4765  */
4766 static void
4767 em_get_hw_control(struct adapter *adapter)
4768 {
4769         u32 ctrl_ext, swsm;
4770
4771         if (adapter->hw.mac.type == e1000_82573) {
4772                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4773                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4774                     swsm | E1000_SWSM_DRV_LOAD);
4775                 return;
4776         }
4777         /* else */
4778         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4779         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4780             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4781         return;
4782 }
4783
4784 /*
4785  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4786  * For ASF and Pass Through versions of f/w this means that
4787  * the driver is no longer loaded. For AMT versions of the
4788  * f/w this means that the network i/f is closed.
4789  */
4790 static void
4791 em_release_hw_control(struct adapter *adapter)
4792 {
4793         u32 ctrl_ext, swsm;
4794
4795         if (!adapter->has_manage)
4796                 return;
4797
4798         if (adapter->hw.mac.type == e1000_82573) {
4799                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4800                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4801                     swsm & ~E1000_SWSM_DRV_LOAD);
4802                 return;
4803         }
4804         /* else */
4805         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4806         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4807             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4808         return;
4809 }
4810
4811 static int
4812 em_is_valid_ether_addr(u8 *addr)
4813 {
4814         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4815
4816         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4817                 return (FALSE);
4818         }
4819
4820         return (TRUE);
4821 }
4822
4823 /*
4824 ** Parse the interface capabilities with regard
4825 ** to both system management and wake-on-lan for
4826 ** later use.
4827 */
4828 static void
4829 em_get_wakeup(device_t dev)
4830 {
4831         struct adapter  *adapter = device_get_softc(dev);
4832         u16             eeprom_data = 0, device_id, apme_mask;
4833
4834         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4835         apme_mask = EM_EEPROM_APME;
4836
4837         switch (adapter->hw.mac.type) {
4838         case e1000_82573:
4839         case e1000_82583:
4840                 adapter->has_amt = TRUE;
4841                 /* Falls thru */
4842         case e1000_82571:
4843         case e1000_82572:
4844         case e1000_80003es2lan:
4845                 if (adapter->hw.bus.func == 1) {
4846                         e1000_read_nvm(&adapter->hw,
4847                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4848                         break;
4849                 } else
4850                         e1000_read_nvm(&adapter->hw,
4851                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4852                 break;
4853         case e1000_ich8lan:
4854         case e1000_ich9lan:
4855         case e1000_ich10lan:
4856         case e1000_pchlan:
4857         case e1000_pch2lan:
4858                 apme_mask = E1000_WUC_APME;
4859                 adapter->has_amt = TRUE;
4860                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4861                 break;
4862         default:
4863                 e1000_read_nvm(&adapter->hw,
4864                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4865                 break;
4866         }
4867         if (eeprom_data & apme_mask)
4868                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4869         /*
4870          * We have the eeprom settings, now apply the special cases
4871          * where the eeprom may be wrong or the board won't support
4872          * wake on lan on a particular port
4873          */
4874         device_id = pci_get_device(dev);
4875         switch (device_id) {
4876         case E1000_DEV_ID_82571EB_FIBER:
4877                 /* Wake events only supported on port A for dual fiber
4878                  * regardless of eeprom setting */
4879                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4880                     E1000_STATUS_FUNC_1)
4881                         adapter->wol = 0;
4882                 break;
4883         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4884         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4885         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4886                 /* if quad port adapter, disable WoL on all but port A */
4887                 if (global_quad_port_a != 0)
4888                         adapter->wol = 0;
4889                 /* Reset for multiple quad port adapters */
4890                 if (++global_quad_port_a == 4)
4891                         global_quad_port_a = 0;
4892                 break;
4893         }
4894         return;
4895 }
4896
4897
4898 /*
4899  * Enable PCI Wake On Lan capability
4900  */
4901 static void
4902 em_enable_wakeup(device_t dev)
4903 {
4904         struct adapter  *adapter = device_get_softc(dev);
4905         struct ifnet    *ifp = adapter->ifp;
4906         u32             pmc, ctrl, ctrl_ext, rctl;
4907         u16             status;
4908
4909         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4910                 return;
4911
4912         /* Advertise the wakeup capability */
4913         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4914         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4915         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4916         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4917
4918         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4919             (adapter->hw.mac.type == e1000_pchlan) ||
4920             (adapter->hw.mac.type == e1000_ich9lan) ||
4921             (adapter->hw.mac.type == e1000_ich10lan))
4922                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4923
4924         /* Keep the laser running on Fiber adapters */
4925         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4926             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4927                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4928                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4929                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4930         }
4931
4932         /*
4933         ** Determine type of Wakeup: note that wol
4934         ** is set with all bits on by default.
4935         */
4936         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4937                 adapter->wol &= ~E1000_WUFC_MAG;
4938
4939         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4940                 adapter->wol &= ~E1000_WUFC_MC;
4941         else {
4942                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4943                 rctl |= E1000_RCTL_MPE;
4944                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4945         }
4946
4947         if ((adapter->hw.mac.type == e1000_pchlan) ||
4948             (adapter->hw.mac.type == e1000_pch2lan)) {
4949                 if (em_enable_phy_wakeup(adapter))
4950                         return;
4951         } else {
4952                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4953                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4954         }
4955
4956         if (adapter->hw.phy.type == e1000_phy_igp_3)
4957                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4958
4959         /* Request PME */
4960         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4961         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4962         if (ifp->if_capenable & IFCAP_WOL)
4963                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4964         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4965
4966         return;
4967 }
4968
4969 /*
4970 ** WOL in the newer chipset interfaces (pchlan)
4971 ** require thing to be copied into the phy
4972 */
4973 static int
4974 em_enable_phy_wakeup(struct adapter *adapter)
4975 {
4976         struct e1000_hw *hw = &adapter->hw;
4977         u32 mreg, ret = 0;
4978         u16 preg;
4979
4980         /* copy MAC RARs to PHY RARs */
4981         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4982
4983         /* copy MAC MTA to PHY MTA */
4984         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4985                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4986                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4987                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4988                     (u16)((mreg >> 16) & 0xFFFF));
4989         }
4990
4991         /* configure PHY Rx Control register */
4992         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4993         mreg = E1000_READ_REG(hw, E1000_RCTL);
4994         if (mreg & E1000_RCTL_UPE)
4995                 preg |= BM_RCTL_UPE;
4996         if (mreg & E1000_RCTL_MPE)
4997                 preg |= BM_RCTL_MPE;
4998         preg &= ~(BM_RCTL_MO_MASK);
4999         if (mreg & E1000_RCTL_MO_3)
5000                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5001                                 << BM_RCTL_MO_SHIFT);
5002         if (mreg & E1000_RCTL_BAM)
5003                 preg |= BM_RCTL_BAM;
5004         if (mreg & E1000_RCTL_PMCF)
5005                 preg |= BM_RCTL_PMCF;
5006         mreg = E1000_READ_REG(hw, E1000_CTRL);
5007         if (mreg & E1000_CTRL_RFCE)
5008                 preg |= BM_RCTL_RFCE;
5009         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5010
5011         /* enable PHY wakeup in MAC register */
5012         E1000_WRITE_REG(hw, E1000_WUC,
5013             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5014         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5015
5016         /* configure and enable PHY wakeup in PHY registers */
5017         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5018         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5019
5020         /* activate PHY wakeup */
5021         ret = hw->phy.ops.acquire(hw);
5022         if (ret) {
5023                 printf("Could not acquire PHY\n");
5024                 return ret;
5025         }
5026         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5027                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5028         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5029         if (ret) {
5030                 printf("Could not read PHY page 769\n");
5031                 goto out;
5032         }
5033         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5034         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5035         if (ret)
5036                 printf("Could not set PHY Host Wakeup bit\n");
5037 out:
5038         hw->phy.ops.release(hw);
5039
5040         return ret;
5041 }
5042
5043 static void
5044 em_led_func(void *arg, int onoff)
5045 {
5046         struct adapter  *adapter = arg;
5047  
5048         EM_CORE_LOCK(adapter);
5049         if (onoff) {
5050                 e1000_setup_led(&adapter->hw);
5051                 e1000_led_on(&adapter->hw);
5052         } else {
5053                 e1000_led_off(&adapter->hw);
5054                 e1000_cleanup_led(&adapter->hw);
5055         }
5056         EM_CORE_UNLOCK(adapter);
5057 }
5058
5059 /*
5060 ** Disable the L0S and L1 LINK states
5061 */
5062 static void
5063 em_disable_aspm(struct adapter *adapter)
5064 {
5065         int             base, reg;
5066         u16             link_cap,link_ctrl;
5067         device_t        dev = adapter->dev;
5068
5069         switch (adapter->hw.mac.type) {
5070                 case e1000_82573:
5071                 case e1000_82574:
5072                 case e1000_82583:
5073                         break;
5074                 default:
5075                         return;
5076         }
5077         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5078                 return;
5079         reg = base + PCIR_EXPRESS_LINK_CAP;
5080         link_cap = pci_read_config(dev, reg, 2);
5081         if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
5082                 return;
5083         reg = base + PCIR_EXPRESS_LINK_CTL;
5084         link_ctrl = pci_read_config(dev, reg, 2);
5085         link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
5086         pci_write_config(dev, reg, link_ctrl, 2);
5087         return;
5088 }
5089
5090 /**********************************************************************
5091  *
5092  *  Update the board statistics counters.
5093  *
5094  **********************************************************************/
5095 static void
5096 em_update_stats_counters(struct adapter *adapter)
5097 {
5098         struct ifnet   *ifp;
5099
5100         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5101            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5102                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5103                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5104         }
5105         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5106         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5107         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5108         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5109
5110         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5111         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5112         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5113         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5114         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5115         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5116         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5117         /*
5118         ** For watchdog management we need to know if we have been
5119         ** paused during the last interval, so capture that here.
5120         */
5121         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5122         adapter->stats.xoffrxc += adapter->pause_frames;
5123         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5124         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5125         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5126         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5127         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5128         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5129         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5130         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5131         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5132         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5133         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5134         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5135
5136         /* For the 64-bit byte counters the low dword must be read first. */
5137         /* Both registers clear on the read of the high dword */
5138
5139         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5140             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5141         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5142             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5143
5144         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5145         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5146         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5147         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5148         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5149
5150         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5151         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5152
5153         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5154         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5155         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5156         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5157         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5158         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5159         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5160         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5161         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5162         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5163
5164         /* Interrupt Counts */
5165
5166         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5167         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5168         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5169         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5170         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5171         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5172         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5173         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5174         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5175
5176         if (adapter->hw.mac.type >= e1000_82543) {
5177                 adapter->stats.algnerrc += 
5178                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5179                 adapter->stats.rxerrc += 
5180                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5181                 adapter->stats.tncrs += 
5182                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5183                 adapter->stats.cexterr += 
5184                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5185                 adapter->stats.tsctc += 
5186                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5187                 adapter->stats.tsctfc += 
5188                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5189         }
5190         ifp = adapter->ifp;
5191
5192         ifp->if_collisions = adapter->stats.colc;
5193
5194         /* Rx Errors */
5195         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5196             adapter->stats.crcerrs + adapter->stats.algnerrc +
5197             adapter->stats.ruc + adapter->stats.roc +
5198             adapter->stats.mpc + adapter->stats.cexterr;
5199
5200         /* Tx Errors */
5201         ifp->if_oerrors = adapter->stats.ecol +
5202             adapter->stats.latecol + adapter->watchdog_events;
5203 }
5204
5205 /* Export a single 32-bit register via a read-only sysctl. */
5206 static int
5207 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5208 {
5209         struct adapter *adapter;
5210         u_int val;
5211
5212         adapter = oidp->oid_arg1;
5213         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5214         return (sysctl_handle_int(oidp, &val, 0, req));
5215 }
5216
5217 /*
5218  * Add sysctl variables, one per statistic, to the system.
5219  */
5220 static void
5221 em_add_hw_stats(struct adapter *adapter)
5222 {
5223         device_t dev = adapter->dev;
5224
5225         struct tx_ring *txr = adapter->tx_rings;
5226         struct rx_ring *rxr = adapter->rx_rings;
5227
5228         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5229         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5230         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5231         struct e1000_hw_stats *stats = &adapter->stats;
5232
5233         struct sysctl_oid *stat_node, *queue_node, *int_node;
5234         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5235
5236 #define QUEUE_NAME_LEN 32
5237         char namebuf[QUEUE_NAME_LEN];
5238         
5239         /* Driver Statistics */
5240         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5241                         CTLFLAG_RD, &adapter->link_irq,
5242                         "Link MSIX IRQ Handled");
5243         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5244                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5245                          "Std mbuf failed");
5246         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5247                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5248                          "Std mbuf cluster failed");
5249         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5250                         CTLFLAG_RD, &adapter->dropped_pkts,
5251                         "Driver dropped packets");
5252         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5253                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5254                         "Driver tx dma failure in xmit");
5255         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5256                         CTLFLAG_RD, &adapter->rx_overruns,
5257                         "RX overruns");
5258         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5259                         CTLFLAG_RD, &adapter->watchdog_events,
5260                         "Watchdog timeouts");
5261         
5262         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5263                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5264                         em_sysctl_reg_handler, "IU",
5265                         "Device Control Register");
5266         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5267                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5268                         em_sysctl_reg_handler, "IU",
5269                         "Receiver Control Register");
5270         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5271                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5272                         "Flow Control High Watermark");
5273         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5274                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5275                         "Flow Control Low Watermark");
5276
5277         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5278                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5279                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5280                                             CTLFLAG_RD, NULL, "Queue Name");
5281                 queue_list = SYSCTL_CHILDREN(queue_node);
5282
5283                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5284                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5285                                 E1000_TDH(txr->me),
5286                                 em_sysctl_reg_handler, "IU",
5287                                 "Transmit Descriptor Head");
5288                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5289                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5290                                 E1000_TDT(txr->me),
5291                                 em_sysctl_reg_handler, "IU",
5292                                 "Transmit Descriptor Tail");
5293                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5294                                 CTLFLAG_RD, &txr->tx_irq,
5295                                 "Queue MSI-X Transmit Interrupts");
5296                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5297                                 CTLFLAG_RD, &txr->no_desc_avail,
5298                                 "Queue No Descriptor Available");
5299                 
5300                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5301                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5302                                 E1000_RDH(rxr->me),
5303                                 em_sysctl_reg_handler, "IU",
5304                                 "Receive Descriptor Head");
5305                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5306                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5307                                 E1000_RDT(rxr->me),
5308                                 em_sysctl_reg_handler, "IU",
5309                                 "Receive Descriptor Tail");
5310                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5311                                 CTLFLAG_RD, &rxr->rx_irq,
5312                                 "Queue MSI-X Receive Interrupts");
5313         }
5314
5315         /* MAC stats get their own sub node */
5316
5317         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5318                                     CTLFLAG_RD, NULL, "Statistics");
5319         stat_list = SYSCTL_CHILDREN(stat_node);
5320
5321         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5322                         CTLFLAG_RD, &stats->ecol,
5323                         "Excessive collisions");
5324         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5325                         CTLFLAG_RD, &stats->scc,
5326                         "Single collisions");
5327         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5328                         CTLFLAG_RD, &stats->mcc,
5329                         "Multiple collisions");
5330         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5331                         CTLFLAG_RD, &stats->latecol,
5332                         "Late collisions");
5333         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5334                         CTLFLAG_RD, &stats->colc,
5335                         "Collision Count");
5336         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5337                         CTLFLAG_RD, &adapter->stats.symerrs,
5338                         "Symbol Errors");
5339         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5340                         CTLFLAG_RD, &adapter->stats.sec,
5341                         "Sequence Errors");
5342         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5343                         CTLFLAG_RD, &adapter->stats.dc,
5344                         "Defer Count");
5345         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5346                         CTLFLAG_RD, &adapter->stats.mpc,
5347                         "Missed Packets");
5348         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5349                         CTLFLAG_RD, &adapter->stats.rnbc,
5350                         "Receive No Buffers");
5351         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5352                         CTLFLAG_RD, &adapter->stats.ruc,
5353                         "Receive Undersize");
5354         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5355                         CTLFLAG_RD, &adapter->stats.rfc,
5356                         "Fragmented Packets Received ");
5357         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5358                         CTLFLAG_RD, &adapter->stats.roc,
5359                         "Oversized Packets Received");
5360         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5361                         CTLFLAG_RD, &adapter->stats.rjc,
5362                         "Recevied Jabber");
5363         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5364                         CTLFLAG_RD, &adapter->stats.rxerrc,
5365                         "Receive Errors");
5366         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5367                         CTLFLAG_RD, &adapter->stats.crcerrs,
5368                         "CRC errors");
5369         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5370                         CTLFLAG_RD, &adapter->stats.algnerrc,
5371                         "Alignment Errors");
5372         /* On 82575 these are collision counts */
5373         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5374                         CTLFLAG_RD, &adapter->stats.cexterr,
5375                         "Collision/Carrier extension errors");
5376         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5377                         CTLFLAG_RD, &adapter->stats.xonrxc,
5378                         "XON Received");
5379         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5380                         CTLFLAG_RD, &adapter->stats.xontxc,
5381                         "XON Transmitted");
5382         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5383                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5384                         "XOFF Received");
5385         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5386                         CTLFLAG_RD, &adapter->stats.xofftxc,
5387                         "XOFF Transmitted");
5388
5389         /* Packet Reception Stats */
5390         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5391                         CTLFLAG_RD, &adapter->stats.tpr,
5392                         "Total Packets Received ");
5393         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5394                         CTLFLAG_RD, &adapter->stats.gprc,
5395                         "Good Packets Received");
5396         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5397                         CTLFLAG_RD, &adapter->stats.bprc,
5398                         "Broadcast Packets Received");
5399         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5400                         CTLFLAG_RD, &adapter->stats.mprc,
5401                         "Multicast Packets Received");
5402         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5403                         CTLFLAG_RD, &adapter->stats.prc64,
5404                         "64 byte frames received ");
5405         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5406                         CTLFLAG_RD, &adapter->stats.prc127,
5407                         "65-127 byte frames received");
5408         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5409                         CTLFLAG_RD, &adapter->stats.prc255,
5410                         "128-255 byte frames received");
5411         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5412                         CTLFLAG_RD, &adapter->stats.prc511,
5413                         "256-511 byte frames received");
5414         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5415                         CTLFLAG_RD, &adapter->stats.prc1023,
5416                         "512-1023 byte frames received");
5417         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5418                         CTLFLAG_RD, &adapter->stats.prc1522,
5419                         "1023-1522 byte frames received");
5420         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5421                         CTLFLAG_RD, &adapter->stats.gorc, 
5422                         "Good Octets Received"); 
5423
5424         /* Packet Transmission Stats */
5425         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5426                         CTLFLAG_RD, &adapter->stats.gotc, 
5427                         "Good Octets Transmitted"); 
5428         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5429                         CTLFLAG_RD, &adapter->stats.tpt,
5430                         "Total Packets Transmitted");
5431         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5432                         CTLFLAG_RD, &adapter->stats.gptc,
5433                         "Good Packets Transmitted");
5434         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5435                         CTLFLAG_RD, &adapter->stats.bptc,
5436                         "Broadcast Packets Transmitted");
5437         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5438                         CTLFLAG_RD, &adapter->stats.mptc,
5439                         "Multicast Packets Transmitted");
5440         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5441                         CTLFLAG_RD, &adapter->stats.ptc64,
5442                         "64 byte frames transmitted ");
5443         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5444                         CTLFLAG_RD, &adapter->stats.ptc127,
5445                         "65-127 byte frames transmitted");
5446         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5447                         CTLFLAG_RD, &adapter->stats.ptc255,
5448                         "128-255 byte frames transmitted");
5449         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5450                         CTLFLAG_RD, &adapter->stats.ptc511,
5451                         "256-511 byte frames transmitted");
5452         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5453                         CTLFLAG_RD, &adapter->stats.ptc1023,
5454                         "512-1023 byte frames transmitted");
5455         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5456                         CTLFLAG_RD, &adapter->stats.ptc1522,
5457                         "1024-1522 byte frames transmitted");
5458         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5459                         CTLFLAG_RD, &adapter->stats.tsctc,
5460                         "TSO Contexts Transmitted");
5461         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5462                         CTLFLAG_RD, &adapter->stats.tsctfc,
5463                         "TSO Contexts Failed");
5464
5465
5466         /* Interrupt Stats */
5467
5468         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5469                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5470         int_list = SYSCTL_CHILDREN(int_node);
5471
5472         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5473                         CTLFLAG_RD, &adapter->stats.iac,
5474                         "Interrupt Assertion Count");
5475
5476         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5477                         CTLFLAG_RD, &adapter->stats.icrxptc,
5478                         "Interrupt Cause Rx Pkt Timer Expire Count");
5479
5480         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5481                         CTLFLAG_RD, &adapter->stats.icrxatc,
5482                         "Interrupt Cause Rx Abs Timer Expire Count");
5483
5484         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5485                         CTLFLAG_RD, &adapter->stats.ictxptc,
5486                         "Interrupt Cause Tx Pkt Timer Expire Count");
5487
5488         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5489                         CTLFLAG_RD, &adapter->stats.ictxatc,
5490                         "Interrupt Cause Tx Abs Timer Expire Count");
5491
5492         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5493                         CTLFLAG_RD, &adapter->stats.ictxqec,
5494                         "Interrupt Cause Tx Queue Empty Count");
5495
5496         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5497                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5498                         "Interrupt Cause Tx Queue Min Thresh Count");
5499
5500         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5501                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5502                         "Interrupt Cause Rx Desc Min Thresh Count");
5503
5504         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5505                         CTLFLAG_RD, &adapter->stats.icrxoc,
5506                         "Interrupt Cause Receiver Overrun Count");
5507 }
5508
5509 /**********************************************************************
5510  *
5511  *  This routine provides a way to dump out the adapter eeprom,
5512  *  often a useful debug/service tool. This only dumps the first
5513  *  32 words, stuff that matters is in that extent.
5514  *
5515  **********************************************************************/
5516 static int
5517 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5518 {
5519         struct adapter *adapter = (struct adapter *)arg1;
5520         int error;
5521         int result;
5522
5523         result = -1;
5524         error = sysctl_handle_int(oidp, &result, 0, req);
5525
5526         if (error || !req->newptr)
5527                 return (error);
5528
5529         /*
5530          * This value will cause a hex dump of the
5531          * first 32 16-bit words of the EEPROM to
5532          * the screen.
5533          */
5534         if (result == 1)
5535                 em_print_nvm_info(adapter);
5536
5537         return (error);
5538 }
5539
5540 static void
5541 em_print_nvm_info(struct adapter *adapter)
5542 {
5543         u16     eeprom_data;
5544         int     i, j, row = 0;
5545
5546         /* Its a bit crude, but it gets the job done */
5547         printf("\nInterface EEPROM Dump:\n");
5548         printf("Offset\n0x0000  ");
5549         for (i = 0, j = 0; i < 32; i++, j++) {
5550                 if (j == 8) { /* Make the offset block */
5551                         j = 0; ++row;
5552                         printf("\n0x00%x0  ",row);
5553                 }
5554                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5555                 printf("%04x ", eeprom_data);
5556         }
5557         printf("\n");
5558 }
5559
5560 static int
5561 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5562 {
5563         struct em_int_delay_info *info;
5564         struct adapter *adapter;
5565         u32 regval;
5566         int error, usecs, ticks;
5567
5568         info = (struct em_int_delay_info *)arg1;
5569         usecs = info->value;
5570         error = sysctl_handle_int(oidp, &usecs, 0, req);
5571         if (error != 0 || req->newptr == NULL)
5572                 return (error);
5573         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5574                 return (EINVAL);
5575         info->value = usecs;
5576         ticks = EM_USECS_TO_TICKS(usecs);
5577
5578         adapter = info->adapter;
5579         
5580         EM_CORE_LOCK(adapter);
5581         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5582         regval = (regval & ~0xffff) | (ticks & 0xffff);
5583         /* Handle a few special cases. */
5584         switch (info->offset) {
5585         case E1000_RDTR:
5586                 break;
5587         case E1000_TIDV:
5588                 if (ticks == 0) {
5589                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5590                         /* Don't write 0 into the TIDV register. */
5591                         regval++;
5592                 } else
5593                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5594                 break;
5595         }
5596         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5597         EM_CORE_UNLOCK(adapter);
5598         return (0);
5599 }
5600
5601 static void
5602 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5603         const char *description, struct em_int_delay_info *info,
5604         int offset, int value)
5605 {
5606         info->adapter = adapter;
5607         info->offset = offset;
5608         info->value = value;
5609         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5610             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5611             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5612             info, 0, em_sysctl_int_delay, "I", description);
5613 }
5614
5615 static void
5616 em_set_sysctl_value(struct adapter *adapter, const char *name,
5617         const char *description, int *limit, int value)
5618 {
5619         *limit = value;
5620         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5621             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5622             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5623 }
5624
5625
5626 /*
5627 ** Set flow control using sysctl:
5628 ** Flow control values:
5629 **      0 - off
5630 **      1 - rx pause
5631 **      2 - tx pause
5632 **      3 - full
5633 */
5634 static int
5635 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5636 {       
5637         int             error;
5638         static int      input = 3; /* default is full */
5639         struct adapter  *adapter = (struct adapter *) arg1;
5640                     
5641         error = sysctl_handle_int(oidp, &input, 0, req);
5642     
5643         if ((error) || (req->newptr == NULL))
5644                 return (error);
5645                 
5646         if (input == adapter->fc) /* no change? */
5647                 return (error);
5648
5649         switch (input) {
5650                 case e1000_fc_rx_pause:
5651                 case e1000_fc_tx_pause:
5652                 case e1000_fc_full:
5653                 case e1000_fc_none:
5654                         adapter->hw.fc.requested_mode = input;
5655                         adapter->fc = input;
5656                         break;
5657                 default:
5658                         /* Do nothing */
5659                         return (error);
5660         }
5661
5662         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5663         e1000_force_mac_fc(&adapter->hw);
5664         return (error);
5665 }
5666
5667
5668 static int
5669 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5670 {
5671         struct adapter *adapter;
5672         int error;
5673         int result;
5674
5675         result = -1;
5676         error = sysctl_handle_int(oidp, &result, 0, req);
5677
5678         if (error || !req->newptr)
5679                 return (error);
5680
5681         if (result == 1) {
5682                 adapter = (struct adapter *)arg1;
5683                 em_print_debug_info(adapter);
5684         }
5685
5686         return (error);
5687 }
5688
5689 /*
5690 ** This routine is meant to be fluid, add whatever is
5691 ** needed for debugging a problem.  -jfv
5692 */
5693 static void
5694 em_print_debug_info(struct adapter *adapter)
5695 {
5696         device_t dev = adapter->dev;
5697         struct tx_ring *txr = adapter->tx_rings;
5698         struct rx_ring *rxr = adapter->rx_rings;
5699
5700         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5701                 printf("Interface is RUNNING ");
5702         else
5703                 printf("Interface is NOT RUNNING\n");
5704
5705         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5706                 printf("and INACTIVE\n");
5707         else
5708                 printf("and ACTIVE\n");
5709
5710         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5711             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5712             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5713         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5714             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5715             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5716         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5717         device_printf(dev, "TX descriptors avail = %d\n",
5718             txr->tx_avail);
5719         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5720             txr->no_desc_avail);
5721         device_printf(dev, "RX discarded packets = %ld\n",
5722             rxr->rx_discarded);
5723         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5724         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5725 }