]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
The description is a bit misleading. Trying to make it more obvious.
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2014, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_var.h>
67 #include <net/if_arp.h>
68 #include <net/if_dl.h>
69 #include <net/if_media.h>
70
71 #include <net/if_types.h>
72 #include <net/if_vlan_var.h>
73
74 #include <netinet/in_systm.h>
75 #include <netinet/in.h>
76 #include <netinet/if_ether.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79 #include <netinet/tcp.h>
80 #include <netinet/udp.h>
81
82 #include <machine/in_cksum.h>
83 #include <dev/led/led.h>
84 #include <dev/pci/pcivar.h>
85 #include <dev/pci/pcireg.h>
86
87 #include "e1000_api.h"
88 #include "e1000_82571.h"
89 #include "if_em.h"
90
91 /*********************************************************************
92  *  Set this to one to display debug statistics
93  *********************************************************************/
94 int     em_display_debug_stats = 0;
95
96 /*********************************************************************
97  *  Driver version:
98  *********************************************************************/
99 char em_driver_version[] = "7.4.2";
100
101 /*********************************************************************
102  *  PCI Device ID Table
103  *
104  *  Used by probe to select devices to load on
105  *  Last field stores an index into e1000_strings
106  *  Last entry must be all 0s
107  *
108  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
109  *********************************************************************/
110
111 static em_vendor_info_t em_vendor_info_array[] =
112 {
113         /* Intel(R) PRO/1000 Network Connection */
114         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
133
134         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
145                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
180                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
181         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
182                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
183         { 0x8086, E1000_DEV_ID_PCH_I218_LM2,    PCI_ANY_ID, PCI_ANY_ID, 0},
184         { 0x8086, E1000_DEV_ID_PCH_I218_V2,     PCI_ANY_ID, PCI_ANY_ID, 0},
185         { 0x8086, E1000_DEV_ID_PCH_I218_LM3,    PCI_ANY_ID, PCI_ANY_ID, 0},
186         { 0x8086, E1000_DEV_ID_PCH_I218_V3,     PCI_ANY_ID, PCI_ANY_ID, 0},
187         /* required last entry */
188         { 0, 0, 0, 0, 0}
189 };
190
191 /*********************************************************************
192  *  Table of branding strings for all supported NICs.
193  *********************************************************************/
194
195 static char *em_strings[] = {
196         "Intel(R) PRO/1000 Network Connection"
197 };
198
199 /*********************************************************************
200  *  Function prototypes
201  *********************************************************************/
202 static int      em_probe(device_t);
203 static int      em_attach(device_t);
204 static int      em_detach(device_t);
205 static int      em_shutdown(device_t);
206 static int      em_suspend(device_t);
207 static int      em_resume(device_t);
208 #ifdef EM_MULTIQUEUE
209 static int      em_mq_start(if_t, struct mbuf *);
210 static int      em_mq_start_locked(if_t,
211                     struct tx_ring *, struct mbuf *);
212 static void     em_qflush(if_t);
213 #else
214 static void     em_start(if_t);
215 static void     em_start_locked(if_t, struct tx_ring *);
216 #endif
217 static int      em_ioctl(if_t, u_long, caddr_t);
218 static void     em_init(void *);
219 static void     em_init_locked(struct adapter *);
220 static void     em_stop(void *);
221 static void     em_media_status(if_t, struct ifmediareq *);
222 static int      em_media_change(if_t);
223 static void     em_identify_hardware(struct adapter *);
224 static int      em_allocate_pci_resources(struct adapter *);
225 static int      em_allocate_legacy(struct adapter *);
226 static int      em_allocate_msix(struct adapter *);
227 static int      em_allocate_queues(struct adapter *);
228 static int      em_setup_msix(struct adapter *);
229 static void     em_free_pci_resources(struct adapter *);
230 static void     em_local_timer(void *);
231 static void     em_reset(struct adapter *);
232 static int      em_setup_interface(device_t, struct adapter *);
233
234 static void     em_setup_transmit_structures(struct adapter *);
235 static void     em_initialize_transmit_unit(struct adapter *);
236 static int      em_allocate_transmit_buffers(struct tx_ring *);
237 static void     em_free_transmit_structures(struct adapter *);
238 static void     em_free_transmit_buffers(struct tx_ring *);
239
240 static int      em_setup_receive_structures(struct adapter *);
241 static int      em_allocate_receive_buffers(struct rx_ring *);
242 static void     em_initialize_receive_unit(struct adapter *);
243 static void     em_free_receive_structures(struct adapter *);
244 static void     em_free_receive_buffers(struct rx_ring *);
245
246 static void     em_enable_intr(struct adapter *);
247 static void     em_disable_intr(struct adapter *);
248 static void     em_update_stats_counters(struct adapter *);
249 static void     em_add_hw_stats(struct adapter *adapter);
250 static void     em_txeof(struct tx_ring *);
251 static bool     em_rxeof(struct rx_ring *, int, int *);
252 #ifndef __NO_STRICT_ALIGNMENT
253 static int      em_fixup_rx(struct rx_ring *);
254 #endif
255 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
256 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
257                     struct ip *, u32 *, u32 *);
258 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
259                     struct tcphdr *, u32 *, u32 *);
260 static void     em_set_promisc(struct adapter *);
261 static void     em_disable_promisc(struct adapter *);
262 static void     em_set_multi(struct adapter *);
263 static void     em_update_link_status(struct adapter *);
264 static void     em_refresh_mbufs(struct rx_ring *, int);
265 static void     em_register_vlan(void *, if_t, u16);
266 static void     em_unregister_vlan(void *, if_t, u16);
267 static void     em_setup_vlan_hw_support(struct adapter *);
268 static int      em_xmit(struct tx_ring *, struct mbuf **);
269 static int      em_dma_malloc(struct adapter *, bus_size_t,
270                     struct em_dma_alloc *, int);
271 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
272 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
273 static void     em_print_nvm_info(struct adapter *);
274 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
275 static void     em_print_debug_info(struct adapter *);
276 static int      em_is_valid_ether_addr(u8 *);
277 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
278 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
279                     const char *, struct em_int_delay_info *, int, int);
280 /* Management and WOL Support */
281 static void     em_init_manageability(struct adapter *);
282 static void     em_release_manageability(struct adapter *);
283 static void     em_get_hw_control(struct adapter *);
284 static void     em_release_hw_control(struct adapter *);
285 static void     em_get_wakeup(device_t);
286 static void     em_enable_wakeup(device_t);
287 static int      em_enable_phy_wakeup(struct adapter *);
288 static void     em_led_func(void *, int);
289 static void     em_disable_aspm(struct adapter *);
290
291 static int      em_irq_fast(void *);
292
293 /* MSIX handlers */
294 static void     em_msix_tx(void *);
295 static void     em_msix_rx(void *);
296 static void     em_msix_link(void *);
297 static void     em_handle_tx(void *context, int pending);
298 static void     em_handle_rx(void *context, int pending);
299 static void     em_handle_link(void *context, int pending);
300
301 static void     em_set_sysctl_value(struct adapter *, const char *,
302                     const char *, int *, int);
303 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
304 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
305
306 static __inline void em_rx_discard(struct rx_ring *, int);
307
308 #ifdef DEVICE_POLLING
309 static poll_handler_drv_t em_poll;
310 #endif /* POLLING */
311
312 /*********************************************************************
313  *  FreeBSD Device Interface Entry Points
314  *********************************************************************/
315
316 static device_method_t em_methods[] = {
317         /* Device interface */
318         DEVMETHOD(device_probe, em_probe),
319         DEVMETHOD(device_attach, em_attach),
320         DEVMETHOD(device_detach, em_detach),
321         DEVMETHOD(device_shutdown, em_shutdown),
322         DEVMETHOD(device_suspend, em_suspend),
323         DEVMETHOD(device_resume, em_resume),
324         DEVMETHOD_END
325 };
326
327 static driver_t em_driver = {
328         "em", em_methods, sizeof(struct adapter),
329 };
330
331 devclass_t em_devclass;
332 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
333 MODULE_DEPEND(em, pci, 1, 1, 1);
334 MODULE_DEPEND(em, ether, 1, 1, 1);
335
336 /*********************************************************************
337  *  Tunable default values.
338  *********************************************************************/
339
340 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
341 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
342 #define M_TSO_LEN                       66
343
344 #define MAX_INTS_PER_SEC        8000
345 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
346
347 /* Allow common code without TSO */
348 #ifndef CSUM_TSO
349 #define CSUM_TSO        0
350 #endif
351
352 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
353
354 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
355 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
356 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
357     0, "Default transmit interrupt delay in usecs");
358 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
359     0, "Default receive interrupt delay in usecs");
360
361 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
362 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
363 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
364     &em_tx_abs_int_delay_dflt, 0,
365     "Default transmit interrupt delay limit in usecs");
366 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
367     &em_rx_abs_int_delay_dflt, 0,
368     "Default receive interrupt delay limit in usecs");
369
370 static int em_rxd = EM_DEFAULT_RXD;
371 static int em_txd = EM_DEFAULT_TXD;
372 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
373     "Number of receive descriptors per queue");
374 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
375     "Number of transmit descriptors per queue");
376
377 static int em_smart_pwr_down = FALSE;
378 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
379     0, "Set to true to leave smart power down enabled on newer adapters");
380
381 /* Controls whether promiscuous also shows bad packets */
382 static int em_debug_sbp = FALSE;
383 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
384     "Show bad packets in promiscuous mode");
385
386 static int em_enable_msix = TRUE;
387 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
388     "Enable MSI-X interrupts");
389
390 /* How many packets rxeof tries to clean at a time */
391 static int em_rx_process_limit = 100;
392 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
393     &em_rx_process_limit, 0,
394     "Maximum number of received packets to process "
395     "at a time, -1 means unlimited");
396
397 /* Energy efficient ethernet - default to OFF */
398 static int eee_setting = 1;
399 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
400     "Enable Energy Efficient Ethernet");
401
402 /* Global used in WOL setup with multiport cards */
403 static int global_quad_port_a = 0;
404
405 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
406 #include <dev/netmap/if_em_netmap.h>
407 #endif /* DEV_NETMAP */
408
409 /*********************************************************************
410  *  Device identification routine
411  *
412  *  em_probe determines if the driver should be loaded on
413  *  adapter based on PCI vendor/device id of the adapter.
414  *
415  *  return BUS_PROBE_DEFAULT on success, positive on failure
416  *********************************************************************/
417
418 static int
419 em_probe(device_t dev)
420 {
421         char            adapter_name[60];
422         u16             pci_vendor_id = 0;
423         u16             pci_device_id = 0;
424         u16             pci_subvendor_id = 0;
425         u16             pci_subdevice_id = 0;
426         em_vendor_info_t *ent;
427
428         INIT_DEBUGOUT("em_probe: begin");
429
430         pci_vendor_id = pci_get_vendor(dev);
431         if (pci_vendor_id != EM_VENDOR_ID)
432                 return (ENXIO);
433
434         pci_device_id = pci_get_device(dev);
435         pci_subvendor_id = pci_get_subvendor(dev);
436         pci_subdevice_id = pci_get_subdevice(dev);
437
438         ent = em_vendor_info_array;
439         while (ent->vendor_id != 0) {
440                 if ((pci_vendor_id == ent->vendor_id) &&
441                     (pci_device_id == ent->device_id) &&
442
443                     ((pci_subvendor_id == ent->subvendor_id) ||
444                     (ent->subvendor_id == PCI_ANY_ID)) &&
445
446                     ((pci_subdevice_id == ent->subdevice_id) ||
447                     (ent->subdevice_id == PCI_ANY_ID))) {
448                         sprintf(adapter_name, "%s %s",
449                                 em_strings[ent->index],
450                                 em_driver_version);
451                         device_set_desc_copy(dev, adapter_name);
452                         return (BUS_PROBE_DEFAULT);
453                 }
454                 ent++;
455         }
456
457         return (ENXIO);
458 }
459
460 /*********************************************************************
461  *  Device initialization routine
462  *
463  *  The attach entry point is called when the driver is being loaded.
464  *  This routine identifies the type of hardware, allocates all resources
465  *  and initializes the hardware.
466  *
467  *  return 0 on success, positive on failure
468  *********************************************************************/
469
470 static int
471 em_attach(device_t dev)
472 {
473         struct adapter  *adapter;
474         struct e1000_hw *hw;
475         int             error = 0;
476
477         INIT_DEBUGOUT("em_attach: begin");
478
479         if (resource_disabled("em", device_get_unit(dev))) {
480                 device_printf(dev, "Disabled by device hint\n");
481                 return (ENXIO);
482         }
483
484         adapter = device_get_softc(dev);
485         adapter->dev = adapter->osdep.dev = dev;
486         hw = &adapter->hw;
487         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
488
489         /* SYSCTL stuff */
490         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
491             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
492             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
493             em_sysctl_nvm_info, "I", "NVM Information");
494
495         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
496             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
497             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
498             em_sysctl_debug_info, "I", "Debug Information");
499
500         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
501             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
502             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
503             em_set_flowcntl, "I", "Flow Control");
504
505         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
506
507         /* Determine hardware and mac info */
508         em_identify_hardware(adapter);
509
510         /* Setup PCI resources */
511         if (em_allocate_pci_resources(adapter)) {
512                 device_printf(dev, "Allocation of PCI resources failed\n");
513                 error = ENXIO;
514                 goto err_pci;
515         }
516
517         /*
518         ** For ICH8 and family we need to
519         ** map the flash memory, and this
520         ** must happen after the MAC is 
521         ** identified
522         */
523         if ((hw->mac.type == e1000_ich8lan) ||
524             (hw->mac.type == e1000_ich9lan) ||
525             (hw->mac.type == e1000_ich10lan) ||
526             (hw->mac.type == e1000_pchlan) ||
527             (hw->mac.type == e1000_pch2lan) ||
528             (hw->mac.type == e1000_pch_lpt)) {
529                 int rid = EM_BAR_TYPE_FLASH;
530                 adapter->flash = bus_alloc_resource_any(dev,
531                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
532                 if (adapter->flash == NULL) {
533                         device_printf(dev, "Mapping of Flash failed\n");
534                         error = ENXIO;
535                         goto err_pci;
536                 }
537                 /* This is used in the shared code */
538                 hw->flash_address = (u8 *)adapter->flash;
539                 adapter->osdep.flash_bus_space_tag =
540                     rman_get_bustag(adapter->flash);
541                 adapter->osdep.flash_bus_space_handle =
542                     rman_get_bushandle(adapter->flash);
543         }
544
545         /* Do Shared Code initialization */
546         if (e1000_setup_init_funcs(hw, TRUE)) {
547                 device_printf(dev, "Setup of Shared code failed\n");
548                 error = ENXIO;
549                 goto err_pci;
550         }
551
552         e1000_get_bus_info(hw);
553
554         /* Set up some sysctls for the tunable interrupt delays */
555         em_add_int_delay_sysctl(adapter, "rx_int_delay",
556             "receive interrupt delay in usecs", &adapter->rx_int_delay,
557             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
558         em_add_int_delay_sysctl(adapter, "tx_int_delay",
559             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
560             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
561         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
562             "receive interrupt delay limit in usecs",
563             &adapter->rx_abs_int_delay,
564             E1000_REGISTER(hw, E1000_RADV),
565             em_rx_abs_int_delay_dflt);
566         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
567             "transmit interrupt delay limit in usecs",
568             &adapter->tx_abs_int_delay,
569             E1000_REGISTER(hw, E1000_TADV),
570             em_tx_abs_int_delay_dflt);
571         em_add_int_delay_sysctl(adapter, "itr",
572             "interrupt delay limit in usecs/4",
573             &adapter->tx_itr,
574             E1000_REGISTER(hw, E1000_ITR),
575             DEFAULT_ITR);
576
577         /* Sysctl for limiting the amount of work done in the taskqueue */
578         em_set_sysctl_value(adapter, "rx_processing_limit",
579             "max number of rx packets to process", &adapter->rx_process_limit,
580             em_rx_process_limit);
581
582         /*
583          * Validate number of transmit and receive descriptors. It
584          * must not exceed hardware maximum, and must be multiple
585          * of E1000_DBA_ALIGN.
586          */
587         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
588             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
589                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
590                     EM_DEFAULT_TXD, em_txd);
591                 adapter->num_tx_desc = EM_DEFAULT_TXD;
592         } else
593                 adapter->num_tx_desc = em_txd;
594
595         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
596             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
597                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
598                     EM_DEFAULT_RXD, em_rxd);
599                 adapter->num_rx_desc = EM_DEFAULT_RXD;
600         } else
601                 adapter->num_rx_desc = em_rxd;
602
603         hw->mac.autoneg = DO_AUTO_NEG;
604         hw->phy.autoneg_wait_to_complete = FALSE;
605         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
606
607         /* Copper options */
608         if (hw->phy.media_type == e1000_media_type_copper) {
609                 hw->phy.mdix = AUTO_ALL_MODES;
610                 hw->phy.disable_polarity_correction = FALSE;
611                 hw->phy.ms_type = EM_MASTER_SLAVE;
612         }
613
614         /*
615          * Set the frame limits assuming
616          * standard ethernet sized frames.
617          */
618         adapter->hw.mac.max_frame_size =
619             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
620
621         /*
622          * This controls when hardware reports transmit completion
623          * status.
624          */
625         hw->mac.report_tx_early = 1;
626
627         /* 
628         ** Get queue/ring memory
629         */
630         if (em_allocate_queues(adapter)) {
631                 error = ENOMEM;
632                 goto err_pci;
633         }
634
635         /* Allocate multicast array memory. */
636         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
637             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
638         if (adapter->mta == NULL) {
639                 device_printf(dev, "Can not allocate multicast setup array\n");
640                 error = ENOMEM;
641                 goto err_late;
642         }
643
644         /* Check SOL/IDER usage */
645         if (e1000_check_reset_block(hw))
646                 device_printf(dev, "PHY reset is blocked"
647                     " due to SOL/IDER session.\n");
648
649         /* Sysctl for setting Energy Efficient Ethernet */
650         hw->dev_spec.ich8lan.eee_disable = eee_setting;
651         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
652             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
653             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
654             adapter, 0, em_sysctl_eee, "I",
655             "Disable Energy Efficient Ethernet");
656
657         /*
658         ** Start from a known state, this is
659         ** important in reading the nvm and
660         ** mac from that.
661         */
662         e1000_reset_hw(hw);
663
664
665         /* Make sure we have a good EEPROM before we read from it */
666         if (e1000_validate_nvm_checksum(hw) < 0) {
667                 /*
668                 ** Some PCI-E parts fail the first check due to
669                 ** the link being in sleep state, call it again,
670                 ** if it fails a second time its a real issue.
671                 */
672                 if (e1000_validate_nvm_checksum(hw) < 0) {
673                         device_printf(dev,
674                             "The EEPROM Checksum Is Not Valid\n");
675                         error = EIO;
676                         goto err_late;
677                 }
678         }
679
680         /* Copy the permanent MAC address out of the EEPROM */
681         if (e1000_read_mac_addr(hw) < 0) {
682                 device_printf(dev, "EEPROM read error while reading MAC"
683                     " address\n");
684                 error = EIO;
685                 goto err_late;
686         }
687
688         if (!em_is_valid_ether_addr(hw->mac.addr)) {
689                 device_printf(dev, "Invalid MAC address\n");
690                 error = EIO;
691                 goto err_late;
692         }
693
694         /* Disable ULP support */
695         e1000_disable_ulp_lpt_lp(hw, TRUE);
696
697         /*
698         **  Do interrupt configuration
699         */
700         if (adapter->msix > 1) /* Do MSIX */
701                 error = em_allocate_msix(adapter);
702         else  /* MSI or Legacy */
703                 error = em_allocate_legacy(adapter);
704         if (error)
705                 goto err_late;
706
707         /*
708          * Get Wake-on-Lan and Management info for later use
709          */
710         em_get_wakeup(dev);
711
712         /* Setup OS specific network interface */
713         if (em_setup_interface(dev, adapter) != 0)
714                 goto err_late;
715
716         em_reset(adapter);
717
718         /* Initialize statistics */
719         em_update_stats_counters(adapter);
720
721         hw->mac.get_link_status = 1;
722         em_update_link_status(adapter);
723
724         /* Register for VLAN events */
725         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
726             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
727         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
728             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
729
730         em_add_hw_stats(adapter);
731
732         /* Non-AMT based hardware can now take control from firmware */
733         if (adapter->has_manage && !adapter->has_amt)
734                 em_get_hw_control(adapter);
735
736         /* Tell the stack that the interface is not active */
737         if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
738
739         adapter->led_dev = led_create(em_led_func, adapter,
740             device_get_nameunit(dev));
741 #ifdef DEV_NETMAP
742         em_netmap_attach(adapter);
743 #endif /* DEV_NETMAP */
744
745         INIT_DEBUGOUT("em_attach: end");
746
747         return (0);
748
749 err_late:
750         em_free_transmit_structures(adapter);
751         em_free_receive_structures(adapter);
752         em_release_hw_control(adapter);
753         if (adapter->ifp != (void *)NULL)
754                 if_free_drv(adapter->ifp);
755 err_pci:
756         em_free_pci_resources(adapter);
757         free(adapter->mta, M_DEVBUF);
758         EM_CORE_LOCK_DESTROY(adapter);
759
760         return (error);
761 }
762
763 /*********************************************************************
764  *  Device removal routine
765  *
766  *  The detach entry point is called when the driver is being removed.
767  *  This routine stops the adapter and deallocates all the resources
768  *  that were allocated for driver operation.
769  *
770  *  return 0 on success, positive on failure
771  *********************************************************************/
772
773 static int
774 em_detach(device_t dev)
775 {
776         struct adapter  *adapter = device_get_softc(dev);
777         if_t ifp = adapter->ifp;
778
779         INIT_DEBUGOUT("em_detach: begin");
780
781         /* Make sure VLANS are not using driver */
782         if (if_vlantrunkinuse(ifp)) {
783                 device_printf(dev,"Vlan in use, detach first\n");
784                 return (EBUSY);
785         }
786
787 #ifdef DEVICE_POLLING
788         if (if_getcapenable(ifp) & IFCAP_POLLING)
789                 ether_poll_deregister_drv(ifp);
790 #endif
791
792         if (adapter->led_dev != NULL)
793                 led_destroy(adapter->led_dev);
794
795         EM_CORE_LOCK(adapter);
796         adapter->in_detach = 1;
797         em_stop(adapter);
798         EM_CORE_UNLOCK(adapter);
799         EM_CORE_LOCK_DESTROY(adapter);
800
801         e1000_phy_hw_reset(&adapter->hw);
802
803         em_release_manageability(adapter);
804         em_release_hw_control(adapter);
805
806         /* Unregister VLAN events */
807         if (adapter->vlan_attach != NULL)
808                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
809         if (adapter->vlan_detach != NULL)
810                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
811
812         ether_ifdetach_drv(adapter->ifp);
813         callout_drain(&adapter->timer);
814
815 #ifdef DEV_NETMAP
816         netmap_detach(ifp);
817 #endif /* DEV_NETMAP */
818
819         em_free_pci_resources(adapter);
820         bus_generic_detach(dev);
821         if_free_drv(ifp);
822
823         em_free_transmit_structures(adapter);
824         em_free_receive_structures(adapter);
825
826         em_release_hw_control(adapter);
827         free(adapter->mta, M_DEVBUF);
828
829         return (0);
830 }
831
832 /*********************************************************************
833  *
834  *  Shutdown entry point
835  *
836  **********************************************************************/
837
838 static int
839 em_shutdown(device_t dev)
840 {
841         return em_suspend(dev);
842 }
843
844 /*
845  * Suspend/resume device methods.
846  */
847 static int
848 em_suspend(device_t dev)
849 {
850         struct adapter *adapter = device_get_softc(dev);
851
852         EM_CORE_LOCK(adapter);
853
854         em_release_manageability(adapter);
855         em_release_hw_control(adapter);
856         em_enable_wakeup(dev);
857
858         EM_CORE_UNLOCK(adapter);
859
860         return bus_generic_suspend(dev);
861 }
862
863 static int
864 em_resume(device_t dev)
865 {
866         struct adapter *adapter = device_get_softc(dev);
867         struct tx_ring  *txr = adapter->tx_rings;
868         if_t ifp = adapter->ifp;
869
870         EM_CORE_LOCK(adapter);
871         if (adapter->hw.mac.type == e1000_pch2lan)
872                 e1000_resume_workarounds_pchlan(&adapter->hw);
873         em_init_locked(adapter);
874         em_init_manageability(adapter);
875
876         if ((if_getflags(ifp) & IFF_UP) &&
877             (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
878                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
879                         EM_TX_LOCK(txr);
880 #ifdef EM_MULTIQUEUE
881                         if (!drbr_empty(ifp, txr->br))
882                                 em_mq_start_locked(ifp, txr, NULL);
883 #else
884                         if (!if_sendq_empty(ifp))
885                                 em_start_locked(ifp, txr);
886 #endif
887                         EM_TX_UNLOCK(txr);
888                 }
889         }
890         EM_CORE_UNLOCK(adapter);
891
892         return bus_generic_resume(dev);
893 }
894
895
896 #ifdef EM_MULTIQUEUE
897 /*********************************************************************
898  *  Multiqueue Transmit routines 
899  *
900  *  em_mq_start is called by the stack to initiate a transmit.
901  *  however, if busy the driver can queue the request rather
902  *  than do an immediate send. It is this that is an advantage
903  *  in this driver, rather than also having multiple tx queues.
904  **********************************************************************/
905 static int
906 em_mq_start_locked(if_t ifp, struct tx_ring *txr, struct mbuf *m)
907 {
908         struct adapter  *adapter = txr->adapter;
909         struct mbuf     *next;
910         int             err = 0, enq = 0;
911
912         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
913             IFF_DRV_RUNNING || adapter->link_active == 0) {
914                 if (m != NULL)
915                         err = drbr_enqueue(ifp, txr->br, m);
916                 return (err);
917         }
918
919         enq = 0;
920         if (m != NULL) {
921                 err = drbr_enqueue(ifp, txr->br, m);
922                 if (err)
923                         return (err);
924         } 
925
926         /* Process the queue */
927         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
928                 if ((err = em_xmit(txr, &next)) != 0) {
929                         if (next == NULL)
930                                 drbr_advance(ifp, txr->br);
931                         else 
932                                 drbr_putback(ifp, txr->br, next);
933                         break;
934                 }
935                 drbr_advance(ifp, txr->br);
936                 enq++;
937                 if_incobytes(ifp,  next->m_pkthdr.len);
938                 if (next->m_flags & M_MCAST)
939                         if_incomcasts(ifp, 1);
940                 if_etherbpfmtap(ifp, next);
941                 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
942                         break;
943         }
944
945         if (enq > 0) {
946                 /* Set the watchdog */
947                 txr->queue_status = EM_QUEUE_WORKING;
948                 txr->watchdog_time = ticks;
949         }
950
951         if (txr->tx_avail < EM_MAX_SCATTER)
952                 em_txeof(txr);
953         if (txr->tx_avail < EM_MAX_SCATTER)
954                 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
955         return (err);
956 }
957
958 /*
959 ** Multiqueue capable stack interface
960 */
961 static int
962 em_mq_start(if_t ifp, struct mbuf *m)
963 {
964         struct adapter  *adapter = if_getsoftc(ifp);
965         struct tx_ring  *txr = adapter->tx_rings;
966         int             error;
967
968         if (EM_TX_TRYLOCK(txr)) {
969                 error = em_mq_start_locked(ifp, txr, m);
970                 EM_TX_UNLOCK(txr);
971         } else 
972                 error = drbr_enqueue(ifp, txr->br, m);
973
974         return (error);
975 }
976
977 /*
978 ** Flush all ring buffers
979 */
980 static void
981 em_qflush(if_t ifp)
982 {
983         struct adapter  *adapter = if_getsoftc(ifp);
984         struct tx_ring  *txr = adapter->tx_rings;
985         struct mbuf     *m;
986
987         for (int i = 0; i < adapter->num_queues; i++, txr++) {
988                 EM_TX_LOCK(txr);
989                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
990                         m_freem(m);
991                 EM_TX_UNLOCK(txr);
992         }
993         if_qflush(ifp);
994 }
995 #else  /* !EM_MULTIQUEUE */
996
997 static void
998 em_start_locked(if_t ifp, struct tx_ring *txr)
999 {
1000         struct adapter  *adapter = if_getsoftc(ifp);
1001         struct mbuf     *m_head;
1002
1003         EM_TX_LOCK_ASSERT(txr);
1004
1005         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1006             IFF_DRV_RUNNING)
1007                 return;
1008
1009         if (!adapter->link_active)
1010                 return;
1011
1012         while (!if_sendq_empty(ifp)) {
1013                 /* Call cleanup if number of TX descriptors low */
1014                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1015                         em_txeof(txr);
1016                 if (txr->tx_avail < EM_MAX_SCATTER) {
1017                         if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
1018                         break;
1019                 }
1020                 m_head = if_dequeue(ifp);
1021                 if (m_head == NULL)
1022                         break;
1023                 /*
1024                  *  Encapsulation can modify our pointer, and or make it
1025                  *  NULL on failure.  In that event, we can't requeue.
1026                  */
1027                 if (em_xmit(txr, &m_head)) {
1028                         if (m_head == NULL)
1029                                 break;
1030                         if_sendq_prepend(ifp, m_head);
1031                         break;
1032                 }
1033
1034                 /* Send a copy of the frame to the BPF listener */
1035                 if_etherbpfmtap(ifp, m_head);
1036
1037                 /* Set timeout in case hardware has problems transmitting. */
1038                 txr->watchdog_time = ticks;
1039                 txr->queue_status = EM_QUEUE_WORKING;
1040         }
1041
1042         return;
1043 }
1044
1045 static void
1046 em_start(if_t ifp)
1047 {
1048         struct adapter  *adapter = if_getsoftc(ifp);
1049         struct tx_ring  *txr = adapter->tx_rings;
1050
1051         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1052                 EM_TX_LOCK(txr);
1053                 em_start_locked(ifp, txr);
1054                 EM_TX_UNLOCK(txr);
1055         }
1056         return;
1057 }
1058 #endif /* EM_MULTIQUEUE */
1059
1060 /*********************************************************************
1061  *  Ioctl entry point
1062  *
1063  *  em_ioctl is called when the user wants to configure the
1064  *  interface.
1065  *
1066  *  return 0 on success, positive on failure
1067  **********************************************************************/
1068
1069 static int
1070 em_ioctl(if_t ifp, u_long command, caddr_t data)
1071 {
1072         struct adapter  *adapter = if_getsoftc(ifp);
1073         struct ifreq    *ifr = (struct ifreq *)data;
1074 #if defined(INET) || defined(INET6)
1075         struct ifaddr   *ifa = (struct ifaddr *)data;
1076 #endif
1077         bool            avoid_reset = FALSE;
1078         int             error = 0;
1079
1080         if (adapter->in_detach)
1081                 return (error);
1082
1083         switch (command) {
1084         case SIOCSIFADDR:
1085 #ifdef INET
1086                 if (ifa->ifa_addr->sa_family == AF_INET)
1087                         avoid_reset = TRUE;
1088 #endif
1089 #ifdef INET6
1090                 if (ifa->ifa_addr->sa_family == AF_INET6)
1091                         avoid_reset = TRUE;
1092 #endif
1093                 /*
1094                 ** Calling init results in link renegotiation,
1095                 ** so we avoid doing it when possible.
1096                 */
1097                 if (avoid_reset) {
1098                         if_setflagbits(ifp,IFF_UP,0);
1099                         if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1100                                 em_init(adapter);
1101 #ifdef INET
1102                         if (!(if_getflags(ifp) & IFF_NOARP))
1103                                 arp_ifinit_drv(ifp, ifa);
1104 #endif
1105                 } else
1106                         error = ether_ioctl_drv(ifp, command, data);
1107                 break;
1108         case SIOCSIFMTU:
1109             {
1110                 int max_frame_size;
1111
1112                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1113
1114                 EM_CORE_LOCK(adapter);
1115                 switch (adapter->hw.mac.type) {
1116                 case e1000_82571:
1117                 case e1000_82572:
1118                 case e1000_ich9lan:
1119                 case e1000_ich10lan:
1120                 case e1000_pch2lan:
1121                 case e1000_pch_lpt:
1122                 case e1000_82574:
1123                 case e1000_82583:
1124                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1125                         max_frame_size = 9234;
1126                         break;
1127                 case e1000_pchlan:
1128                         max_frame_size = 4096;
1129                         break;
1130                         /* Adapters that do not support jumbo frames */
1131                 case e1000_ich8lan:
1132                         max_frame_size = ETHER_MAX_LEN;
1133                         break;
1134                 default:
1135                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1136                 }
1137                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1138                     ETHER_CRC_LEN) {
1139                         EM_CORE_UNLOCK(adapter);
1140                         error = EINVAL;
1141                         break;
1142                 }
1143
1144                 if_setmtu(ifp, ifr->ifr_mtu);
1145                 adapter->hw.mac.max_frame_size =
1146                     if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1147                 em_init_locked(adapter);
1148                 EM_CORE_UNLOCK(adapter);
1149                 break;
1150             }
1151         case SIOCSIFFLAGS:
1152                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1153                     SIOCSIFFLAGS (Set Interface Flags)");
1154                 EM_CORE_LOCK(adapter);
1155                 if (if_getflags(ifp) & IFF_UP) {
1156                         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1157                                 if ((if_getflags(ifp) ^ adapter->if_flags) &
1158                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1159                                         em_disable_promisc(adapter);
1160                                         em_set_promisc(adapter);
1161                                 }
1162                         } else
1163                                 em_init_locked(adapter);
1164                 } else
1165                         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1166                                 em_stop(adapter);
1167                 adapter->if_flags = if_getflags(ifp);
1168                 EM_CORE_UNLOCK(adapter);
1169                 break;
1170         case SIOCADDMULTI:
1171         case SIOCDELMULTI:
1172                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1173                 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1174                         EM_CORE_LOCK(adapter);
1175                         em_disable_intr(adapter);
1176                         em_set_multi(adapter);
1177 #ifdef DEVICE_POLLING
1178                         if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1179 #endif
1180                                 em_enable_intr(adapter);
1181                         EM_CORE_UNLOCK(adapter);
1182                 }
1183                 break;
1184         case SIOCSIFMEDIA:
1185                 /* Check SOL/IDER usage */
1186                 EM_CORE_LOCK(adapter);
1187                 if (e1000_check_reset_block(&adapter->hw)) {
1188                         EM_CORE_UNLOCK(adapter);
1189                         device_printf(adapter->dev, "Media change is"
1190                             " blocked due to SOL/IDER session.\n");
1191                         break;
1192                 }
1193                 EM_CORE_UNLOCK(adapter);
1194                 /* falls thru */
1195         case SIOCGIFMEDIA:
1196                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1197                     SIOCxIFMEDIA (Get/Set Interface Media)");
1198                 error = ifmedia_ioctl_drv(ifp, ifr, &adapter->media, command);
1199                 break;
1200         case SIOCSIFCAP:
1201             {
1202                 int mask, reinit;
1203
1204                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1205                 reinit = 0;
1206                 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1207 #ifdef DEVICE_POLLING
1208                 if (mask & IFCAP_POLLING) {
1209                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1210                                 error = ether_poll_register_drv(em_poll, ifp);
1211                                 if (error)
1212                                         return (error);
1213                                 EM_CORE_LOCK(adapter);
1214                                 em_disable_intr(adapter);
1215                                 if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1216                                 EM_CORE_UNLOCK(adapter);
1217                         } else {
1218                                 error = ether_poll_deregister_drv(ifp);
1219                                 /* Enable interrupt even in error case */
1220                                 EM_CORE_LOCK(adapter);
1221                                 em_enable_intr(adapter);
1222                                 if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1223                                 EM_CORE_UNLOCK(adapter);
1224                         }
1225                 }
1226 #endif
1227                 if (mask & IFCAP_HWCSUM) {
1228                         if_togglecapenable(ifp,IFCAP_HWCSUM);
1229                         reinit = 1;
1230                 }
1231                 if (mask & IFCAP_TSO4) {
1232                         if_togglecapenable(ifp,IFCAP_TSO4);
1233                         reinit = 1;
1234                 }
1235                 if (mask & IFCAP_VLAN_HWTAGGING) {
1236                         if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1237                         reinit = 1;
1238                 }
1239                 if (mask & IFCAP_VLAN_HWFILTER) {
1240                         if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1241                         reinit = 1;
1242                 }
1243                 if (mask & IFCAP_VLAN_HWTSO) {
1244                         if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1245                         reinit = 1;
1246                 }
1247                 if ((mask & IFCAP_WOL) &&
1248                     (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1249                         if (mask & IFCAP_WOL_MCAST)
1250                                 if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1251                         if (mask & IFCAP_WOL_MAGIC)
1252                                 if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1253                 }
1254                 if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1255                         em_init(adapter);
1256                 if_vlancap(ifp);
1257                 break;
1258             }
1259
1260         default:
1261                 error = ether_ioctl_drv(ifp, command, data);
1262                 break;
1263         }
1264
1265         return (error);
1266 }
1267
1268
1269 /*********************************************************************
1270  *  Init entry point
1271  *
1272  *  This routine is used in two ways. It is used by the stack as
1273  *  init entry point in network interface structure. It is also used
1274  *  by the driver as a hw/sw initialization routine to get to a
1275  *  consistent state.
1276  *
1277  *  return 0 on success, positive on failure
1278  **********************************************************************/
1279
1280 static void
1281 em_init_locked(struct adapter *adapter)
1282 {
1283         if_t ifp = adapter->ifp;
1284         device_t        dev = adapter->dev;
1285
1286         INIT_DEBUGOUT("em_init: begin");
1287
1288         EM_CORE_LOCK_ASSERT(adapter);
1289
1290         em_disable_intr(adapter);
1291         callout_stop(&adapter->timer);
1292
1293         /* Get the latest mac address, User can use a LAA */
1294         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1295               ETHER_ADDR_LEN);
1296
1297         /* Put the address into the Receive Address Array */
1298         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1299
1300         /*
1301          * With the 82571 adapter, RAR[0] may be overwritten
1302          * when the other port is reset, we make a duplicate
1303          * in RAR[14] for that eventuality, this assures
1304          * the interface continues to function.
1305          */
1306         if (adapter->hw.mac.type == e1000_82571) {
1307                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1308                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1309                     E1000_RAR_ENTRIES - 1);
1310         }
1311
1312         /* Initialize the hardware */
1313         em_reset(adapter);
1314         em_update_link_status(adapter);
1315
1316         /* Setup VLAN support, basic and offload if available */
1317         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1318
1319         /* Set hardware offload abilities */
1320         if_clearhwassist(ifp);
1321         if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1322                 if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1323         if (if_getcapenable(ifp) & IFCAP_TSO4)
1324                 if_sethwassistbits(ifp, CSUM_TSO, 0);
1325
1326         /* Configure for OS presence */
1327         em_init_manageability(adapter);
1328
1329         /* Prepare transmit descriptors and buffers */
1330         em_setup_transmit_structures(adapter);
1331         em_initialize_transmit_unit(adapter);
1332
1333         /* Setup Multicast table */
1334         em_set_multi(adapter);
1335
1336         /*
1337         ** Figure out the desired mbuf
1338         ** pool for doing jumbos
1339         */
1340         if (adapter->hw.mac.max_frame_size <= 2048)
1341                 adapter->rx_mbuf_sz = MCLBYTES;
1342         else if (adapter->hw.mac.max_frame_size <= 4096)
1343                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1344         else
1345                 adapter->rx_mbuf_sz = MJUM9BYTES;
1346
1347         /* Prepare receive descriptors and buffers */
1348         if (em_setup_receive_structures(adapter)) {
1349                 device_printf(dev, "Could not setup receive structures\n");
1350                 em_stop(adapter);
1351                 return;
1352         }
1353         em_initialize_receive_unit(adapter);
1354
1355         /* Use real VLAN Filter support? */
1356         if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1357                 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1358                         /* Use real VLAN Filter support */
1359                         em_setup_vlan_hw_support(adapter);
1360                 else {
1361                         u32 ctrl;
1362                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1363                         ctrl |= E1000_CTRL_VME;
1364                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1365                 }
1366         }
1367
1368         /* Don't lose promiscuous settings */
1369         em_set_promisc(adapter);
1370
1371         /* Set the interface as ACTIVE */
1372         if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1373
1374         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1375         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1376
1377         /* MSI/X configuration for 82574 */
1378         if (adapter->hw.mac.type == e1000_82574) {
1379                 int tmp;
1380                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1381                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1382                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1383                 /* Set the IVAR - interrupt vector routing. */
1384                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1385         }
1386
1387 #ifdef DEVICE_POLLING
1388         /*
1389          * Only enable interrupts if we are not polling, make sure
1390          * they are off otherwise.
1391          */
1392         if (if_getcapenable(ifp) & IFCAP_POLLING)
1393                 em_disable_intr(adapter);
1394         else
1395 #endif /* DEVICE_POLLING */
1396                 em_enable_intr(adapter);
1397
1398         /* AMT based hardware can now take control from firmware */
1399         if (adapter->has_manage && adapter->has_amt)
1400                 em_get_hw_control(adapter);
1401 }
1402
1403 static void
1404 em_init(void *arg)
1405 {
1406         struct adapter *adapter = arg;
1407
1408         EM_CORE_LOCK(adapter);
1409         em_init_locked(adapter);
1410         EM_CORE_UNLOCK(adapter);
1411 }
1412
1413
1414 #ifdef DEVICE_POLLING
1415 /*********************************************************************
1416  *
1417  *  Legacy polling routine: note this only works with single queue
1418  *
1419  *********************************************************************/
1420 static int
1421 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1422 {
1423         struct adapter *adapter = if_getsoftc(ifp);
1424         struct tx_ring  *txr = adapter->tx_rings;
1425         struct rx_ring  *rxr = adapter->rx_rings;
1426         u32             reg_icr;
1427         int             rx_done;
1428
1429         EM_CORE_LOCK(adapter);
1430         if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1431                 EM_CORE_UNLOCK(adapter);
1432                 return (0);
1433         }
1434
1435         if (cmd == POLL_AND_CHECK_STATUS) {
1436                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1437                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1438                         callout_stop(&adapter->timer);
1439                         adapter->hw.mac.get_link_status = 1;
1440                         em_update_link_status(adapter);
1441                         callout_reset(&adapter->timer, hz,
1442                             em_local_timer, adapter);
1443                 }
1444         }
1445         EM_CORE_UNLOCK(adapter);
1446
1447         em_rxeof(rxr, count, &rx_done);
1448
1449         EM_TX_LOCK(txr);
1450         em_txeof(txr);
1451 #ifdef EM_MULTIQUEUE
1452         if (!drbr_empty(ifp, txr->br))
1453                 em_mq_start_locked(ifp, txr, NULL);
1454 #else
1455         if (!if_sendq_empty(ifp))
1456                 em_start_locked(ifp, txr);
1457 #endif
1458         EM_TX_UNLOCK(txr);
1459
1460         return (rx_done);
1461 }
1462 #endif /* DEVICE_POLLING */
1463
1464
1465 /*********************************************************************
1466  *
1467  *  Fast Legacy/MSI Combined Interrupt Service routine  
1468  *
1469  *********************************************************************/
1470 static int
1471 em_irq_fast(void *arg)
1472 {
1473         struct adapter  *adapter = arg;
1474         if_t ifp;
1475         u32             reg_icr;
1476
1477         ifp = adapter->ifp;
1478
1479         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1480
1481         /* Hot eject?  */
1482         if (reg_icr == 0xffffffff)
1483                 return FILTER_STRAY;
1484
1485         /* Definitely not our interrupt.  */
1486         if (reg_icr == 0x0)
1487                 return FILTER_STRAY;
1488
1489         /*
1490          * Starting with the 82571 chip, bit 31 should be used to
1491          * determine whether the interrupt belongs to us.
1492          */
1493         if (adapter->hw.mac.type >= e1000_82571 &&
1494             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1495                 return FILTER_STRAY;
1496
1497         em_disable_intr(adapter);
1498         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1499
1500         /* Link status change */
1501         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1502                 adapter->hw.mac.get_link_status = 1;
1503                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1504         }
1505
1506         if (reg_icr & E1000_ICR_RXO)
1507                 adapter->rx_overruns++;
1508         return FILTER_HANDLED;
1509 }
1510
1511 /* Combined RX/TX handler, used by Legacy and MSI */
1512 static void
1513 em_handle_que(void *context, int pending)
1514 {
1515         struct adapter  *adapter = context;
1516         if_t ifp = adapter->ifp;
1517         struct tx_ring  *txr = adapter->tx_rings;
1518         struct rx_ring  *rxr = adapter->rx_rings;
1519
1520
1521         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1522                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1523                 EM_TX_LOCK(txr);
1524                 em_txeof(txr);
1525 #ifdef EM_MULTIQUEUE
1526                 if (!drbr_empty(ifp, txr->br))
1527                         em_mq_start_locked(ifp, txr, NULL);
1528 #else
1529                 if (!if_sendq_empty(ifp))
1530                         em_start_locked(ifp, txr);
1531 #endif
1532                 EM_TX_UNLOCK(txr);
1533                 if (more) {
1534                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1535                         return;
1536                 }
1537         }
1538
1539         em_enable_intr(adapter);
1540         return;
1541 }
1542
1543
1544 /*********************************************************************
1545  *
1546  *  MSIX Interrupt Service Routines
1547  *
1548  **********************************************************************/
1549 static void
1550 em_msix_tx(void *arg)
1551 {
1552         struct tx_ring *txr = arg;
1553         struct adapter *adapter = txr->adapter;
1554         if_t ifp = adapter->ifp;
1555
1556         ++txr->tx_irq;
1557         EM_TX_LOCK(txr);
1558         em_txeof(txr);
1559 #ifdef EM_MULTIQUEUE
1560         if (!drbr_empty(ifp, txr->br))
1561                 em_mq_start_locked(ifp, txr, NULL);
1562 #else
1563         if (!if_sendq_empty(ifp))
1564                 em_start_locked(ifp, txr);
1565 #endif
1566         /* Reenable this interrupt */
1567         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1568         EM_TX_UNLOCK(txr);
1569         return;
1570 }
1571
1572 /*********************************************************************
1573  *
1574  *  MSIX RX Interrupt Service routine
1575  *
1576  **********************************************************************/
1577
1578 static void
1579 em_msix_rx(void *arg)
1580 {
1581         struct rx_ring  *rxr = arg;
1582         struct adapter  *adapter = rxr->adapter;
1583         bool            more;
1584
1585         ++rxr->rx_irq;
1586         if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1587                 return;
1588         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1589         if (more)
1590                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1591         else
1592                 /* Reenable this interrupt */
1593                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1594         return;
1595 }
1596
1597 /*********************************************************************
1598  *
1599  *  MSIX Link Fast Interrupt Service routine
1600  *
1601  **********************************************************************/
1602 static void
1603 em_msix_link(void *arg)
1604 {
1605         struct adapter  *adapter = arg;
1606         u32             reg_icr;
1607
1608         ++adapter->link_irq;
1609         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1610
1611         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1612                 adapter->hw.mac.get_link_status = 1;
1613                 em_handle_link(adapter, 0);
1614         } else
1615                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1616                     EM_MSIX_LINK | E1000_IMS_LSC);
1617         return;
1618 }
1619
1620 static void
1621 em_handle_rx(void *context, int pending)
1622 {
1623         struct rx_ring  *rxr = context;
1624         struct adapter  *adapter = rxr->adapter;
1625         bool            more;
1626
1627         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1628         if (more)
1629                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1630         else
1631                 /* Reenable this interrupt */
1632                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1633 }
1634
1635 static void
1636 em_handle_tx(void *context, int pending)
1637 {
1638         struct tx_ring  *txr = context;
1639         struct adapter  *adapter = txr->adapter;
1640         if_t ifp = adapter->ifp;
1641
1642         EM_TX_LOCK(txr);
1643         em_txeof(txr);
1644 #ifdef EM_MULTIQUEUE
1645         if (!drbr_empty(ifp, txr->br))
1646                 em_mq_start_locked(ifp, txr, NULL);
1647 #else
1648         if (!if_sendq_empty(ifp))
1649                 em_start_locked(ifp, txr);
1650 #endif
1651         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1652         EM_TX_UNLOCK(txr);
1653 }
1654
1655 static void
1656 em_handle_link(void *context, int pending)
1657 {
1658         struct adapter  *adapter = context;
1659         struct tx_ring  *txr = adapter->tx_rings;
1660         if_t ifp = adapter->ifp;
1661
1662         if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1663                 return;
1664
1665         EM_CORE_LOCK(adapter);
1666         callout_stop(&adapter->timer);
1667         em_update_link_status(adapter);
1668         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1669         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1670             EM_MSIX_LINK | E1000_IMS_LSC);
1671         if (adapter->link_active) {
1672                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1673                         EM_TX_LOCK(txr);
1674 #ifdef EM_MULTIQUEUE
1675                         if (!drbr_empty(ifp, txr->br))
1676                                 em_mq_start_locked(ifp, txr, NULL);
1677 #else
1678                         if (if_sendq_empty(ifp))
1679                                 em_start_locked(ifp, txr);
1680 #endif
1681                         EM_TX_UNLOCK(txr);
1682                 }
1683         }
1684         EM_CORE_UNLOCK(adapter);
1685 }
1686
1687
1688 /*********************************************************************
1689  *
1690  *  Media Ioctl callback
1691  *
1692  *  This routine is called whenever the user queries the status of
1693  *  the interface using ifconfig.
1694  *
1695  **********************************************************************/
1696 static void
1697 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1698 {
1699         struct adapter *adapter = if_getsoftc(ifp);
1700         u_char fiber_type = IFM_1000_SX;
1701
1702         INIT_DEBUGOUT("em_media_status: begin");
1703
1704         EM_CORE_LOCK(adapter);
1705         em_update_link_status(adapter);
1706
1707         ifmr->ifm_status = IFM_AVALID;
1708         ifmr->ifm_active = IFM_ETHER;
1709
1710         if (!adapter->link_active) {
1711                 EM_CORE_UNLOCK(adapter);
1712                 return;
1713         }
1714
1715         ifmr->ifm_status |= IFM_ACTIVE;
1716
1717         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1718             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1719                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1720         } else {
1721                 switch (adapter->link_speed) {
1722                 case 10:
1723                         ifmr->ifm_active |= IFM_10_T;
1724                         break;
1725                 case 100:
1726                         ifmr->ifm_active |= IFM_100_TX;
1727                         break;
1728                 case 1000:
1729                         ifmr->ifm_active |= IFM_1000_T;
1730                         break;
1731                 }
1732                 if (adapter->link_duplex == FULL_DUPLEX)
1733                         ifmr->ifm_active |= IFM_FDX;
1734                 else
1735                         ifmr->ifm_active |= IFM_HDX;
1736         }
1737         EM_CORE_UNLOCK(adapter);
1738 }
1739
1740 /*********************************************************************
1741  *
1742  *  Media Ioctl callback
1743  *
1744  *  This routine is called when the user changes speed/duplex using
1745  *  media/mediopt option with ifconfig.
1746  *
1747  **********************************************************************/
1748 static int
1749 em_media_change(if_t ifp)
1750 {
1751         struct adapter *adapter = if_getsoftc(ifp);
1752         struct ifmedia  *ifm = &adapter->media;
1753
1754         INIT_DEBUGOUT("em_media_change: begin");
1755
1756         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1757                 return (EINVAL);
1758
1759         EM_CORE_LOCK(adapter);
1760         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1761         case IFM_AUTO:
1762                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1763                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1764                 break;
1765         case IFM_1000_LX:
1766         case IFM_1000_SX:
1767         case IFM_1000_T:
1768                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1769                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1770                 break;
1771         case IFM_100_TX:
1772                 adapter->hw.mac.autoneg = FALSE;
1773                 adapter->hw.phy.autoneg_advertised = 0;
1774                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1775                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1776                 else
1777                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1778                 break;
1779         case IFM_10_T:
1780                 adapter->hw.mac.autoneg = FALSE;
1781                 adapter->hw.phy.autoneg_advertised = 0;
1782                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1783                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1784                 else
1785                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1786                 break;
1787         default:
1788                 device_printf(adapter->dev, "Unsupported media type\n");
1789         }
1790
1791         em_init_locked(adapter);
1792         EM_CORE_UNLOCK(adapter);
1793
1794         return (0);
1795 }
1796
1797 /*********************************************************************
1798  *
1799  *  This routine maps the mbufs to tx descriptors.
1800  *
1801  *  return 0 on success, positive on failure
1802  **********************************************************************/
1803
1804 static int
1805 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1806 {
1807         struct adapter          *adapter = txr->adapter;
1808         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1809         bus_dmamap_t            map;
1810         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1811         struct e1000_tx_desc    *ctxd = NULL;
1812         struct mbuf             *m_head;
1813         struct ether_header     *eh;
1814         struct ip               *ip = NULL;
1815         struct tcphdr           *tp = NULL;
1816         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1817         int                     ip_off, poff;
1818         int                     nsegs, i, j, first, last = 0;
1819         int                     error, do_tso, tso_desc = 0, remap = 1;
1820
1821         m_head = *m_headp;
1822         txd_upper = txd_lower = txd_used = txd_saved = 0;
1823         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1824         ip_off = poff = 0;
1825
1826         /*
1827          * Intel recommends entire IP/TCP header length reside in a single
1828          * buffer. If multiple descriptors are used to describe the IP and
1829          * TCP header, each descriptor should describe one or more
1830          * complete headers; descriptors referencing only parts of headers
1831          * are not supported. If all layer headers are not coalesced into
1832          * a single buffer, each buffer should not cross a 4KB boundary,
1833          * or be larger than the maximum read request size.
1834          * Controller also requires modifing IP/TCP header to make TSO work
1835          * so we firstly get a writable mbuf chain then coalesce ethernet/
1836          * IP/TCP header into a single buffer to meet the requirement of
1837          * controller. This also simplifies IP/TCP/UDP checksum offloading
1838          * which also has similiar restrictions.
1839          */
1840         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1841                 if (do_tso || (m_head->m_next != NULL && 
1842                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1843                         if (M_WRITABLE(*m_headp) == 0) {
1844                                 m_head = m_dup(*m_headp, M_NOWAIT);
1845                                 m_freem(*m_headp);
1846                                 if (m_head == NULL) {
1847                                         *m_headp = NULL;
1848                                         return (ENOBUFS);
1849                                 }
1850                                 *m_headp = m_head;
1851                         }
1852                 }
1853                 /*
1854                  * XXX
1855                  * Assume IPv4, we don't have TSO/checksum offload support
1856                  * for IPv6 yet.
1857                  */
1858                 ip_off = sizeof(struct ether_header);
1859                 m_head = m_pullup(m_head, ip_off);
1860                 if (m_head == NULL) {
1861                         *m_headp = NULL;
1862                         return (ENOBUFS);
1863                 }
1864                 eh = mtod(m_head, struct ether_header *);
1865                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1866                         ip_off = sizeof(struct ether_vlan_header);
1867                         m_head = m_pullup(m_head, ip_off);
1868                         if (m_head == NULL) {
1869                                 *m_headp = NULL;
1870                                 return (ENOBUFS);
1871                         }
1872                 }
1873                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1874                 if (m_head == NULL) {
1875                         *m_headp = NULL;
1876                         return (ENOBUFS);
1877                 }
1878                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1879                 poff = ip_off + (ip->ip_hl << 2);
1880                 if (do_tso) {
1881                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1882                         if (m_head == NULL) {
1883                                 *m_headp = NULL;
1884                                 return (ENOBUFS);
1885                         }
1886                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1887                         /*
1888                          * TSO workaround:
1889                          *   pull 4 more bytes of data into it.
1890                          */
1891                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1892                         if (m_head == NULL) {
1893                                 *m_headp = NULL;
1894                                 return (ENOBUFS);
1895                         }
1896                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1897                         ip->ip_len = 0;
1898                         ip->ip_sum = 0;
1899                         /*
1900                          * The pseudo TCP checksum does not include TCP payload
1901                          * length so driver should recompute the checksum here
1902                          * what hardware expect to see. This is adherence of
1903                          * Microsoft's Large Send specification.
1904                          */
1905                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1906                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1907                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1908                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1909                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1910                         if (m_head == NULL) {
1911                                 *m_headp = NULL;
1912                                 return (ENOBUFS);
1913                         }
1914                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1915                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1916                         if (m_head == NULL) {
1917                                 *m_headp = NULL;
1918                                 return (ENOBUFS);
1919                         }
1920                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1921                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1922                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1923                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1924                         if (m_head == NULL) {
1925                                 *m_headp = NULL;
1926                                 return (ENOBUFS);
1927                         }
1928                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1929                 }
1930                 *m_headp = m_head;
1931         }
1932
1933         /*
1934          * Map the packet for DMA
1935          *
1936          * Capture the first descriptor index,
1937          * this descriptor will have the index
1938          * of the EOP which is the only one that
1939          * now gets a DONE bit writeback.
1940          */
1941         first = txr->next_avail_desc;
1942         tx_buffer = &txr->tx_buffers[first];
1943         tx_buffer_mapped = tx_buffer;
1944         map = tx_buffer->map;
1945
1946 retry:
1947         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1948             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1949
1950         /*
1951          * There are two types of errors we can (try) to handle:
1952          * - EFBIG means the mbuf chain was too long and bus_dma ran
1953          *   out of segments.  Defragment the mbuf chain and try again.
1954          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1955          *   at this point in time.  Defer sending and try again later.
1956          * All other errors, in particular EINVAL, are fatal and prevent the
1957          * mbuf chain from ever going through.  Drop it and report error.
1958          */
1959         if (error == EFBIG && remap) {
1960                 struct mbuf *m;
1961
1962                 m = m_defrag(*m_headp, M_NOWAIT);
1963                 if (m == NULL) {
1964                         adapter->mbuf_alloc_failed++;
1965                         m_freem(*m_headp);
1966                         *m_headp = NULL;
1967                         return (ENOBUFS);
1968                 }
1969                 *m_headp = m;
1970
1971                 /* Try it again, but only once */
1972                 remap = 0;
1973                 goto retry;
1974         } else if (error == ENOMEM) {
1975                 adapter->no_tx_dma_setup++;
1976                 return (error);
1977         } else if (error != 0) {
1978                 adapter->no_tx_dma_setup++;
1979                 m_freem(*m_headp);
1980                 *m_headp = NULL;
1981                 return (error);
1982         }
1983
1984         /*
1985          * TSO Hardware workaround, if this packet is not
1986          * TSO, and is only a single descriptor long, and
1987          * it follows a TSO burst, then we need to add a
1988          * sentinel descriptor to prevent premature writeback.
1989          */
1990         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1991                 if (nsegs == 1)
1992                         tso_desc = TRUE;
1993                 txr->tx_tso = FALSE;
1994         }
1995
1996         if (nsegs > (txr->tx_avail - 2)) {
1997                 txr->no_desc_avail++;
1998                 bus_dmamap_unload(txr->txtag, map);
1999                 return (ENOBUFS);
2000         }
2001         m_head = *m_headp;
2002
2003         /* Do hardware assists */
2004         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2005                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2006                     &txd_upper, &txd_lower);
2007                 /* we need to make a final sentinel transmit desc */
2008                 tso_desc = TRUE;
2009         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2010                 em_transmit_checksum_setup(txr, m_head,
2011                     ip_off, ip, &txd_upper, &txd_lower);
2012
2013         if (m_head->m_flags & M_VLANTAG) {
2014                 /* Set the vlan id. */
2015                 txd_upper |= htole16(if_getvtag(m_head)) << 16;
2016                 /* Tell hardware to add tag */
2017                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2018         }
2019
2020         i = txr->next_avail_desc;
2021
2022         /* Set up our transmit descriptors */
2023         for (j = 0; j < nsegs; j++) {
2024                 bus_size_t seg_len;
2025                 bus_addr_t seg_addr;
2026
2027                 tx_buffer = &txr->tx_buffers[i];
2028                 ctxd = &txr->tx_base[i];
2029                 seg_addr = segs[j].ds_addr;
2030                 seg_len  = segs[j].ds_len;
2031                 /*
2032                 ** TSO Workaround:
2033                 ** If this is the last descriptor, we want to
2034                 ** split it so we have a small final sentinel
2035                 */
2036                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2037                         seg_len -= 4;
2038                         ctxd->buffer_addr = htole64(seg_addr);
2039                         ctxd->lower.data = htole32(
2040                         adapter->txd_cmd | txd_lower | seg_len);
2041                         ctxd->upper.data =
2042                             htole32(txd_upper);
2043                         if (++i == adapter->num_tx_desc)
2044                                 i = 0;
2045                         /* Now make the sentinel */     
2046                         ++txd_used; /* using an extra txd */
2047                         ctxd = &txr->tx_base[i];
2048                         tx_buffer = &txr->tx_buffers[i];
2049                         ctxd->buffer_addr =
2050                             htole64(seg_addr + seg_len);
2051                         ctxd->lower.data = htole32(
2052                         adapter->txd_cmd | txd_lower | 4);
2053                         ctxd->upper.data =
2054                             htole32(txd_upper);
2055                         last = i;
2056                         if (++i == adapter->num_tx_desc)
2057                                 i = 0;
2058                 } else {
2059                         ctxd->buffer_addr = htole64(seg_addr);
2060                         ctxd->lower.data = htole32(
2061                         adapter->txd_cmd | txd_lower | seg_len);
2062                         ctxd->upper.data =
2063                             htole32(txd_upper);
2064                         last = i;
2065                         if (++i == adapter->num_tx_desc)
2066                                 i = 0;
2067                 }
2068                 tx_buffer->m_head = NULL;
2069                 tx_buffer->next_eop = -1;
2070         }
2071
2072         txr->next_avail_desc = i;
2073         txr->tx_avail -= nsegs;
2074         if (tso_desc) /* TSO used an extra for sentinel */
2075                 txr->tx_avail -= txd_used;
2076
2077         tx_buffer->m_head = m_head;
2078         /*
2079         ** Here we swap the map so the last descriptor,
2080         ** which gets the completion interrupt has the
2081         ** real map, and the first descriptor gets the
2082         ** unused map from this descriptor.
2083         */
2084         tx_buffer_mapped->map = tx_buffer->map;
2085         tx_buffer->map = map;
2086         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2087
2088         /*
2089          * Last Descriptor of Packet
2090          * needs End Of Packet (EOP)
2091          * and Report Status (RS)
2092          */
2093         ctxd->lower.data |=
2094             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2095         /*
2096          * Keep track in the first buffer which
2097          * descriptor will be written back
2098          */
2099         tx_buffer = &txr->tx_buffers[first];
2100         tx_buffer->next_eop = last;
2101         /* Update the watchdog time early and often */
2102         txr->watchdog_time = ticks;
2103
2104         /*
2105          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2106          * that this frame is available to transmit.
2107          */
2108         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2109             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2110         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2111
2112         return (0);
2113 }
2114
2115 static void
2116 em_set_promisc(struct adapter *adapter)
2117 {
2118         if_t ifp = adapter->ifp;
2119         u32             reg_rctl;
2120
2121         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2122
2123         if (if_getflags(ifp) & IFF_PROMISC) {
2124                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2125                 /* Turn this on if you want to see bad packets */
2126                 if (em_debug_sbp)
2127                         reg_rctl |= E1000_RCTL_SBP;
2128                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2129         } else if (if_getflags(ifp) & IFF_ALLMULTI) {
2130                 reg_rctl |= E1000_RCTL_MPE;
2131                 reg_rctl &= ~E1000_RCTL_UPE;
2132                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2133         }
2134 }
2135
2136 static void
2137 em_disable_promisc(struct adapter *adapter)
2138 {
2139         if_t            ifp = adapter->ifp;
2140         u32             reg_rctl;
2141         int             mcnt = 0;
2142
2143         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2144         reg_rctl &=  (~E1000_RCTL_UPE);
2145         if (if_getflags(ifp) & IFF_ALLMULTI)
2146                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2147         else
2148                 mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2149         /* Don't disable if in MAX groups */
2150         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2151                 reg_rctl &=  (~E1000_RCTL_MPE);
2152         reg_rctl &=  (~E1000_RCTL_SBP);
2153         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2154 }
2155
2156
2157 /*********************************************************************
2158  *  Multicast Update
2159  *
2160  *  This routine is called whenever multicast address list is updated.
2161  *
2162  **********************************************************************/
2163
2164 static void
2165 em_set_multi(struct adapter *adapter)
2166 {
2167         if_t ifp = adapter->ifp;
2168         u32 reg_rctl = 0;
2169         u8  *mta; /* Multicast array memory */
2170         int mcnt = 0;
2171
2172         IOCTL_DEBUGOUT("em_set_multi: begin");
2173
2174         mta = adapter->mta;
2175         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2176
2177         if (adapter->hw.mac.type == e1000_82542 && 
2178             adapter->hw.revision_id == E1000_REVISION_2) {
2179                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2180                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2181                         e1000_pci_clear_mwi(&adapter->hw);
2182                 reg_rctl |= E1000_RCTL_RST;
2183                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2184                 msec_delay(5);
2185         }
2186
2187         if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2188
2189         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2190                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2191                 reg_rctl |= E1000_RCTL_MPE;
2192                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2193         } else
2194                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2195
2196         if (adapter->hw.mac.type == e1000_82542 && 
2197             adapter->hw.revision_id == E1000_REVISION_2) {
2198                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2199                 reg_rctl &= ~E1000_RCTL_RST;
2200                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2201                 msec_delay(5);
2202                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2203                         e1000_pci_set_mwi(&adapter->hw);
2204         }
2205 }
2206
2207
2208 /*********************************************************************
2209  *  Timer routine
2210  *
2211  *  This routine checks for link status and updates statistics.
2212  *
2213  **********************************************************************/
2214
2215 static void
2216 em_local_timer(void *arg)
2217 {
2218         struct adapter  *adapter = arg;
2219         if_t ifp = adapter->ifp;
2220         struct tx_ring  *txr = adapter->tx_rings;
2221         struct rx_ring  *rxr = adapter->rx_rings;
2222         u32             trigger;
2223
2224         EM_CORE_LOCK_ASSERT(adapter);
2225
2226         em_update_link_status(adapter);
2227         em_update_stats_counters(adapter);
2228
2229         /* Reset LAA into RAR[0] on 82571 */
2230         if ((adapter->hw.mac.type == e1000_82571) &&
2231             e1000_get_laa_state_82571(&adapter->hw))
2232                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2233
2234         /* Mask to use in the irq trigger */
2235         if (adapter->msix_mem)
2236                 trigger = rxr->ims;
2237         else
2238                 trigger = E1000_ICS_RXDMT0;
2239
2240         /*
2241         ** Check on the state of the TX queue(s), this 
2242         ** can be done without the lock because its RO
2243         ** and the HUNG state will be static if set.
2244         */
2245         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2246                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2247                     (adapter->pause_frames == 0))
2248                         goto hung;
2249                 /* Schedule a TX tasklet if needed */
2250                 if (txr->tx_avail <= EM_MAX_SCATTER)
2251                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2252         }
2253         
2254         adapter->pause_frames = 0;
2255         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2256 #ifndef DEVICE_POLLING
2257         /* Trigger an RX interrupt to guarantee mbuf refresh */
2258         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2259 #endif
2260         return;
2261 hung:
2262         /* Looks like we're hung */
2263         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2264         device_printf(adapter->dev,
2265             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2266             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2267             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2268         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2269             "Next TX to Clean = %d\n",
2270             txr->me, txr->tx_avail, txr->next_to_clean);
2271         if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2272         adapter->watchdog_events++;
2273         adapter->pause_frames = 0;
2274         em_init_locked(adapter);
2275 }
2276
2277
2278 static void
2279 em_update_link_status(struct adapter *adapter)
2280 {
2281         struct e1000_hw *hw = &adapter->hw;
2282         if_t ifp = adapter->ifp;
2283         device_t dev = adapter->dev;
2284         struct tx_ring *txr = adapter->tx_rings;
2285         u32 link_check = 0;
2286
2287         /* Get the cached link value or read phy for real */
2288         switch (hw->phy.media_type) {
2289         case e1000_media_type_copper:
2290                 if (hw->mac.get_link_status) {
2291                         /* Do the work to read phy */
2292                         e1000_check_for_link(hw);
2293                         link_check = !hw->mac.get_link_status;
2294                         if (link_check) /* ESB2 fix */
2295                                 e1000_cfg_on_link_up(hw);
2296                 } else
2297                         link_check = TRUE;
2298                 break;
2299         case e1000_media_type_fiber:
2300                 e1000_check_for_link(hw);
2301                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2302                                  E1000_STATUS_LU);
2303                 break;
2304         case e1000_media_type_internal_serdes:
2305                 e1000_check_for_link(hw);
2306                 link_check = adapter->hw.mac.serdes_has_link;
2307                 break;
2308         default:
2309         case e1000_media_type_unknown:
2310                 break;
2311         }
2312
2313         /* Now check for a transition */
2314         if (link_check && (adapter->link_active == 0)) {
2315                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2316                     &adapter->link_duplex);
2317                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2318                 if ((adapter->link_speed != SPEED_1000) &&
2319                     ((hw->mac.type == e1000_82571) ||
2320                     (hw->mac.type == e1000_82572))) {
2321                         int tarc0;
2322                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2323                         tarc0 &= ~SPEED_MODE_BIT;
2324                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2325                 }
2326                 if (bootverbose)
2327                         device_printf(dev, "Link is up %d Mbps %s\n",
2328                             adapter->link_speed,
2329                             ((adapter->link_duplex == FULL_DUPLEX) ?
2330                             "Full Duplex" : "Half Duplex"));
2331                 adapter->link_active = 1;
2332                 adapter->smartspeed = 0;
2333                 if_setbaudrate(ifp, adapter->link_speed * 1000000);
2334                 if_linkstate_change_drv(ifp, LINK_STATE_UP);
2335         } else if (!link_check && (adapter->link_active == 1)) {
2336                 if_setbaudrate(ifp, 0);
2337                 adapter->link_speed = 0;
2338                 adapter->link_duplex = 0;
2339                 if (bootverbose)
2340                         device_printf(dev, "Link is Down\n");
2341                 adapter->link_active = 0;
2342                 /* Link down, disable watchdog */
2343                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2344                         txr->queue_status = EM_QUEUE_IDLE;
2345                 if_linkstate_change_drv(ifp, LINK_STATE_DOWN);
2346         }
2347 }
2348
2349 /*********************************************************************
2350  *
2351  *  This routine disables all traffic on the adapter by issuing a
2352  *  global reset on the MAC and deallocates TX/RX buffers.
2353  *
2354  *  This routine should always be called with BOTH the CORE
2355  *  and TX locks.
2356  **********************************************************************/
2357
2358 static void
2359 em_stop(void *arg)
2360 {
2361         struct adapter  *adapter = arg;
2362         if_t ifp = adapter->ifp;
2363         struct tx_ring  *txr = adapter->tx_rings;
2364
2365         EM_CORE_LOCK_ASSERT(adapter);
2366
2367         INIT_DEBUGOUT("em_stop: begin");
2368
2369         em_disable_intr(adapter);
2370         callout_stop(&adapter->timer);
2371
2372         /* Tell the stack that the interface is no longer active */
2373         if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2374
2375         /* Unarm watchdog timer. */
2376         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2377                 EM_TX_LOCK(txr);
2378                 txr->queue_status = EM_QUEUE_IDLE;
2379                 EM_TX_UNLOCK(txr);
2380         }
2381
2382         e1000_reset_hw(&adapter->hw);
2383         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2384
2385         e1000_led_off(&adapter->hw);
2386         e1000_cleanup_led(&adapter->hw);
2387 }
2388
2389
2390 /*********************************************************************
2391  *
2392  *  Determine hardware revision.
2393  *
2394  **********************************************************************/
2395 static void
2396 em_identify_hardware(struct adapter *adapter)
2397 {
2398         device_t dev = adapter->dev;
2399
2400         /* Make sure our PCI config space has the necessary stuff set */
2401         pci_enable_busmaster(dev);
2402         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2403
2404         /* Save off the information about this board */
2405         adapter->hw.vendor_id = pci_get_vendor(dev);
2406         adapter->hw.device_id = pci_get_device(dev);
2407         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2408         adapter->hw.subsystem_vendor_id =
2409             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2410         adapter->hw.subsystem_device_id =
2411             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2412
2413         /* Do Shared Code Init and Setup */
2414         if (e1000_set_mac_type(&adapter->hw)) {
2415                 device_printf(dev, "Setup init failure\n");
2416                 return;
2417         }
2418 }
2419
2420 static int
2421 em_allocate_pci_resources(struct adapter *adapter)
2422 {
2423         device_t        dev = adapter->dev;
2424         int             rid;
2425
2426         rid = PCIR_BAR(0);
2427         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2428             &rid, RF_ACTIVE);
2429         if (adapter->memory == NULL) {
2430                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2431                 return (ENXIO);
2432         }
2433         adapter->osdep.mem_bus_space_tag =
2434             rman_get_bustag(adapter->memory);
2435         adapter->osdep.mem_bus_space_handle =
2436             rman_get_bushandle(adapter->memory);
2437         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2438
2439         /* Default to a single queue */
2440         adapter->num_queues = 1;
2441
2442         /*
2443          * Setup MSI/X or MSI if PCI Express
2444          */
2445         adapter->msix = em_setup_msix(adapter);
2446
2447         adapter->hw.back = &adapter->osdep;
2448
2449         return (0);
2450 }
2451
2452 /*********************************************************************
2453  *
2454  *  Setup the Legacy or MSI Interrupt handler
2455  *
2456  **********************************************************************/
2457 int
2458 em_allocate_legacy(struct adapter *adapter)
2459 {
2460         device_t dev = adapter->dev;
2461         struct tx_ring  *txr = adapter->tx_rings;
2462         int error, rid = 0;
2463
2464         /* Manually turn off all interrupts */
2465         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2466
2467         if (adapter->msix == 1) /* using MSI */
2468                 rid = 1;
2469         /* We allocate a single interrupt resource */
2470         adapter->res = bus_alloc_resource_any(dev,
2471             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2472         if (adapter->res == NULL) {
2473                 device_printf(dev, "Unable to allocate bus resource: "
2474                     "interrupt\n");
2475                 return (ENXIO);
2476         }
2477
2478         /*
2479          * Allocate a fast interrupt and the associated
2480          * deferred processing contexts.
2481          */
2482         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2483         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2484             taskqueue_thread_enqueue, &adapter->tq);
2485         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2486             device_get_nameunit(adapter->dev));
2487         /* Use a TX only tasklet for local timer */
2488         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2489         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2490             taskqueue_thread_enqueue, &txr->tq);
2491         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2492             device_get_nameunit(adapter->dev));
2493         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2494         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2495             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2496                 device_printf(dev, "Failed to register fast interrupt "
2497                             "handler: %d\n", error);
2498                 taskqueue_free(adapter->tq);
2499                 adapter->tq = NULL;
2500                 return (error);
2501         }
2502         
2503         return (0);
2504 }
2505
2506 /*********************************************************************
2507  *
2508  *  Setup the MSIX Interrupt handlers
2509  *   This is not really Multiqueue, rather
2510  *   its just seperate interrupt vectors
2511  *   for TX, RX, and Link.
2512  *
2513  **********************************************************************/
2514 int
2515 em_allocate_msix(struct adapter *adapter)
2516 {
2517         device_t        dev = adapter->dev;
2518         struct          tx_ring *txr = adapter->tx_rings;
2519         struct          rx_ring *rxr = adapter->rx_rings;
2520         int             error, rid, vector = 0;
2521
2522
2523         /* Make sure all interrupts are disabled */
2524         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2525
2526         /* First set up ring resources */
2527         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2528
2529                 /* RX ring */
2530                 rid = vector + 1;
2531
2532                 rxr->res = bus_alloc_resource_any(dev,
2533                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2534                 if (rxr->res == NULL) {
2535                         device_printf(dev,
2536                             "Unable to allocate bus resource: "
2537                             "RX MSIX Interrupt %d\n", i);
2538                         return (ENXIO);
2539                 }
2540                 if ((error = bus_setup_intr(dev, rxr->res,
2541                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2542                     rxr, &rxr->tag)) != 0) {
2543                         device_printf(dev, "Failed to register RX handler");
2544                         return (error);
2545                 }
2546 #if __FreeBSD_version >= 800504
2547                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2548 #endif
2549                 rxr->msix = vector++; /* NOTE increment vector for TX */
2550                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2551                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2552                     taskqueue_thread_enqueue, &rxr->tq);
2553                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2554                     device_get_nameunit(adapter->dev));
2555                 /*
2556                 ** Set the bit to enable interrupt
2557                 ** in E1000_IMS -- bits 20 and 21
2558                 ** are for RX0 and RX1, note this has
2559                 ** NOTHING to do with the MSIX vector
2560                 */
2561                 rxr->ims = 1 << (20 + i);
2562                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2563
2564                 /* TX ring */
2565                 rid = vector + 1;
2566                 txr->res = bus_alloc_resource_any(dev,
2567                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2568                 if (txr->res == NULL) {
2569                         device_printf(dev,
2570                             "Unable to allocate bus resource: "
2571                             "TX MSIX Interrupt %d\n", i);
2572                         return (ENXIO);
2573                 }
2574                 if ((error = bus_setup_intr(dev, txr->res,
2575                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2576                     txr, &txr->tag)) != 0) {
2577                         device_printf(dev, "Failed to register TX handler");
2578                         return (error);
2579                 }
2580 #if __FreeBSD_version >= 800504
2581                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2582 #endif
2583                 txr->msix = vector++; /* Increment vector for next pass */
2584                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2585                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2586                     taskqueue_thread_enqueue, &txr->tq);
2587                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2588                     device_get_nameunit(adapter->dev));
2589                 /*
2590                 ** Set the bit to enable interrupt
2591                 ** in E1000_IMS -- bits 22 and 23
2592                 ** are for TX0 and TX1, note this has
2593                 ** NOTHING to do with the MSIX vector
2594                 */
2595                 txr->ims = 1 << (22 + i);
2596                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2597         }
2598
2599         /* Link interrupt */
2600         ++rid;
2601         adapter->res = bus_alloc_resource_any(dev,
2602             SYS_RES_IRQ, &rid, RF_ACTIVE);
2603         if (!adapter->res) {
2604                 device_printf(dev,"Unable to allocate "
2605                     "bus resource: Link interrupt [%d]\n", rid);
2606                 return (ENXIO);
2607         }
2608         /* Set the link handler function */
2609         error = bus_setup_intr(dev, adapter->res,
2610             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2611             em_msix_link, adapter, &adapter->tag);
2612         if (error) {
2613                 adapter->res = NULL;
2614                 device_printf(dev, "Failed to register LINK handler");
2615                 return (error);
2616         }
2617 #if __FreeBSD_version >= 800504
2618                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2619 #endif
2620         adapter->linkvec = vector;
2621         adapter->ivars |=  (8 | vector) << 16;
2622         adapter->ivars |= 0x80000000;
2623
2624         return (0);
2625 }
2626
2627
2628 static void
2629 em_free_pci_resources(struct adapter *adapter)
2630 {
2631         device_t        dev = adapter->dev;
2632         struct tx_ring  *txr;
2633         struct rx_ring  *rxr;
2634         int             rid;
2635
2636
2637         /*
2638         ** Release all the queue interrupt resources:
2639         */
2640         for (int i = 0; i < adapter->num_queues; i++) {
2641                 txr = &adapter->tx_rings[i];
2642                 rxr = &adapter->rx_rings[i];
2643                 /* an early abort? */
2644                 if ((txr == NULL) || (rxr == NULL))
2645                         break;
2646                 rid = txr->msix +1;
2647                 if (txr->tag != NULL) {
2648                         bus_teardown_intr(dev, txr->res, txr->tag);
2649                         txr->tag = NULL;
2650                 }
2651                 if (txr->res != NULL)
2652                         bus_release_resource(dev, SYS_RES_IRQ,
2653                             rid, txr->res);
2654                 rid = rxr->msix +1;
2655                 if (rxr->tag != NULL) {
2656                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2657                         rxr->tag = NULL;
2658                 }
2659                 if (rxr->res != NULL)
2660                         bus_release_resource(dev, SYS_RES_IRQ,
2661                             rid, rxr->res);
2662         }
2663
2664         if (adapter->linkvec) /* we are doing MSIX */
2665                 rid = adapter->linkvec + 1;
2666         else
2667                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2668
2669         if (adapter->tag != NULL) {
2670                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2671                 adapter->tag = NULL;
2672         }
2673
2674         if (adapter->res != NULL)
2675                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2676
2677
2678         if (adapter->msix)
2679                 pci_release_msi(dev);
2680
2681         if (adapter->msix_mem != NULL)
2682                 bus_release_resource(dev, SYS_RES_MEMORY,
2683                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2684
2685         if (adapter->memory != NULL)
2686                 bus_release_resource(dev, SYS_RES_MEMORY,
2687                     PCIR_BAR(0), adapter->memory);
2688
2689         if (adapter->flash != NULL)
2690                 bus_release_resource(dev, SYS_RES_MEMORY,
2691                     EM_FLASH, adapter->flash);
2692 }
2693
2694 /*
2695  * Setup MSI or MSI/X
2696  */
2697 static int
2698 em_setup_msix(struct adapter *adapter)
2699 {
2700         device_t dev = adapter->dev;
2701         int val;
2702
2703         /*
2704         ** Setup MSI/X for Hartwell: tests have shown
2705         ** use of two queues to be unstable, and to
2706         ** provide no great gain anyway, so we simply
2707         ** seperate the interrupts and use a single queue.
2708         */
2709         if ((adapter->hw.mac.type == e1000_82574) &&
2710             (em_enable_msix == TRUE)) {
2711                 /* Map the MSIX BAR */
2712                 int rid = PCIR_BAR(EM_MSIX_BAR);
2713                 adapter->msix_mem = bus_alloc_resource_any(dev,
2714                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2715                 if (adapter->msix_mem == NULL) {
2716                         /* May not be enabled */
2717                         device_printf(adapter->dev,
2718                             "Unable to map MSIX table \n");
2719                         goto msi;
2720                 }
2721                 val = pci_msix_count(dev); 
2722                 /* We only need/want 3 vectors */
2723                 if (val >= 3)
2724                         val = 3;
2725                 else {
2726                         device_printf(adapter->dev,
2727                             "MSIX: insufficient vectors, using MSI\n");
2728                         goto msi;
2729                 }
2730
2731                 if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2732                         device_printf(adapter->dev,
2733                             "Using MSIX interrupts "
2734                             "with %d vectors\n", val);
2735                         return (val);
2736                 }
2737
2738                 /*
2739                 ** If MSIX alloc failed or provided us with
2740                 ** less than needed, free and fall through to MSI
2741                 */
2742                 pci_release_msi(dev);
2743         }
2744 msi:
2745         if (adapter->msix_mem != NULL) {
2746                 bus_release_resource(dev, SYS_RES_MEMORY,
2747                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2748                 adapter->msix_mem = NULL;
2749         }
2750         val = 1;
2751         if (pci_alloc_msi(dev, &val) == 0) {
2752                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2753                 return (val);
2754         } 
2755         /* Should only happen due to manual configuration */
2756         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2757         return (0);
2758 }
2759
2760
2761 /*********************************************************************
2762  *
2763  *  Initialize the hardware to a configuration
2764  *  as specified by the adapter structure.
2765  *
2766  **********************************************************************/
2767 static void
2768 em_reset(struct adapter *adapter)
2769 {
2770         device_t        dev = adapter->dev;
2771         if_t ifp = adapter->ifp;
2772         struct e1000_hw *hw = &adapter->hw;
2773         u16             rx_buffer_size;
2774         u32             pba;
2775
2776         INIT_DEBUGOUT("em_reset: begin");
2777
2778         /* Set up smart power down as default off on newer adapters. */
2779         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2780             hw->mac.type == e1000_82572)) {
2781                 u16 phy_tmp = 0;
2782
2783                 /* Speed up time to link by disabling smart power down. */
2784                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2785                 phy_tmp &= ~IGP02E1000_PM_SPD;
2786                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2787         }
2788
2789         /*
2790          * Packet Buffer Allocation (PBA)
2791          * Writing PBA sets the receive portion of the buffer
2792          * the remainder is used for the transmit buffer.
2793          */
2794         switch (hw->mac.type) {
2795         /* Total Packet Buffer on these is 48K */
2796         case e1000_82571:
2797         case e1000_82572:
2798         case e1000_80003es2lan:
2799                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2800                 break;
2801         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2802                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2803                 break;
2804         case e1000_82574:
2805         case e1000_82583:
2806                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2807                 break;
2808         case e1000_ich8lan:
2809                 pba = E1000_PBA_8K;
2810                 break;
2811         case e1000_ich9lan:
2812         case e1000_ich10lan:
2813                 /* Boost Receive side for jumbo frames */
2814                 if (adapter->hw.mac.max_frame_size > 4096)
2815                         pba = E1000_PBA_14K;
2816                 else
2817                         pba = E1000_PBA_10K;
2818                 break;
2819         case e1000_pchlan:
2820         case e1000_pch2lan:
2821         case e1000_pch_lpt:
2822                 pba = E1000_PBA_26K;
2823                 break;
2824         default:
2825                 if (adapter->hw.mac.max_frame_size > 8192)
2826                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2827                 else
2828                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2829         }
2830         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2831
2832         /*
2833          * These parameters control the automatic generation (Tx) and
2834          * response (Rx) to Ethernet PAUSE frames.
2835          * - High water mark should allow for at least two frames to be
2836          *   received after sending an XOFF.
2837          * - Low water mark works best when it is very near the high water mark.
2838          *   This allows the receiver to restart by sending XON when it has
2839          *   drained a bit. Here we use an arbitary value of 1500 which will
2840          *   restart after one full frame is pulled from the buffer. There
2841          *   could be several smaller frames in the buffer and if so they will
2842          *   not trigger the XON until their total number reduces the buffer
2843          *   by 1500.
2844          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2845          */
2846         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2847         hw->fc.high_water = rx_buffer_size -
2848             roundup2(adapter->hw.mac.max_frame_size, 1024);
2849         hw->fc.low_water = hw->fc.high_water - 1500;
2850
2851         if (adapter->fc) /* locally set flow control value? */
2852                 hw->fc.requested_mode = adapter->fc;
2853         else
2854                 hw->fc.requested_mode = e1000_fc_full;
2855
2856         if (hw->mac.type == e1000_80003es2lan)
2857                 hw->fc.pause_time = 0xFFFF;
2858         else
2859                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2860
2861         hw->fc.send_xon = TRUE;
2862
2863         /* Device specific overrides/settings */
2864         switch (hw->mac.type) {
2865         case e1000_pchlan:
2866                 /* Workaround: no TX flow ctrl for PCH */
2867                 hw->fc.requested_mode = e1000_fc_rx_pause;
2868                 hw->fc.pause_time = 0xFFFF; /* override */
2869                 if (if_getmtu(ifp) > ETHERMTU) {
2870                         hw->fc.high_water = 0x3500;
2871                         hw->fc.low_water = 0x1500;
2872                 } else {
2873                         hw->fc.high_water = 0x5000;
2874                         hw->fc.low_water = 0x3000;
2875                 }
2876                 hw->fc.refresh_time = 0x1000;
2877                 break;
2878         case e1000_pch2lan:
2879         case e1000_pch_lpt:
2880                 hw->fc.high_water = 0x5C20;
2881                 hw->fc.low_water = 0x5048;
2882                 hw->fc.pause_time = 0x0650;
2883                 hw->fc.refresh_time = 0x0400;
2884                 /* Jumbos need adjusted PBA */
2885                 if (if_getmtu(ifp) > ETHERMTU)
2886                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2887                 else
2888                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2889                 break;
2890         case e1000_ich9lan:
2891         case e1000_ich10lan:
2892                 if (if_getmtu(ifp) > ETHERMTU) {
2893                         hw->fc.high_water = 0x2800;
2894                         hw->fc.low_water = hw->fc.high_water - 8;
2895                         break;
2896                 } 
2897                 /* else fall thru */
2898         default:
2899                 if (hw->mac.type == e1000_80003es2lan)
2900                         hw->fc.pause_time = 0xFFFF;
2901                 break;
2902         }
2903
2904         /* Issue a global reset */
2905         e1000_reset_hw(hw);
2906         E1000_WRITE_REG(hw, E1000_WUC, 0);
2907         em_disable_aspm(adapter);
2908         /* and a re-init */
2909         if (e1000_init_hw(hw) < 0) {
2910                 device_printf(dev, "Hardware Initialization Failed\n");
2911                 return;
2912         }
2913
2914         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2915         e1000_get_phy_info(hw);
2916         e1000_check_for_link(hw);
2917         return;
2918 }
2919
2920 /*********************************************************************
2921  *
2922  *  Setup networking device structure and register an interface.
2923  *
2924  **********************************************************************/
2925 static int
2926 em_setup_interface(device_t dev, struct adapter *adapter)
2927 {
2928         if_t ifp;
2929
2930         INIT_DEBUGOUT("em_setup_interface: begin");
2931
2932         ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2933         if (ifp == 0) {
2934                 device_printf(dev, "can not allocate ifnet structure\n");
2935                 return (-1);
2936         }
2937         if_initname_drv(ifp, device_get_name(dev), device_get_unit(dev));
2938         if_setdev(ifp, dev);
2939         if_setinitfn(ifp, em_init);
2940         if_setsoftc(ifp, adapter);
2941         if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2942         if_setioctlfn(ifp, em_ioctl);
2943 #ifdef EM_MULTIQUEUE
2944         /* Multiqueue stack interface */
2945         if_settransmitfn(ifp, em_mq_start);
2946         if_setqflushfn(ifp, em_qflush);
2947 #else
2948         if_setstartfn(ifp, em_start);
2949         if_setsendqlen(ifp, adapter->num_tx_desc - 1);
2950         if_setsendqready(ifp);
2951 #endif  
2952
2953         ether_ifattach_drv(ifp, adapter->hw.mac.addr);
2954
2955         if_setcapabilities(ifp, 0);
2956         if_setcapenable(ifp, 0);
2957
2958
2959         if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
2960             IFCAP_TSO4, 0);
2961         /*
2962          * Tell the upper layer(s) we
2963          * support full VLAN capability
2964          */
2965         if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2966         if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
2967             IFCAP_VLAN_MTU, 0);
2968         if_setcapenable(ifp, if_getcapabilities(ifp));
2969
2970         /*
2971         ** Don't turn this on by default, if vlans are
2972         ** created on another pseudo device (eg. lagg)
2973         ** then vlan events are not passed thru, breaking
2974         ** operation, but with HW FILTER off it works. If
2975         ** using vlans directly on the em driver you can
2976         ** enable this and get full hardware tag filtering.
2977         */
2978         if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
2979
2980 #ifdef DEVICE_POLLING
2981         if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
2982 #endif
2983
2984         /* Enable only WOL MAGIC by default */
2985         if (adapter->wol) {
2986                 if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
2987                 if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
2988         }
2989                 
2990         /*
2991          * Specify the media types supported by this adapter and register
2992          * callbacks to update media and link information
2993          */
2994         ifmedia_init_drv(&adapter->media, IFM_IMASK,
2995             em_media_change, em_media_status);
2996         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2997             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2998                 u_char fiber_type = IFM_1000_SX;        /* default type */
2999
3000                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3001                             0, NULL);
3002                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3003         } else {
3004                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3005                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3006                             0, NULL);
3007                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3008                             0, NULL);
3009                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3010                             0, NULL);
3011                 if (adapter->hw.phy.type != e1000_phy_ife) {
3012                         ifmedia_add(&adapter->media,
3013                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3014                         ifmedia_add(&adapter->media,
3015                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3016                 }
3017         }
3018         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3019         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3020         return (0);
3021 }
3022
3023
3024 /*
3025  * Manage DMA'able memory.
3026  */
3027 static void
3028 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3029 {
3030         if (error)
3031                 return;
3032         *(bus_addr_t *) arg = segs[0].ds_addr;
3033 }
3034
3035 static int
3036 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3037         struct em_dma_alloc *dma, int mapflags)
3038 {
3039         int error;
3040
3041         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3042                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3043                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3044                                 BUS_SPACE_MAXADDR,      /* highaddr */
3045                                 NULL, NULL,             /* filter, filterarg */
3046                                 size,                   /* maxsize */
3047                                 1,                      /* nsegments */
3048                                 size,                   /* maxsegsize */
3049                                 0,                      /* flags */
3050                                 NULL,                   /* lockfunc */
3051                                 NULL,                   /* lockarg */
3052                                 &dma->dma_tag);
3053         if (error) {
3054                 device_printf(adapter->dev,
3055                     "%s: bus_dma_tag_create failed: %d\n",
3056                     __func__, error);
3057                 goto fail_0;
3058         }
3059
3060         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3061             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3062         if (error) {
3063                 device_printf(adapter->dev,
3064                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3065                     __func__, (uintmax_t)size, error);
3066                 goto fail_2;
3067         }
3068
3069         dma->dma_paddr = 0;
3070         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3071             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3072         if (error || dma->dma_paddr == 0) {
3073                 device_printf(adapter->dev,
3074                     "%s: bus_dmamap_load failed: %d\n",
3075                     __func__, error);
3076                 goto fail_3;
3077         }
3078
3079         return (0);
3080
3081 fail_3:
3082         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3083 fail_2:
3084         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3085         bus_dma_tag_destroy(dma->dma_tag);
3086 fail_0:
3087         dma->dma_tag = NULL;
3088
3089         return (error);
3090 }
3091
3092 static void
3093 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3094 {
3095         if (dma->dma_tag == NULL)
3096                 return;
3097         if (dma->dma_paddr != 0) {
3098                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3099                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3100                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3101                 dma->dma_paddr = 0;
3102         }
3103         if (dma->dma_vaddr != NULL) {
3104                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3105                 dma->dma_vaddr = NULL;
3106         }
3107         bus_dma_tag_destroy(dma->dma_tag);
3108         dma->dma_tag = NULL;
3109 }
3110
3111
3112 /*********************************************************************
3113  *
3114  *  Allocate memory for the transmit and receive rings, and then
3115  *  the descriptors associated with each, called only once at attach.
3116  *
3117  **********************************************************************/
3118 static int
3119 em_allocate_queues(struct adapter *adapter)
3120 {
3121         device_t                dev = adapter->dev;
3122         struct tx_ring          *txr = NULL;
3123         struct rx_ring          *rxr = NULL;
3124         int rsize, tsize, error = E1000_SUCCESS;
3125         int txconf = 0, rxconf = 0;
3126
3127
3128         /* Allocate the TX ring struct memory */
3129         if (!(adapter->tx_rings =
3130             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3131             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3132                 device_printf(dev, "Unable to allocate TX ring memory\n");
3133                 error = ENOMEM;
3134                 goto fail;
3135         }
3136
3137         /* Now allocate the RX */
3138         if (!(adapter->rx_rings =
3139             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3140             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3141                 device_printf(dev, "Unable to allocate RX ring memory\n");
3142                 error = ENOMEM;
3143                 goto rx_fail;
3144         }
3145
3146         tsize = roundup2(adapter->num_tx_desc *
3147             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3148         /*
3149          * Now set up the TX queues, txconf is needed to handle the
3150          * possibility that things fail midcourse and we need to
3151          * undo memory gracefully
3152          */ 
3153         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3154                 /* Set up some basics */
3155                 txr = &adapter->tx_rings[i];
3156                 txr->adapter = adapter;
3157                 txr->me = i;
3158
3159                 /* Initialize the TX lock */
3160                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3161                     device_get_nameunit(dev), txr->me);
3162                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3163
3164                 if (em_dma_malloc(adapter, tsize,
3165                         &txr->txdma, BUS_DMA_NOWAIT)) {
3166                         device_printf(dev,
3167                             "Unable to allocate TX Descriptor memory\n");
3168                         error = ENOMEM;
3169                         goto err_tx_desc;
3170                 }
3171                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3172                 bzero((void *)txr->tx_base, tsize);
3173
3174                 if (em_allocate_transmit_buffers(txr)) {
3175                         device_printf(dev,
3176                             "Critical Failure setting up transmit buffers\n");
3177                         error = ENOMEM;
3178                         goto err_tx_desc;
3179                 }
3180 #if __FreeBSD_version >= 800000
3181                 /* Allocate a buf ring */
3182                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3183                     M_WAITOK, &txr->tx_mtx);
3184 #endif
3185         }
3186
3187         /*
3188          * Next the RX queues...
3189          */ 
3190         rsize = roundup2(adapter->num_rx_desc *
3191             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3192         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3193                 rxr = &adapter->rx_rings[i];
3194                 rxr->adapter = adapter;
3195                 rxr->me = i;
3196
3197                 /* Initialize the RX lock */
3198                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3199                     device_get_nameunit(dev), txr->me);
3200                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3201
3202                 if (em_dma_malloc(adapter, rsize,
3203                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3204                         device_printf(dev,
3205                             "Unable to allocate RxDescriptor memory\n");
3206                         error = ENOMEM;
3207                         goto err_rx_desc;
3208                 }
3209                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3210                 bzero((void *)rxr->rx_base, rsize);
3211
3212                 /* Allocate receive buffers for the ring*/
3213                 if (em_allocate_receive_buffers(rxr)) {
3214                         device_printf(dev,
3215                             "Critical Failure setting up receive buffers\n");
3216                         error = ENOMEM;
3217                         goto err_rx_desc;
3218                 }
3219         }
3220
3221         return (0);
3222
3223 err_rx_desc:
3224         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3225                 em_dma_free(adapter, &rxr->rxdma);
3226 err_tx_desc:
3227         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3228                 em_dma_free(adapter, &txr->txdma);
3229         free(adapter->rx_rings, M_DEVBUF);
3230 rx_fail:
3231 #if __FreeBSD_version >= 800000
3232         buf_ring_free(txr->br, M_DEVBUF);
3233 #endif
3234         free(adapter->tx_rings, M_DEVBUF);
3235 fail:
3236         return (error);
3237 }
3238
3239
3240 /*********************************************************************
3241  *
3242  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3243  *  the information needed to transmit a packet on the wire. This is
3244  *  called only once at attach, setup is done every reset.
3245  *
3246  **********************************************************************/
3247 static int
3248 em_allocate_transmit_buffers(struct tx_ring *txr)
3249 {
3250         struct adapter *adapter = txr->adapter;
3251         device_t dev = adapter->dev;
3252         struct em_buffer *txbuf;
3253         int error, i;
3254
3255         /*
3256          * Setup DMA descriptor areas.
3257          */
3258         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3259                                1, 0,                    /* alignment, bounds */
3260                                BUS_SPACE_MAXADDR,       /* lowaddr */
3261                                BUS_SPACE_MAXADDR,       /* highaddr */
3262                                NULL, NULL,              /* filter, filterarg */
3263                                EM_TSO_SIZE,             /* maxsize */
3264                                EM_MAX_SCATTER,          /* nsegments */
3265                                PAGE_SIZE,               /* maxsegsize */
3266                                0,                       /* flags */
3267                                NULL,                    /* lockfunc */
3268                                NULL,                    /* lockfuncarg */
3269                                &txr->txtag))) {
3270                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3271                 goto fail;
3272         }
3273
3274         if (!(txr->tx_buffers =
3275             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3276             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3277                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3278                 error = ENOMEM;
3279                 goto fail;
3280         }
3281
3282         /* Create the descriptor buffer dma maps */
3283         txbuf = txr->tx_buffers;
3284         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3285                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3286                 if (error != 0) {
3287                         device_printf(dev, "Unable to create TX DMA map\n");
3288                         goto fail;
3289                 }
3290         }
3291
3292         return 0;
3293 fail:
3294         /* We free all, it handles case where we are in the middle */
3295         em_free_transmit_structures(adapter);
3296         return (error);
3297 }
3298
3299 /*********************************************************************
3300  *
3301  *  Initialize a transmit ring.
3302  *
3303  **********************************************************************/
3304 static void
3305 em_setup_transmit_ring(struct tx_ring *txr)
3306 {
3307         struct adapter *adapter = txr->adapter;
3308         struct em_buffer *txbuf;
3309         int i;
3310 #ifdef DEV_NETMAP
3311         struct netmap_slot *slot;
3312         struct netmap_adapter *na = netmap_getna(adapter->ifp);
3313 #endif /* DEV_NETMAP */
3314
3315         /* Clear the old descriptor contents */
3316         EM_TX_LOCK(txr);
3317 #ifdef DEV_NETMAP
3318         slot = netmap_reset(na, NR_TX, txr->me, 0);
3319 #endif /* DEV_NETMAP */
3320
3321         bzero((void *)txr->tx_base,
3322               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3323         /* Reset indices */
3324         txr->next_avail_desc = 0;
3325         txr->next_to_clean = 0;
3326
3327         /* Free any existing tx buffers. */
3328         txbuf = txr->tx_buffers;
3329         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3330                 if (txbuf->m_head != NULL) {
3331                         bus_dmamap_sync(txr->txtag, txbuf->map,
3332                             BUS_DMASYNC_POSTWRITE);
3333                         bus_dmamap_unload(txr->txtag, txbuf->map);
3334                         m_freem(txbuf->m_head);
3335                         txbuf->m_head = NULL;
3336                 }
3337 #ifdef DEV_NETMAP
3338                 if (slot) {
3339                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3340                         uint64_t paddr;
3341                         void *addr;
3342
3343                         addr = PNMB(slot + si, &paddr);
3344                         txr->tx_base[i].buffer_addr = htole64(paddr);
3345                         /* reload the map for netmap mode */
3346                         netmap_load_map(txr->txtag, txbuf->map, addr);
3347                 }
3348 #endif /* DEV_NETMAP */
3349
3350                 /* clear the watch index */
3351                 txbuf->next_eop = -1;
3352         }
3353
3354         /* Set number of descriptors available */
3355         txr->tx_avail = adapter->num_tx_desc;
3356         txr->queue_status = EM_QUEUE_IDLE;
3357
3358         /* Clear checksum offload context. */
3359         txr->last_hw_offload = 0;
3360         txr->last_hw_ipcss = 0;
3361         txr->last_hw_ipcso = 0;
3362         txr->last_hw_tucss = 0;
3363         txr->last_hw_tucso = 0;
3364
3365         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3366             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3367         EM_TX_UNLOCK(txr);
3368 }
3369
3370 /*********************************************************************
3371  *
3372  *  Initialize all transmit rings.
3373  *
3374  **********************************************************************/
3375 static void
3376 em_setup_transmit_structures(struct adapter *adapter)
3377 {
3378         struct tx_ring *txr = adapter->tx_rings;
3379
3380         for (int i = 0; i < adapter->num_queues; i++, txr++)
3381                 em_setup_transmit_ring(txr);
3382
3383         return;
3384 }
3385
3386 /*********************************************************************
3387  *
3388  *  Enable transmit unit.
3389  *
3390  **********************************************************************/
3391 static void
3392 em_initialize_transmit_unit(struct adapter *adapter)
3393 {
3394         struct tx_ring  *txr = adapter->tx_rings;
3395         struct e1000_hw *hw = &adapter->hw;
3396         u32     tctl, tarc, tipg = 0;
3397
3398          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3399
3400         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3401                 u64 bus_addr = txr->txdma.dma_paddr;
3402                 /* Base and Len of TX Ring */
3403                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3404                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3405                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3406                     (u32)(bus_addr >> 32));
3407                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3408                     (u32)bus_addr);
3409                 /* Init the HEAD/TAIL indices */
3410                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3411                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3412
3413                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3414                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3415                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3416
3417                 txr->queue_status = EM_QUEUE_IDLE;
3418         }
3419
3420         /* Set the default values for the Tx Inter Packet Gap timer */
3421         switch (adapter->hw.mac.type) {
3422         case e1000_80003es2lan:
3423                 tipg = DEFAULT_82543_TIPG_IPGR1;
3424                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3425                     E1000_TIPG_IPGR2_SHIFT;
3426                 break;
3427         default:
3428                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3429                     (adapter->hw.phy.media_type ==
3430                     e1000_media_type_internal_serdes))
3431                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3432                 else
3433                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3434                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3435                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3436         }
3437
3438         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3439         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3440
3441         if(adapter->hw.mac.type >= e1000_82540)
3442                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3443                     adapter->tx_abs_int_delay.value);
3444
3445         if ((adapter->hw.mac.type == e1000_82571) ||
3446             (adapter->hw.mac.type == e1000_82572)) {
3447                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3448                 tarc |= SPEED_MODE_BIT;
3449                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3450         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3451                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3452                 tarc |= 1;
3453                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3454                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3455                 tarc |= 1;
3456                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3457         }
3458
3459         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3460         if (adapter->tx_int_delay.value > 0)
3461                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3462
3463         /* Program the Transmit Control Register */
3464         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3465         tctl &= ~E1000_TCTL_CT;
3466         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3467                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3468
3469         if (adapter->hw.mac.type >= e1000_82571)
3470                 tctl |= E1000_TCTL_MULR;
3471
3472         /* This write will effectively turn on the transmit unit. */
3473         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3474
3475 }
3476
3477
3478 /*********************************************************************
3479  *
3480  *  Free all transmit rings.
3481  *
3482  **********************************************************************/
3483 static void
3484 em_free_transmit_structures(struct adapter *adapter)
3485 {
3486         struct tx_ring *txr = adapter->tx_rings;
3487
3488         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3489                 EM_TX_LOCK(txr);
3490                 em_free_transmit_buffers(txr);
3491                 em_dma_free(adapter, &txr->txdma);
3492                 EM_TX_UNLOCK(txr);
3493                 EM_TX_LOCK_DESTROY(txr);
3494         }
3495
3496         free(adapter->tx_rings, M_DEVBUF);
3497 }
3498
3499 /*********************************************************************
3500  *
3501  *  Free transmit ring related data structures.
3502  *
3503  **********************************************************************/
3504 static void
3505 em_free_transmit_buffers(struct tx_ring *txr)
3506 {
3507         struct adapter          *adapter = txr->adapter;
3508         struct em_buffer        *txbuf;
3509
3510         INIT_DEBUGOUT("free_transmit_ring: begin");
3511
3512         if (txr->tx_buffers == NULL)
3513                 return;
3514
3515         for (int i = 0; i < adapter->num_tx_desc; i++) {
3516                 txbuf = &txr->tx_buffers[i];
3517                 if (txbuf->m_head != NULL) {
3518                         bus_dmamap_sync(txr->txtag, txbuf->map,
3519                             BUS_DMASYNC_POSTWRITE);
3520                         bus_dmamap_unload(txr->txtag,
3521                             txbuf->map);
3522                         m_freem(txbuf->m_head);
3523                         txbuf->m_head = NULL;
3524                         if (txbuf->map != NULL) {
3525                                 bus_dmamap_destroy(txr->txtag,
3526                                     txbuf->map);
3527                                 txbuf->map = NULL;
3528                         }
3529                 } else if (txbuf->map != NULL) {
3530                         bus_dmamap_unload(txr->txtag,
3531                             txbuf->map);
3532                         bus_dmamap_destroy(txr->txtag,
3533                             txbuf->map);
3534                         txbuf->map = NULL;
3535                 }
3536         }
3537 #if __FreeBSD_version >= 800000
3538         if (txr->br != NULL)
3539                 buf_ring_free(txr->br, M_DEVBUF);
3540 #endif
3541         if (txr->tx_buffers != NULL) {
3542                 free(txr->tx_buffers, M_DEVBUF);
3543                 txr->tx_buffers = NULL;
3544         }
3545         if (txr->txtag != NULL) {
3546                 bus_dma_tag_destroy(txr->txtag);
3547                 txr->txtag = NULL;
3548         }
3549         return;
3550 }
3551
3552
3553 /*********************************************************************
3554  *  The offload context is protocol specific (TCP/UDP) and thus
3555  *  only needs to be set when the protocol changes. The occasion
3556  *  of a context change can be a performance detriment, and
3557  *  might be better just disabled. The reason arises in the way
3558  *  in which the controller supports pipelined requests from the
3559  *  Tx data DMA. Up to four requests can be pipelined, and they may
3560  *  belong to the same packet or to multiple packets. However all
3561  *  requests for one packet are issued before a request is issued
3562  *  for a subsequent packet and if a request for the next packet
3563  *  requires a context change, that request will be stalled
3564  *  until the previous request completes. This means setting up
3565  *  a new context effectively disables pipelined Tx data DMA which
3566  *  in turn greatly slow down performance to send small sized
3567  *  frames. 
3568  **********************************************************************/
3569 static void
3570 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3571     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3572 {
3573         struct adapter                  *adapter = txr->adapter;
3574         struct e1000_context_desc       *TXD = NULL;
3575         struct em_buffer                *tx_buffer;
3576         int                             cur, hdr_len;
3577         u32                             cmd = 0;
3578         u16                             offload = 0;
3579         u8                              ipcso, ipcss, tucso, tucss;
3580
3581         ipcss = ipcso = tucss = tucso = 0;
3582         hdr_len = ip_off + (ip->ip_hl << 2);
3583         cur = txr->next_avail_desc;
3584
3585         /* Setup of IP header checksum. */
3586         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3587                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3588                 offload |= CSUM_IP;
3589                 ipcss = ip_off;
3590                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3591                 /*
3592                  * Start offset for header checksum calculation.
3593                  * End offset for header checksum calculation.
3594                  * Offset of place to put the checksum.
3595                  */
3596                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3597                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3598                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3599                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3600                 cmd |= E1000_TXD_CMD_IP;
3601         }
3602
3603         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3604                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3605                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3606                 offload |= CSUM_TCP;
3607                 tucss = hdr_len;
3608                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3609                 /*
3610                  * Setting up new checksum offload context for every frames
3611                  * takes a lot of processing time for hardware. This also
3612                  * reduces performance a lot for small sized frames so avoid
3613                  * it if driver can use previously configured checksum
3614                  * offload context.
3615                  */
3616                 if (txr->last_hw_offload == offload) {
3617                         if (offload & CSUM_IP) {
3618                                 if (txr->last_hw_ipcss == ipcss &&
3619                                     txr->last_hw_ipcso == ipcso &&
3620                                     txr->last_hw_tucss == tucss &&
3621                                     txr->last_hw_tucso == tucso)
3622                                         return;
3623                         } else {
3624                                 if (txr->last_hw_tucss == tucss &&
3625                                     txr->last_hw_tucso == tucso)
3626                                         return;
3627                         }
3628                 }
3629                 txr->last_hw_offload = offload;
3630                 txr->last_hw_tucss = tucss;
3631                 txr->last_hw_tucso = tucso;
3632                 /*
3633                  * Start offset for payload checksum calculation.
3634                  * End offset for payload checksum calculation.
3635                  * Offset of place to put the checksum.
3636                  */
3637                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3638                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3639                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3640                 TXD->upper_setup.tcp_fields.tucso = tucso;
3641                 cmd |= E1000_TXD_CMD_TCP;
3642         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3643                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3644                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3645                 tucss = hdr_len;
3646                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3647                 /*
3648                  * Setting up new checksum offload context for every frames
3649                  * takes a lot of processing time for hardware. This also
3650                  * reduces performance a lot for small sized frames so avoid
3651                  * it if driver can use previously configured checksum
3652                  * offload context.
3653                  */
3654                 if (txr->last_hw_offload == offload) {
3655                         if (offload & CSUM_IP) {
3656                                 if (txr->last_hw_ipcss == ipcss &&
3657                                     txr->last_hw_ipcso == ipcso &&
3658                                     txr->last_hw_tucss == tucss &&
3659                                     txr->last_hw_tucso == tucso)
3660                                         return;
3661                         } else {
3662                                 if (txr->last_hw_tucss == tucss &&
3663                                     txr->last_hw_tucso == tucso)
3664                                         return;
3665                         }
3666                 }
3667                 txr->last_hw_offload = offload;
3668                 txr->last_hw_tucss = tucss;
3669                 txr->last_hw_tucso = tucso;
3670                 /*
3671                  * Start offset for header checksum calculation.
3672                  * End offset for header checksum calculation.
3673                  * Offset of place to put the checksum.
3674                  */
3675                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3676                 TXD->upper_setup.tcp_fields.tucss = tucss;
3677                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3678                 TXD->upper_setup.tcp_fields.tucso = tucso;
3679         }
3680   
3681         if (offload & CSUM_IP) {
3682                 txr->last_hw_ipcss = ipcss;
3683                 txr->last_hw_ipcso = ipcso;
3684         }
3685
3686         TXD->tcp_seg_setup.data = htole32(0);
3687         TXD->cmd_and_length =
3688             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3689         tx_buffer = &txr->tx_buffers[cur];
3690         tx_buffer->m_head = NULL;
3691         tx_buffer->next_eop = -1;
3692
3693         if (++cur == adapter->num_tx_desc)
3694                 cur = 0;
3695
3696         txr->tx_avail--;
3697         txr->next_avail_desc = cur;
3698 }
3699
3700
3701 /**********************************************************************
3702  *
3703  *  Setup work for hardware segmentation offload (TSO)
3704  *
3705  **********************************************************************/
3706 static void
3707 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3708     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3709 {
3710         struct adapter                  *adapter = txr->adapter;
3711         struct e1000_context_desc       *TXD;
3712         struct em_buffer                *tx_buffer;
3713         int cur, hdr_len;
3714
3715         /*
3716          * In theory we can use the same TSO context if and only if
3717          * frame is the same type(IP/TCP) and the same MSS. However
3718          * checking whether a frame has the same IP/TCP structure is
3719          * hard thing so just ignore that and always restablish a
3720          * new TSO context.
3721          */
3722         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3723         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3724                       E1000_TXD_DTYP_D |        /* Data descr type */
3725                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3726
3727         /* IP and/or TCP header checksum calculation and insertion. */
3728         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3729
3730         cur = txr->next_avail_desc;
3731         tx_buffer = &txr->tx_buffers[cur];
3732         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3733
3734         /*
3735          * Start offset for header checksum calculation.
3736          * End offset for header checksum calculation.
3737          * Offset of place put the checksum.
3738          */
3739         TXD->lower_setup.ip_fields.ipcss = ip_off;
3740         TXD->lower_setup.ip_fields.ipcse =
3741             htole16(ip_off + (ip->ip_hl << 2) - 1);
3742         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3743         /*
3744          * Start offset for payload checksum calculation.
3745          * End offset for payload checksum calculation.
3746          * Offset of place to put the checksum.
3747          */
3748         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3749         TXD->upper_setup.tcp_fields.tucse = 0;
3750         TXD->upper_setup.tcp_fields.tucso =
3751             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3752         /*
3753          * Payload size per packet w/o any headers.
3754          * Length of all headers up to payload.
3755          */
3756         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3757         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3758
3759         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3760                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3761                                 E1000_TXD_CMD_TSE |     /* TSE context */
3762                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3763                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3764                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3765
3766         tx_buffer->m_head = NULL;
3767         tx_buffer->next_eop = -1;
3768
3769         if (++cur == adapter->num_tx_desc)
3770                 cur = 0;
3771
3772         txr->tx_avail--;
3773         txr->next_avail_desc = cur;
3774         txr->tx_tso = TRUE;
3775 }
3776
3777
3778 /**********************************************************************
3779  *
3780  *  Examine each tx_buffer in the used queue. If the hardware is done
3781  *  processing the packet then free associated resources. The
3782  *  tx_buffer is put back on the free queue.
3783  *
3784  **********************************************************************/
3785 static void
3786 em_txeof(struct tx_ring *txr)
3787 {
3788         struct adapter  *adapter = txr->adapter;
3789         int first, last, done, processed;
3790         struct em_buffer *tx_buffer;
3791         struct e1000_tx_desc   *tx_desc, *eop_desc;
3792         if_t ifp = adapter->ifp;
3793
3794         EM_TX_LOCK_ASSERT(txr);
3795 #ifdef DEV_NETMAP
3796         if (netmap_tx_irq(ifp, txr->me))
3797                 return;
3798 #endif /* DEV_NETMAP */
3799
3800         /* No work, make sure watchdog is off */
3801         if (txr->tx_avail == adapter->num_tx_desc) {
3802                 txr->queue_status = EM_QUEUE_IDLE;
3803                 return;
3804         }
3805
3806         processed = 0;
3807         first = txr->next_to_clean;
3808         tx_desc = &txr->tx_base[first];
3809         tx_buffer = &txr->tx_buffers[first];
3810         last = tx_buffer->next_eop;
3811         eop_desc = &txr->tx_base[last];
3812
3813         /*
3814          * What this does is get the index of the
3815          * first descriptor AFTER the EOP of the 
3816          * first packet, that way we can do the
3817          * simple comparison on the inner while loop.
3818          */
3819         if (++last == adapter->num_tx_desc)
3820                 last = 0;
3821         done = last;
3822
3823         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3824             BUS_DMASYNC_POSTREAD);
3825
3826         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3827                 /* We clean the range of the packet */
3828                 while (first != done) {
3829                         tx_desc->upper.data = 0;
3830                         tx_desc->lower.data = 0;
3831                         tx_desc->buffer_addr = 0;
3832                         ++txr->tx_avail;
3833                         ++processed;
3834
3835                         if (tx_buffer->m_head) {
3836                                 bus_dmamap_sync(txr->txtag,
3837                                     tx_buffer->map,
3838                                     BUS_DMASYNC_POSTWRITE);
3839                                 bus_dmamap_unload(txr->txtag,
3840                                     tx_buffer->map);
3841                                 m_freem(tx_buffer->m_head);
3842                                 tx_buffer->m_head = NULL;
3843                         }
3844                         tx_buffer->next_eop = -1;
3845                         txr->watchdog_time = ticks;
3846
3847                         if (++first == adapter->num_tx_desc)
3848                                 first = 0;
3849
3850                         tx_buffer = &txr->tx_buffers[first];
3851                         tx_desc = &txr->tx_base[first];
3852                 }
3853                 if_incopackets(ifp, 1);
3854                 /* See if we can continue to the next packet */
3855                 last = tx_buffer->next_eop;
3856                 if (last != -1) {
3857                         eop_desc = &txr->tx_base[last];
3858                         /* Get new done point */
3859                         if (++last == adapter->num_tx_desc) last = 0;
3860                         done = last;
3861                 } else
3862                         break;
3863         }
3864         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3865             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3866
3867         txr->next_to_clean = first;
3868
3869         /*
3870         ** Watchdog calculation, we know there's
3871         ** work outstanding or the first return
3872         ** would have been taken, so none processed
3873         ** for too long indicates a hang. local timer
3874         ** will examine this and do a reset if needed.
3875         */
3876         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3877                 txr->queue_status = EM_QUEUE_HUNG;
3878
3879         /*
3880          * If we have a minimum free, clear IFF_DRV_OACTIVE
3881          * to tell the stack that it is OK to send packets.
3882          * Notice that all writes of OACTIVE happen under the
3883          * TX lock which, with a single queue, guarantees 
3884          * sanity.
3885          */
3886         if (txr->tx_avail >= EM_MAX_SCATTER)
3887                 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
3888
3889         /* Disable watchdog if all clean */
3890         if (txr->tx_avail == adapter->num_tx_desc) {
3891                 txr->queue_status = EM_QUEUE_IDLE;
3892         } 
3893 }
3894
3895
3896 /*********************************************************************
3897  *
3898  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3899  *
3900  **********************************************************************/
3901 static void
3902 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3903 {
3904         struct adapter          *adapter = rxr->adapter;
3905         struct mbuf             *m;
3906         bus_dma_segment_t       segs[1];
3907         struct em_buffer        *rxbuf;
3908         int                     i, j, error, nsegs;
3909         bool                    cleaned = FALSE;
3910
3911         i = j = rxr->next_to_refresh;
3912         /*
3913         ** Get one descriptor beyond
3914         ** our work mark to control
3915         ** the loop.
3916         */
3917         if (++j == adapter->num_rx_desc)
3918                 j = 0;
3919
3920         while (j != limit) {
3921                 rxbuf = &rxr->rx_buffers[i];
3922                 if (rxbuf->m_head == NULL) {
3923                         m = m_getjcl(M_NOWAIT, MT_DATA,
3924                             M_PKTHDR, adapter->rx_mbuf_sz);
3925                         /*
3926                         ** If we have a temporary resource shortage
3927                         ** that causes a failure, just abort refresh
3928                         ** for now, we will return to this point when
3929                         ** reinvoked from em_rxeof.
3930                         */
3931                         if (m == NULL)
3932                                 goto update;
3933                 } else
3934                         m = rxbuf->m_head;
3935
3936                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3937                 m->m_flags |= M_PKTHDR;
3938                 m->m_data = m->m_ext.ext_buf;
3939
3940                 /* Use bus_dma machinery to setup the memory mapping  */
3941                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3942                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3943                 if (error != 0) {
3944                         printf("Refresh mbufs: hdr dmamap load"
3945                             " failure - %d\n", error);
3946                         m_free(m);
3947                         rxbuf->m_head = NULL;
3948                         goto update;
3949                 }
3950                 rxbuf->m_head = m;
3951                 bus_dmamap_sync(rxr->rxtag,
3952                     rxbuf->map, BUS_DMASYNC_PREREAD);
3953                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3954                 cleaned = TRUE;
3955
3956                 i = j; /* Next is precalulated for us */
3957                 rxr->next_to_refresh = i;
3958                 /* Calculate next controlling index */
3959                 if (++j == adapter->num_rx_desc)
3960                         j = 0;
3961         }
3962 update:
3963         /*
3964         ** Update the tail pointer only if,
3965         ** and as far as we have refreshed.
3966         */
3967         if (cleaned)
3968                 E1000_WRITE_REG(&adapter->hw,
3969                     E1000_RDT(rxr->me), rxr->next_to_refresh);
3970
3971         return;
3972 }
3973
3974
3975 /*********************************************************************
3976  *
3977  *  Allocate memory for rx_buffer structures. Since we use one
3978  *  rx_buffer per received packet, the maximum number of rx_buffer's
3979  *  that we'll need is equal to the number of receive descriptors
3980  *  that we've allocated.
3981  *
3982  **********************************************************************/
3983 static int
3984 em_allocate_receive_buffers(struct rx_ring *rxr)
3985 {
3986         struct adapter          *adapter = rxr->adapter;
3987         device_t                dev = adapter->dev;
3988         struct em_buffer        *rxbuf;
3989         int                     error;
3990
3991         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3992             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3993         if (rxr->rx_buffers == NULL) {
3994                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3995                 return (ENOMEM);
3996         }
3997
3998         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3999                                 1, 0,                   /* alignment, bounds */
4000                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4001                                 BUS_SPACE_MAXADDR,      /* highaddr */
4002                                 NULL, NULL,             /* filter, filterarg */
4003                                 MJUM9BYTES,             /* maxsize */
4004                                 1,                      /* nsegments */
4005                                 MJUM9BYTES,             /* maxsegsize */
4006                                 0,                      /* flags */
4007                                 NULL,                   /* lockfunc */
4008                                 NULL,                   /* lockarg */
4009                                 &rxr->rxtag);
4010         if (error) {
4011                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4012                     __func__, error);
4013                 goto fail;
4014         }
4015
4016         rxbuf = rxr->rx_buffers;
4017         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4018                 rxbuf = &rxr->rx_buffers[i];
4019                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4020                 if (error) {
4021                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4022                             __func__, error);
4023                         goto fail;
4024                 }
4025         }
4026
4027         return (0);
4028
4029 fail:
4030         em_free_receive_structures(adapter);
4031         return (error);
4032 }
4033
4034
4035 /*********************************************************************
4036  *
4037  *  Initialize a receive ring and its buffers.
4038  *
4039  **********************************************************************/
4040 static int
4041 em_setup_receive_ring(struct rx_ring *rxr)
4042 {
4043         struct  adapter         *adapter = rxr->adapter;
4044         struct em_buffer        *rxbuf;
4045         bus_dma_segment_t       seg[1];
4046         int                     rsize, nsegs, error = 0;
4047 #ifdef DEV_NETMAP
4048         struct netmap_slot *slot;
4049         struct netmap_adapter *na = netmap_getna(adapter->ifp);
4050 #endif
4051
4052
4053         /* Clear the ring contents */
4054         EM_RX_LOCK(rxr);
4055         rsize = roundup2(adapter->num_rx_desc *
4056             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4057         bzero((void *)rxr->rx_base, rsize);
4058 #ifdef DEV_NETMAP
4059         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4060 #endif
4061
4062         /*
4063         ** Free current RX buffer structs and their mbufs
4064         */
4065         for (int i = 0; i < adapter->num_rx_desc; i++) {
4066                 rxbuf = &rxr->rx_buffers[i];
4067                 if (rxbuf->m_head != NULL) {
4068                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4069                             BUS_DMASYNC_POSTREAD);
4070                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4071                         m_freem(rxbuf->m_head);
4072                         rxbuf->m_head = NULL; /* mark as freed */
4073                 }
4074         }
4075
4076         /* Now replenish the mbufs */
4077         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4078                 rxbuf = &rxr->rx_buffers[j];
4079 #ifdef DEV_NETMAP
4080                 if (slot) {
4081                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4082                         uint64_t paddr;
4083                         void *addr;
4084
4085                         addr = PNMB(slot + si, &paddr);
4086                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4087                         /* Update descriptor */
4088                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4089                         continue;
4090                 }
4091 #endif /* DEV_NETMAP */
4092                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4093                     M_PKTHDR, adapter->rx_mbuf_sz);
4094                 if (rxbuf->m_head == NULL) {
4095                         error = ENOBUFS;
4096                         goto fail;
4097                 }
4098                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4099                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4100                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4101
4102                 /* Get the memory mapping */
4103                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4104                     rxbuf->map, rxbuf->m_head, seg,
4105                     &nsegs, BUS_DMA_NOWAIT);
4106                 if (error != 0) {
4107                         m_freem(rxbuf->m_head);
4108                         rxbuf->m_head = NULL;
4109                         goto fail;
4110                 }
4111                 bus_dmamap_sync(rxr->rxtag,
4112                     rxbuf->map, BUS_DMASYNC_PREREAD);
4113
4114                 /* Update descriptor */
4115                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4116         }
4117         rxr->next_to_check = 0;
4118         rxr->next_to_refresh = 0;
4119         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4120             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4121
4122 fail:
4123         EM_RX_UNLOCK(rxr);
4124         return (error);
4125 }
4126
4127 /*********************************************************************
4128  *
4129  *  Initialize all receive rings.
4130  *
4131  **********************************************************************/
4132 static int
4133 em_setup_receive_structures(struct adapter *adapter)
4134 {
4135         struct rx_ring *rxr = adapter->rx_rings;
4136         int q;
4137
4138         for (q = 0; q < adapter->num_queues; q++, rxr++)
4139                 if (em_setup_receive_ring(rxr))
4140                         goto fail;
4141
4142         return (0);
4143 fail:
4144         /*
4145          * Free RX buffers allocated so far, we will only handle
4146          * the rings that completed, the failing case will have
4147          * cleaned up for itself. 'q' failed, so its the terminus.
4148          */
4149         for (int i = 0; i < q; ++i) {
4150                 rxr = &adapter->rx_rings[i];
4151                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4152                         struct em_buffer *rxbuf;
4153                         rxbuf = &rxr->rx_buffers[n];
4154                         if (rxbuf->m_head != NULL) {
4155                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4156                                   BUS_DMASYNC_POSTREAD);
4157                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4158                                 m_freem(rxbuf->m_head);
4159                                 rxbuf->m_head = NULL;
4160                         }
4161                 }
4162                 rxr->next_to_check = 0;
4163                 rxr->next_to_refresh = 0;
4164         }
4165
4166         return (ENOBUFS);
4167 }
4168
4169 /*********************************************************************
4170  *
4171  *  Free all receive rings.
4172  *
4173  **********************************************************************/
4174 static void
4175 em_free_receive_structures(struct adapter *adapter)
4176 {
4177         struct rx_ring *rxr = adapter->rx_rings;
4178
4179         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4180                 em_free_receive_buffers(rxr);
4181                 /* Free the ring memory as well */
4182                 em_dma_free(adapter, &rxr->rxdma);
4183                 EM_RX_LOCK_DESTROY(rxr);
4184         }
4185
4186         free(adapter->rx_rings, M_DEVBUF);
4187 }
4188
4189
4190 /*********************************************************************
4191  *
4192  *  Free receive ring data structures
4193  *
4194  **********************************************************************/
4195 static void
4196 em_free_receive_buffers(struct rx_ring *rxr)
4197 {
4198         struct adapter          *adapter = rxr->adapter;
4199         struct em_buffer        *rxbuf = NULL;
4200
4201         INIT_DEBUGOUT("free_receive_buffers: begin");
4202
4203         if (rxr->rx_buffers != NULL) {
4204                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4205                         rxbuf = &rxr->rx_buffers[i];
4206                         if (rxbuf->map != NULL) {
4207                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4208                                     BUS_DMASYNC_POSTREAD);
4209                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4210                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4211                         }
4212                         if (rxbuf->m_head != NULL) {
4213                                 m_freem(rxbuf->m_head);
4214                                 rxbuf->m_head = NULL;
4215                         }
4216                 }
4217                 free(rxr->rx_buffers, M_DEVBUF);
4218                 rxr->rx_buffers = NULL;
4219                 rxr->next_to_check = 0;
4220                 rxr->next_to_refresh = 0;
4221         }
4222
4223         if (rxr->rxtag != NULL) {
4224                 bus_dma_tag_destroy(rxr->rxtag);
4225                 rxr->rxtag = NULL;
4226         }
4227
4228         return;
4229 }
4230
4231
4232 /*********************************************************************
4233  *
4234  *  Enable receive unit.
4235  *
4236  **********************************************************************/
4237
4238 static void
4239 em_initialize_receive_unit(struct adapter *adapter)
4240 {
4241         struct rx_ring  *rxr = adapter->rx_rings;
4242         if_t ifp = adapter->ifp;
4243         struct e1000_hw *hw = &adapter->hw;
4244         u64     bus_addr;
4245         u32     rctl, rxcsum;
4246
4247         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4248
4249         /*
4250          * Make sure receives are disabled while setting
4251          * up the descriptor ring
4252          */
4253         rctl = E1000_READ_REG(hw, E1000_RCTL);
4254         /* Do not disable if ever enabled on this hardware */
4255         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4256                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4257
4258         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4259             adapter->rx_abs_int_delay.value);
4260         /*
4261          * Set the interrupt throttling rate. Value is calculated
4262          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4263          */
4264         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4265
4266         /*
4267         ** When using MSIX interrupts we need to throttle
4268         ** using the EITR register (82574 only)
4269         */
4270         if (hw->mac.type == e1000_82574) {
4271                 for (int i = 0; i < 4; i++)
4272                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4273                             DEFAULT_ITR);
4274                 /* Disable accelerated acknowledge */
4275                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4276         }
4277
4278         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4279         if (if_getcapenable(ifp) & IFCAP_RXCSUM)
4280                 rxcsum |= E1000_RXCSUM_TUOFL;
4281         else
4282                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4283         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4284
4285         /*
4286         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4287         ** long latencies are observed, like Lenovo X60. This
4288         ** change eliminates the problem, but since having positive
4289         ** values in RDTR is a known source of problems on other
4290         ** platforms another solution is being sought.
4291         */
4292         if (hw->mac.type == e1000_82573)
4293                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4294
4295         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4296                 /* Setup the Base and Length of the Rx Descriptor Ring */
4297                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4298
4299                 bus_addr = rxr->rxdma.dma_paddr;
4300                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4301                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4302                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4303                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4304                 /* Setup the Head and Tail Descriptor Pointers */
4305                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4306 #ifdef DEV_NETMAP
4307                 /*
4308                  * an init() while a netmap client is active must
4309                  * preserve the rx buffers passed to userspace.
4310                  */
4311                 if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4312                         struct netmap_adapter *na = netmap_getna(adapter->ifp);
4313                         rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4314                 }
4315 #endif /* DEV_NETMAP */
4316                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4317         }
4318
4319         /* Set PTHRESH for improved jumbo performance */
4320         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4321             (adapter->hw.mac.type == e1000_pch2lan) ||
4322             (adapter->hw.mac.type == e1000_ich10lan)) &&
4323             (if_getmtu(ifp) > ETHERMTU)) {
4324                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4325                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4326         }
4327                 
4328         if (adapter->hw.mac.type >= e1000_pch2lan) {
4329                 if (if_getmtu(ifp) > ETHERMTU)
4330                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4331                 else
4332                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4333         }
4334
4335         /* Setup the Receive Control Register */
4336         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4337         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4338             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4339             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4340
4341         /* Strip the CRC */
4342         rctl |= E1000_RCTL_SECRC;
4343
4344         /* Make sure VLAN Filters are off */
4345         rctl &= ~E1000_RCTL_VFE;
4346         rctl &= ~E1000_RCTL_SBP;
4347
4348         if (adapter->rx_mbuf_sz == MCLBYTES)
4349                 rctl |= E1000_RCTL_SZ_2048;
4350         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4351                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4352         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4353                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4354
4355         if (if_getmtu(ifp) > ETHERMTU)
4356                 rctl |= E1000_RCTL_LPE;
4357         else
4358                 rctl &= ~E1000_RCTL_LPE;
4359
4360         /* Write out the settings */
4361         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4362
4363         return;
4364 }
4365
4366
4367 /*********************************************************************
4368  *
4369  *  This routine executes in interrupt context. It replenishes
4370  *  the mbufs in the descriptor and sends data which has been
4371  *  dma'ed into host memory to upper layer.
4372  *
4373  *  We loop at most count times if count is > 0, or until done if
4374  *  count < 0.
4375  *  
4376  *  For polling we also now return the number of cleaned packets
4377  *********************************************************************/
4378 static bool
4379 em_rxeof(struct rx_ring *rxr, int count, int *done)
4380 {
4381         struct adapter          *adapter = rxr->adapter;
4382         if_t ifp = adapter->ifp;
4383         struct mbuf             *mp, *sendmp;
4384         u8                      status = 0;
4385         u16                     len;
4386         int                     i, processed, rxdone = 0;
4387         bool                    eop;
4388         struct e1000_rx_desc    *cur;
4389
4390         EM_RX_LOCK(rxr);
4391
4392 #ifdef DEV_NETMAP
4393         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4394                 EM_RX_UNLOCK(rxr);
4395                 return (FALSE);
4396         }
4397 #endif /* DEV_NETMAP */
4398
4399         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4400
4401                 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4402                         break;
4403
4404                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4405                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4406
4407                 cur = &rxr->rx_base[i];
4408                 status = cur->status;
4409                 mp = sendmp = NULL;
4410
4411                 if ((status & E1000_RXD_STAT_DD) == 0)
4412                         break;
4413
4414                 len = le16toh(cur->length);
4415                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4416
4417                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4418                     (rxr->discard == TRUE)) {
4419                         adapter->dropped_pkts++;
4420                         ++rxr->rx_discarded;
4421                         if (!eop) /* Catch subsequent segs */
4422                                 rxr->discard = TRUE;
4423                         else
4424                                 rxr->discard = FALSE;
4425                         em_rx_discard(rxr, i);
4426                         goto next_desc;
4427                 }
4428                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4429
4430                 /* Assign correct length to the current fragment */
4431                 mp = rxr->rx_buffers[i].m_head;
4432                 mp->m_len = len;
4433
4434                 /* Trigger for refresh */
4435                 rxr->rx_buffers[i].m_head = NULL;
4436
4437                 /* First segment? */
4438                 if (rxr->fmp == NULL) {
4439                         mp->m_pkthdr.len = len;
4440                         rxr->fmp = rxr->lmp = mp;
4441                 } else {
4442                         /* Chain mbuf's together */
4443                         mp->m_flags &= ~M_PKTHDR;
4444                         rxr->lmp->m_next = mp;
4445                         rxr->lmp = mp;
4446                         rxr->fmp->m_pkthdr.len += len;
4447                 }
4448
4449                 if (eop) {
4450                         --count;
4451                         sendmp = rxr->fmp;
4452                         if_setrcvif(sendmp, ifp);
4453                         if_incipackets(ifp, 1);
4454                         em_receive_checksum(cur, sendmp);
4455 #ifndef __NO_STRICT_ALIGNMENT
4456                         if (adapter->hw.mac.max_frame_size >
4457                             (MCLBYTES - ETHER_ALIGN) &&
4458                             em_fixup_rx(rxr) != 0)
4459                                 goto skip;
4460 #endif
4461                         if (status & E1000_RXD_STAT_VP) {
4462                                 if_setvtag(sendmp, 
4463                                     le16toh(cur->special));
4464                                 sendmp->m_flags |= M_VLANTAG;
4465                         }
4466 #ifndef __NO_STRICT_ALIGNMENT
4467 skip:
4468 #endif
4469                         rxr->fmp = rxr->lmp = NULL;
4470                 }
4471 next_desc:
4472                 /* Zero out the receive descriptors status. */
4473                 cur->status = 0;
4474                 ++rxdone;       /* cumulative for POLL */
4475                 ++processed;
4476
4477                 /* Advance our pointers to the next descriptor. */
4478                 if (++i == adapter->num_rx_desc)
4479                         i = 0;
4480
4481                 /* Send to the stack */
4482                 if (sendmp != NULL) {
4483                         rxr->next_to_check = i;
4484                         EM_RX_UNLOCK(rxr);
4485                         if_input(ifp, sendmp);
4486                         EM_RX_LOCK(rxr);
4487                         i = rxr->next_to_check;
4488                 }
4489
4490                 /* Only refresh mbufs every 8 descriptors */
4491                 if (processed == 8) {
4492                         em_refresh_mbufs(rxr, i);
4493                         processed = 0;
4494                 }
4495         }
4496
4497         /* Catch any remaining refresh work */
4498         if (e1000_rx_unrefreshed(rxr))
4499                 em_refresh_mbufs(rxr, i);
4500
4501         rxr->next_to_check = i;
4502         if (done != NULL)
4503                 *done = rxdone;
4504         EM_RX_UNLOCK(rxr);
4505
4506         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4507 }
4508
4509 static __inline void
4510 em_rx_discard(struct rx_ring *rxr, int i)
4511 {
4512         struct em_buffer        *rbuf;
4513
4514         rbuf = &rxr->rx_buffers[i];
4515         bus_dmamap_unload(rxr->rxtag, rbuf->map);
4516
4517         /* Free any previous pieces */
4518         if (rxr->fmp != NULL) {
4519                 rxr->fmp->m_flags |= M_PKTHDR;
4520                 m_freem(rxr->fmp);
4521                 rxr->fmp = NULL;
4522                 rxr->lmp = NULL;
4523         }
4524         /*
4525         ** Free buffer and allow em_refresh_mbufs()
4526         ** to clean up and recharge buffer.
4527         */
4528         if (rbuf->m_head) {
4529                 m_free(rbuf->m_head);
4530                 rbuf->m_head = NULL;
4531         }
4532         return;
4533 }
4534
4535 #ifndef __NO_STRICT_ALIGNMENT
4536 /*
4537  * When jumbo frames are enabled we should realign entire payload on
4538  * architecures with strict alignment. This is serious design mistake of 8254x
4539  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4540  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4541  * payload. On architecures without strict alignment restrictions 8254x still
4542  * performs unaligned memory access which would reduce the performance too.
4543  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4544  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4545  * existing mbuf chain.
4546  *
4547  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4548  * not used at all on architectures with strict alignment.
4549  */
4550 static int
4551 em_fixup_rx(struct rx_ring *rxr)
4552 {
4553         struct adapter *adapter = rxr->adapter;
4554         struct mbuf *m, *n;
4555         int error;
4556
4557         error = 0;
4558         m = rxr->fmp;
4559         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4560                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4561                 m->m_data += ETHER_HDR_LEN;
4562         } else {
4563                 MGETHDR(n, M_NOWAIT, MT_DATA);
4564                 if (n != NULL) {
4565                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4566                         m->m_data += ETHER_HDR_LEN;
4567                         m->m_len -= ETHER_HDR_LEN;
4568                         n->m_len = ETHER_HDR_LEN;
4569                         M_MOVE_PKTHDR(n, m);
4570                         n->m_next = m;
4571                         rxr->fmp = n;
4572                 } else {
4573                         adapter->dropped_pkts++;
4574                         m_freem(rxr->fmp);
4575                         rxr->fmp = NULL;
4576                         error = ENOMEM;
4577                 }
4578         }
4579
4580         return (error);
4581 }
4582 #endif
4583
4584 /*********************************************************************
4585  *
4586  *  Verify that the hardware indicated that the checksum is valid.
4587  *  Inform the stack about the status of checksum so that stack
4588  *  doesn't spend time verifying the checksum.
4589  *
4590  *********************************************************************/
4591 static void
4592 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4593 {
4594         mp->m_pkthdr.csum_flags = 0;
4595
4596         /* Ignore Checksum bit is set */
4597         if (rx_desc->status & E1000_RXD_STAT_IXSM)
4598                 return;
4599
4600         if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4601                 return;
4602
4603         /* IP Checksum Good? */
4604         if (rx_desc->status & E1000_RXD_STAT_IPCS)
4605                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4606
4607         /* TCP or UDP checksum */
4608         if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4609                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4610                 mp->m_pkthdr.csum_data = htons(0xffff);
4611         }
4612 }
4613
4614 /*
4615  * This routine is run via an vlan
4616  * config EVENT
4617  */
4618 static void
4619 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4620 {
4621         struct adapter  *adapter = if_getsoftc(ifp);
4622         u32             index, bit;
4623
4624         if ((void*)adapter !=  arg)   /* Not our event */
4625                 return;
4626
4627         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4628                 return;
4629
4630         EM_CORE_LOCK(adapter);
4631         index = (vtag >> 5) & 0x7F;
4632         bit = vtag & 0x1F;
4633         adapter->shadow_vfta[index] |= (1 << bit);
4634         ++adapter->num_vlans;
4635         /* Re-init to load the changes */
4636         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4637                 em_init_locked(adapter);
4638         EM_CORE_UNLOCK(adapter);
4639 }
4640
4641 /*
4642  * This routine is run via an vlan
4643  * unconfig EVENT
4644  */
4645 static void
4646 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4647 {
4648         struct adapter  *adapter = if_getsoftc(ifp);
4649         u32             index, bit;
4650
4651         if (adapter != arg)
4652                 return;
4653
4654         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4655                 return;
4656
4657         EM_CORE_LOCK(adapter);
4658         index = (vtag >> 5) & 0x7F;
4659         bit = vtag & 0x1F;
4660         adapter->shadow_vfta[index] &= ~(1 << bit);
4661         --adapter->num_vlans;
4662         /* Re-init to load the changes */
4663         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4664                 em_init_locked(adapter);
4665         EM_CORE_UNLOCK(adapter);
4666 }
4667
4668 static void
4669 em_setup_vlan_hw_support(struct adapter *adapter)
4670 {
4671         struct e1000_hw *hw = &adapter->hw;
4672         u32             reg;
4673
4674         /*
4675         ** We get here thru init_locked, meaning
4676         ** a soft reset, this has already cleared
4677         ** the VFTA and other state, so if there
4678         ** have been no vlan's registered do nothing.
4679         */
4680         if (adapter->num_vlans == 0)
4681                 return;
4682
4683         /*
4684         ** A soft reset zero's out the VFTA, so
4685         ** we need to repopulate it now.
4686         */
4687         for (int i = 0; i < EM_VFTA_SIZE; i++)
4688                 if (adapter->shadow_vfta[i] != 0)
4689                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4690                             i, adapter->shadow_vfta[i]);
4691
4692         reg = E1000_READ_REG(hw, E1000_CTRL);
4693         reg |= E1000_CTRL_VME;
4694         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4695
4696         /* Enable the Filter Table */
4697         reg = E1000_READ_REG(hw, E1000_RCTL);
4698         reg &= ~E1000_RCTL_CFIEN;
4699         reg |= E1000_RCTL_VFE;
4700         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4701 }
4702
4703 static void
4704 em_enable_intr(struct adapter *adapter)
4705 {
4706         struct e1000_hw *hw = &adapter->hw;
4707         u32 ims_mask = IMS_ENABLE_MASK;
4708
4709         if (hw->mac.type == e1000_82574) {
4710                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4711                 ims_mask |= EM_MSIX_MASK;
4712         } 
4713         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4714 }
4715
4716 static void
4717 em_disable_intr(struct adapter *adapter)
4718 {
4719         struct e1000_hw *hw = &adapter->hw;
4720
4721         if (hw->mac.type == e1000_82574)
4722                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4723         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4724 }
4725
4726 /*
4727  * Bit of a misnomer, what this really means is
4728  * to enable OS management of the system... aka
4729  * to disable special hardware management features 
4730  */
4731 static void
4732 em_init_manageability(struct adapter *adapter)
4733 {
4734         /* A shared code workaround */
4735 #define E1000_82542_MANC2H E1000_MANC2H
4736         if (adapter->has_manage) {
4737                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4738                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4739
4740                 /* disable hardware interception of ARP */
4741                 manc &= ~(E1000_MANC_ARP_EN);
4742
4743                 /* enable receiving management packets to the host */
4744                 manc |= E1000_MANC_EN_MNG2HOST;
4745 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4746 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4747                 manc2h |= E1000_MNG2HOST_PORT_623;
4748                 manc2h |= E1000_MNG2HOST_PORT_664;
4749                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4750                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4751         }
4752 }
4753
4754 /*
4755  * Give control back to hardware management
4756  * controller if there is one.
4757  */
4758 static void
4759 em_release_manageability(struct adapter *adapter)
4760 {
4761         if (adapter->has_manage) {
4762                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4763
4764                 /* re-enable hardware interception of ARP */
4765                 manc |= E1000_MANC_ARP_EN;
4766                 manc &= ~E1000_MANC_EN_MNG2HOST;
4767
4768                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4769         }
4770 }
4771
4772 /*
4773  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4774  * For ASF and Pass Through versions of f/w this means
4775  * that the driver is loaded. For AMT version type f/w
4776  * this means that the network i/f is open.
4777  */
4778 static void
4779 em_get_hw_control(struct adapter *adapter)
4780 {
4781         u32 ctrl_ext, swsm;
4782
4783         if (adapter->hw.mac.type == e1000_82573) {
4784                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4785                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4786                     swsm | E1000_SWSM_DRV_LOAD);
4787                 return;
4788         }
4789         /* else */
4790         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4791         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4792             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4793         return;
4794 }
4795
4796 /*
4797  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4798  * For ASF and Pass Through versions of f/w this means that
4799  * the driver is no longer loaded. For AMT versions of the
4800  * f/w this means that the network i/f is closed.
4801  */
4802 static void
4803 em_release_hw_control(struct adapter *adapter)
4804 {
4805         u32 ctrl_ext, swsm;
4806
4807         if (!adapter->has_manage)
4808                 return;
4809
4810         if (adapter->hw.mac.type == e1000_82573) {
4811                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4812                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4813                     swsm & ~E1000_SWSM_DRV_LOAD);
4814                 return;
4815         }
4816         /* else */
4817         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4818         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4819             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4820         return;
4821 }
4822
4823 static int
4824 em_is_valid_ether_addr(u8 *addr)
4825 {
4826         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4827
4828         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4829                 return (FALSE);
4830         }
4831
4832         return (TRUE);
4833 }
4834
4835 /*
4836 ** Parse the interface capabilities with regard
4837 ** to both system management and wake-on-lan for
4838 ** later use.
4839 */
4840 static void
4841 em_get_wakeup(device_t dev)
4842 {
4843         struct adapter  *adapter = device_get_softc(dev);
4844         u16             eeprom_data = 0, device_id, apme_mask;
4845
4846         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4847         apme_mask = EM_EEPROM_APME;
4848
4849         switch (adapter->hw.mac.type) {
4850         case e1000_82573:
4851         case e1000_82583:
4852                 adapter->has_amt = TRUE;
4853                 /* Falls thru */
4854         case e1000_82571:
4855         case e1000_82572:
4856         case e1000_80003es2lan:
4857                 if (adapter->hw.bus.func == 1) {
4858                         e1000_read_nvm(&adapter->hw,
4859                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4860                         break;
4861                 } else
4862                         e1000_read_nvm(&adapter->hw,
4863                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4864                 break;
4865         case e1000_ich8lan:
4866         case e1000_ich9lan:
4867         case e1000_ich10lan:
4868         case e1000_pchlan:
4869         case e1000_pch2lan:
4870                 apme_mask = E1000_WUC_APME;
4871                 adapter->has_amt = TRUE;
4872                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4873                 break;
4874         default:
4875                 e1000_read_nvm(&adapter->hw,
4876                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4877                 break;
4878         }
4879         if (eeprom_data & apme_mask)
4880                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4881         /*
4882          * We have the eeprom settings, now apply the special cases
4883          * where the eeprom may be wrong or the board won't support
4884          * wake on lan on a particular port
4885          */
4886         device_id = pci_get_device(dev);
4887         switch (device_id) {
4888         case E1000_DEV_ID_82571EB_FIBER:
4889                 /* Wake events only supported on port A for dual fiber
4890                  * regardless of eeprom setting */
4891                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4892                     E1000_STATUS_FUNC_1)
4893                         adapter->wol = 0;
4894                 break;
4895         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4896         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4897         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4898                 /* if quad port adapter, disable WoL on all but port A */
4899                 if (global_quad_port_a != 0)
4900                         adapter->wol = 0;
4901                 /* Reset for multiple quad port adapters */
4902                 if (++global_quad_port_a == 4)
4903                         global_quad_port_a = 0;
4904                 break;
4905         }
4906         return;
4907 }
4908
4909
4910 /*
4911  * Enable PCI Wake On Lan capability
4912  */
4913 static void
4914 em_enable_wakeup(device_t dev)
4915 {
4916         struct adapter  *adapter = device_get_softc(dev);
4917         if_t ifp = adapter->ifp;
4918         u32             pmc, ctrl, ctrl_ext, rctl;
4919         u16             status;
4920
4921         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4922                 return;
4923
4924         /* Advertise the wakeup capability */
4925         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4926         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4927         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4928         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4929
4930         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4931             (adapter->hw.mac.type == e1000_pchlan) ||
4932             (adapter->hw.mac.type == e1000_ich9lan) ||
4933             (adapter->hw.mac.type == e1000_ich10lan))
4934                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4935
4936         /* Keep the laser running on Fiber adapters */
4937         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4938             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4939                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4940                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4941                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4942         }
4943
4944         /*
4945         ** Determine type of Wakeup: note that wol
4946         ** is set with all bits on by default.
4947         */
4948         if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
4949                 adapter->wol &= ~E1000_WUFC_MAG;
4950
4951         if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
4952                 adapter->wol &= ~E1000_WUFC_MC;
4953         else {
4954                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4955                 rctl |= E1000_RCTL_MPE;
4956                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4957         }
4958
4959         if ((adapter->hw.mac.type == e1000_pchlan) ||
4960             (adapter->hw.mac.type == e1000_pch2lan)) {
4961                 if (em_enable_phy_wakeup(adapter))
4962                         return;
4963         } else {
4964                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4965                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4966         }
4967
4968         if (adapter->hw.phy.type == e1000_phy_igp_3)
4969                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4970
4971         /* Request PME */
4972         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4973         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4974         if (if_getcapenable(ifp) & IFCAP_WOL)
4975                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4976         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4977
4978         return;
4979 }
4980
4981 /*
4982 ** WOL in the newer chipset interfaces (pchlan)
4983 ** require thing to be copied into the phy
4984 */
4985 static int
4986 em_enable_phy_wakeup(struct adapter *adapter)
4987 {
4988         struct e1000_hw *hw = &adapter->hw;
4989         u32 mreg, ret = 0;
4990         u16 preg;
4991
4992         /* copy MAC RARs to PHY RARs */
4993         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4994
4995         /* copy MAC MTA to PHY MTA */
4996         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4997                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4998                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4999                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5000                     (u16)((mreg >> 16) & 0xFFFF));
5001         }
5002
5003         /* configure PHY Rx Control register */
5004         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5005         mreg = E1000_READ_REG(hw, E1000_RCTL);
5006         if (mreg & E1000_RCTL_UPE)
5007                 preg |= BM_RCTL_UPE;
5008         if (mreg & E1000_RCTL_MPE)
5009                 preg |= BM_RCTL_MPE;
5010         preg &= ~(BM_RCTL_MO_MASK);
5011         if (mreg & E1000_RCTL_MO_3)
5012                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5013                                 << BM_RCTL_MO_SHIFT);
5014         if (mreg & E1000_RCTL_BAM)
5015                 preg |= BM_RCTL_BAM;
5016         if (mreg & E1000_RCTL_PMCF)
5017                 preg |= BM_RCTL_PMCF;
5018         mreg = E1000_READ_REG(hw, E1000_CTRL);
5019         if (mreg & E1000_CTRL_RFCE)
5020                 preg |= BM_RCTL_RFCE;
5021         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5022
5023         /* enable PHY wakeup in MAC register */
5024         E1000_WRITE_REG(hw, E1000_WUC,
5025             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5026         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5027
5028         /* configure and enable PHY wakeup in PHY registers */
5029         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5030         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5031
5032         /* activate PHY wakeup */
5033         ret = hw->phy.ops.acquire(hw);
5034         if (ret) {
5035                 printf("Could not acquire PHY\n");
5036                 return ret;
5037         }
5038         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5039                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5040         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5041         if (ret) {
5042                 printf("Could not read PHY page 769\n");
5043                 goto out;
5044         }
5045         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5046         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5047         if (ret)
5048                 printf("Could not set PHY Host Wakeup bit\n");
5049 out:
5050         hw->phy.ops.release(hw);
5051
5052         return ret;
5053 }
5054
5055 static void
5056 em_led_func(void *arg, int onoff)
5057 {
5058         struct adapter  *adapter = arg;
5059  
5060         EM_CORE_LOCK(adapter);
5061         if (onoff) {
5062                 e1000_setup_led(&adapter->hw);
5063                 e1000_led_on(&adapter->hw);
5064         } else {
5065                 e1000_led_off(&adapter->hw);
5066                 e1000_cleanup_led(&adapter->hw);
5067         }
5068         EM_CORE_UNLOCK(adapter);
5069 }
5070
5071 /*
5072 ** Disable the L0S and L1 LINK states
5073 */
5074 static void
5075 em_disable_aspm(struct adapter *adapter)
5076 {
5077         int             base, reg;
5078         u16             link_cap,link_ctrl;
5079         device_t        dev = adapter->dev;
5080
5081         switch (adapter->hw.mac.type) {
5082                 case e1000_82573:
5083                 case e1000_82574:
5084                 case e1000_82583:
5085                         break;
5086                 default:
5087                         return;
5088         }
5089         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5090                 return;
5091         reg = base + PCIER_LINK_CAP;
5092         link_cap = pci_read_config(dev, reg, 2);
5093         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5094                 return;
5095         reg = base + PCIER_LINK_CTL;
5096         link_ctrl = pci_read_config(dev, reg, 2);
5097         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5098         pci_write_config(dev, reg, link_ctrl, 2);
5099         return;
5100 }
5101
5102 /**********************************************************************
5103  *
5104  *  Update the board statistics counters.
5105  *
5106  **********************************************************************/
5107 static void
5108 em_update_stats_counters(struct adapter *adapter)
5109 {
5110         if_t ifp;
5111
5112         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5113            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5114                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5115                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5116         }
5117         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5118         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5119         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5120         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5121
5122         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5123         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5124         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5125         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5126         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5127         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5128         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5129         /*
5130         ** For watchdog management we need to know if we have been
5131         ** paused during the last interval, so capture that here.
5132         */
5133         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5134         adapter->stats.xoffrxc += adapter->pause_frames;
5135         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5136         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5137         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5138         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5139         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5140         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5141         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5142         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5143         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5144         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5145         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5146         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5147
5148         /* For the 64-bit byte counters the low dword must be read first. */
5149         /* Both registers clear on the read of the high dword */
5150
5151         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5152             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5153         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5154             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5155
5156         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5157         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5158         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5159         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5160         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5161
5162         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5163         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5164
5165         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5166         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5167         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5168         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5169         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5170         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5171         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5172         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5173         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5174         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5175
5176         /* Interrupt Counts */
5177
5178         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5179         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5180         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5181         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5182         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5183         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5184         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5185         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5186         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5187
5188         if (adapter->hw.mac.type >= e1000_82543) {
5189                 adapter->stats.algnerrc += 
5190                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5191                 adapter->stats.rxerrc += 
5192                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5193                 adapter->stats.tncrs += 
5194                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5195                 adapter->stats.cexterr += 
5196                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5197                 adapter->stats.tsctc += 
5198                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5199                 adapter->stats.tsctfc += 
5200                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5201         }
5202         ifp = adapter->ifp;
5203
5204         if_setcollisions(ifp, adapter->stats.colc);
5205
5206         /* Rx Errors */
5207         if_setierrors(ifp, adapter->dropped_pkts + adapter->stats.rxerrc +
5208             adapter->stats.crcerrs + adapter->stats.algnerrc +
5209             adapter->stats.ruc + adapter->stats.roc +
5210             adapter->stats.mpc + adapter->stats.cexterr);
5211
5212         /* Tx Errors */
5213         if_setoerrors(ifp, adapter->stats.ecol + adapter->stats.latecol +
5214             adapter->watchdog_events);
5215 }
5216
5217 /* Export a single 32-bit register via a read-only sysctl. */
5218 static int
5219 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5220 {
5221         struct adapter *adapter;
5222         u_int val;
5223
5224         adapter = oidp->oid_arg1;
5225         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5226         return (sysctl_handle_int(oidp, &val, 0, req));
5227 }
5228
5229 /*
5230  * Add sysctl variables, one per statistic, to the system.
5231  */
5232 static void
5233 em_add_hw_stats(struct adapter *adapter)
5234 {
5235         device_t dev = adapter->dev;
5236
5237         struct tx_ring *txr = adapter->tx_rings;
5238         struct rx_ring *rxr = adapter->rx_rings;
5239
5240         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5241         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5242         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5243         struct e1000_hw_stats *stats = &adapter->stats;
5244
5245         struct sysctl_oid *stat_node, *queue_node, *int_node;
5246         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5247
5248 #define QUEUE_NAME_LEN 32
5249         char namebuf[QUEUE_NAME_LEN];
5250         
5251         /* Driver Statistics */
5252         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5253                         CTLFLAG_RD, &adapter->link_irq,
5254                         "Link MSIX IRQ Handled");
5255         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5256                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5257                          "Std mbuf failed");
5258         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5259                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5260                          "Std mbuf cluster failed");
5261         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5262                         CTLFLAG_RD, &adapter->dropped_pkts,
5263                         "Driver dropped packets");
5264         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5265                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5266                         "Driver tx dma failure in xmit");
5267         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5268                         CTLFLAG_RD, &adapter->rx_overruns,
5269                         "RX overruns");
5270         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5271                         CTLFLAG_RD, &adapter->watchdog_events,
5272                         "Watchdog timeouts");
5273         
5274         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5275                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5276                         em_sysctl_reg_handler, "IU",
5277                         "Device Control Register");
5278         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5279                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5280                         em_sysctl_reg_handler, "IU",
5281                         "Receiver Control Register");
5282         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5283                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5284                         "Flow Control High Watermark");
5285         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5286                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5287                         "Flow Control Low Watermark");
5288
5289         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5290                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5291                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5292                                             CTLFLAG_RD, NULL, "Queue Name");
5293                 queue_list = SYSCTL_CHILDREN(queue_node);
5294
5295                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5296                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5297                                 E1000_TDH(txr->me),
5298                                 em_sysctl_reg_handler, "IU",
5299                                 "Transmit Descriptor Head");
5300                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5301                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5302                                 E1000_TDT(txr->me),
5303                                 em_sysctl_reg_handler, "IU",
5304                                 "Transmit Descriptor Tail");
5305                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5306                                 CTLFLAG_RD, &txr->tx_irq,
5307                                 "Queue MSI-X Transmit Interrupts");
5308                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5309                                 CTLFLAG_RD, &txr->no_desc_avail,
5310                                 "Queue No Descriptor Available");
5311                 
5312                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5313                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5314                                 E1000_RDH(rxr->me),
5315                                 em_sysctl_reg_handler, "IU",
5316                                 "Receive Descriptor Head");
5317                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5318                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5319                                 E1000_RDT(rxr->me),
5320                                 em_sysctl_reg_handler, "IU",
5321                                 "Receive Descriptor Tail");
5322                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5323                                 CTLFLAG_RD, &rxr->rx_irq,
5324                                 "Queue MSI-X Receive Interrupts");
5325         }
5326
5327         /* MAC stats get their own sub node */
5328
5329         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5330                                     CTLFLAG_RD, NULL, "Statistics");
5331         stat_list = SYSCTL_CHILDREN(stat_node);
5332
5333         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5334                         CTLFLAG_RD, &stats->ecol,
5335                         "Excessive collisions");
5336         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5337                         CTLFLAG_RD, &stats->scc,
5338                         "Single collisions");
5339         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5340                         CTLFLAG_RD, &stats->mcc,
5341                         "Multiple collisions");
5342         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5343                         CTLFLAG_RD, &stats->latecol,
5344                         "Late collisions");
5345         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5346                         CTLFLAG_RD, &stats->colc,
5347                         "Collision Count");
5348         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5349                         CTLFLAG_RD, &adapter->stats.symerrs,
5350                         "Symbol Errors");
5351         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5352                         CTLFLAG_RD, &adapter->stats.sec,
5353                         "Sequence Errors");
5354         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5355                         CTLFLAG_RD, &adapter->stats.dc,
5356                         "Defer Count");
5357         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5358                         CTLFLAG_RD, &adapter->stats.mpc,
5359                         "Missed Packets");
5360         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5361                         CTLFLAG_RD, &adapter->stats.rnbc,
5362                         "Receive No Buffers");
5363         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5364                         CTLFLAG_RD, &adapter->stats.ruc,
5365                         "Receive Undersize");
5366         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5367                         CTLFLAG_RD, &adapter->stats.rfc,
5368                         "Fragmented Packets Received ");
5369         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5370                         CTLFLAG_RD, &adapter->stats.roc,
5371                         "Oversized Packets Received");
5372         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5373                         CTLFLAG_RD, &adapter->stats.rjc,
5374                         "Recevied Jabber");
5375         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5376                         CTLFLAG_RD, &adapter->stats.rxerrc,
5377                         "Receive Errors");
5378         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5379                         CTLFLAG_RD, &adapter->stats.crcerrs,
5380                         "CRC errors");
5381         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5382                         CTLFLAG_RD, &adapter->stats.algnerrc,
5383                         "Alignment Errors");
5384         /* On 82575 these are collision counts */
5385         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5386                         CTLFLAG_RD, &adapter->stats.cexterr,
5387                         "Collision/Carrier extension errors");
5388         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5389                         CTLFLAG_RD, &adapter->stats.xonrxc,
5390                         "XON Received");
5391         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5392                         CTLFLAG_RD, &adapter->stats.xontxc,
5393                         "XON Transmitted");
5394         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5395                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5396                         "XOFF Received");
5397         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5398                         CTLFLAG_RD, &adapter->stats.xofftxc,
5399                         "XOFF Transmitted");
5400
5401         /* Packet Reception Stats */
5402         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5403                         CTLFLAG_RD, &adapter->stats.tpr,
5404                         "Total Packets Received ");
5405         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5406                         CTLFLAG_RD, &adapter->stats.gprc,
5407                         "Good Packets Received");
5408         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5409                         CTLFLAG_RD, &adapter->stats.bprc,
5410                         "Broadcast Packets Received");
5411         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5412                         CTLFLAG_RD, &adapter->stats.mprc,
5413                         "Multicast Packets Received");
5414         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5415                         CTLFLAG_RD, &adapter->stats.prc64,
5416                         "64 byte frames received ");
5417         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5418                         CTLFLAG_RD, &adapter->stats.prc127,
5419                         "65-127 byte frames received");
5420         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5421                         CTLFLAG_RD, &adapter->stats.prc255,
5422                         "128-255 byte frames received");
5423         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5424                         CTLFLAG_RD, &adapter->stats.prc511,
5425                         "256-511 byte frames received");
5426         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5427                         CTLFLAG_RD, &adapter->stats.prc1023,
5428                         "512-1023 byte frames received");
5429         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5430                         CTLFLAG_RD, &adapter->stats.prc1522,
5431                         "1023-1522 byte frames received");
5432         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5433                         CTLFLAG_RD, &adapter->stats.gorc, 
5434                         "Good Octets Received"); 
5435
5436         /* Packet Transmission Stats */
5437         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5438                         CTLFLAG_RD, &adapter->stats.gotc, 
5439                         "Good Octets Transmitted"); 
5440         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5441                         CTLFLAG_RD, &adapter->stats.tpt,
5442                         "Total Packets Transmitted");
5443         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5444                         CTLFLAG_RD, &adapter->stats.gptc,
5445                         "Good Packets Transmitted");
5446         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5447                         CTLFLAG_RD, &adapter->stats.bptc,
5448                         "Broadcast Packets Transmitted");
5449         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5450                         CTLFLAG_RD, &adapter->stats.mptc,
5451                         "Multicast Packets Transmitted");
5452         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5453                         CTLFLAG_RD, &adapter->stats.ptc64,
5454                         "64 byte frames transmitted ");
5455         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5456                         CTLFLAG_RD, &adapter->stats.ptc127,
5457                         "65-127 byte frames transmitted");
5458         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5459                         CTLFLAG_RD, &adapter->stats.ptc255,
5460                         "128-255 byte frames transmitted");
5461         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5462                         CTLFLAG_RD, &adapter->stats.ptc511,
5463                         "256-511 byte frames transmitted");
5464         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5465                         CTLFLAG_RD, &adapter->stats.ptc1023,
5466                         "512-1023 byte frames transmitted");
5467         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5468                         CTLFLAG_RD, &adapter->stats.ptc1522,
5469                         "1024-1522 byte frames transmitted");
5470         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5471                         CTLFLAG_RD, &adapter->stats.tsctc,
5472                         "TSO Contexts Transmitted");
5473         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5474                         CTLFLAG_RD, &adapter->stats.tsctfc,
5475                         "TSO Contexts Failed");
5476
5477
5478         /* Interrupt Stats */
5479
5480         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5481                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5482         int_list = SYSCTL_CHILDREN(int_node);
5483
5484         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5485                         CTLFLAG_RD, &adapter->stats.iac,
5486                         "Interrupt Assertion Count");
5487
5488         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5489                         CTLFLAG_RD, &adapter->stats.icrxptc,
5490                         "Interrupt Cause Rx Pkt Timer Expire Count");
5491
5492         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5493                         CTLFLAG_RD, &adapter->stats.icrxatc,
5494                         "Interrupt Cause Rx Abs Timer Expire Count");
5495
5496         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5497                         CTLFLAG_RD, &adapter->stats.ictxptc,
5498                         "Interrupt Cause Tx Pkt Timer Expire Count");
5499
5500         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5501                         CTLFLAG_RD, &adapter->stats.ictxatc,
5502                         "Interrupt Cause Tx Abs Timer Expire Count");
5503
5504         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5505                         CTLFLAG_RD, &adapter->stats.ictxqec,
5506                         "Interrupt Cause Tx Queue Empty Count");
5507
5508         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5509                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5510                         "Interrupt Cause Tx Queue Min Thresh Count");
5511
5512         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5513                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5514                         "Interrupt Cause Rx Desc Min Thresh Count");
5515
5516         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5517                         CTLFLAG_RD, &adapter->stats.icrxoc,
5518                         "Interrupt Cause Receiver Overrun Count");
5519 }
5520
5521 /**********************************************************************
5522  *
5523  *  This routine provides a way to dump out the adapter eeprom,
5524  *  often a useful debug/service tool. This only dumps the first
5525  *  32 words, stuff that matters is in that extent.
5526  *
5527  **********************************************************************/
5528 static int
5529 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5530 {
5531         struct adapter *adapter = (struct adapter *)arg1;
5532         int error;
5533         int result;
5534
5535         result = -1;
5536         error = sysctl_handle_int(oidp, &result, 0, req);
5537
5538         if (error || !req->newptr)
5539                 return (error);
5540
5541         /*
5542          * This value will cause a hex dump of the
5543          * first 32 16-bit words of the EEPROM to
5544          * the screen.
5545          */
5546         if (result == 1)
5547                 em_print_nvm_info(adapter);
5548
5549         return (error);
5550 }
5551
5552 static void
5553 em_print_nvm_info(struct adapter *adapter)
5554 {
5555         u16     eeprom_data;
5556         int     i, j, row = 0;
5557
5558         /* Its a bit crude, but it gets the job done */
5559         printf("\nInterface EEPROM Dump:\n");
5560         printf("Offset\n0x0000  ");
5561         for (i = 0, j = 0; i < 32; i++, j++) {
5562                 if (j == 8) { /* Make the offset block */
5563                         j = 0; ++row;
5564                         printf("\n0x00%x0  ",row);
5565                 }
5566                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5567                 printf("%04x ", eeprom_data);
5568         }
5569         printf("\n");
5570 }
5571
5572 static int
5573 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5574 {
5575         struct em_int_delay_info *info;
5576         struct adapter *adapter;
5577         u32 regval;
5578         int error, usecs, ticks;
5579
5580         info = (struct em_int_delay_info *)arg1;
5581         usecs = info->value;
5582         error = sysctl_handle_int(oidp, &usecs, 0, req);
5583         if (error != 0 || req->newptr == NULL)
5584                 return (error);
5585         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5586                 return (EINVAL);
5587         info->value = usecs;
5588         ticks = EM_USECS_TO_TICKS(usecs);
5589         if (info->offset == E1000_ITR)  /* units are 256ns here */
5590                 ticks *= 4;
5591
5592         adapter = info->adapter;
5593         
5594         EM_CORE_LOCK(adapter);
5595         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5596         regval = (regval & ~0xffff) | (ticks & 0xffff);
5597         /* Handle a few special cases. */
5598         switch (info->offset) {
5599         case E1000_RDTR:
5600                 break;
5601         case E1000_TIDV:
5602                 if (ticks == 0) {
5603                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5604                         /* Don't write 0 into the TIDV register. */
5605                         regval++;
5606                 } else
5607                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5608                 break;
5609         }
5610         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5611         EM_CORE_UNLOCK(adapter);
5612         return (0);
5613 }
5614
5615 static void
5616 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5617         const char *description, struct em_int_delay_info *info,
5618         int offset, int value)
5619 {
5620         info->adapter = adapter;
5621         info->offset = offset;
5622         info->value = value;
5623         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5624             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5625             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5626             info, 0, em_sysctl_int_delay, "I", description);
5627 }
5628
5629 static void
5630 em_set_sysctl_value(struct adapter *adapter, const char *name,
5631         const char *description, int *limit, int value)
5632 {
5633         *limit = value;
5634         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5635             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5636             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5637 }
5638
5639
5640 /*
5641 ** Set flow control using sysctl:
5642 ** Flow control values:
5643 **      0 - off
5644 **      1 - rx pause
5645 **      2 - tx pause
5646 **      3 - full
5647 */
5648 static int
5649 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5650 {       
5651         int             error;
5652         static int      input = 3; /* default is full */
5653         struct adapter  *adapter = (struct adapter *) arg1;
5654                     
5655         error = sysctl_handle_int(oidp, &input, 0, req);
5656     
5657         if ((error) || (req->newptr == NULL))
5658                 return (error);
5659                 
5660         if (input == adapter->fc) /* no change? */
5661                 return (error);
5662
5663         switch (input) {
5664                 case e1000_fc_rx_pause:
5665                 case e1000_fc_tx_pause:
5666                 case e1000_fc_full:
5667                 case e1000_fc_none:
5668                         adapter->hw.fc.requested_mode = input;
5669                         adapter->fc = input;
5670                         break;
5671                 default:
5672                         /* Do nothing */
5673                         return (error);
5674         }
5675
5676         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5677         e1000_force_mac_fc(&adapter->hw);
5678         return (error);
5679 }
5680
5681 /*
5682 ** Manage Energy Efficient Ethernet:
5683 ** Control values:
5684 **     0/1 - enabled/disabled
5685 */
5686 static int
5687 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5688 {
5689        struct adapter *adapter = (struct adapter *) arg1;
5690        int             error, value;
5691
5692        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5693        error = sysctl_handle_int(oidp, &value, 0, req);
5694        if (error || req->newptr == NULL)
5695                return (error);
5696        EM_CORE_LOCK(adapter);
5697        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5698        em_init_locked(adapter);
5699        EM_CORE_UNLOCK(adapter);
5700        return (0);
5701 }
5702
5703 static int
5704 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5705 {
5706         struct adapter *adapter;
5707         int error;
5708         int result;
5709
5710         result = -1;
5711         error = sysctl_handle_int(oidp, &result, 0, req);
5712
5713         if (error || !req->newptr)
5714                 return (error);
5715
5716         if (result == 1) {
5717                 adapter = (struct adapter *)arg1;
5718                 em_print_debug_info(adapter);
5719         }
5720
5721         return (error);
5722 }
5723
5724 /*
5725 ** This routine is meant to be fluid, add whatever is
5726 ** needed for debugging a problem.  -jfv
5727 */
5728 static void
5729 em_print_debug_info(struct adapter *adapter)
5730 {
5731         device_t dev = adapter->dev;
5732         struct tx_ring *txr = adapter->tx_rings;
5733         struct rx_ring *rxr = adapter->rx_rings;
5734
5735         if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5736                 printf("Interface is RUNNING ");
5737         else
5738                 printf("Interface is NOT RUNNING\n");
5739
5740         if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5741                 printf("and INACTIVE\n");
5742         else
5743                 printf("and ACTIVE\n");
5744
5745         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5746             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5747             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5748         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5749             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5750             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5751         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5752         device_printf(dev, "TX descriptors avail = %d\n",
5753             txr->tx_avail);
5754         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5755             txr->no_desc_avail);
5756         device_printf(dev, "RX discarded packets = %ld\n",
5757             rxr->rx_discarded);
5758         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5759         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5760 }