]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/dev/e1000/if_em.c
Merge r254306:
[FreeBSD/releng/9.2.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_arp.h>
67 #include <net/if_dl.h>
68 #include <net/if_media.h>
69
70 #include <net/if_types.h>
71 #include <net/if_vlan_var.h>
72
73 #include <netinet/in_systm.h>
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip.h>
77 #include <netinet/ip6.h>
78 #include <netinet/tcp.h>
79 #include <netinet/udp.h>
80
81 #include <machine/in_cksum.h>
82 #include <dev/led/led.h>
83 #include <dev/pci/pcivar.h>
84 #include <dev/pci/pcireg.h>
85
86 #include "e1000_api.h"
87 #include "e1000_82571.h"
88 #include "if_em.h"
89
90 /*********************************************************************
91  *  Set this to one to display debug statistics
92  *********************************************************************/
93 int     em_display_debug_stats = 0;
94
95 /*********************************************************************
96  *  Driver version:
97  *********************************************************************/
98 char em_driver_version[] = "7.3.8";
99
100 /*********************************************************************
101  *  PCI Device ID Table
102  *
103  *  Used by probe to select devices to load on
104  *  Last field stores an index into e1000_strings
105  *  Last entry must be all 0s
106  *
107  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108  *********************************************************************/
109
110 static em_vendor_info_t em_vendor_info_array[] =
111 {
112         /* Intel(R) PRO/1000 Network Connection */
113         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
132
133         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
182         /* required last entry */
183         { 0, 0, 0, 0, 0}
184 };
185
186 /*********************************************************************
187  *  Table of branding strings for all supported NICs.
188  *********************************************************************/
189
190 static char *em_strings[] = {
191         "Intel(R) PRO/1000 Network Connection"
192 };
193
194 /*********************************************************************
195  *  Function prototypes
196  *********************************************************************/
197 static int      em_probe(device_t);
198 static int      em_attach(device_t);
199 static int      em_detach(device_t);
200 static int      em_shutdown(device_t);
201 static int      em_suspend(device_t);
202 static int      em_resume(device_t);
203 #ifdef EM_MULTIQUEUE
204 static int      em_mq_start(struct ifnet *, struct mbuf *);
205 static int      em_mq_start_locked(struct ifnet *,
206                     struct tx_ring *, struct mbuf *);
207 static void     em_qflush(struct ifnet *);
208 #else
209 static void     em_start(struct ifnet *);
210 static void     em_start_locked(struct ifnet *, struct tx_ring *);
211 #endif
212 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
213 static void     em_init(void *);
214 static void     em_init_locked(struct adapter *);
215 static void     em_stop(void *);
216 static void     em_media_status(struct ifnet *, struct ifmediareq *);
217 static int      em_media_change(struct ifnet *);
218 static void     em_identify_hardware(struct adapter *);
219 static int      em_allocate_pci_resources(struct adapter *);
220 static int      em_allocate_legacy(struct adapter *);
221 static int      em_allocate_msix(struct adapter *);
222 static int      em_allocate_queues(struct adapter *);
223 static int      em_setup_msix(struct adapter *);
224 static void     em_free_pci_resources(struct adapter *);
225 static void     em_local_timer(void *);
226 static void     em_reset(struct adapter *);
227 static int      em_setup_interface(device_t, struct adapter *);
228
229 static void     em_setup_transmit_structures(struct adapter *);
230 static void     em_initialize_transmit_unit(struct adapter *);
231 static int      em_allocate_transmit_buffers(struct tx_ring *);
232 static void     em_free_transmit_structures(struct adapter *);
233 static void     em_free_transmit_buffers(struct tx_ring *);
234
235 static int      em_setup_receive_structures(struct adapter *);
236 static int      em_allocate_receive_buffers(struct rx_ring *);
237 static void     em_initialize_receive_unit(struct adapter *);
238 static void     em_free_receive_structures(struct adapter *);
239 static void     em_free_receive_buffers(struct rx_ring *);
240
241 static void     em_enable_intr(struct adapter *);
242 static void     em_disable_intr(struct adapter *);
243 static void     em_update_stats_counters(struct adapter *);
244 static void     em_add_hw_stats(struct adapter *adapter);
245 static void     em_txeof(struct tx_ring *);
246 static bool     em_rxeof(struct rx_ring *, int, int *);
247 #ifndef __NO_STRICT_ALIGNMENT
248 static int      em_fixup_rx(struct rx_ring *);
249 #endif
250 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
251 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
252                     struct ip *, u32 *, u32 *);
253 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
254                     struct tcphdr *, u32 *, u32 *);
255 static void     em_set_promisc(struct adapter *);
256 static void     em_disable_promisc(struct adapter *);
257 static void     em_set_multi(struct adapter *);
258 static void     em_update_link_status(struct adapter *);
259 static void     em_refresh_mbufs(struct rx_ring *, int);
260 static void     em_register_vlan(void *, struct ifnet *, u16);
261 static void     em_unregister_vlan(void *, struct ifnet *, u16);
262 static void     em_setup_vlan_hw_support(struct adapter *);
263 static int      em_xmit(struct tx_ring *, struct mbuf **);
264 static int      em_dma_malloc(struct adapter *, bus_size_t,
265                     struct em_dma_alloc *, int);
266 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
267 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
268 static void     em_print_nvm_info(struct adapter *);
269 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
270 static void     em_print_debug_info(struct adapter *);
271 static int      em_is_valid_ether_addr(u8 *);
272 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
273 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
274                     const char *, struct em_int_delay_info *, int, int);
275 /* Management and WOL Support */
276 static void     em_init_manageability(struct adapter *);
277 static void     em_release_manageability(struct adapter *);
278 static void     em_get_hw_control(struct adapter *);
279 static void     em_release_hw_control(struct adapter *);
280 static void     em_get_wakeup(device_t);
281 static void     em_enable_wakeup(device_t);
282 static int      em_enable_phy_wakeup(struct adapter *);
283 static void     em_led_func(void *, int);
284 static void     em_disable_aspm(struct adapter *);
285
286 static int      em_irq_fast(void *);
287
288 /* MSIX handlers */
289 static void     em_msix_tx(void *);
290 static void     em_msix_rx(void *);
291 static void     em_msix_link(void *);
292 static void     em_handle_tx(void *context, int pending);
293 static void     em_handle_rx(void *context, int pending);
294 static void     em_handle_link(void *context, int pending);
295
296 static void     em_set_sysctl_value(struct adapter *, const char *,
297                     const char *, int *, int);
298 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
299 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
300
301 static __inline void em_rx_discard(struct rx_ring *, int);
302
303 #ifdef DEVICE_POLLING
304 static poll_handler_t em_poll;
305 #endif /* POLLING */
306
307 /*********************************************************************
308  *  FreeBSD Device Interface Entry Points
309  *********************************************************************/
310
311 static device_method_t em_methods[] = {
312         /* Device interface */
313         DEVMETHOD(device_probe, em_probe),
314         DEVMETHOD(device_attach, em_attach),
315         DEVMETHOD(device_detach, em_detach),
316         DEVMETHOD(device_shutdown, em_shutdown),
317         DEVMETHOD(device_suspend, em_suspend),
318         DEVMETHOD(device_resume, em_resume),
319         DEVMETHOD_END
320 };
321
322 static driver_t em_driver = {
323         "em", em_methods, sizeof(struct adapter),
324 };
325
326 devclass_t em_devclass;
327 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
328 MODULE_DEPEND(em, pci, 1, 1, 1);
329 MODULE_DEPEND(em, ether, 1, 1, 1);
330
331 /*********************************************************************
332  *  Tunable default values.
333  *********************************************************************/
334
335 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
336 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
337 #define M_TSO_LEN                       66
338
339 #define MAX_INTS_PER_SEC        8000
340 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
341
342 /* Allow common code without TSO */
343 #ifndef CSUM_TSO
344 #define CSUM_TSO        0
345 #endif
346
347 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
348
349 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
350 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
351 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
352 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
353 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
354     0, "Default transmit interrupt delay in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
356     0, "Default receive interrupt delay in usecs");
357
358 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
359 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
360 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
361 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
362 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
363     &em_tx_abs_int_delay_dflt, 0,
364     "Default transmit interrupt delay limit in usecs");
365 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
366     &em_rx_abs_int_delay_dflt, 0,
367     "Default receive interrupt delay limit in usecs");
368
369 static int em_rxd = EM_DEFAULT_RXD;
370 static int em_txd = EM_DEFAULT_TXD;
371 TUNABLE_INT("hw.em.rxd", &em_rxd);
372 TUNABLE_INT("hw.em.txd", &em_txd);
373 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
374     "Number of receive descriptors per queue");
375 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
376     "Number of transmit descriptors per queue");
377
378 static int em_smart_pwr_down = FALSE;
379 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
380 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
381     0, "Set to true to leave smart power down enabled on newer adapters");
382
383 /* Controls whether promiscuous also shows bad packets */
384 static int em_debug_sbp = FALSE;
385 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
386 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
387     "Show bad packets in promiscuous mode");
388
389 static int em_enable_msix = TRUE;
390 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
391 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
392     "Enable MSI-X interrupts");
393
394 /* How many packets rxeof tries to clean at a time */
395 static int em_rx_process_limit = 100;
396 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
397 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398     &em_rx_process_limit, 0,
399     "Maximum number of received packets to process "
400     "at a time, -1 means unlimited");
401
402 /* Energy efficient ethernet - default to OFF */
403 static int eee_setting = 1;
404 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
405 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
406     "Enable Energy Efficient Ethernet");
407
408 /* Global used in WOL setup with multiport cards */
409 static int global_quad_port_a = 0;
410
411 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
412 #include <dev/netmap/if_em_netmap.h>
413 #endif /* DEV_NETMAP */
414
415 /*********************************************************************
416  *  Device identification routine
417  *
418  *  em_probe determines if the driver should be loaded on
419  *  adapter based on PCI vendor/device id of the adapter.
420  *
421  *  return BUS_PROBE_DEFAULT on success, positive on failure
422  *********************************************************************/
423
424 static int
425 em_probe(device_t dev)
426 {
427         char            adapter_name[60];
428         u16             pci_vendor_id = 0;
429         u16             pci_device_id = 0;
430         u16             pci_subvendor_id = 0;
431         u16             pci_subdevice_id = 0;
432         em_vendor_info_t *ent;
433
434         INIT_DEBUGOUT("em_probe: begin");
435
436         pci_vendor_id = pci_get_vendor(dev);
437         if (pci_vendor_id != EM_VENDOR_ID)
438                 return (ENXIO);
439
440         pci_device_id = pci_get_device(dev);
441         pci_subvendor_id = pci_get_subvendor(dev);
442         pci_subdevice_id = pci_get_subdevice(dev);
443
444         ent = em_vendor_info_array;
445         while (ent->vendor_id != 0) {
446                 if ((pci_vendor_id == ent->vendor_id) &&
447                     (pci_device_id == ent->device_id) &&
448
449                     ((pci_subvendor_id == ent->subvendor_id) ||
450                     (ent->subvendor_id == PCI_ANY_ID)) &&
451
452                     ((pci_subdevice_id == ent->subdevice_id) ||
453                     (ent->subdevice_id == PCI_ANY_ID))) {
454                         sprintf(adapter_name, "%s %s",
455                                 em_strings[ent->index],
456                                 em_driver_version);
457                         device_set_desc_copy(dev, adapter_name);
458                         return (BUS_PROBE_DEFAULT);
459                 }
460                 ent++;
461         }
462
463         return (ENXIO);
464 }
465
466 /*********************************************************************
467  *  Device initialization routine
468  *
469  *  The attach entry point is called when the driver is being loaded.
470  *  This routine identifies the type of hardware, allocates all resources
471  *  and initializes the hardware.
472  *
473  *  return 0 on success, positive on failure
474  *********************************************************************/
475
476 static int
477 em_attach(device_t dev)
478 {
479         struct adapter  *adapter;
480         struct e1000_hw *hw;
481         int             error = 0;
482
483         INIT_DEBUGOUT("em_attach: begin");
484
485         if (resource_disabled("em", device_get_unit(dev))) {
486                 device_printf(dev, "Disabled by device hint\n");
487                 return (ENXIO);
488         }
489
490         adapter = device_get_softc(dev);
491         adapter->dev = adapter->osdep.dev = dev;
492         hw = &adapter->hw;
493         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
494
495         /* SYSCTL stuff */
496         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499             em_sysctl_nvm_info, "I", "NVM Information");
500
501         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
502             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
503             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
504             em_sysctl_debug_info, "I", "Debug Information");
505
506         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
507             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
508             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
509             em_set_flowcntl, "I", "Flow Control");
510
511         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
512
513         /* Determine hardware and mac info */
514         em_identify_hardware(adapter);
515
516         /* Setup PCI resources */
517         if (em_allocate_pci_resources(adapter)) {
518                 device_printf(dev, "Allocation of PCI resources failed\n");
519                 error = ENXIO;
520                 goto err_pci;
521         }
522
523         /*
524         ** For ICH8 and family we need to
525         ** map the flash memory, and this
526         ** must happen after the MAC is 
527         ** identified
528         */
529         if ((hw->mac.type == e1000_ich8lan) ||
530             (hw->mac.type == e1000_ich9lan) ||
531             (hw->mac.type == e1000_ich10lan) ||
532             (hw->mac.type == e1000_pchlan) ||
533             (hw->mac.type == e1000_pch2lan) ||
534             (hw->mac.type == e1000_pch_lpt)) {
535                 int rid = EM_BAR_TYPE_FLASH;
536                 adapter->flash = bus_alloc_resource_any(dev,
537                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
538                 if (adapter->flash == NULL) {
539                         device_printf(dev, "Mapping of Flash failed\n");
540                         error = ENXIO;
541                         goto err_pci;
542                 }
543                 /* This is used in the shared code */
544                 hw->flash_address = (u8 *)adapter->flash;
545                 adapter->osdep.flash_bus_space_tag =
546                     rman_get_bustag(adapter->flash);
547                 adapter->osdep.flash_bus_space_handle =
548                     rman_get_bushandle(adapter->flash);
549         }
550
551         /* Do Shared Code initialization */
552         if (e1000_setup_init_funcs(hw, TRUE)) {
553                 device_printf(dev, "Setup of Shared code failed\n");
554                 error = ENXIO;
555                 goto err_pci;
556         }
557
558         e1000_get_bus_info(hw);
559
560         /* Set up some sysctls for the tunable interrupt delays */
561         em_add_int_delay_sysctl(adapter, "rx_int_delay",
562             "receive interrupt delay in usecs", &adapter->rx_int_delay,
563             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
564         em_add_int_delay_sysctl(adapter, "tx_int_delay",
565             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
566             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
567         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
568             "receive interrupt delay limit in usecs",
569             &adapter->rx_abs_int_delay,
570             E1000_REGISTER(hw, E1000_RADV),
571             em_rx_abs_int_delay_dflt);
572         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
573             "transmit interrupt delay limit in usecs",
574             &adapter->tx_abs_int_delay,
575             E1000_REGISTER(hw, E1000_TADV),
576             em_tx_abs_int_delay_dflt);
577         em_add_int_delay_sysctl(adapter, "itr",
578             "interrupt delay limit in usecs/4",
579             &adapter->tx_itr,
580             E1000_REGISTER(hw, E1000_ITR),
581             DEFAULT_ITR);
582
583         /* Sysctl for limiting the amount of work done in the taskqueue */
584         em_set_sysctl_value(adapter, "rx_processing_limit",
585             "max number of rx packets to process", &adapter->rx_process_limit,
586             em_rx_process_limit);
587
588         /*
589          * Validate number of transmit and receive descriptors. It
590          * must not exceed hardware maximum, and must be multiple
591          * of E1000_DBA_ALIGN.
592          */
593         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
594             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
595                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
596                     EM_DEFAULT_TXD, em_txd);
597                 adapter->num_tx_desc = EM_DEFAULT_TXD;
598         } else
599                 adapter->num_tx_desc = em_txd;
600
601         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
602             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
603                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
604                     EM_DEFAULT_RXD, em_rxd);
605                 adapter->num_rx_desc = EM_DEFAULT_RXD;
606         } else
607                 adapter->num_rx_desc = em_rxd;
608
609         hw->mac.autoneg = DO_AUTO_NEG;
610         hw->phy.autoneg_wait_to_complete = FALSE;
611         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
612
613         /* Copper options */
614         if (hw->phy.media_type == e1000_media_type_copper) {
615                 hw->phy.mdix = AUTO_ALL_MODES;
616                 hw->phy.disable_polarity_correction = FALSE;
617                 hw->phy.ms_type = EM_MASTER_SLAVE;
618         }
619
620         /*
621          * Set the frame limits assuming
622          * standard ethernet sized frames.
623          */
624         adapter->hw.mac.max_frame_size =
625             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
626
627         /*
628          * This controls when hardware reports transmit completion
629          * status.
630          */
631         hw->mac.report_tx_early = 1;
632
633         /* 
634         ** Get queue/ring memory
635         */
636         if (em_allocate_queues(adapter)) {
637                 error = ENOMEM;
638                 goto err_pci;
639         }
640
641         /* Allocate multicast array memory. */
642         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
643             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
644         if (adapter->mta == NULL) {
645                 device_printf(dev, "Can not allocate multicast setup array\n");
646                 error = ENOMEM;
647                 goto err_late;
648         }
649
650         /* Check SOL/IDER usage */
651         if (e1000_check_reset_block(hw))
652                 device_printf(dev, "PHY reset is blocked"
653                     " due to SOL/IDER session.\n");
654
655         /* Sysctl for setting Energy Efficient Ethernet */
656         hw->dev_spec.ich8lan.eee_disable = eee_setting;
657         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
658             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
659             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
660             adapter, 0, em_sysctl_eee, "I",
661             "Disable Energy Efficient Ethernet");
662
663         /*
664         ** Start from a known state, this is
665         ** important in reading the nvm and
666         ** mac from that.
667         */
668         e1000_reset_hw(hw);
669
670
671         /* Make sure we have a good EEPROM before we read from it */
672         if (e1000_validate_nvm_checksum(hw) < 0) {
673                 /*
674                 ** Some PCI-E parts fail the first check due to
675                 ** the link being in sleep state, call it again,
676                 ** if it fails a second time its a real issue.
677                 */
678                 if (e1000_validate_nvm_checksum(hw) < 0) {
679                         device_printf(dev,
680                             "The EEPROM Checksum Is Not Valid\n");
681                         error = EIO;
682                         goto err_late;
683                 }
684         }
685
686         /* Copy the permanent MAC address out of the EEPROM */
687         if (e1000_read_mac_addr(hw) < 0) {
688                 device_printf(dev, "EEPROM read error while reading MAC"
689                     " address\n");
690                 error = EIO;
691                 goto err_late;
692         }
693
694         if (!em_is_valid_ether_addr(hw->mac.addr)) {
695                 device_printf(dev, "Invalid MAC address\n");
696                 error = EIO;
697                 goto err_late;
698         }
699
700         /*
701         **  Do interrupt configuration
702         */
703         if (adapter->msix > 1) /* Do MSIX */
704                 error = em_allocate_msix(adapter);
705         else  /* MSI or Legacy */
706                 error = em_allocate_legacy(adapter);
707         if (error)
708                 goto err_late;
709
710         /*
711          * Get Wake-on-Lan and Management info for later use
712          */
713         em_get_wakeup(dev);
714
715         /* Setup OS specific network interface */
716         if (em_setup_interface(dev, adapter) != 0)
717                 goto err_late;
718
719         em_reset(adapter);
720
721         /* Initialize statistics */
722         em_update_stats_counters(adapter);
723
724         hw->mac.get_link_status = 1;
725         em_update_link_status(adapter);
726
727         /* Register for VLAN events */
728         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
729             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
730         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
731             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
732
733         em_add_hw_stats(adapter);
734
735         /* Non-AMT based hardware can now take control from firmware */
736         if (adapter->has_manage && !adapter->has_amt)
737                 em_get_hw_control(adapter);
738
739         /* Tell the stack that the interface is not active */
740         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
741         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
742
743         adapter->led_dev = led_create(em_led_func, adapter,
744             device_get_nameunit(dev));
745 #ifdef DEV_NETMAP
746         em_netmap_attach(adapter);
747 #endif /* DEV_NETMAP */
748
749         INIT_DEBUGOUT("em_attach: end");
750
751         return (0);
752
753 err_late:
754         em_free_transmit_structures(adapter);
755         em_free_receive_structures(adapter);
756         em_release_hw_control(adapter);
757         if (adapter->ifp != NULL)
758                 if_free(adapter->ifp);
759 err_pci:
760         em_free_pci_resources(adapter);
761         free(adapter->mta, M_DEVBUF);
762         EM_CORE_LOCK_DESTROY(adapter);
763
764         return (error);
765 }
766
767 /*********************************************************************
768  *  Device removal routine
769  *
770  *  The detach entry point is called when the driver is being removed.
771  *  This routine stops the adapter and deallocates all the resources
772  *  that were allocated for driver operation.
773  *
774  *  return 0 on success, positive on failure
775  *********************************************************************/
776
777 static int
778 em_detach(device_t dev)
779 {
780         struct adapter  *adapter = device_get_softc(dev);
781         struct ifnet    *ifp = adapter->ifp;
782
783         INIT_DEBUGOUT("em_detach: begin");
784
785         /* Make sure VLANS are not using driver */
786         if (adapter->ifp->if_vlantrunk != NULL) {
787                 device_printf(dev,"Vlan in use, detach first\n");
788                 return (EBUSY);
789         }
790
791 #ifdef DEVICE_POLLING
792         if (ifp->if_capenable & IFCAP_POLLING)
793                 ether_poll_deregister(ifp);
794 #endif
795
796         if (adapter->led_dev != NULL)
797                 led_destroy(adapter->led_dev);
798
799         EM_CORE_LOCK(adapter);
800         adapter->in_detach = 1;
801         em_stop(adapter);
802         EM_CORE_UNLOCK(adapter);
803         EM_CORE_LOCK_DESTROY(adapter);
804
805         e1000_phy_hw_reset(&adapter->hw);
806
807         em_release_manageability(adapter);
808         em_release_hw_control(adapter);
809
810         /* Unregister VLAN events */
811         if (adapter->vlan_attach != NULL)
812                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
813         if (adapter->vlan_detach != NULL)
814                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
815
816         ether_ifdetach(adapter->ifp);
817         callout_drain(&adapter->timer);
818
819 #ifdef DEV_NETMAP
820         netmap_detach(ifp);
821 #endif /* DEV_NETMAP */
822
823         em_free_pci_resources(adapter);
824         bus_generic_detach(dev);
825         if_free(ifp);
826
827         em_free_transmit_structures(adapter);
828         em_free_receive_structures(adapter);
829
830         em_release_hw_control(adapter);
831         free(adapter->mta, M_DEVBUF);
832
833         return (0);
834 }
835
836 /*********************************************************************
837  *
838  *  Shutdown entry point
839  *
840  **********************************************************************/
841
842 static int
843 em_shutdown(device_t dev)
844 {
845         return em_suspend(dev);
846 }
847
848 /*
849  * Suspend/resume device methods.
850  */
851 static int
852 em_suspend(device_t dev)
853 {
854         struct adapter *adapter = device_get_softc(dev);
855
856         EM_CORE_LOCK(adapter);
857
858         em_release_manageability(adapter);
859         em_release_hw_control(adapter);
860         em_enable_wakeup(dev);
861
862         EM_CORE_UNLOCK(adapter);
863
864         return bus_generic_suspend(dev);
865 }
866
867 static int
868 em_resume(device_t dev)
869 {
870         struct adapter *adapter = device_get_softc(dev);
871         struct tx_ring  *txr = adapter->tx_rings;
872         struct ifnet *ifp = adapter->ifp;
873
874         EM_CORE_LOCK(adapter);
875         if (adapter->hw.mac.type == e1000_pch2lan)
876                 e1000_resume_workarounds_pchlan(&adapter->hw);
877         em_init_locked(adapter);
878         em_init_manageability(adapter);
879
880         if ((ifp->if_flags & IFF_UP) &&
881             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
882                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
883                         EM_TX_LOCK(txr);
884 #ifdef EM_MULTIQUEUE
885                         if (!drbr_empty(ifp, txr->br))
886                                 em_mq_start_locked(ifp, txr, NULL);
887 #else
888                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
889                                 em_start_locked(ifp, txr);
890 #endif
891                         EM_TX_UNLOCK(txr);
892                 }
893         }
894         EM_CORE_UNLOCK(adapter);
895
896         return bus_generic_resume(dev);
897 }
898
899
900 #ifdef EM_MULTIQUEUE
901 /*********************************************************************
902  *  Multiqueue Transmit routines 
903  *
904  *  em_mq_start is called by the stack to initiate a transmit.
905  *  however, if busy the driver can queue the request rather
906  *  than do an immediate send. It is this that is an advantage
907  *  in this driver, rather than also having multiple tx queues.
908  **********************************************************************/
909 static int
910 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
911 {
912         struct adapter  *adapter = txr->adapter;
913         struct mbuf     *next;
914         int             err = 0, enq = 0;
915
916         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
917             IFF_DRV_RUNNING || adapter->link_active == 0) {
918                 if (m != NULL)
919                         err = drbr_enqueue(ifp, txr->br, m);
920                 return (err);
921         }
922
923         enq = 0;
924         if (m != NULL) {
925                 err = drbr_enqueue(ifp, txr->br, m);
926                 if (err)
927                         return (err);
928         } 
929
930         /* Process the queue */
931         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
932                 if ((err = em_xmit(txr, &next)) != 0) {
933                         if (next == NULL)
934                                 drbr_advance(ifp, txr->br);
935                         else 
936                                 drbr_putback(ifp, txr->br, next);
937                         break;
938                 }
939                 drbr_advance(ifp, txr->br);
940                 enq++;
941                 ifp->if_obytes += next->m_pkthdr.len;
942                 if (next->m_flags & M_MCAST)
943                         ifp->if_omcasts++;
944                 ETHER_BPF_MTAP(ifp, next);
945                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
946                         break;
947         }
948
949         if (enq > 0) {
950                 /* Set the watchdog */
951                 txr->queue_status = EM_QUEUE_WORKING;
952                 txr->watchdog_time = ticks;
953         }
954
955         if (txr->tx_avail < EM_MAX_SCATTER)
956                 em_txeof(txr);
957         if (txr->tx_avail < EM_MAX_SCATTER)
958                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959         return (err);
960 }
961
962 /*
963 ** Multiqueue capable stack interface
964 */
965 static int
966 em_mq_start(struct ifnet *ifp, struct mbuf *m)
967 {
968         struct adapter  *adapter = ifp->if_softc;
969         struct tx_ring  *txr = adapter->tx_rings;
970         int             error;
971
972         if (EM_TX_TRYLOCK(txr)) {
973                 error = em_mq_start_locked(ifp, txr, m);
974                 EM_TX_UNLOCK(txr);
975         } else 
976                 error = drbr_enqueue(ifp, txr->br, m);
977
978         return (error);
979 }
980
981 /*
982 ** Flush all ring buffers
983 */
984 static void
985 em_qflush(struct ifnet *ifp)
986 {
987         struct adapter  *adapter = ifp->if_softc;
988         struct tx_ring  *txr = adapter->tx_rings;
989         struct mbuf     *m;
990
991         for (int i = 0; i < adapter->num_queues; i++, txr++) {
992                 EM_TX_LOCK(txr);
993                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
994                         m_freem(m);
995                 EM_TX_UNLOCK(txr);
996         }
997         if_qflush(ifp);
998 }
999 #else  /* !EM_MULTIQUEUE */
1000
1001 static void
1002 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1003 {
1004         struct adapter  *adapter = ifp->if_softc;
1005         struct mbuf     *m_head;
1006
1007         EM_TX_LOCK_ASSERT(txr);
1008
1009         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1010             IFF_DRV_RUNNING)
1011                 return;
1012
1013         if (!adapter->link_active)
1014                 return;
1015
1016         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1017                 /* Call cleanup if number of TX descriptors low */
1018                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1019                         em_txeof(txr);
1020                 if (txr->tx_avail < EM_MAX_SCATTER) {
1021                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1022                         break;
1023                 }
1024                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1025                 if (m_head == NULL)
1026                         break;
1027                 /*
1028                  *  Encapsulation can modify our pointer, and or make it
1029                  *  NULL on failure.  In that event, we can't requeue.
1030                  */
1031                 if (em_xmit(txr, &m_head)) {
1032                         if (m_head == NULL)
1033                                 break;
1034                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1035                         break;
1036                 }
1037
1038                 /* Send a copy of the frame to the BPF listener */
1039                 ETHER_BPF_MTAP(ifp, m_head);
1040
1041                 /* Set timeout in case hardware has problems transmitting. */
1042                 txr->watchdog_time = ticks;
1043                 txr->queue_status = EM_QUEUE_WORKING;
1044         }
1045
1046         return;
1047 }
1048
1049 static void
1050 em_start(struct ifnet *ifp)
1051 {
1052         struct adapter  *adapter = ifp->if_softc;
1053         struct tx_ring  *txr = adapter->tx_rings;
1054
1055         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1056                 EM_TX_LOCK(txr);
1057                 em_start_locked(ifp, txr);
1058                 EM_TX_UNLOCK(txr);
1059         }
1060         return;
1061 }
1062 #endif /* EM_MULTIQUEUE */
1063
1064 /*********************************************************************
1065  *  Ioctl entry point
1066  *
1067  *  em_ioctl is called when the user wants to configure the
1068  *  interface.
1069  *
1070  *  return 0 on success, positive on failure
1071  **********************************************************************/
1072
1073 static int
1074 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1075 {
1076         struct adapter  *adapter = ifp->if_softc;
1077         struct ifreq    *ifr = (struct ifreq *)data;
1078 #if defined(INET) || defined(INET6)
1079         struct ifaddr   *ifa = (struct ifaddr *)data;
1080 #endif
1081         bool            avoid_reset = FALSE;
1082         int             error = 0;
1083
1084         if (adapter->in_detach)
1085                 return (error);
1086
1087         switch (command) {
1088         case SIOCSIFADDR:
1089 #ifdef INET
1090                 if (ifa->ifa_addr->sa_family == AF_INET)
1091                         avoid_reset = TRUE;
1092 #endif
1093 #ifdef INET6
1094                 if (ifa->ifa_addr->sa_family == AF_INET6)
1095                         avoid_reset = TRUE;
1096 #endif
1097                 /*
1098                 ** Calling init results in link renegotiation,
1099                 ** so we avoid doing it when possible.
1100                 */
1101                 if (avoid_reset) {
1102                         ifp->if_flags |= IFF_UP;
1103                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1104                                 em_init(adapter);
1105 #ifdef INET
1106                         if (!(ifp->if_flags & IFF_NOARP))
1107                                 arp_ifinit(ifp, ifa);
1108 #endif
1109                 } else
1110                         error = ether_ioctl(ifp, command, data);
1111                 break;
1112         case SIOCSIFMTU:
1113             {
1114                 int max_frame_size;
1115
1116                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1117
1118                 EM_CORE_LOCK(adapter);
1119                 switch (adapter->hw.mac.type) {
1120                 case e1000_82571:
1121                 case e1000_82572:
1122                 case e1000_ich9lan:
1123                 case e1000_ich10lan:
1124                 case e1000_pch2lan:
1125                 case e1000_pch_lpt:
1126                 case e1000_82574:
1127                 case e1000_82583:
1128                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1129                         max_frame_size = 9234;
1130                         break;
1131                 case e1000_pchlan:
1132                         max_frame_size = 4096;
1133                         break;
1134                         /* Adapters that do not support jumbo frames */
1135                 case e1000_ich8lan:
1136                         max_frame_size = ETHER_MAX_LEN;
1137                         break;
1138                 default:
1139                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1140                 }
1141                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1142                     ETHER_CRC_LEN) {
1143                         EM_CORE_UNLOCK(adapter);
1144                         error = EINVAL;
1145                         break;
1146                 }
1147
1148                 ifp->if_mtu = ifr->ifr_mtu;
1149                 adapter->hw.mac.max_frame_size =
1150                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1151                 em_init_locked(adapter);
1152                 EM_CORE_UNLOCK(adapter);
1153                 break;
1154             }
1155         case SIOCSIFFLAGS:
1156                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1157                     SIOCSIFFLAGS (Set Interface Flags)");
1158                 EM_CORE_LOCK(adapter);
1159                 if (ifp->if_flags & IFF_UP) {
1160                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1161                                 if ((ifp->if_flags ^ adapter->if_flags) &
1162                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1163                                         em_disable_promisc(adapter);
1164                                         em_set_promisc(adapter);
1165                                 }
1166                         } else
1167                                 em_init_locked(adapter);
1168                 } else
1169                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1170                                 em_stop(adapter);
1171                 adapter->if_flags = ifp->if_flags;
1172                 EM_CORE_UNLOCK(adapter);
1173                 break;
1174         case SIOCADDMULTI:
1175         case SIOCDELMULTI:
1176                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1177                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1178                         EM_CORE_LOCK(adapter);
1179                         em_disable_intr(adapter);
1180                         em_set_multi(adapter);
1181 #ifdef DEVICE_POLLING
1182                         if (!(ifp->if_capenable & IFCAP_POLLING))
1183 #endif
1184                                 em_enable_intr(adapter);
1185                         EM_CORE_UNLOCK(adapter);
1186                 }
1187                 break;
1188         case SIOCSIFMEDIA:
1189                 /* Check SOL/IDER usage */
1190                 EM_CORE_LOCK(adapter);
1191                 if (e1000_check_reset_block(&adapter->hw)) {
1192                         EM_CORE_UNLOCK(adapter);
1193                         device_printf(adapter->dev, "Media change is"
1194                             " blocked due to SOL/IDER session.\n");
1195                         break;
1196                 }
1197                 EM_CORE_UNLOCK(adapter);
1198                 /* falls thru */
1199         case SIOCGIFMEDIA:
1200                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1201                     SIOCxIFMEDIA (Get/Set Interface Media)");
1202                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203                 break;
1204         case SIOCSIFCAP:
1205             {
1206                 int mask, reinit;
1207
1208                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209                 reinit = 0;
1210                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211 #ifdef DEVICE_POLLING
1212                 if (mask & IFCAP_POLLING) {
1213                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214                                 error = ether_poll_register(em_poll, ifp);
1215                                 if (error)
1216                                         return (error);
1217                                 EM_CORE_LOCK(adapter);
1218                                 em_disable_intr(adapter);
1219                                 ifp->if_capenable |= IFCAP_POLLING;
1220                                 EM_CORE_UNLOCK(adapter);
1221                         } else {
1222                                 error = ether_poll_deregister(ifp);
1223                                 /* Enable interrupt even in error case */
1224                                 EM_CORE_LOCK(adapter);
1225                                 em_enable_intr(adapter);
1226                                 ifp->if_capenable &= ~IFCAP_POLLING;
1227                                 EM_CORE_UNLOCK(adapter);
1228                         }
1229                 }
1230 #endif
1231                 if (mask & IFCAP_HWCSUM) {
1232                         ifp->if_capenable ^= IFCAP_HWCSUM;
1233                         reinit = 1;
1234                 }
1235                 if (mask & IFCAP_TSO4) {
1236                         ifp->if_capenable ^= IFCAP_TSO4;
1237                         reinit = 1;
1238                 }
1239                 if (mask & IFCAP_VLAN_HWTAGGING) {
1240                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1241                         reinit = 1;
1242                 }
1243                 if (mask & IFCAP_VLAN_HWFILTER) {
1244                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1245                         reinit = 1;
1246                 }
1247                 if (mask & IFCAP_VLAN_HWTSO) {
1248                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1249                         reinit = 1;
1250                 }
1251                 if ((mask & IFCAP_WOL) &&
1252                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1253                         if (mask & IFCAP_WOL_MCAST)
1254                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1255                         if (mask & IFCAP_WOL_MAGIC)
1256                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1257                 }
1258                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1259                         em_init(adapter);
1260                 VLAN_CAPABILITIES(ifp);
1261                 break;
1262             }
1263
1264         default:
1265                 error = ether_ioctl(ifp, command, data);
1266                 break;
1267         }
1268
1269         return (error);
1270 }
1271
1272
1273 /*********************************************************************
1274  *  Init entry point
1275  *
1276  *  This routine is used in two ways. It is used by the stack as
1277  *  init entry point in network interface structure. It is also used
1278  *  by the driver as a hw/sw initialization routine to get to a
1279  *  consistent state.
1280  *
1281  *  return 0 on success, positive on failure
1282  **********************************************************************/
1283
1284 static void
1285 em_init_locked(struct adapter *adapter)
1286 {
1287         struct ifnet    *ifp = adapter->ifp;
1288         device_t        dev = adapter->dev;
1289
1290         INIT_DEBUGOUT("em_init: begin");
1291
1292         EM_CORE_LOCK_ASSERT(adapter);
1293
1294         em_disable_intr(adapter);
1295         callout_stop(&adapter->timer);
1296
1297         /* Get the latest mac address, User can use a LAA */
1298         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1299               ETHER_ADDR_LEN);
1300
1301         /* Put the address into the Receive Address Array */
1302         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1303
1304         /*
1305          * With the 82571 adapter, RAR[0] may be overwritten
1306          * when the other port is reset, we make a duplicate
1307          * in RAR[14] for that eventuality, this assures
1308          * the interface continues to function.
1309          */
1310         if (adapter->hw.mac.type == e1000_82571) {
1311                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1312                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1313                     E1000_RAR_ENTRIES - 1);
1314         }
1315
1316         /* Initialize the hardware */
1317         em_reset(adapter);
1318         em_update_link_status(adapter);
1319
1320         /* Setup VLAN support, basic and offload if available */
1321         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1322
1323         /* Set hardware offload abilities */
1324         ifp->if_hwassist = 0;
1325         if (ifp->if_capenable & IFCAP_TXCSUM)
1326                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1327         if (ifp->if_capenable & IFCAP_TSO4)
1328                 ifp->if_hwassist |= CSUM_TSO;
1329
1330         /* Configure for OS presence */
1331         em_init_manageability(adapter);
1332
1333         /* Prepare transmit descriptors and buffers */
1334         em_setup_transmit_structures(adapter);
1335         em_initialize_transmit_unit(adapter);
1336
1337         /* Setup Multicast table */
1338         em_set_multi(adapter);
1339
1340         /*
1341         ** Figure out the desired mbuf
1342         ** pool for doing jumbos
1343         */
1344         if (adapter->hw.mac.max_frame_size <= 2048)
1345                 adapter->rx_mbuf_sz = MCLBYTES;
1346         else if (adapter->hw.mac.max_frame_size <= 4096)
1347                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1348         else
1349                 adapter->rx_mbuf_sz = MJUM9BYTES;
1350
1351         /* Prepare receive descriptors and buffers */
1352         if (em_setup_receive_structures(adapter)) {
1353                 device_printf(dev, "Could not setup receive structures\n");
1354                 em_stop(adapter);
1355                 return;
1356         }
1357         em_initialize_receive_unit(adapter);
1358
1359         /* Use real VLAN Filter support? */
1360         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1361                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1362                         /* Use real VLAN Filter support */
1363                         em_setup_vlan_hw_support(adapter);
1364                 else {
1365                         u32 ctrl;
1366                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1367                         ctrl |= E1000_CTRL_VME;
1368                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1369                 }
1370         }
1371
1372         /* Don't lose promiscuous settings */
1373         em_set_promisc(adapter);
1374
1375         /* Set the interface as ACTIVE */
1376         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1377         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1378
1379         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1380         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1381
1382         /* MSI/X configuration for 82574 */
1383         if (adapter->hw.mac.type == e1000_82574) {
1384                 int tmp;
1385                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1386                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1387                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1388                 /* Set the IVAR - interrupt vector routing. */
1389                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1390         }
1391
1392 #ifdef DEVICE_POLLING
1393         /*
1394          * Only enable interrupts if we are not polling, make sure
1395          * they are off otherwise.
1396          */
1397         if (ifp->if_capenable & IFCAP_POLLING)
1398                 em_disable_intr(adapter);
1399         else
1400 #endif /* DEVICE_POLLING */
1401                 em_enable_intr(adapter);
1402
1403         /* AMT based hardware can now take control from firmware */
1404         if (adapter->has_manage && adapter->has_amt)
1405                 em_get_hw_control(adapter);
1406 }
1407
1408 static void
1409 em_init(void *arg)
1410 {
1411         struct adapter *adapter = arg;
1412
1413         EM_CORE_LOCK(adapter);
1414         em_init_locked(adapter);
1415         EM_CORE_UNLOCK(adapter);
1416 }
1417
1418
1419 #ifdef DEVICE_POLLING
1420 /*********************************************************************
1421  *
1422  *  Legacy polling routine: note this only works with single queue
1423  *
1424  *********************************************************************/
1425 static int
1426 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1427 {
1428         struct adapter *adapter = ifp->if_softc;
1429         struct tx_ring  *txr = adapter->tx_rings;
1430         struct rx_ring  *rxr = adapter->rx_rings;
1431         u32             reg_icr;
1432         int             rx_done;
1433
1434         EM_CORE_LOCK(adapter);
1435         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1436                 EM_CORE_UNLOCK(adapter);
1437                 return (0);
1438         }
1439
1440         if (cmd == POLL_AND_CHECK_STATUS) {
1441                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1442                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1443                         callout_stop(&adapter->timer);
1444                         adapter->hw.mac.get_link_status = 1;
1445                         em_update_link_status(adapter);
1446                         callout_reset(&adapter->timer, hz,
1447                             em_local_timer, adapter);
1448                 }
1449         }
1450         EM_CORE_UNLOCK(adapter);
1451
1452         em_rxeof(rxr, count, &rx_done);
1453
1454         EM_TX_LOCK(txr);
1455         em_txeof(txr);
1456 #ifdef EM_MULTIQUEUE
1457         if (!drbr_empty(ifp, txr->br))
1458                 em_mq_start_locked(ifp, txr, NULL);
1459 #else
1460         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1461                 em_start_locked(ifp, txr);
1462 #endif
1463         EM_TX_UNLOCK(txr);
1464
1465         return (rx_done);
1466 }
1467 #endif /* DEVICE_POLLING */
1468
1469
1470 /*********************************************************************
1471  *
1472  *  Fast Legacy/MSI Combined Interrupt Service routine  
1473  *
1474  *********************************************************************/
1475 static int
1476 em_irq_fast(void *arg)
1477 {
1478         struct adapter  *adapter = arg;
1479         struct ifnet    *ifp;
1480         u32             reg_icr;
1481
1482         ifp = adapter->ifp;
1483
1484         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1485
1486         /* Hot eject?  */
1487         if (reg_icr == 0xffffffff)
1488                 return FILTER_STRAY;
1489
1490         /* Definitely not our interrupt.  */
1491         if (reg_icr == 0x0)
1492                 return FILTER_STRAY;
1493
1494         /*
1495          * Starting with the 82571 chip, bit 31 should be used to
1496          * determine whether the interrupt belongs to us.
1497          */
1498         if (adapter->hw.mac.type >= e1000_82571 &&
1499             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1500                 return FILTER_STRAY;
1501
1502         em_disable_intr(adapter);
1503         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1504
1505         /* Link status change */
1506         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1507                 adapter->hw.mac.get_link_status = 1;
1508                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1509         }
1510
1511         if (reg_icr & E1000_ICR_RXO)
1512                 adapter->rx_overruns++;
1513         return FILTER_HANDLED;
1514 }
1515
1516 /* Combined RX/TX handler, used by Legacy and MSI */
1517 static void
1518 em_handle_que(void *context, int pending)
1519 {
1520         struct adapter  *adapter = context;
1521         struct ifnet    *ifp = adapter->ifp;
1522         struct tx_ring  *txr = adapter->tx_rings;
1523         struct rx_ring  *rxr = adapter->rx_rings;
1524
1525
1526         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1527                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1528                 EM_TX_LOCK(txr);
1529                 em_txeof(txr);
1530 #ifdef EM_MULTIQUEUE
1531                 if (!drbr_empty(ifp, txr->br))
1532                         em_mq_start_locked(ifp, txr, NULL);
1533 #else
1534                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1535                         em_start_locked(ifp, txr);
1536 #endif
1537                 EM_TX_UNLOCK(txr);
1538                 if (more) {
1539                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1540                         return;
1541                 }
1542         }
1543
1544         em_enable_intr(adapter);
1545         return;
1546 }
1547
1548
1549 /*********************************************************************
1550  *
1551  *  MSIX Interrupt Service Routines
1552  *
1553  **********************************************************************/
1554 static void
1555 em_msix_tx(void *arg)
1556 {
1557         struct tx_ring *txr = arg;
1558         struct adapter *adapter = txr->adapter;
1559         struct ifnet    *ifp = adapter->ifp;
1560
1561         ++txr->tx_irq;
1562         EM_TX_LOCK(txr);
1563         em_txeof(txr);
1564 #ifdef EM_MULTIQUEUE
1565         if (!drbr_empty(ifp, txr->br))
1566                 em_mq_start_locked(ifp, txr, NULL);
1567 #else
1568         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1569                 em_start_locked(ifp, txr);
1570 #endif
1571         /* Reenable this interrupt */
1572         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1573         EM_TX_UNLOCK(txr);
1574         return;
1575 }
1576
1577 /*********************************************************************
1578  *
1579  *  MSIX RX Interrupt Service routine
1580  *
1581  **********************************************************************/
1582
1583 static void
1584 em_msix_rx(void *arg)
1585 {
1586         struct rx_ring  *rxr = arg;
1587         struct adapter  *adapter = rxr->adapter;
1588         bool            more;
1589
1590         ++rxr->rx_irq;
1591         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1592                 return;
1593         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1594         if (more)
1595                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1596         else
1597                 /* Reenable this interrupt */
1598                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1599         return;
1600 }
1601
1602 /*********************************************************************
1603  *
1604  *  MSIX Link Fast Interrupt Service routine
1605  *
1606  **********************************************************************/
1607 static void
1608 em_msix_link(void *arg)
1609 {
1610         struct adapter  *adapter = arg;
1611         u32             reg_icr;
1612
1613         ++adapter->link_irq;
1614         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1615
1616         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1617                 adapter->hw.mac.get_link_status = 1;
1618                 em_handle_link(adapter, 0);
1619         } else
1620                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1621                     EM_MSIX_LINK | E1000_IMS_LSC);
1622         return;
1623 }
1624
1625 static void
1626 em_handle_rx(void *context, int pending)
1627 {
1628         struct rx_ring  *rxr = context;
1629         struct adapter  *adapter = rxr->adapter;
1630         bool            more;
1631
1632         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1633         if (more)
1634                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1635         else
1636                 /* Reenable this interrupt */
1637                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1638 }
1639
1640 static void
1641 em_handle_tx(void *context, int pending)
1642 {
1643         struct tx_ring  *txr = context;
1644         struct adapter  *adapter = txr->adapter;
1645         struct ifnet    *ifp = adapter->ifp;
1646
1647         EM_TX_LOCK(txr);
1648         em_txeof(txr);
1649 #ifdef EM_MULTIQUEUE
1650         if (!drbr_empty(ifp, txr->br))
1651                 em_mq_start_locked(ifp, txr, NULL);
1652 #else
1653         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1654                 em_start_locked(ifp, txr);
1655 #endif
1656         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1657         EM_TX_UNLOCK(txr);
1658 }
1659
1660 static void
1661 em_handle_link(void *context, int pending)
1662 {
1663         struct adapter  *adapter = context;
1664         struct tx_ring  *txr = adapter->tx_rings;
1665         struct ifnet *ifp = adapter->ifp;
1666
1667         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1668                 return;
1669
1670         EM_CORE_LOCK(adapter);
1671         callout_stop(&adapter->timer);
1672         em_update_link_status(adapter);
1673         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1674         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1675             EM_MSIX_LINK | E1000_IMS_LSC);
1676         if (adapter->link_active) {
1677                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1678                         EM_TX_LOCK(txr);
1679 #ifdef EM_MULTIQUEUE
1680                         if (!drbr_empty(ifp, txr->br))
1681                                 em_mq_start_locked(ifp, txr, NULL);
1682 #else
1683                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1684                                 em_start_locked(ifp, txr);
1685 #endif
1686                         EM_TX_UNLOCK(txr);
1687                 }
1688         }
1689         EM_CORE_UNLOCK(adapter);
1690 }
1691
1692
1693 /*********************************************************************
1694  *
1695  *  Media Ioctl callback
1696  *
1697  *  This routine is called whenever the user queries the status of
1698  *  the interface using ifconfig.
1699  *
1700  **********************************************************************/
1701 static void
1702 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1703 {
1704         struct adapter *adapter = ifp->if_softc;
1705         u_char fiber_type = IFM_1000_SX;
1706
1707         INIT_DEBUGOUT("em_media_status: begin");
1708
1709         EM_CORE_LOCK(adapter);
1710         em_update_link_status(adapter);
1711
1712         ifmr->ifm_status = IFM_AVALID;
1713         ifmr->ifm_active = IFM_ETHER;
1714
1715         if (!adapter->link_active) {
1716                 EM_CORE_UNLOCK(adapter);
1717                 return;
1718         }
1719
1720         ifmr->ifm_status |= IFM_ACTIVE;
1721
1722         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1723             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1724                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1725         } else {
1726                 switch (adapter->link_speed) {
1727                 case 10:
1728                         ifmr->ifm_active |= IFM_10_T;
1729                         break;
1730                 case 100:
1731                         ifmr->ifm_active |= IFM_100_TX;
1732                         break;
1733                 case 1000:
1734                         ifmr->ifm_active |= IFM_1000_T;
1735                         break;
1736                 }
1737                 if (adapter->link_duplex == FULL_DUPLEX)
1738                         ifmr->ifm_active |= IFM_FDX;
1739                 else
1740                         ifmr->ifm_active |= IFM_HDX;
1741         }
1742         EM_CORE_UNLOCK(adapter);
1743 }
1744
1745 /*********************************************************************
1746  *
1747  *  Media Ioctl callback
1748  *
1749  *  This routine is called when the user changes speed/duplex using
1750  *  media/mediopt option with ifconfig.
1751  *
1752  **********************************************************************/
1753 static int
1754 em_media_change(struct ifnet *ifp)
1755 {
1756         struct adapter *adapter = ifp->if_softc;
1757         struct ifmedia  *ifm = &adapter->media;
1758
1759         INIT_DEBUGOUT("em_media_change: begin");
1760
1761         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1762                 return (EINVAL);
1763
1764         EM_CORE_LOCK(adapter);
1765         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1766         case IFM_AUTO:
1767                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1768                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1769                 break;
1770         case IFM_1000_LX:
1771         case IFM_1000_SX:
1772         case IFM_1000_T:
1773                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1774                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1775                 break;
1776         case IFM_100_TX:
1777                 adapter->hw.mac.autoneg = FALSE;
1778                 adapter->hw.phy.autoneg_advertised = 0;
1779                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1781                 else
1782                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1783                 break;
1784         case IFM_10_T:
1785                 adapter->hw.mac.autoneg = FALSE;
1786                 adapter->hw.phy.autoneg_advertised = 0;
1787                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1788                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1789                 else
1790                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1791                 break;
1792         default:
1793                 device_printf(adapter->dev, "Unsupported media type\n");
1794         }
1795
1796         em_init_locked(adapter);
1797         EM_CORE_UNLOCK(adapter);
1798
1799         return (0);
1800 }
1801
1802 /*********************************************************************
1803  *
1804  *  This routine maps the mbufs to tx descriptors.
1805  *
1806  *  return 0 on success, positive on failure
1807  **********************************************************************/
1808
1809 static int
1810 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1811 {
1812         struct adapter          *adapter = txr->adapter;
1813         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1814         bus_dmamap_t            map;
1815         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1816         struct e1000_tx_desc    *ctxd = NULL;
1817         struct mbuf             *m_head;
1818         struct ether_header     *eh;
1819         struct ip               *ip = NULL;
1820         struct tcphdr           *tp = NULL;
1821         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1822         int                     ip_off, poff;
1823         int                     nsegs, i, j, first, last = 0;
1824         int                     error, do_tso, tso_desc = 0, remap = 1;
1825
1826 retry:
1827         m_head = *m_headp;
1828         txd_upper = txd_lower = txd_used = txd_saved = 0;
1829         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1830         ip_off = poff = 0;
1831
1832         /*
1833          * Intel recommends entire IP/TCP header length reside in a single
1834          * buffer. If multiple descriptors are used to describe the IP and
1835          * TCP header, each descriptor should describe one or more
1836          * complete headers; descriptors referencing only parts of headers
1837          * are not supported. If all layer headers are not coalesced into
1838          * a single buffer, each buffer should not cross a 4KB boundary,
1839          * or be larger than the maximum read request size.
1840          * Controller also requires modifing IP/TCP header to make TSO work
1841          * so we firstly get a writable mbuf chain then coalesce ethernet/
1842          * IP/TCP header into a single buffer to meet the requirement of
1843          * controller. This also simplifies IP/TCP/UDP checksum offloading
1844          * which also has similiar restrictions.
1845          */
1846         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1847                 if (do_tso || (m_head->m_next != NULL && 
1848                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1849                         if (M_WRITABLE(*m_headp) == 0) {
1850                                 m_head = m_dup(*m_headp, M_NOWAIT);
1851                                 m_freem(*m_headp);
1852                                 if (m_head == NULL) {
1853                                         *m_headp = NULL;
1854                                         return (ENOBUFS);
1855                                 }
1856                                 *m_headp = m_head;
1857                         }
1858                 }
1859                 /*
1860                  * XXX
1861                  * Assume IPv4, we don't have TSO/checksum offload support
1862                  * for IPv6 yet.
1863                  */
1864                 ip_off = sizeof(struct ether_header);
1865                 m_head = m_pullup(m_head, ip_off);
1866                 if (m_head == NULL) {
1867                         *m_headp = NULL;
1868                         return (ENOBUFS);
1869                 }
1870                 eh = mtod(m_head, struct ether_header *);
1871                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1872                         ip_off = sizeof(struct ether_vlan_header);
1873                         m_head = m_pullup(m_head, ip_off);
1874                         if (m_head == NULL) {
1875                                 *m_headp = NULL;
1876                                 return (ENOBUFS);
1877                         }
1878                 }
1879                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1880                 if (m_head == NULL) {
1881                         *m_headp = NULL;
1882                         return (ENOBUFS);
1883                 }
1884                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1885                 poff = ip_off + (ip->ip_hl << 2);
1886                 if (do_tso) {
1887                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1888                         if (m_head == NULL) {
1889                                 *m_headp = NULL;
1890                                 return (ENOBUFS);
1891                         }
1892                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893                         /*
1894                          * TSO workaround:
1895                          *   pull 4 more bytes of data into it.
1896                          */
1897                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1898                         if (m_head == NULL) {
1899                                 *m_headp = NULL;
1900                                 return (ENOBUFS);
1901                         }
1902                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903                         ip->ip_len = 0;
1904                         ip->ip_sum = 0;
1905                         /*
1906                          * The pseudo TCP checksum does not include TCP payload
1907                          * length so driver should recompute the checksum here
1908                          * what hardware expect to see. This is adherence of
1909                          * Microsoft's Large Send specification.
1910                          */
1911                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1913                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1914                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1915                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1916                         if (m_head == NULL) {
1917                                 *m_headp = NULL;
1918                                 return (ENOBUFS);
1919                         }
1920                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1921                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1922                         if (m_head == NULL) {
1923                                 *m_headp = NULL;
1924                                 return (ENOBUFS);
1925                         }
1926                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1927                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1928                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1929                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1930                         if (m_head == NULL) {
1931                                 *m_headp = NULL;
1932                                 return (ENOBUFS);
1933                         }
1934                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1935                 }
1936                 *m_headp = m_head;
1937         }
1938
1939         /*
1940          * Map the packet for DMA
1941          *
1942          * Capture the first descriptor index,
1943          * this descriptor will have the index
1944          * of the EOP which is the only one that
1945          * now gets a DONE bit writeback.
1946          */
1947         first = txr->next_avail_desc;
1948         tx_buffer = &txr->tx_buffers[first];
1949         tx_buffer_mapped = tx_buffer;
1950         map = tx_buffer->map;
1951
1952         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1953             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1954
1955         /*
1956          * There are two types of errors we can (try) to handle:
1957          * - EFBIG means the mbuf chain was too long and bus_dma ran
1958          *   out of segments.  Defragment the mbuf chain and try again.
1959          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1960          *   at this point in time.  Defer sending and try again later.
1961          * All other errors, in particular EINVAL, are fatal and prevent the
1962          * mbuf chain from ever going through.  Drop it and report error.
1963          */
1964         if (error == EFBIG && remap) {
1965                 struct mbuf *m;
1966
1967                 m = m_defrag(*m_headp, M_NOWAIT);
1968                 if (m == NULL) {
1969                         adapter->mbuf_alloc_failed++;
1970                         m_freem(*m_headp);
1971                         *m_headp = NULL;
1972                         return (ENOBUFS);
1973                 }
1974                 *m_headp = m;
1975
1976                 /* Try it again, but only once */
1977                 remap = 0;
1978                 goto retry;
1979         } else if (error == ENOMEM) {
1980                 adapter->no_tx_dma_setup++;
1981                 return (error);
1982         } else if (error != 0) {
1983                 adapter->no_tx_dma_setup++;
1984                 m_freem(*m_headp);
1985                 *m_headp = NULL;
1986                 return (error);
1987         }
1988
1989         /*
1990          * TSO Hardware workaround, if this packet is not
1991          * TSO, and is only a single descriptor long, and
1992          * it follows a TSO burst, then we need to add a
1993          * sentinel descriptor to prevent premature writeback.
1994          */
1995         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1996                 if (nsegs == 1)
1997                         tso_desc = TRUE;
1998                 txr->tx_tso = FALSE;
1999         }
2000
2001         if (nsegs > (txr->tx_avail - 2)) {
2002                 txr->no_desc_avail++;
2003                 bus_dmamap_unload(txr->txtag, map);
2004                 return (ENOBUFS);
2005         }
2006         m_head = *m_headp;
2007
2008         /* Do hardware assists */
2009         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2010                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2011                     &txd_upper, &txd_lower);
2012                 /* we need to make a final sentinel transmit desc */
2013                 tso_desc = TRUE;
2014         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2015                 em_transmit_checksum_setup(txr, m_head,
2016                     ip_off, ip, &txd_upper, &txd_lower);
2017
2018         if (m_head->m_flags & M_VLANTAG) {
2019                 /* Set the vlan id. */
2020                 txd_upper |=
2021                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2022                 /* Tell hardware to add tag */
2023                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2024         }
2025
2026         i = txr->next_avail_desc;
2027
2028         /* Set up our transmit descriptors */
2029         for (j = 0; j < nsegs; j++) {
2030                 bus_size_t seg_len;
2031                 bus_addr_t seg_addr;
2032
2033                 tx_buffer = &txr->tx_buffers[i];
2034                 ctxd = &txr->tx_base[i];
2035                 seg_addr = segs[j].ds_addr;
2036                 seg_len  = segs[j].ds_len;
2037                 /*
2038                 ** TSO Workaround:
2039                 ** If this is the last descriptor, we want to
2040                 ** split it so we have a small final sentinel
2041                 */
2042                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2043                         seg_len -= 4;
2044                         ctxd->buffer_addr = htole64(seg_addr);
2045                         ctxd->lower.data = htole32(
2046                         adapter->txd_cmd | txd_lower | seg_len);
2047                         ctxd->upper.data =
2048                             htole32(txd_upper);
2049                         if (++i == adapter->num_tx_desc)
2050                                 i = 0;
2051                         /* Now make the sentinel */     
2052                         ++txd_used; /* using an extra txd */
2053                         ctxd = &txr->tx_base[i];
2054                         tx_buffer = &txr->tx_buffers[i];
2055                         ctxd->buffer_addr =
2056                             htole64(seg_addr + seg_len);
2057                         ctxd->lower.data = htole32(
2058                         adapter->txd_cmd | txd_lower | 4);
2059                         ctxd->upper.data =
2060                             htole32(txd_upper);
2061                         last = i;
2062                         if (++i == adapter->num_tx_desc)
2063                                 i = 0;
2064                 } else {
2065                         ctxd->buffer_addr = htole64(seg_addr);
2066                         ctxd->lower.data = htole32(
2067                         adapter->txd_cmd | txd_lower | seg_len);
2068                         ctxd->upper.data =
2069                             htole32(txd_upper);
2070                         last = i;
2071                         if (++i == adapter->num_tx_desc)
2072                                 i = 0;
2073                 }
2074                 tx_buffer->m_head = NULL;
2075                 tx_buffer->next_eop = -1;
2076         }
2077
2078         txr->next_avail_desc = i;
2079         txr->tx_avail -= nsegs;
2080         if (tso_desc) /* TSO used an extra for sentinel */
2081                 txr->tx_avail -= txd_used;
2082
2083         tx_buffer->m_head = m_head;
2084         /*
2085         ** Here we swap the map so the last descriptor,
2086         ** which gets the completion interrupt has the
2087         ** real map, and the first descriptor gets the
2088         ** unused map from this descriptor.
2089         */
2090         tx_buffer_mapped->map = tx_buffer->map;
2091         tx_buffer->map = map;
2092         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2093
2094         /*
2095          * Last Descriptor of Packet
2096          * needs End Of Packet (EOP)
2097          * and Report Status (RS)
2098          */
2099         ctxd->lower.data |=
2100             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2101         /*
2102          * Keep track in the first buffer which
2103          * descriptor will be written back
2104          */
2105         tx_buffer = &txr->tx_buffers[first];
2106         tx_buffer->next_eop = last;
2107         /* Update the watchdog time early and often */
2108         txr->watchdog_time = ticks;
2109
2110         /*
2111          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2112          * that this frame is available to transmit.
2113          */
2114         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2115             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2116         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2117
2118         return (0);
2119 }
2120
2121 static void
2122 em_set_promisc(struct adapter *adapter)
2123 {
2124         struct ifnet    *ifp = adapter->ifp;
2125         u32             reg_rctl;
2126
2127         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2128
2129         if (ifp->if_flags & IFF_PROMISC) {
2130                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2131                 /* Turn this on if you want to see bad packets */
2132                 if (em_debug_sbp)
2133                         reg_rctl |= E1000_RCTL_SBP;
2134                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2135         } else if (ifp->if_flags & IFF_ALLMULTI) {
2136                 reg_rctl |= E1000_RCTL_MPE;
2137                 reg_rctl &= ~E1000_RCTL_UPE;
2138                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2139         }
2140 }
2141
2142 static void
2143 em_disable_promisc(struct adapter *adapter)
2144 {
2145         struct ifnet    *ifp = adapter->ifp;
2146         u32             reg_rctl;
2147         int             mcnt = 0;
2148
2149         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2150         reg_rctl &=  (~E1000_RCTL_UPE);
2151         if (ifp->if_flags & IFF_ALLMULTI)
2152                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2153         else {
2154                 struct  ifmultiaddr *ifma;
2155 #if __FreeBSD_version < 800000
2156                 IF_ADDR_LOCK(ifp);
2157 #else   
2158                 if_maddr_rlock(ifp);
2159 #endif
2160                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2161                         if (ifma->ifma_addr->sa_family != AF_LINK)
2162                                 continue;
2163                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2164                                 break;
2165                         mcnt++;
2166                 }
2167 #if __FreeBSD_version < 800000
2168                 IF_ADDR_UNLOCK(ifp);
2169 #else
2170                 if_maddr_runlock(ifp);
2171 #endif
2172         }
2173         /* Don't disable if in MAX groups */
2174         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2175                 reg_rctl &=  (~E1000_RCTL_MPE);
2176         reg_rctl &=  (~E1000_RCTL_SBP);
2177         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2178 }
2179
2180
2181 /*********************************************************************
2182  *  Multicast Update
2183  *
2184  *  This routine is called whenever multicast address list is updated.
2185  *
2186  **********************************************************************/
2187
2188 static void
2189 em_set_multi(struct adapter *adapter)
2190 {
2191         struct ifnet    *ifp = adapter->ifp;
2192         struct ifmultiaddr *ifma;
2193         u32 reg_rctl = 0;
2194         u8  *mta; /* Multicast array memory */
2195         int mcnt = 0;
2196
2197         IOCTL_DEBUGOUT("em_set_multi: begin");
2198
2199         mta = adapter->mta;
2200         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2201
2202         if (adapter->hw.mac.type == e1000_82542 && 
2203             adapter->hw.revision_id == E1000_REVISION_2) {
2204                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2205                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2206                         e1000_pci_clear_mwi(&adapter->hw);
2207                 reg_rctl |= E1000_RCTL_RST;
2208                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2209                 msec_delay(5);
2210         }
2211
2212 #if __FreeBSD_version < 800000
2213         IF_ADDR_LOCK(ifp);
2214 #else
2215         if_maddr_rlock(ifp);
2216 #endif
2217         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2218                 if (ifma->ifma_addr->sa_family != AF_LINK)
2219                         continue;
2220
2221                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2222                         break;
2223
2224                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2225                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2226                 mcnt++;
2227         }
2228 #if __FreeBSD_version < 800000
2229         IF_ADDR_UNLOCK(ifp);
2230 #else
2231         if_maddr_runlock(ifp);
2232 #endif
2233         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2234                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2235                 reg_rctl |= E1000_RCTL_MPE;
2236                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2237         } else
2238                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2239
2240         if (adapter->hw.mac.type == e1000_82542 && 
2241             adapter->hw.revision_id == E1000_REVISION_2) {
2242                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243                 reg_rctl &= ~E1000_RCTL_RST;
2244                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2245                 msec_delay(5);
2246                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2247                         e1000_pci_set_mwi(&adapter->hw);
2248         }
2249 }
2250
2251
2252 /*********************************************************************
2253  *  Timer routine
2254  *
2255  *  This routine checks for link status and updates statistics.
2256  *
2257  **********************************************************************/
2258
2259 static void
2260 em_local_timer(void *arg)
2261 {
2262         struct adapter  *adapter = arg;
2263         struct ifnet    *ifp = adapter->ifp;
2264         struct tx_ring  *txr = adapter->tx_rings;
2265         struct rx_ring  *rxr = adapter->rx_rings;
2266         u32             trigger;
2267
2268         EM_CORE_LOCK_ASSERT(adapter);
2269
2270         em_update_link_status(adapter);
2271         em_update_stats_counters(adapter);
2272
2273         /* Reset LAA into RAR[0] on 82571 */
2274         if ((adapter->hw.mac.type == e1000_82571) &&
2275             e1000_get_laa_state_82571(&adapter->hw))
2276                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2277
2278         /* Mask to use in the irq trigger */
2279         if (adapter->msix_mem)
2280                 trigger = rxr->ims; /* RX for 82574 */
2281         else
2282                 trigger = E1000_ICS_RXDMT0;
2283
2284         /*
2285         ** Check on the state of the TX queue(s), this 
2286         ** can be done without the lock because its RO
2287         ** and the HUNG state will be static if set.
2288         */
2289         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2290                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2291                     (adapter->pause_frames == 0))
2292                         goto hung;
2293                 /* Schedule a TX tasklet if needed */
2294                 if (txr->tx_avail <= EM_MAX_SCATTER)
2295                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2296         }
2297         
2298         adapter->pause_frames = 0;
2299         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2300 #ifndef DEVICE_POLLING
2301         /* Trigger an RX interrupt to guarantee mbuf refresh */
2302         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2303 #endif
2304         return;
2305 hung:
2306         /* Looks like we're hung */
2307         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2308         device_printf(adapter->dev,
2309             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2310             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2311             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2312         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2313             "Next TX to Clean = %d\n",
2314             txr->me, txr->tx_avail, txr->next_to_clean);
2315         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2316         adapter->watchdog_events++;
2317         adapter->pause_frames = 0;
2318         em_init_locked(adapter);
2319 }
2320
2321
2322 static void
2323 em_update_link_status(struct adapter *adapter)
2324 {
2325         struct e1000_hw *hw = &adapter->hw;
2326         struct ifnet *ifp = adapter->ifp;
2327         device_t dev = adapter->dev;
2328         struct tx_ring *txr = adapter->tx_rings;
2329         u32 link_check = 0;
2330
2331         /* Get the cached link value or read phy for real */
2332         switch (hw->phy.media_type) {
2333         case e1000_media_type_copper:
2334                 if (hw->mac.get_link_status) {
2335                         /* Do the work to read phy */
2336                         e1000_check_for_link(hw);
2337                         link_check = !hw->mac.get_link_status;
2338                         if (link_check) /* ESB2 fix */
2339                                 e1000_cfg_on_link_up(hw);
2340                 } else
2341                         link_check = TRUE;
2342                 break;
2343         case e1000_media_type_fiber:
2344                 e1000_check_for_link(hw);
2345                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2346                                  E1000_STATUS_LU);
2347                 break;
2348         case e1000_media_type_internal_serdes:
2349                 e1000_check_for_link(hw);
2350                 link_check = adapter->hw.mac.serdes_has_link;
2351                 break;
2352         default:
2353         case e1000_media_type_unknown:
2354                 break;
2355         }
2356
2357         /* Now check for a transition */
2358         if (link_check && (adapter->link_active == 0)) {
2359                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2360                     &adapter->link_duplex);
2361                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2362                 if ((adapter->link_speed != SPEED_1000) &&
2363                     ((hw->mac.type == e1000_82571) ||
2364                     (hw->mac.type == e1000_82572))) {
2365                         int tarc0;
2366                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2367                         tarc0 &= ~SPEED_MODE_BIT;
2368                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2369                 }
2370                 if (bootverbose)
2371                         device_printf(dev, "Link is up %d Mbps %s\n",
2372                             adapter->link_speed,
2373                             ((adapter->link_duplex == FULL_DUPLEX) ?
2374                             "Full Duplex" : "Half Duplex"));
2375                 adapter->link_active = 1;
2376                 adapter->smartspeed = 0;
2377                 ifp->if_baudrate = adapter->link_speed * 1000000;
2378                 if_link_state_change(ifp, LINK_STATE_UP);
2379         } else if (!link_check && (adapter->link_active == 1)) {
2380                 ifp->if_baudrate = adapter->link_speed = 0;
2381                 adapter->link_duplex = 0;
2382                 if (bootverbose)
2383                         device_printf(dev, "Link is Down\n");
2384                 adapter->link_active = 0;
2385                 /* Link down, disable watchdog */
2386                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2387                         txr->queue_status = EM_QUEUE_IDLE;
2388                 if_link_state_change(ifp, LINK_STATE_DOWN);
2389         }
2390 }
2391
2392 /*********************************************************************
2393  *
2394  *  This routine disables all traffic on the adapter by issuing a
2395  *  global reset on the MAC and deallocates TX/RX buffers.
2396  *
2397  *  This routine should always be called with BOTH the CORE
2398  *  and TX locks.
2399  **********************************************************************/
2400
2401 static void
2402 em_stop(void *arg)
2403 {
2404         struct adapter  *adapter = arg;
2405         struct ifnet    *ifp = adapter->ifp;
2406         struct tx_ring  *txr = adapter->tx_rings;
2407
2408         EM_CORE_LOCK_ASSERT(adapter);
2409
2410         INIT_DEBUGOUT("em_stop: begin");
2411
2412         em_disable_intr(adapter);
2413         callout_stop(&adapter->timer);
2414
2415         /* Tell the stack that the interface is no longer active */
2416         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2417         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2418
2419         /* Unarm watchdog timer. */
2420         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2421                 EM_TX_LOCK(txr);
2422                 txr->queue_status = EM_QUEUE_IDLE;
2423                 EM_TX_UNLOCK(txr);
2424         }
2425
2426         e1000_reset_hw(&adapter->hw);
2427         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2428
2429         e1000_led_off(&adapter->hw);
2430         e1000_cleanup_led(&adapter->hw);
2431 }
2432
2433
2434 /*********************************************************************
2435  *
2436  *  Determine hardware revision.
2437  *
2438  **********************************************************************/
2439 static void
2440 em_identify_hardware(struct adapter *adapter)
2441 {
2442         device_t dev = adapter->dev;
2443
2444         /* Make sure our PCI config space has the necessary stuff set */
2445         pci_enable_busmaster(dev);
2446         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2447
2448         /* Save off the information about this board */
2449         adapter->hw.vendor_id = pci_get_vendor(dev);
2450         adapter->hw.device_id = pci_get_device(dev);
2451         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2452         adapter->hw.subsystem_vendor_id =
2453             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2454         adapter->hw.subsystem_device_id =
2455             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2456
2457         /* Do Shared Code Init and Setup */
2458         if (e1000_set_mac_type(&adapter->hw)) {
2459                 device_printf(dev, "Setup init failure\n");
2460                 return;
2461         }
2462 }
2463
2464 static int
2465 em_allocate_pci_resources(struct adapter *adapter)
2466 {
2467         device_t        dev = adapter->dev;
2468         int             rid;
2469
2470         rid = PCIR_BAR(0);
2471         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2472             &rid, RF_ACTIVE);
2473         if (adapter->memory == NULL) {
2474                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2475                 return (ENXIO);
2476         }
2477         adapter->osdep.mem_bus_space_tag =
2478             rman_get_bustag(adapter->memory);
2479         adapter->osdep.mem_bus_space_handle =
2480             rman_get_bushandle(adapter->memory);
2481         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2482
2483         /* Default to a single queue */
2484         adapter->num_queues = 1;
2485
2486         /*
2487          * Setup MSI/X or MSI if PCI Express
2488          */
2489         adapter->msix = em_setup_msix(adapter);
2490
2491         adapter->hw.back = &adapter->osdep;
2492
2493         return (0);
2494 }
2495
2496 /*********************************************************************
2497  *
2498  *  Setup the Legacy or MSI Interrupt handler
2499  *
2500  **********************************************************************/
2501 int
2502 em_allocate_legacy(struct adapter *adapter)
2503 {
2504         device_t dev = adapter->dev;
2505         struct tx_ring  *txr = adapter->tx_rings;
2506         int error, rid = 0;
2507
2508         /* Manually turn off all interrupts */
2509         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2510
2511         if (adapter->msix == 1) /* using MSI */
2512                 rid = 1;
2513         /* We allocate a single interrupt resource */
2514         adapter->res = bus_alloc_resource_any(dev,
2515             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2516         if (adapter->res == NULL) {
2517                 device_printf(dev, "Unable to allocate bus resource: "
2518                     "interrupt\n");
2519                 return (ENXIO);
2520         }
2521
2522         /*
2523          * Allocate a fast interrupt and the associated
2524          * deferred processing contexts.
2525          */
2526         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2527         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2528             taskqueue_thread_enqueue, &adapter->tq);
2529         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2530             device_get_nameunit(adapter->dev));
2531         /* Use a TX only tasklet for local timer */
2532         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2533         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2534             taskqueue_thread_enqueue, &txr->tq);
2535         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2536             device_get_nameunit(adapter->dev));
2537         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2538         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2539             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2540                 device_printf(dev, "Failed to register fast interrupt "
2541                             "handler: %d\n", error);
2542                 taskqueue_free(adapter->tq);
2543                 adapter->tq = NULL;
2544                 return (error);
2545         }
2546         
2547         return (0);
2548 }
2549
2550 /*********************************************************************
2551  *
2552  *  Setup the MSIX Interrupt handlers
2553  *   This is not really Multiqueue, rather
2554  *   its just seperate interrupt vectors
2555  *   for TX, RX, and Link.
2556  *
2557  **********************************************************************/
2558 int
2559 em_allocate_msix(struct adapter *adapter)
2560 {
2561         device_t        dev = adapter->dev;
2562         struct          tx_ring *txr = adapter->tx_rings;
2563         struct          rx_ring *rxr = adapter->rx_rings;
2564         int             error, rid, vector = 0;
2565
2566
2567         /* Make sure all interrupts are disabled */
2568         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2569
2570         /* First set up ring resources */
2571         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2572
2573                 /* RX ring */
2574                 rid = vector + 1;
2575
2576                 rxr->res = bus_alloc_resource_any(dev,
2577                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2578                 if (rxr->res == NULL) {
2579                         device_printf(dev,
2580                             "Unable to allocate bus resource: "
2581                             "RX MSIX Interrupt %d\n", i);
2582                         return (ENXIO);
2583                 }
2584                 if ((error = bus_setup_intr(dev, rxr->res,
2585                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2586                     rxr, &rxr->tag)) != 0) {
2587                         device_printf(dev, "Failed to register RX handler");
2588                         return (error);
2589                 }
2590 #if __FreeBSD_version >= 800504
2591                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2592 #endif
2593                 rxr->msix = vector++; /* NOTE increment vector for TX */
2594                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2595                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2596                     taskqueue_thread_enqueue, &rxr->tq);
2597                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2598                     device_get_nameunit(adapter->dev));
2599                 /*
2600                 ** Set the bit to enable interrupt
2601                 ** in E1000_IMS -- bits 20 and 21
2602                 ** are for RX0 and RX1, note this has
2603                 ** NOTHING to do with the MSIX vector
2604                 */
2605                 rxr->ims = 1 << (20 + i);
2606                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2607
2608                 /* TX ring */
2609                 rid = vector + 1;
2610                 txr->res = bus_alloc_resource_any(dev,
2611                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2612                 if (txr->res == NULL) {
2613                         device_printf(dev,
2614                             "Unable to allocate bus resource: "
2615                             "TX MSIX Interrupt %d\n", i);
2616                         return (ENXIO);
2617                 }
2618                 if ((error = bus_setup_intr(dev, txr->res,
2619                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2620                     txr, &txr->tag)) != 0) {
2621                         device_printf(dev, "Failed to register TX handler");
2622                         return (error);
2623                 }
2624 #if __FreeBSD_version >= 800504
2625                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2626 #endif
2627                 txr->msix = vector++; /* Increment vector for next pass */
2628                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2629                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2630                     taskqueue_thread_enqueue, &txr->tq);
2631                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2632                     device_get_nameunit(adapter->dev));
2633                 /*
2634                 ** Set the bit to enable interrupt
2635                 ** in E1000_IMS -- bits 22 and 23
2636                 ** are for TX0 and TX1, note this has
2637                 ** NOTHING to do with the MSIX vector
2638                 */
2639                 txr->ims = 1 << (22 + i);
2640                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2641         }
2642
2643         /* Link interrupt */
2644         ++rid;
2645         adapter->res = bus_alloc_resource_any(dev,
2646             SYS_RES_IRQ, &rid, RF_ACTIVE);
2647         if (!adapter->res) {
2648                 device_printf(dev,"Unable to allocate "
2649                     "bus resource: Link interrupt [%d]\n", rid);
2650                 return (ENXIO);
2651         }
2652         /* Set the link handler function */
2653         error = bus_setup_intr(dev, adapter->res,
2654             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2655             em_msix_link, adapter, &adapter->tag);
2656         if (error) {
2657                 adapter->res = NULL;
2658                 device_printf(dev, "Failed to register LINK handler");
2659                 return (error);
2660         }
2661 #if __FreeBSD_version >= 800504
2662                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2663 #endif
2664         adapter->linkvec = vector;
2665         adapter->ivars |=  (8 | vector) << 16;
2666         adapter->ivars |= 0x80000000;
2667
2668         return (0);
2669 }
2670
2671
2672 static void
2673 em_free_pci_resources(struct adapter *adapter)
2674 {
2675         device_t        dev = adapter->dev;
2676         struct tx_ring  *txr;
2677         struct rx_ring  *rxr;
2678         int             rid;
2679
2680
2681         /*
2682         ** Release all the queue interrupt resources:
2683         */
2684         for (int i = 0; i < adapter->num_queues; i++) {
2685                 txr = &adapter->tx_rings[i];
2686                 rxr = &adapter->rx_rings[i];
2687                 /* an early abort? */
2688                 if ((txr == NULL) || (rxr == NULL))
2689                         break;
2690                 rid = txr->msix +1;
2691                 if (txr->tag != NULL) {
2692                         bus_teardown_intr(dev, txr->res, txr->tag);
2693                         txr->tag = NULL;
2694                 }
2695                 if (txr->res != NULL)
2696                         bus_release_resource(dev, SYS_RES_IRQ,
2697                             rid, txr->res);
2698                 rid = rxr->msix +1;
2699                 if (rxr->tag != NULL) {
2700                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2701                         rxr->tag = NULL;
2702                 }
2703                 if (rxr->res != NULL)
2704                         bus_release_resource(dev, SYS_RES_IRQ,
2705                             rid, rxr->res);
2706         }
2707
2708         if (adapter->linkvec) /* we are doing MSIX */
2709                 rid = adapter->linkvec + 1;
2710         else
2711                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2712
2713         if (adapter->tag != NULL) {
2714                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2715                 adapter->tag = NULL;
2716         }
2717
2718         if (adapter->res != NULL)
2719                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2720
2721
2722         if (adapter->msix)
2723                 pci_release_msi(dev);
2724
2725         if (adapter->msix_mem != NULL)
2726                 bus_release_resource(dev, SYS_RES_MEMORY,
2727                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2728
2729         if (adapter->memory != NULL)
2730                 bus_release_resource(dev, SYS_RES_MEMORY,
2731                     PCIR_BAR(0), adapter->memory);
2732
2733         if (adapter->flash != NULL)
2734                 bus_release_resource(dev, SYS_RES_MEMORY,
2735                     EM_FLASH, adapter->flash);
2736 }
2737
2738 /*
2739  * Setup MSI or MSI/X
2740  */
2741 static int
2742 em_setup_msix(struct adapter *adapter)
2743 {
2744         device_t dev = adapter->dev;
2745         int val = 0;
2746
2747         /*
2748         ** Setup MSI/X for Hartwell: tests have shown
2749         ** use of two queues to be unstable, and to
2750         ** provide no great gain anyway, so we simply
2751         ** seperate the interrupts and use a single queue.
2752         */
2753         if ((adapter->hw.mac.type == e1000_82574) &&
2754             (em_enable_msix == TRUE)) {
2755                 /* Map the MSIX BAR */
2756                 int rid = PCIR_BAR(EM_MSIX_BAR);
2757                 adapter->msix_mem = bus_alloc_resource_any(dev,
2758                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2759                 if (!adapter->msix_mem) {
2760                         /* May not be enabled */
2761                         device_printf(adapter->dev,
2762                             "Unable to map MSIX table \n");
2763                         goto msi;
2764                 }
2765                 val = pci_msix_count(dev); 
2766                 /* We only need 3 vectors */
2767                 if (val > 3)
2768                         val = 3;
2769                 if ((val != 3) && (val != 5)) {
2770                         bus_release_resource(dev, SYS_RES_MEMORY,
2771                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2772                         adapter->msix_mem = NULL;
2773                         device_printf(adapter->dev,
2774                             "MSIX: incorrect vectors, using MSI\n");
2775                         goto msi;
2776                 }
2777
2778                 if (pci_alloc_msix(dev, &val) == 0) {
2779                         device_printf(adapter->dev,
2780                             "Using MSIX interrupts "
2781                             "with %d vectors\n", val);
2782                 }
2783
2784                 return (val);
2785         }
2786 msi:
2787         val = pci_msi_count(dev);
2788         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2789                 adapter->msix = 1;
2790                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2791                 return (val);
2792         } 
2793         /* Should only happen due to manual configuration */
2794         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2795         return (0);
2796 }
2797
2798
2799 /*********************************************************************
2800  *
2801  *  Initialize the hardware to a configuration
2802  *  as specified by the adapter structure.
2803  *
2804  **********************************************************************/
2805 static void
2806 em_reset(struct adapter *adapter)
2807 {
2808         device_t        dev = adapter->dev;
2809         struct ifnet    *ifp = adapter->ifp;
2810         struct e1000_hw *hw = &adapter->hw;
2811         u16             rx_buffer_size;
2812         u32             pba;
2813
2814         INIT_DEBUGOUT("em_reset: begin");
2815
2816         /* Set up smart power down as default off on newer adapters. */
2817         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2818             hw->mac.type == e1000_82572)) {
2819                 u16 phy_tmp = 0;
2820
2821                 /* Speed up time to link by disabling smart power down. */
2822                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2823                 phy_tmp &= ~IGP02E1000_PM_SPD;
2824                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2825         }
2826
2827         /*
2828          * Packet Buffer Allocation (PBA)
2829          * Writing PBA sets the receive portion of the buffer
2830          * the remainder is used for the transmit buffer.
2831          */
2832         switch (hw->mac.type) {
2833         /* Total Packet Buffer on these is 48K */
2834         case e1000_82571:
2835         case e1000_82572:
2836         case e1000_80003es2lan:
2837                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2838                 break;
2839         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2840                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2841                 break;
2842         case e1000_82574:
2843         case e1000_82583:
2844                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2845                 break;
2846         case e1000_ich8lan:
2847                 pba = E1000_PBA_8K;
2848                 break;
2849         case e1000_ich9lan:
2850         case e1000_ich10lan:
2851                 /* Boost Receive side for jumbo frames */
2852                 if (adapter->hw.mac.max_frame_size > 4096)
2853                         pba = E1000_PBA_14K;
2854                 else
2855                         pba = E1000_PBA_10K;
2856                 break;
2857         case e1000_pchlan:
2858         case e1000_pch2lan:
2859         case e1000_pch_lpt:
2860                 pba = E1000_PBA_26K;
2861                 break;
2862         default:
2863                 if (adapter->hw.mac.max_frame_size > 8192)
2864                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2865                 else
2866                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2867         }
2868         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2869
2870         /*
2871          * These parameters control the automatic generation (Tx) and
2872          * response (Rx) to Ethernet PAUSE frames.
2873          * - High water mark should allow for at least two frames to be
2874          *   received after sending an XOFF.
2875          * - Low water mark works best when it is very near the high water mark.
2876          *   This allows the receiver to restart by sending XON when it has
2877          *   drained a bit. Here we use an arbitary value of 1500 which will
2878          *   restart after one full frame is pulled from the buffer. There
2879          *   could be several smaller frames in the buffer and if so they will
2880          *   not trigger the XON until their total number reduces the buffer
2881          *   by 1500.
2882          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2883          */
2884         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2885         hw->fc.high_water = rx_buffer_size -
2886             roundup2(adapter->hw.mac.max_frame_size, 1024);
2887         hw->fc.low_water = hw->fc.high_water - 1500;
2888
2889         if (adapter->fc) /* locally set flow control value? */
2890                 hw->fc.requested_mode = adapter->fc;
2891         else
2892                 hw->fc.requested_mode = e1000_fc_full;
2893
2894         if (hw->mac.type == e1000_80003es2lan)
2895                 hw->fc.pause_time = 0xFFFF;
2896         else
2897                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2898
2899         hw->fc.send_xon = TRUE;
2900
2901         /* Device specific overrides/settings */
2902         switch (hw->mac.type) {
2903         case e1000_pchlan:
2904                 /* Workaround: no TX flow ctrl for PCH */
2905                 hw->fc.requested_mode = e1000_fc_rx_pause;
2906                 hw->fc.pause_time = 0xFFFF; /* override */
2907                 if (ifp->if_mtu > ETHERMTU) {
2908                         hw->fc.high_water = 0x3500;
2909                         hw->fc.low_water = 0x1500;
2910                 } else {
2911                         hw->fc.high_water = 0x5000;
2912                         hw->fc.low_water = 0x3000;
2913                 }
2914                 hw->fc.refresh_time = 0x1000;
2915                 break;
2916         case e1000_pch2lan:
2917         case e1000_pch_lpt:
2918                 hw->fc.high_water = 0x5C20;
2919                 hw->fc.low_water = 0x5048;
2920                 hw->fc.pause_time = 0x0650;
2921                 hw->fc.refresh_time = 0x0400;
2922                 /* Jumbos need adjusted PBA */
2923                 if (ifp->if_mtu > ETHERMTU)
2924                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2925                 else
2926                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2927                 break;
2928         case e1000_ich9lan:
2929         case e1000_ich10lan:
2930                 if (ifp->if_mtu > ETHERMTU) {
2931                         hw->fc.high_water = 0x2800;
2932                         hw->fc.low_water = hw->fc.high_water - 8;
2933                         break;
2934                 } 
2935                 /* else fall thru */
2936         default:
2937                 if (hw->mac.type == e1000_80003es2lan)
2938                         hw->fc.pause_time = 0xFFFF;
2939                 break;
2940         }
2941
2942         /* Issue a global reset */
2943         e1000_reset_hw(hw);
2944         E1000_WRITE_REG(hw, E1000_WUC, 0);
2945         em_disable_aspm(adapter);
2946         /* and a re-init */
2947         if (e1000_init_hw(hw) < 0) {
2948                 device_printf(dev, "Hardware Initialization Failed\n");
2949                 return;
2950         }
2951
2952         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2953         e1000_get_phy_info(hw);
2954         e1000_check_for_link(hw);
2955         return;
2956 }
2957
2958 /*********************************************************************
2959  *
2960  *  Setup networking device structure and register an interface.
2961  *
2962  **********************************************************************/
2963 static int
2964 em_setup_interface(device_t dev, struct adapter *adapter)
2965 {
2966         struct ifnet   *ifp;
2967
2968         INIT_DEBUGOUT("em_setup_interface: begin");
2969
2970         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2971         if (ifp == NULL) {
2972                 device_printf(dev, "can not allocate ifnet structure\n");
2973                 return (-1);
2974         }
2975         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2976         ifp->if_init =  em_init;
2977         ifp->if_softc = adapter;
2978         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2979         ifp->if_ioctl = em_ioctl;
2980 #ifdef EM_MULTIQUEUE
2981         /* Multiqueue stack interface */
2982         ifp->if_transmit = em_mq_start;
2983         ifp->if_qflush = em_qflush;
2984 #else
2985         ifp->if_start = em_start;
2986         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2987         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2988         IFQ_SET_READY(&ifp->if_snd);
2989 #endif  
2990
2991         ether_ifattach(ifp, adapter->hw.mac.addr);
2992
2993         ifp->if_capabilities = ifp->if_capenable = 0;
2994
2995
2996         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2997         ifp->if_capabilities |= IFCAP_TSO4;
2998         /*
2999          * Tell the upper layer(s) we
3000          * support full VLAN capability
3001          */
3002         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3003         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3004                              |  IFCAP_VLAN_HWTSO
3005                              |  IFCAP_VLAN_MTU;
3006         ifp->if_capenable = ifp->if_capabilities;
3007
3008         /*
3009         ** Don't turn this on by default, if vlans are
3010         ** created on another pseudo device (eg. lagg)
3011         ** then vlan events are not passed thru, breaking
3012         ** operation, but with HW FILTER off it works. If
3013         ** using vlans directly on the em driver you can
3014         ** enable this and get full hardware tag filtering.
3015         */
3016         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3017
3018 #ifdef DEVICE_POLLING
3019         ifp->if_capabilities |= IFCAP_POLLING;
3020 #endif
3021
3022         /* Enable only WOL MAGIC by default */
3023         if (adapter->wol) {
3024                 ifp->if_capabilities |= IFCAP_WOL;
3025                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3026         }
3027                 
3028         /*
3029          * Specify the media types supported by this adapter and register
3030          * callbacks to update media and link information
3031          */
3032         ifmedia_init(&adapter->media, IFM_IMASK,
3033             em_media_change, em_media_status);
3034         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3035             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3036                 u_char fiber_type = IFM_1000_SX;        /* default type */
3037
3038                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3039                             0, NULL);
3040                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3041         } else {
3042                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3043                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3044                             0, NULL);
3045                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3046                             0, NULL);
3047                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3048                             0, NULL);
3049                 if (adapter->hw.phy.type != e1000_phy_ife) {
3050                         ifmedia_add(&adapter->media,
3051                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3052                         ifmedia_add(&adapter->media,
3053                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3054                 }
3055         }
3056         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3057         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3058         return (0);
3059 }
3060
3061
3062 /*
3063  * Manage DMA'able memory.
3064  */
3065 static void
3066 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3067 {
3068         if (error)
3069                 return;
3070         *(bus_addr_t *) arg = segs[0].ds_addr;
3071 }
3072
3073 static int
3074 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3075         struct em_dma_alloc *dma, int mapflags)
3076 {
3077         int error;
3078
3079         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3080                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3081                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3082                                 BUS_SPACE_MAXADDR,      /* highaddr */
3083                                 NULL, NULL,             /* filter, filterarg */
3084                                 size,                   /* maxsize */
3085                                 1,                      /* nsegments */
3086                                 size,                   /* maxsegsize */
3087                                 0,                      /* flags */
3088                                 NULL,                   /* lockfunc */
3089                                 NULL,                   /* lockarg */
3090                                 &dma->dma_tag);
3091         if (error) {
3092                 device_printf(adapter->dev,
3093                     "%s: bus_dma_tag_create failed: %d\n",
3094                     __func__, error);
3095                 goto fail_0;
3096         }
3097
3098         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3099             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3100         if (error) {
3101                 device_printf(adapter->dev,
3102                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3103                     __func__, (uintmax_t)size, error);
3104                 goto fail_2;
3105         }
3106
3107         dma->dma_paddr = 0;
3108         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3109             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3110         if (error || dma->dma_paddr == 0) {
3111                 device_printf(adapter->dev,
3112                     "%s: bus_dmamap_load failed: %d\n",
3113                     __func__, error);
3114                 goto fail_3;
3115         }
3116
3117         return (0);
3118
3119 fail_3:
3120         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3121 fail_2:
3122         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3123         bus_dma_tag_destroy(dma->dma_tag);
3124 fail_0:
3125         dma->dma_map = NULL;
3126         dma->dma_tag = NULL;
3127
3128         return (error);
3129 }
3130
3131 static void
3132 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3133 {
3134         if (dma->dma_tag == NULL)
3135                 return;
3136         if (dma->dma_map != NULL) {
3137                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3138                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3139                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3140                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3141                 dma->dma_map = NULL;
3142         }
3143         bus_dma_tag_destroy(dma->dma_tag);
3144         dma->dma_tag = NULL;
3145 }
3146
3147
3148 /*********************************************************************
3149  *
3150  *  Allocate memory for the transmit and receive rings, and then
3151  *  the descriptors associated with each, called only once at attach.
3152  *
3153  **********************************************************************/
3154 static int
3155 em_allocate_queues(struct adapter *adapter)
3156 {
3157         device_t                dev = adapter->dev;
3158         struct tx_ring          *txr = NULL;
3159         struct rx_ring          *rxr = NULL;
3160         int rsize, tsize, error = E1000_SUCCESS;
3161         int txconf = 0, rxconf = 0;
3162
3163
3164         /* Allocate the TX ring struct memory */
3165         if (!(adapter->tx_rings =
3166             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3167             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3168                 device_printf(dev, "Unable to allocate TX ring memory\n");
3169                 error = ENOMEM;
3170                 goto fail;
3171         }
3172
3173         /* Now allocate the RX */
3174         if (!(adapter->rx_rings =
3175             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3176             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3177                 device_printf(dev, "Unable to allocate RX ring memory\n");
3178                 error = ENOMEM;
3179                 goto rx_fail;
3180         }
3181
3182         tsize = roundup2(adapter->num_tx_desc *
3183             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3184         /*
3185          * Now set up the TX queues, txconf is needed to handle the
3186          * possibility that things fail midcourse and we need to
3187          * undo memory gracefully
3188          */ 
3189         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3190                 /* Set up some basics */
3191                 txr = &adapter->tx_rings[i];
3192                 txr->adapter = adapter;
3193                 txr->me = i;
3194
3195                 /* Initialize the TX lock */
3196                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3197                     device_get_nameunit(dev), txr->me);
3198                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3199
3200                 if (em_dma_malloc(adapter, tsize,
3201                         &txr->txdma, BUS_DMA_NOWAIT)) {
3202                         device_printf(dev,
3203                             "Unable to allocate TX Descriptor memory\n");
3204                         error = ENOMEM;
3205                         goto err_tx_desc;
3206                 }
3207                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3208                 bzero((void *)txr->tx_base, tsize);
3209
3210                 if (em_allocate_transmit_buffers(txr)) {
3211                         device_printf(dev,
3212                             "Critical Failure setting up transmit buffers\n");
3213                         error = ENOMEM;
3214                         goto err_tx_desc;
3215                 }
3216 #if __FreeBSD_version >= 800000
3217                 /* Allocate a buf ring */
3218                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3219                     M_WAITOK, &txr->tx_mtx);
3220 #endif
3221         }
3222
3223         /*
3224          * Next the RX queues...
3225          */ 
3226         rsize = roundup2(adapter->num_rx_desc *
3227             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3228         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3229                 rxr = &adapter->rx_rings[i];
3230                 rxr->adapter = adapter;
3231                 rxr->me = i;
3232
3233                 /* Initialize the RX lock */
3234                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3235                     device_get_nameunit(dev), txr->me);
3236                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3237
3238                 if (em_dma_malloc(adapter, rsize,
3239                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3240                         device_printf(dev,
3241                             "Unable to allocate RxDescriptor memory\n");
3242                         error = ENOMEM;
3243                         goto err_rx_desc;
3244                 }
3245                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3246                 bzero((void *)rxr->rx_base, rsize);
3247
3248                 /* Allocate receive buffers for the ring*/
3249                 if (em_allocate_receive_buffers(rxr)) {
3250                         device_printf(dev,
3251                             "Critical Failure setting up receive buffers\n");
3252                         error = ENOMEM;
3253                         goto err_rx_desc;
3254                 }
3255         }
3256
3257         return (0);
3258
3259 err_rx_desc:
3260         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3261                 em_dma_free(adapter, &rxr->rxdma);
3262 err_tx_desc:
3263         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3264                 em_dma_free(adapter, &txr->txdma);
3265         free(adapter->rx_rings, M_DEVBUF);
3266 rx_fail:
3267 #if __FreeBSD_version >= 800000
3268         buf_ring_free(txr->br, M_DEVBUF);
3269 #endif
3270         free(adapter->tx_rings, M_DEVBUF);
3271 fail:
3272         return (error);
3273 }
3274
3275
3276 /*********************************************************************
3277  *
3278  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3279  *  the information needed to transmit a packet on the wire. This is
3280  *  called only once at attach, setup is done every reset.
3281  *
3282  **********************************************************************/
3283 static int
3284 em_allocate_transmit_buffers(struct tx_ring *txr)
3285 {
3286         struct adapter *adapter = txr->adapter;
3287         device_t dev = adapter->dev;
3288         struct em_buffer *txbuf;
3289         int error, i;
3290
3291         /*
3292          * Setup DMA descriptor areas.
3293          */
3294         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3295                                1, 0,                    /* alignment, bounds */
3296                                BUS_SPACE_MAXADDR,       /* lowaddr */
3297                                BUS_SPACE_MAXADDR,       /* highaddr */
3298                                NULL, NULL,              /* filter, filterarg */
3299                                EM_TSO_SIZE,             /* maxsize */
3300                                EM_MAX_SCATTER,          /* nsegments */
3301                                PAGE_SIZE,               /* maxsegsize */
3302                                0,                       /* flags */
3303                                NULL,                    /* lockfunc */
3304                                NULL,                    /* lockfuncarg */
3305                                &txr->txtag))) {
3306                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3307                 goto fail;
3308         }
3309
3310         if (!(txr->tx_buffers =
3311             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3312             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3313                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3314                 error = ENOMEM;
3315                 goto fail;
3316         }
3317
3318         /* Create the descriptor buffer dma maps */
3319         txbuf = txr->tx_buffers;
3320         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3321                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3322                 if (error != 0) {
3323                         device_printf(dev, "Unable to create TX DMA map\n");
3324                         goto fail;
3325                 }
3326         }
3327
3328         return 0;
3329 fail:
3330         /* We free all, it handles case where we are in the middle */
3331         em_free_transmit_structures(adapter);
3332         return (error);
3333 }
3334
3335 /*********************************************************************
3336  *
3337  *  Initialize a transmit ring.
3338  *
3339  **********************************************************************/
3340 static void
3341 em_setup_transmit_ring(struct tx_ring *txr)
3342 {
3343         struct adapter *adapter = txr->adapter;
3344         struct em_buffer *txbuf;
3345         int i;
3346 #ifdef DEV_NETMAP
3347         struct netmap_adapter *na = NA(adapter->ifp);
3348         struct netmap_slot *slot;
3349 #endif /* DEV_NETMAP */
3350
3351         /* Clear the old descriptor contents */
3352         EM_TX_LOCK(txr);
3353 #ifdef DEV_NETMAP
3354         slot = netmap_reset(na, NR_TX, txr->me, 0);
3355 #endif /* DEV_NETMAP */
3356
3357         bzero((void *)txr->tx_base,
3358               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3359         /* Reset indices */
3360         txr->next_avail_desc = 0;
3361         txr->next_to_clean = 0;
3362
3363         /* Free any existing tx buffers. */
3364         txbuf = txr->tx_buffers;
3365         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3366                 if (txbuf->m_head != NULL) {
3367                         bus_dmamap_sync(txr->txtag, txbuf->map,
3368                             BUS_DMASYNC_POSTWRITE);
3369                         bus_dmamap_unload(txr->txtag, txbuf->map);
3370                         m_freem(txbuf->m_head);
3371                         txbuf->m_head = NULL;
3372                 }
3373 #ifdef DEV_NETMAP
3374                 if (slot) {
3375                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3376                         uint64_t paddr;
3377                         void *addr;
3378
3379                         addr = PNMB(slot + si, &paddr);
3380                         txr->tx_base[i].buffer_addr = htole64(paddr);
3381                         /* reload the map for netmap mode */
3382                         netmap_load_map(txr->txtag, txbuf->map, addr);
3383                 }
3384 #endif /* DEV_NETMAP */
3385
3386                 /* clear the watch index */
3387                 txbuf->next_eop = -1;
3388         }
3389
3390         /* Set number of descriptors available */
3391         txr->tx_avail = adapter->num_tx_desc;
3392         txr->queue_status = EM_QUEUE_IDLE;
3393
3394         /* Clear checksum offload context. */
3395         txr->last_hw_offload = 0;
3396         txr->last_hw_ipcss = 0;
3397         txr->last_hw_ipcso = 0;
3398         txr->last_hw_tucss = 0;
3399         txr->last_hw_tucso = 0;
3400
3401         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3402             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3403         EM_TX_UNLOCK(txr);
3404 }
3405
3406 /*********************************************************************
3407  *
3408  *  Initialize all transmit rings.
3409  *
3410  **********************************************************************/
3411 static void
3412 em_setup_transmit_structures(struct adapter *adapter)
3413 {
3414         struct tx_ring *txr = adapter->tx_rings;
3415
3416         for (int i = 0; i < adapter->num_queues; i++, txr++)
3417                 em_setup_transmit_ring(txr);
3418
3419         return;
3420 }
3421
3422 /*********************************************************************
3423  *
3424  *  Enable transmit unit.
3425  *
3426  **********************************************************************/
3427 static void
3428 em_initialize_transmit_unit(struct adapter *adapter)
3429 {
3430         struct tx_ring  *txr = adapter->tx_rings;
3431         struct e1000_hw *hw = &adapter->hw;
3432         u32     tctl, tarc, tipg = 0;
3433
3434          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3435
3436         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3437                 u64 bus_addr = txr->txdma.dma_paddr;
3438                 /* Base and Len of TX Ring */
3439                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3440                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3441                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3442                     (u32)(bus_addr >> 32));
3443                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3444                     (u32)bus_addr);
3445                 /* Init the HEAD/TAIL indices */
3446                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3447                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3448
3449                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3450                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3451                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3452
3453                 txr->queue_status = EM_QUEUE_IDLE;
3454         }
3455
3456         /* Set the default values for the Tx Inter Packet Gap timer */
3457         switch (adapter->hw.mac.type) {
3458         case e1000_80003es2lan:
3459                 tipg = DEFAULT_82543_TIPG_IPGR1;
3460                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3461                     E1000_TIPG_IPGR2_SHIFT;
3462                 break;
3463         default:
3464                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3465                     (adapter->hw.phy.media_type ==
3466                     e1000_media_type_internal_serdes))
3467                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3468                 else
3469                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3470                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3471                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3472         }
3473
3474         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3475         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3476
3477         if(adapter->hw.mac.type >= e1000_82540)
3478                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3479                     adapter->tx_abs_int_delay.value);
3480
3481         if ((adapter->hw.mac.type == e1000_82571) ||
3482             (adapter->hw.mac.type == e1000_82572)) {
3483                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3484                 tarc |= SPEED_MODE_BIT;
3485                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3486         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3487                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3488                 tarc |= 1;
3489                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3490                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3491                 tarc |= 1;
3492                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3493         }
3494
3495         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3496         if (adapter->tx_int_delay.value > 0)
3497                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3498
3499         /* Program the Transmit Control Register */
3500         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3501         tctl &= ~E1000_TCTL_CT;
3502         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3503                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3504
3505         if (adapter->hw.mac.type >= e1000_82571)
3506                 tctl |= E1000_TCTL_MULR;
3507
3508         /* This write will effectively turn on the transmit unit. */
3509         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3510
3511 }
3512
3513
3514 /*********************************************************************
3515  *
3516  *  Free all transmit rings.
3517  *
3518  **********************************************************************/
3519 static void
3520 em_free_transmit_structures(struct adapter *adapter)
3521 {
3522         struct tx_ring *txr = adapter->tx_rings;
3523
3524         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3525                 EM_TX_LOCK(txr);
3526                 em_free_transmit_buffers(txr);
3527                 em_dma_free(adapter, &txr->txdma);
3528                 EM_TX_UNLOCK(txr);
3529                 EM_TX_LOCK_DESTROY(txr);
3530         }
3531
3532         free(adapter->tx_rings, M_DEVBUF);
3533 }
3534
3535 /*********************************************************************
3536  *
3537  *  Free transmit ring related data structures.
3538  *
3539  **********************************************************************/
3540 static void
3541 em_free_transmit_buffers(struct tx_ring *txr)
3542 {
3543         struct adapter          *adapter = txr->adapter;
3544         struct em_buffer        *txbuf;
3545
3546         INIT_DEBUGOUT("free_transmit_ring: begin");
3547
3548         if (txr->tx_buffers == NULL)
3549                 return;
3550
3551         for (int i = 0; i < adapter->num_tx_desc; i++) {
3552                 txbuf = &txr->tx_buffers[i];
3553                 if (txbuf->m_head != NULL) {
3554                         bus_dmamap_sync(txr->txtag, txbuf->map,
3555                             BUS_DMASYNC_POSTWRITE);
3556                         bus_dmamap_unload(txr->txtag,
3557                             txbuf->map);
3558                         m_freem(txbuf->m_head);
3559                         txbuf->m_head = NULL;
3560                         if (txbuf->map != NULL) {
3561                                 bus_dmamap_destroy(txr->txtag,
3562                                     txbuf->map);
3563                                 txbuf->map = NULL;
3564                         }
3565                 } else if (txbuf->map != NULL) {
3566                         bus_dmamap_unload(txr->txtag,
3567                             txbuf->map);
3568                         bus_dmamap_destroy(txr->txtag,
3569                             txbuf->map);
3570                         txbuf->map = NULL;
3571                 }
3572         }
3573 #if __FreeBSD_version >= 800000
3574         if (txr->br != NULL)
3575                 buf_ring_free(txr->br, M_DEVBUF);
3576 #endif
3577         if (txr->tx_buffers != NULL) {
3578                 free(txr->tx_buffers, M_DEVBUF);
3579                 txr->tx_buffers = NULL;
3580         }
3581         if (txr->txtag != NULL) {
3582                 bus_dma_tag_destroy(txr->txtag);
3583                 txr->txtag = NULL;
3584         }
3585         return;
3586 }
3587
3588
3589 /*********************************************************************
3590  *  The offload context is protocol specific (TCP/UDP) and thus
3591  *  only needs to be set when the protocol changes. The occasion
3592  *  of a context change can be a performance detriment, and
3593  *  might be better just disabled. The reason arises in the way
3594  *  in which the controller supports pipelined requests from the
3595  *  Tx data DMA. Up to four requests can be pipelined, and they may
3596  *  belong to the same packet or to multiple packets. However all
3597  *  requests for one packet are issued before a request is issued
3598  *  for a subsequent packet and if a request for the next packet
3599  *  requires a context change, that request will be stalled
3600  *  until the previous request completes. This means setting up
3601  *  a new context effectively disables pipelined Tx data DMA which
3602  *  in turn greatly slow down performance to send small sized
3603  *  frames. 
3604  **********************************************************************/
3605 static void
3606 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3607     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3608 {
3609         struct adapter                  *adapter = txr->adapter;
3610         struct e1000_context_desc       *TXD = NULL;
3611         struct em_buffer                *tx_buffer;
3612         int                             cur, hdr_len;
3613         u32                             cmd = 0;
3614         u16                             offload = 0;
3615         u8                              ipcso, ipcss, tucso, tucss;
3616
3617         ipcss = ipcso = tucss = tucso = 0;
3618         hdr_len = ip_off + (ip->ip_hl << 2);
3619         cur = txr->next_avail_desc;
3620
3621         /* Setup of IP header checksum. */
3622         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3623                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3624                 offload |= CSUM_IP;
3625                 ipcss = ip_off;
3626                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3627                 /*
3628                  * Start offset for header checksum calculation.
3629                  * End offset for header checksum calculation.
3630                  * Offset of place to put the checksum.
3631                  */
3632                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3633                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3634                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3635                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3636                 cmd |= E1000_TXD_CMD_IP;
3637         }
3638
3639         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3640                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3641                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3642                 offload |= CSUM_TCP;
3643                 tucss = hdr_len;
3644                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3645                 /*
3646                  * Setting up new checksum offload context for every frames
3647                  * takes a lot of processing time for hardware. This also
3648                  * reduces performance a lot for small sized frames so avoid
3649                  * it if driver can use previously configured checksum
3650                  * offload context.
3651                  */
3652                 if (txr->last_hw_offload == offload) {
3653                         if (offload & CSUM_IP) {
3654                                 if (txr->last_hw_ipcss == ipcss &&
3655                                     txr->last_hw_ipcso == ipcso &&
3656                                     txr->last_hw_tucss == tucss &&
3657                                     txr->last_hw_tucso == tucso)
3658                                         return;
3659                         } else {
3660                                 if (txr->last_hw_tucss == tucss &&
3661                                     txr->last_hw_tucso == tucso)
3662                                         return;
3663                         }
3664                 }
3665                 txr->last_hw_offload = offload;
3666                 txr->last_hw_tucss = tucss;
3667                 txr->last_hw_tucso = tucso;
3668                 /*
3669                  * Start offset for payload checksum calculation.
3670                  * End offset for payload checksum calculation.
3671                  * Offset of place to put the checksum.
3672                  */
3673                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3674                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3675                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3676                 TXD->upper_setup.tcp_fields.tucso = tucso;
3677                 cmd |= E1000_TXD_CMD_TCP;
3678         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3679                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3680                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3681                 tucss = hdr_len;
3682                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3683                 /*
3684                  * Setting up new checksum offload context for every frames
3685                  * takes a lot of processing time for hardware. This also
3686                  * reduces performance a lot for small sized frames so avoid
3687                  * it if driver can use previously configured checksum
3688                  * offload context.
3689                  */
3690                 if (txr->last_hw_offload == offload) {
3691                         if (offload & CSUM_IP) {
3692                                 if (txr->last_hw_ipcss == ipcss &&
3693                                     txr->last_hw_ipcso == ipcso &&
3694                                     txr->last_hw_tucss == tucss &&
3695                                     txr->last_hw_tucso == tucso)
3696                                         return;
3697                         } else {
3698                                 if (txr->last_hw_tucss == tucss &&
3699                                     txr->last_hw_tucso == tucso)
3700                                         return;
3701                         }
3702                 }
3703                 txr->last_hw_offload = offload;
3704                 txr->last_hw_tucss = tucss;
3705                 txr->last_hw_tucso = tucso;
3706                 /*
3707                  * Start offset for header checksum calculation.
3708                  * End offset for header checksum calculation.
3709                  * Offset of place to put the checksum.
3710                  */
3711                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3712                 TXD->upper_setup.tcp_fields.tucss = tucss;
3713                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3714                 TXD->upper_setup.tcp_fields.tucso = tucso;
3715         }
3716   
3717         if (offload & CSUM_IP) {
3718                 txr->last_hw_ipcss = ipcss;
3719                 txr->last_hw_ipcso = ipcso;
3720         }
3721
3722         TXD->tcp_seg_setup.data = htole32(0);
3723         TXD->cmd_and_length =
3724             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3725         tx_buffer = &txr->tx_buffers[cur];
3726         tx_buffer->m_head = NULL;
3727         tx_buffer->next_eop = -1;
3728
3729         if (++cur == adapter->num_tx_desc)
3730                 cur = 0;
3731
3732         txr->tx_avail--;
3733         txr->next_avail_desc = cur;
3734 }
3735
3736
3737 /**********************************************************************
3738  *
3739  *  Setup work for hardware segmentation offload (TSO)
3740  *
3741  **********************************************************************/
3742 static void
3743 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3744     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3745 {
3746         struct adapter                  *adapter = txr->adapter;
3747         struct e1000_context_desc       *TXD;
3748         struct em_buffer                *tx_buffer;
3749         int cur, hdr_len;
3750
3751         /*
3752          * In theory we can use the same TSO context if and only if
3753          * frame is the same type(IP/TCP) and the same MSS. However
3754          * checking whether a frame has the same IP/TCP structure is
3755          * hard thing so just ignore that and always restablish a
3756          * new TSO context.
3757          */
3758         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3759         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3760                       E1000_TXD_DTYP_D |        /* Data descr type */
3761                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3762
3763         /* IP and/or TCP header checksum calculation and insertion. */
3764         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3765
3766         cur = txr->next_avail_desc;
3767         tx_buffer = &txr->tx_buffers[cur];
3768         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3769
3770         /*
3771          * Start offset for header checksum calculation.
3772          * End offset for header checksum calculation.
3773          * Offset of place put the checksum.
3774          */
3775         TXD->lower_setup.ip_fields.ipcss = ip_off;
3776         TXD->lower_setup.ip_fields.ipcse =
3777             htole16(ip_off + (ip->ip_hl << 2) - 1);
3778         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3779         /*
3780          * Start offset for payload checksum calculation.
3781          * End offset for payload checksum calculation.
3782          * Offset of place to put the checksum.
3783          */
3784         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3785         TXD->upper_setup.tcp_fields.tucse = 0;
3786         TXD->upper_setup.tcp_fields.tucso =
3787             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3788         /*
3789          * Payload size per packet w/o any headers.
3790          * Length of all headers up to payload.
3791          */
3792         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3793         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3794
3795         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3796                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3797                                 E1000_TXD_CMD_TSE |     /* TSE context */
3798                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3799                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3800                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3801
3802         tx_buffer->m_head = NULL;
3803         tx_buffer->next_eop = -1;
3804
3805         if (++cur == adapter->num_tx_desc)
3806                 cur = 0;
3807
3808         txr->tx_avail--;
3809         txr->next_avail_desc = cur;
3810         txr->tx_tso = TRUE;
3811 }
3812
3813
3814 /**********************************************************************
3815  *
3816  *  Examine each tx_buffer in the used queue. If the hardware is done
3817  *  processing the packet then free associated resources. The
3818  *  tx_buffer is put back on the free queue.
3819  *
3820  **********************************************************************/
3821 static void
3822 em_txeof(struct tx_ring *txr)
3823 {
3824         struct adapter  *adapter = txr->adapter;
3825         int first, last, done, processed;
3826         struct em_buffer *tx_buffer;
3827         struct e1000_tx_desc   *tx_desc, *eop_desc;
3828         struct ifnet   *ifp = adapter->ifp;
3829
3830         EM_TX_LOCK_ASSERT(txr);
3831 #ifdef DEV_NETMAP
3832         if (netmap_tx_irq(ifp, txr->me |
3833             (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
3834                 return;
3835 #endif /* DEV_NETMAP */
3836
3837         /* No work, make sure watchdog is off */
3838         if (txr->tx_avail == adapter->num_tx_desc) {
3839                 txr->queue_status = EM_QUEUE_IDLE;
3840                 return;
3841         }
3842
3843         processed = 0;
3844         first = txr->next_to_clean;
3845         tx_desc = &txr->tx_base[first];
3846         tx_buffer = &txr->tx_buffers[first];
3847         last = tx_buffer->next_eop;
3848         eop_desc = &txr->tx_base[last];
3849
3850         /*
3851          * What this does is get the index of the
3852          * first descriptor AFTER the EOP of the 
3853          * first packet, that way we can do the
3854          * simple comparison on the inner while loop.
3855          */
3856         if (++last == adapter->num_tx_desc)
3857                 last = 0;
3858         done = last;
3859
3860         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3861             BUS_DMASYNC_POSTREAD);
3862
3863         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3864                 /* We clean the range of the packet */
3865                 while (first != done) {
3866                         tx_desc->upper.data = 0;
3867                         tx_desc->lower.data = 0;
3868                         tx_desc->buffer_addr = 0;
3869                         ++txr->tx_avail;
3870                         ++processed;
3871
3872                         if (tx_buffer->m_head) {
3873                                 bus_dmamap_sync(txr->txtag,
3874                                     tx_buffer->map,
3875                                     BUS_DMASYNC_POSTWRITE);
3876                                 bus_dmamap_unload(txr->txtag,
3877                                     tx_buffer->map);
3878                                 m_freem(tx_buffer->m_head);
3879                                 tx_buffer->m_head = NULL;
3880                         }
3881                         tx_buffer->next_eop = -1;
3882                         txr->watchdog_time = ticks;
3883
3884                         if (++first == adapter->num_tx_desc)
3885                                 first = 0;
3886
3887                         tx_buffer = &txr->tx_buffers[first];
3888                         tx_desc = &txr->tx_base[first];
3889                 }
3890                 ++ifp->if_opackets;
3891                 /* See if we can continue to the next packet */
3892                 last = tx_buffer->next_eop;
3893                 if (last != -1) {
3894                         eop_desc = &txr->tx_base[last];
3895                         /* Get new done point */
3896                         if (++last == adapter->num_tx_desc) last = 0;
3897                         done = last;
3898                 } else
3899                         break;
3900         }
3901         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3902             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3903
3904         txr->next_to_clean = first;
3905
3906         /*
3907         ** Watchdog calculation, we know there's
3908         ** work outstanding or the first return
3909         ** would have been taken, so none processed
3910         ** for too long indicates a hang. local timer
3911         ** will examine this and do a reset if needed.
3912         */
3913         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3914                 txr->queue_status = EM_QUEUE_HUNG;
3915
3916         /*
3917          * If we have a minimum free, clear IFF_DRV_OACTIVE
3918          * to tell the stack that it is OK to send packets.
3919          * Notice that all writes of OACTIVE happen under the
3920          * TX lock which, with a single queue, guarantees 
3921          * sanity.
3922          */
3923         if (txr->tx_avail >= EM_MAX_SCATTER)
3924                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3925
3926         /* Disable watchdog if all clean */
3927         if (txr->tx_avail == adapter->num_tx_desc) {
3928                 txr->queue_status = EM_QUEUE_IDLE;
3929         } 
3930 }
3931
3932
3933 /*********************************************************************
3934  *
3935  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3936  *
3937  **********************************************************************/
3938 static void
3939 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3940 {
3941         struct adapter          *adapter = rxr->adapter;
3942         struct mbuf             *m;
3943         bus_dma_segment_t       segs[1];
3944         struct em_buffer        *rxbuf;
3945         int                     i, j, error, nsegs;
3946         bool                    cleaned = FALSE;
3947
3948         i = j = rxr->next_to_refresh;
3949         /*
3950         ** Get one descriptor beyond
3951         ** our work mark to control
3952         ** the loop.
3953         */
3954         if (++j == adapter->num_rx_desc)
3955                 j = 0;
3956
3957         while (j != limit) {
3958                 rxbuf = &rxr->rx_buffers[i];
3959                 if (rxbuf->m_head == NULL) {
3960                         m = m_getjcl(M_NOWAIT, MT_DATA,
3961                             M_PKTHDR, adapter->rx_mbuf_sz);
3962                         /*
3963                         ** If we have a temporary resource shortage
3964                         ** that causes a failure, just abort refresh
3965                         ** for now, we will return to this point when
3966                         ** reinvoked from em_rxeof.
3967                         */
3968                         if (m == NULL)
3969                                 goto update;
3970                 } else
3971                         m = rxbuf->m_head;
3972
3973                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3974                 m->m_flags |= M_PKTHDR;
3975                 m->m_data = m->m_ext.ext_buf;
3976
3977                 /* Use bus_dma machinery to setup the memory mapping  */
3978                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3979                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3980                 if (error != 0) {
3981                         printf("Refresh mbufs: hdr dmamap load"
3982                             " failure - %d\n", error);
3983                         m_free(m);
3984                         rxbuf->m_head = NULL;
3985                         goto update;
3986                 }
3987                 rxbuf->m_head = m;
3988                 bus_dmamap_sync(rxr->rxtag,
3989                     rxbuf->map, BUS_DMASYNC_PREREAD);
3990                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3991                 cleaned = TRUE;
3992
3993                 i = j; /* Next is precalulated for us */
3994                 rxr->next_to_refresh = i;
3995                 /* Calculate next controlling index */
3996                 if (++j == adapter->num_rx_desc)
3997                         j = 0;
3998         }
3999 update:
4000         /*
4001         ** Update the tail pointer only if,
4002         ** and as far as we have refreshed.
4003         */
4004         if (cleaned)
4005                 E1000_WRITE_REG(&adapter->hw,
4006                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4007
4008         return;
4009 }
4010
4011
4012 /*********************************************************************
4013  *
4014  *  Allocate memory for rx_buffer structures. Since we use one
4015  *  rx_buffer per received packet, the maximum number of rx_buffer's
4016  *  that we'll need is equal to the number of receive descriptors
4017  *  that we've allocated.
4018  *
4019  **********************************************************************/
4020 static int
4021 em_allocate_receive_buffers(struct rx_ring *rxr)
4022 {
4023         struct adapter          *adapter = rxr->adapter;
4024         device_t                dev = adapter->dev;
4025         struct em_buffer        *rxbuf;
4026         int                     error;
4027
4028         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4029             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4030         if (rxr->rx_buffers == NULL) {
4031                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4032                 return (ENOMEM);
4033         }
4034
4035         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4036                                 1, 0,                   /* alignment, bounds */
4037                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4038                                 BUS_SPACE_MAXADDR,      /* highaddr */
4039                                 NULL, NULL,             /* filter, filterarg */
4040                                 MJUM9BYTES,             /* maxsize */
4041                                 1,                      /* nsegments */
4042                                 MJUM9BYTES,             /* maxsegsize */
4043                                 0,                      /* flags */
4044                                 NULL,                   /* lockfunc */
4045                                 NULL,                   /* lockarg */
4046                                 &rxr->rxtag);
4047         if (error) {
4048                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4049                     __func__, error);
4050                 goto fail;
4051         }
4052
4053         rxbuf = rxr->rx_buffers;
4054         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4055                 rxbuf = &rxr->rx_buffers[i];
4056                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4057                     &rxbuf->map);
4058                 if (error) {
4059                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4060                             __func__, error);
4061                         goto fail;
4062                 }
4063         }
4064
4065         return (0);
4066
4067 fail:
4068         em_free_receive_structures(adapter);
4069         return (error);
4070 }
4071
4072
4073 /*********************************************************************
4074  *
4075  *  Initialize a receive ring and its buffers.
4076  *
4077  **********************************************************************/
4078 static int
4079 em_setup_receive_ring(struct rx_ring *rxr)
4080 {
4081         struct  adapter         *adapter = rxr->adapter;
4082         struct em_buffer        *rxbuf;
4083         bus_dma_segment_t       seg[1];
4084         int                     rsize, nsegs, error = 0;
4085 #ifdef DEV_NETMAP
4086         struct netmap_adapter *na = NA(adapter->ifp);
4087         struct netmap_slot *slot;
4088 #endif
4089
4090
4091         /* Clear the ring contents */
4092         EM_RX_LOCK(rxr);
4093         rsize = roundup2(adapter->num_rx_desc *
4094             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4095         bzero((void *)rxr->rx_base, rsize);
4096 #ifdef DEV_NETMAP
4097         slot = netmap_reset(na, NR_RX, 0, 0);
4098 #endif
4099
4100         /*
4101         ** Free current RX buffer structs and their mbufs
4102         */
4103         for (int i = 0; i < adapter->num_rx_desc; i++) {
4104                 rxbuf = &rxr->rx_buffers[i];
4105                 if (rxbuf->m_head != NULL) {
4106                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4107                             BUS_DMASYNC_POSTREAD);
4108                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4109                         m_freem(rxbuf->m_head);
4110                         rxbuf->m_head = NULL; /* mark as freed */
4111                 }
4112         }
4113
4114         /* Now replenish the mbufs */
4115         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4116                 rxbuf = &rxr->rx_buffers[j];
4117 #ifdef DEV_NETMAP
4118                 if (slot) {
4119                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4120                         uint64_t paddr;
4121                         void *addr;
4122
4123                         addr = PNMB(slot + si, &paddr);
4124                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4125                         /* Update descriptor */
4126                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4127                         continue;
4128                 }
4129 #endif /* DEV_NETMAP */
4130                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4131                     M_PKTHDR, adapter->rx_mbuf_sz);
4132                 if (rxbuf->m_head == NULL) {
4133                         error = ENOBUFS;
4134                         goto fail;
4135                 }
4136                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4137                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4138                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4139
4140                 /* Get the memory mapping */
4141                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4142                     rxbuf->map, rxbuf->m_head, seg,
4143                     &nsegs, BUS_DMA_NOWAIT);
4144                 if (error != 0) {
4145                         m_freem(rxbuf->m_head);
4146                         rxbuf->m_head = NULL;
4147                         goto fail;
4148                 }
4149                 bus_dmamap_sync(rxr->rxtag,
4150                     rxbuf->map, BUS_DMASYNC_PREREAD);
4151
4152                 /* Update descriptor */
4153                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4154         }
4155         rxr->next_to_check = 0;
4156         rxr->next_to_refresh = 0;
4157         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4158             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4159
4160 fail:
4161         EM_RX_UNLOCK(rxr);
4162         return (error);
4163 }
4164
4165 /*********************************************************************
4166  *
4167  *  Initialize all receive rings.
4168  *
4169  **********************************************************************/
4170 static int
4171 em_setup_receive_structures(struct adapter *adapter)
4172 {
4173         struct rx_ring *rxr = adapter->rx_rings;
4174         int q;
4175
4176         for (q = 0; q < adapter->num_queues; q++, rxr++)
4177                 if (em_setup_receive_ring(rxr))
4178                         goto fail;
4179
4180         return (0);
4181 fail:
4182         /*
4183          * Free RX buffers allocated so far, we will only handle
4184          * the rings that completed, the failing case will have
4185          * cleaned up for itself. 'q' failed, so its the terminus.
4186          */
4187         for (int i = 0; i < q; ++i) {
4188                 rxr = &adapter->rx_rings[i];
4189                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4190                         struct em_buffer *rxbuf;
4191                         rxbuf = &rxr->rx_buffers[n];
4192                         if (rxbuf->m_head != NULL) {
4193                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4194                                   BUS_DMASYNC_POSTREAD);
4195                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4196                                 m_freem(rxbuf->m_head);
4197                                 rxbuf->m_head = NULL;
4198                         }
4199                 }
4200                 rxr->next_to_check = 0;
4201                 rxr->next_to_refresh = 0;
4202         }
4203
4204         return (ENOBUFS);
4205 }
4206
4207 /*********************************************************************
4208  *
4209  *  Free all receive rings.
4210  *
4211  **********************************************************************/
4212 static void
4213 em_free_receive_structures(struct adapter *adapter)
4214 {
4215         struct rx_ring *rxr = adapter->rx_rings;
4216
4217         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4218                 em_free_receive_buffers(rxr);
4219                 /* Free the ring memory as well */
4220                 em_dma_free(adapter, &rxr->rxdma);
4221                 EM_RX_LOCK_DESTROY(rxr);
4222         }
4223
4224         free(adapter->rx_rings, M_DEVBUF);
4225 }
4226
4227
4228 /*********************************************************************
4229  *
4230  *  Free receive ring data structures
4231  *
4232  **********************************************************************/
4233 static void
4234 em_free_receive_buffers(struct rx_ring *rxr)
4235 {
4236         struct adapter          *adapter = rxr->adapter;
4237         struct em_buffer        *rxbuf = NULL;
4238
4239         INIT_DEBUGOUT("free_receive_buffers: begin");
4240
4241         if (rxr->rx_buffers != NULL) {
4242                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4243                         rxbuf = &rxr->rx_buffers[i];
4244                         if (rxbuf->map != NULL) {
4245                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4246                                     BUS_DMASYNC_POSTREAD);
4247                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4248                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4249                         }
4250                         if (rxbuf->m_head != NULL) {
4251                                 m_freem(rxbuf->m_head);
4252                                 rxbuf->m_head = NULL;
4253                         }
4254                 }
4255                 free(rxr->rx_buffers, M_DEVBUF);
4256                 rxr->rx_buffers = NULL;
4257                 rxr->next_to_check = 0;
4258                 rxr->next_to_refresh = 0;
4259         }
4260
4261         if (rxr->rxtag != NULL) {
4262                 bus_dma_tag_destroy(rxr->rxtag);
4263                 rxr->rxtag = NULL;
4264         }
4265
4266         return;
4267 }
4268
4269
4270 /*********************************************************************
4271  *
4272  *  Enable receive unit.
4273  *
4274  **********************************************************************/
4275
4276 static void
4277 em_initialize_receive_unit(struct adapter *adapter)
4278 {
4279         struct rx_ring  *rxr = adapter->rx_rings;
4280         struct ifnet    *ifp = adapter->ifp;
4281         struct e1000_hw *hw = &adapter->hw;
4282         u64     bus_addr;
4283         u32     rctl, rxcsum;
4284
4285         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4286
4287         /*
4288          * Make sure receives are disabled while setting
4289          * up the descriptor ring
4290          */
4291         rctl = E1000_READ_REG(hw, E1000_RCTL);
4292         /* Do not disable if ever enabled on this hardware */
4293         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4294                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4295
4296         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4297             adapter->rx_abs_int_delay.value);
4298         /*
4299          * Set the interrupt throttling rate. Value is calculated
4300          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4301          */
4302         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4303
4304         /*
4305         ** When using MSIX interrupts we need to throttle
4306         ** using the EITR register (82574 only)
4307         */
4308         if (hw->mac.type == e1000_82574) {
4309                 for (int i = 0; i < 4; i++)
4310                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4311                             DEFAULT_ITR);
4312                 /* Disable accelerated acknowledge */
4313                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4314         }
4315
4316         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4317         if (ifp->if_capenable & IFCAP_RXCSUM)
4318                 rxcsum |= E1000_RXCSUM_TUOFL;
4319         else
4320                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4321         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4322
4323         /*
4324         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4325         ** long latencies are observed, like Lenovo X60. This
4326         ** change eliminates the problem, but since having positive
4327         ** values in RDTR is a known source of problems on other
4328         ** platforms another solution is being sought.
4329         */
4330         if (hw->mac.type == e1000_82573)
4331                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4332
4333         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4334                 /* Setup the Base and Length of the Rx Descriptor Ring */
4335                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4336
4337                 bus_addr = rxr->rxdma.dma_paddr;
4338                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4339                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4340                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4341                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4342                 /* Setup the Head and Tail Descriptor Pointers */
4343                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4344 #ifdef DEV_NETMAP
4345                 /*
4346                  * an init() while a netmap client is active must
4347                  * preserve the rx buffers passed to userspace.
4348                  */
4349                 if (ifp->if_capenable & IFCAP_NETMAP)
4350                         rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4351 #endif /* DEV_NETMAP */
4352                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4353         }
4354
4355         /* Set PTHRESH for improved jumbo performance */
4356         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4357             (adapter->hw.mac.type == e1000_pch2lan) ||
4358             (adapter->hw.mac.type == e1000_ich10lan)) &&
4359             (ifp->if_mtu > ETHERMTU)) {
4360                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4361                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4362         }
4363                 
4364         if (adapter->hw.mac.type >= e1000_pch2lan) {
4365                 if (ifp->if_mtu > ETHERMTU)
4366                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4367                 else
4368                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4369         }
4370
4371         /* Setup the Receive Control Register */
4372         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4373         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4374             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4375             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4376
4377         /* Strip the CRC */
4378         rctl |= E1000_RCTL_SECRC;
4379
4380         /* Make sure VLAN Filters are off */
4381         rctl &= ~E1000_RCTL_VFE;
4382         rctl &= ~E1000_RCTL_SBP;
4383
4384         if (adapter->rx_mbuf_sz == MCLBYTES)
4385                 rctl |= E1000_RCTL_SZ_2048;
4386         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4387                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4388         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4389                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4390
4391         if (ifp->if_mtu > ETHERMTU)
4392                 rctl |= E1000_RCTL_LPE;
4393         else
4394                 rctl &= ~E1000_RCTL_LPE;
4395
4396         /* Write out the settings */
4397         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4398
4399         return;
4400 }
4401
4402
4403 /*********************************************************************
4404  *
4405  *  This routine executes in interrupt context. It replenishes
4406  *  the mbufs in the descriptor and sends data which has been
4407  *  dma'ed into host memory to upper layer.
4408  *
4409  *  We loop at most count times if count is > 0, or until done if
4410  *  count < 0.
4411  *  
4412  *  For polling we also now return the number of cleaned packets
4413  *********************************************************************/
4414 static bool
4415 em_rxeof(struct rx_ring *rxr, int count, int *done)
4416 {
4417         struct adapter          *adapter = rxr->adapter;
4418         struct ifnet            *ifp = adapter->ifp;
4419         struct mbuf             *mp, *sendmp;
4420         u8                      status = 0;
4421         u16                     len;
4422         int                     i, processed, rxdone = 0;
4423         bool                    eop;
4424         struct e1000_rx_desc    *cur;
4425
4426         EM_RX_LOCK(rxr);
4427
4428 #ifdef DEV_NETMAP
4429         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4430                 return (FALSE);
4431 #endif /* DEV_NETMAP */
4432
4433         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4434
4435                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4436                         break;
4437
4438                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4439                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4440
4441                 cur = &rxr->rx_base[i];
4442                 status = cur->status;
4443                 mp = sendmp = NULL;
4444
4445                 if ((status & E1000_RXD_STAT_DD) == 0)
4446                         break;
4447
4448                 len = le16toh(cur->length);
4449                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4450
4451                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4452                     (rxr->discard == TRUE)) {
4453                         adapter->dropped_pkts++;
4454                         ++rxr->rx_discarded;
4455                         if (!eop) /* Catch subsequent segs */
4456                                 rxr->discard = TRUE;
4457                         else
4458                                 rxr->discard = FALSE;
4459                         em_rx_discard(rxr, i);
4460                         goto next_desc;
4461                 }
4462
4463                 /* Assign correct length to the current fragment */
4464                 mp = rxr->rx_buffers[i].m_head;
4465                 mp->m_len = len;
4466
4467                 /* Trigger for refresh */
4468                 rxr->rx_buffers[i].m_head = NULL;
4469
4470                 /* First segment? */
4471                 if (rxr->fmp == NULL) {
4472                         mp->m_pkthdr.len = len;
4473                         rxr->fmp = rxr->lmp = mp;
4474                 } else {
4475                         /* Chain mbuf's together */
4476                         mp->m_flags &= ~M_PKTHDR;
4477                         rxr->lmp->m_next = mp;
4478                         rxr->lmp = mp;
4479                         rxr->fmp->m_pkthdr.len += len;
4480                 }
4481
4482                 if (eop) {
4483                         --count;
4484                         sendmp = rxr->fmp;
4485                         sendmp->m_pkthdr.rcvif = ifp;
4486                         ifp->if_ipackets++;
4487                         em_receive_checksum(cur, sendmp);
4488 #ifndef __NO_STRICT_ALIGNMENT
4489                         if (adapter->hw.mac.max_frame_size >
4490                             (MCLBYTES - ETHER_ALIGN) &&
4491                             em_fixup_rx(rxr) != 0)
4492                                 goto skip;
4493 #endif
4494                         if (status & E1000_RXD_STAT_VP) {
4495                                 sendmp->m_pkthdr.ether_vtag =
4496                                     le16toh(cur->special);
4497                                 sendmp->m_flags |= M_VLANTAG;
4498                         }
4499 #ifndef __NO_STRICT_ALIGNMENT
4500 skip:
4501 #endif
4502                         rxr->fmp = rxr->lmp = NULL;
4503                 }
4504 next_desc:
4505                 /* Zero out the receive descriptors status. */
4506                 cur->status = 0;
4507                 ++rxdone;       /* cumulative for POLL */
4508                 ++processed;
4509
4510                 /* Advance our pointers to the next descriptor. */
4511                 if (++i == adapter->num_rx_desc)
4512                         i = 0;
4513
4514                 /* Send to the stack */
4515                 if (sendmp != NULL) {
4516                         rxr->next_to_check = i;
4517                         EM_RX_UNLOCK(rxr);
4518                         (*ifp->if_input)(ifp, sendmp);
4519                         EM_RX_LOCK(rxr);
4520                         i = rxr->next_to_check;
4521                 }
4522
4523                 /* Only refresh mbufs every 8 descriptors */
4524                 if (processed == 8) {
4525                         em_refresh_mbufs(rxr, i);
4526                         processed = 0;
4527                 }
4528         }
4529
4530         /* Catch any remaining refresh work */
4531         if (e1000_rx_unrefreshed(rxr))
4532                 em_refresh_mbufs(rxr, i);
4533
4534         rxr->next_to_check = i;
4535         if (done != NULL)
4536                 *done = rxdone;
4537         EM_RX_UNLOCK(rxr);
4538
4539         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4540 }
4541
4542 static __inline void
4543 em_rx_discard(struct rx_ring *rxr, int i)
4544 {
4545         struct em_buffer        *rbuf;
4546
4547         rbuf = &rxr->rx_buffers[i];
4548         /* Free any previous pieces */
4549         if (rxr->fmp != NULL) {
4550                 rxr->fmp->m_flags |= M_PKTHDR;
4551                 m_freem(rxr->fmp);
4552                 rxr->fmp = NULL;
4553                 rxr->lmp = NULL;
4554         }
4555         /*
4556         ** Free buffer and allow em_refresh_mbufs()
4557         ** to clean up and recharge buffer.
4558         */
4559         if (rbuf->m_head) {
4560                 m_free(rbuf->m_head);
4561                 rbuf->m_head = NULL;
4562         }
4563         return;
4564 }
4565
4566 #ifndef __NO_STRICT_ALIGNMENT
4567 /*
4568  * When jumbo frames are enabled we should realign entire payload on
4569  * architecures with strict alignment. This is serious design mistake of 8254x
4570  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4571  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4572  * payload. On architecures without strict alignment restrictions 8254x still
4573  * performs unaligned memory access which would reduce the performance too.
4574  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4575  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4576  * existing mbuf chain.
4577  *
4578  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4579  * not used at all on architectures with strict alignment.
4580  */
4581 static int
4582 em_fixup_rx(struct rx_ring *rxr)
4583 {
4584         struct adapter *adapter = rxr->adapter;
4585         struct mbuf *m, *n;
4586         int error;
4587
4588         error = 0;
4589         m = rxr->fmp;
4590         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4591                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4592                 m->m_data += ETHER_HDR_LEN;
4593         } else {
4594                 MGETHDR(n, M_NOWAIT, MT_DATA);
4595                 if (n != NULL) {
4596                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4597                         m->m_data += ETHER_HDR_LEN;
4598                         m->m_len -= ETHER_HDR_LEN;
4599                         n->m_len = ETHER_HDR_LEN;
4600                         M_MOVE_PKTHDR(n, m);
4601                         n->m_next = m;
4602                         rxr->fmp = n;
4603                 } else {
4604                         adapter->dropped_pkts++;
4605                         m_freem(rxr->fmp);
4606                         rxr->fmp = NULL;
4607                         error = ENOMEM;
4608                 }
4609         }
4610
4611         return (error);
4612 }
4613 #endif
4614
4615 /*********************************************************************
4616  *
4617  *  Verify that the hardware indicated that the checksum is valid.
4618  *  Inform the stack about the status of checksum so that stack
4619  *  doesn't spend time verifying the checksum.
4620  *
4621  *********************************************************************/
4622 static void
4623 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4624 {
4625         mp->m_pkthdr.csum_flags = 0;
4626
4627         /* Ignore Checksum bit is set */
4628         if (rx_desc->status & E1000_RXD_STAT_IXSM)
4629                 return;
4630
4631         if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4632                 return;
4633
4634         /* IP Checksum Good? */
4635         if (rx_desc->status & E1000_RXD_STAT_IPCS)
4636                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4637
4638         /* TCP or UDP checksum */
4639         if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4640                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4641                 mp->m_pkthdr.csum_data = htons(0xffff);
4642         }
4643 }
4644
4645 /*
4646  * This routine is run via an vlan
4647  * config EVENT
4648  */
4649 static void
4650 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4651 {
4652         struct adapter  *adapter = ifp->if_softc;
4653         u32             index, bit;
4654
4655         if (ifp->if_softc !=  arg)   /* Not our event */
4656                 return;
4657
4658         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4659                 return;
4660
4661         EM_CORE_LOCK(adapter);
4662         index = (vtag >> 5) & 0x7F;
4663         bit = vtag & 0x1F;
4664         adapter->shadow_vfta[index] |= (1 << bit);
4665         ++adapter->num_vlans;
4666         /* Re-init to load the changes */
4667         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4668                 em_init_locked(adapter);
4669         EM_CORE_UNLOCK(adapter);
4670 }
4671
4672 /*
4673  * This routine is run via an vlan
4674  * unconfig EVENT
4675  */
4676 static void
4677 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4678 {
4679         struct adapter  *adapter = ifp->if_softc;
4680         u32             index, bit;
4681
4682         if (ifp->if_softc !=  arg)
4683                 return;
4684
4685         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4686                 return;
4687
4688         EM_CORE_LOCK(adapter);
4689         index = (vtag >> 5) & 0x7F;
4690         bit = vtag & 0x1F;
4691         adapter->shadow_vfta[index] &= ~(1 << bit);
4692         --adapter->num_vlans;
4693         /* Re-init to load the changes */
4694         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4695                 em_init_locked(adapter);
4696         EM_CORE_UNLOCK(adapter);
4697 }
4698
4699 static void
4700 em_setup_vlan_hw_support(struct adapter *adapter)
4701 {
4702         struct e1000_hw *hw = &adapter->hw;
4703         u32             reg;
4704
4705         /*
4706         ** We get here thru init_locked, meaning
4707         ** a soft reset, this has already cleared
4708         ** the VFTA and other state, so if there
4709         ** have been no vlan's registered do nothing.
4710         */
4711         if (adapter->num_vlans == 0)
4712                 return;
4713
4714         /*
4715         ** A soft reset zero's out the VFTA, so
4716         ** we need to repopulate it now.
4717         */
4718         for (int i = 0; i < EM_VFTA_SIZE; i++)
4719                 if (adapter->shadow_vfta[i] != 0)
4720                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4721                             i, adapter->shadow_vfta[i]);
4722
4723         reg = E1000_READ_REG(hw, E1000_CTRL);
4724         reg |= E1000_CTRL_VME;
4725         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4726
4727         /* Enable the Filter Table */
4728         reg = E1000_READ_REG(hw, E1000_RCTL);
4729         reg &= ~E1000_RCTL_CFIEN;
4730         reg |= E1000_RCTL_VFE;
4731         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4732 }
4733
4734 static void
4735 em_enable_intr(struct adapter *adapter)
4736 {
4737         struct e1000_hw *hw = &adapter->hw;
4738         u32 ims_mask = IMS_ENABLE_MASK;
4739
4740         if (hw->mac.type == e1000_82574) {
4741                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4742                 ims_mask |= EM_MSIX_MASK;
4743         } 
4744         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4745 }
4746
4747 static void
4748 em_disable_intr(struct adapter *adapter)
4749 {
4750         struct e1000_hw *hw = &adapter->hw;
4751
4752         if (hw->mac.type == e1000_82574)
4753                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4754         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4755 }
4756
4757 /*
4758  * Bit of a misnomer, what this really means is
4759  * to enable OS management of the system... aka
4760  * to disable special hardware management features 
4761  */
4762 static void
4763 em_init_manageability(struct adapter *adapter)
4764 {
4765         /* A shared code workaround */
4766 #define E1000_82542_MANC2H E1000_MANC2H
4767         if (adapter->has_manage) {
4768                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4769                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4770
4771                 /* disable hardware interception of ARP */
4772                 manc &= ~(E1000_MANC_ARP_EN);
4773
4774                 /* enable receiving management packets to the host */
4775                 manc |= E1000_MANC_EN_MNG2HOST;
4776 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4777 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4778                 manc2h |= E1000_MNG2HOST_PORT_623;
4779                 manc2h |= E1000_MNG2HOST_PORT_664;
4780                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4781                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4782         }
4783 }
4784
4785 /*
4786  * Give control back to hardware management
4787  * controller if there is one.
4788  */
4789 static void
4790 em_release_manageability(struct adapter *adapter)
4791 {
4792         if (adapter->has_manage) {
4793                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4794
4795                 /* re-enable hardware interception of ARP */
4796                 manc |= E1000_MANC_ARP_EN;
4797                 manc &= ~E1000_MANC_EN_MNG2HOST;
4798
4799                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4800         }
4801 }
4802
4803 /*
4804  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4805  * For ASF and Pass Through versions of f/w this means
4806  * that the driver is loaded. For AMT version type f/w
4807  * this means that the network i/f is open.
4808  */
4809 static void
4810 em_get_hw_control(struct adapter *adapter)
4811 {
4812         u32 ctrl_ext, swsm;
4813
4814         if (adapter->hw.mac.type == e1000_82573) {
4815                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4816                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4817                     swsm | E1000_SWSM_DRV_LOAD);
4818                 return;
4819         }
4820         /* else */
4821         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4822         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4823             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4824         return;
4825 }
4826
4827 /*
4828  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4829  * For ASF and Pass Through versions of f/w this means that
4830  * the driver is no longer loaded. For AMT versions of the
4831  * f/w this means that the network i/f is closed.
4832  */
4833 static void
4834 em_release_hw_control(struct adapter *adapter)
4835 {
4836         u32 ctrl_ext, swsm;
4837
4838         if (!adapter->has_manage)
4839                 return;
4840
4841         if (adapter->hw.mac.type == e1000_82573) {
4842                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4843                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4844                     swsm & ~E1000_SWSM_DRV_LOAD);
4845                 return;
4846         }
4847         /* else */
4848         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4849         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4850             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4851         return;
4852 }
4853
4854 static int
4855 em_is_valid_ether_addr(u8 *addr)
4856 {
4857         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4858
4859         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4860                 return (FALSE);
4861         }
4862
4863         return (TRUE);
4864 }
4865
4866 /*
4867 ** Parse the interface capabilities with regard
4868 ** to both system management and wake-on-lan for
4869 ** later use.
4870 */
4871 static void
4872 em_get_wakeup(device_t dev)
4873 {
4874         struct adapter  *adapter = device_get_softc(dev);
4875         u16             eeprom_data = 0, device_id, apme_mask;
4876
4877         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4878         apme_mask = EM_EEPROM_APME;
4879
4880         switch (adapter->hw.mac.type) {
4881         case e1000_82573:
4882         case e1000_82583:
4883                 adapter->has_amt = TRUE;
4884                 /* Falls thru */
4885         case e1000_82571:
4886         case e1000_82572:
4887         case e1000_80003es2lan:
4888                 if (adapter->hw.bus.func == 1) {
4889                         e1000_read_nvm(&adapter->hw,
4890                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4891                         break;
4892                 } else
4893                         e1000_read_nvm(&adapter->hw,
4894                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4895                 break;
4896         case e1000_ich8lan:
4897         case e1000_ich9lan:
4898         case e1000_ich10lan:
4899         case e1000_pchlan:
4900         case e1000_pch2lan:
4901                 apme_mask = E1000_WUC_APME;
4902                 adapter->has_amt = TRUE;
4903                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4904                 break;
4905         default:
4906                 e1000_read_nvm(&adapter->hw,
4907                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4908                 break;
4909         }
4910         if (eeprom_data & apme_mask)
4911                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4912         /*
4913          * We have the eeprom settings, now apply the special cases
4914          * where the eeprom may be wrong or the board won't support
4915          * wake on lan on a particular port
4916          */
4917         device_id = pci_get_device(dev);
4918         switch (device_id) {
4919         case E1000_DEV_ID_82571EB_FIBER:
4920                 /* Wake events only supported on port A for dual fiber
4921                  * regardless of eeprom setting */
4922                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4923                     E1000_STATUS_FUNC_1)
4924                         adapter->wol = 0;
4925                 break;
4926         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4927         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4928         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4929                 /* if quad port adapter, disable WoL on all but port A */
4930                 if (global_quad_port_a != 0)
4931                         adapter->wol = 0;
4932                 /* Reset for multiple quad port adapters */
4933                 if (++global_quad_port_a == 4)
4934                         global_quad_port_a = 0;
4935                 break;
4936         }
4937         return;
4938 }
4939
4940
4941 /*
4942  * Enable PCI Wake On Lan capability
4943  */
4944 static void
4945 em_enable_wakeup(device_t dev)
4946 {
4947         struct adapter  *adapter = device_get_softc(dev);
4948         struct ifnet    *ifp = adapter->ifp;
4949         u32             pmc, ctrl, ctrl_ext, rctl;
4950         u16             status;
4951
4952         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4953                 return;
4954
4955         /* Advertise the wakeup capability */
4956         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4957         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4958         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4959         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4960
4961         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4962             (adapter->hw.mac.type == e1000_pchlan) ||
4963             (adapter->hw.mac.type == e1000_ich9lan) ||
4964             (adapter->hw.mac.type == e1000_ich10lan))
4965                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4966
4967         /* Keep the laser running on Fiber adapters */
4968         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4969             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4970                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4971                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4972                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4973         }
4974
4975         /*
4976         ** Determine type of Wakeup: note that wol
4977         ** is set with all bits on by default.
4978         */
4979         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4980                 adapter->wol &= ~E1000_WUFC_MAG;
4981
4982         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4983                 adapter->wol &= ~E1000_WUFC_MC;
4984         else {
4985                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4986                 rctl |= E1000_RCTL_MPE;
4987                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4988         }
4989
4990         if ((adapter->hw.mac.type == e1000_pchlan) ||
4991             (adapter->hw.mac.type == e1000_pch2lan)) {
4992                 if (em_enable_phy_wakeup(adapter))
4993                         return;
4994         } else {
4995                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4996                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4997         }
4998
4999         if (adapter->hw.phy.type == e1000_phy_igp_3)
5000                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5001
5002         /* Request PME */
5003         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5004         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5005         if (ifp->if_capenable & IFCAP_WOL)
5006                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5007         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5008
5009         return;
5010 }
5011
5012 /*
5013 ** WOL in the newer chipset interfaces (pchlan)
5014 ** require thing to be copied into the phy
5015 */
5016 static int
5017 em_enable_phy_wakeup(struct adapter *adapter)
5018 {
5019         struct e1000_hw *hw = &adapter->hw;
5020         u32 mreg, ret = 0;
5021         u16 preg;
5022
5023         /* copy MAC RARs to PHY RARs */
5024         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5025
5026         /* copy MAC MTA to PHY MTA */
5027         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5028                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5029                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5030                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5031                     (u16)((mreg >> 16) & 0xFFFF));
5032         }
5033
5034         /* configure PHY Rx Control register */
5035         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5036         mreg = E1000_READ_REG(hw, E1000_RCTL);
5037         if (mreg & E1000_RCTL_UPE)
5038                 preg |= BM_RCTL_UPE;
5039         if (mreg & E1000_RCTL_MPE)
5040                 preg |= BM_RCTL_MPE;
5041         preg &= ~(BM_RCTL_MO_MASK);
5042         if (mreg & E1000_RCTL_MO_3)
5043                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5044                                 << BM_RCTL_MO_SHIFT);
5045         if (mreg & E1000_RCTL_BAM)
5046                 preg |= BM_RCTL_BAM;
5047         if (mreg & E1000_RCTL_PMCF)
5048                 preg |= BM_RCTL_PMCF;
5049         mreg = E1000_READ_REG(hw, E1000_CTRL);
5050         if (mreg & E1000_CTRL_RFCE)
5051                 preg |= BM_RCTL_RFCE;
5052         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5053
5054         /* enable PHY wakeup in MAC register */
5055         E1000_WRITE_REG(hw, E1000_WUC,
5056             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5057         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5058
5059         /* configure and enable PHY wakeup in PHY registers */
5060         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5061         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5062
5063         /* activate PHY wakeup */
5064         ret = hw->phy.ops.acquire(hw);
5065         if (ret) {
5066                 printf("Could not acquire PHY\n");
5067                 return ret;
5068         }
5069         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5070                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5071         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5072         if (ret) {
5073                 printf("Could not read PHY page 769\n");
5074                 goto out;
5075         }
5076         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5077         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5078         if (ret)
5079                 printf("Could not set PHY Host Wakeup bit\n");
5080 out:
5081         hw->phy.ops.release(hw);
5082
5083         return ret;
5084 }
5085
5086 static void
5087 em_led_func(void *arg, int onoff)
5088 {
5089         struct adapter  *adapter = arg;
5090  
5091         EM_CORE_LOCK(adapter);
5092         if (onoff) {
5093                 e1000_setup_led(&adapter->hw);
5094                 e1000_led_on(&adapter->hw);
5095         } else {
5096                 e1000_led_off(&adapter->hw);
5097                 e1000_cleanup_led(&adapter->hw);
5098         }
5099         EM_CORE_UNLOCK(adapter);
5100 }
5101
5102 /*
5103 ** Disable the L0S and L1 LINK states
5104 */
5105 static void
5106 em_disable_aspm(struct adapter *adapter)
5107 {
5108         int             base, reg;
5109         u16             link_cap,link_ctrl;
5110         device_t        dev = adapter->dev;
5111
5112         switch (adapter->hw.mac.type) {
5113                 case e1000_82573:
5114                 case e1000_82574:
5115                 case e1000_82583:
5116                         break;
5117                 default:
5118                         return;
5119         }
5120         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5121                 return;
5122         reg = base + PCIER_LINK_CAP;
5123         link_cap = pci_read_config(dev, reg, 2);
5124         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5125                 return;
5126         reg = base + PCIER_LINK_CTL;
5127         link_ctrl = pci_read_config(dev, reg, 2);
5128         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5129         pci_write_config(dev, reg, link_ctrl, 2);
5130         return;
5131 }
5132
5133 /**********************************************************************
5134  *
5135  *  Update the board statistics counters.
5136  *
5137  **********************************************************************/
5138 static void
5139 em_update_stats_counters(struct adapter *adapter)
5140 {
5141         struct ifnet   *ifp;
5142
5143         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5144            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5145                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5146                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5147         }
5148         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5149         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5150         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5151         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5152
5153         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5154         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5155         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5156         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5157         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5158         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5159         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5160         /*
5161         ** For watchdog management we need to know if we have been
5162         ** paused during the last interval, so capture that here.
5163         */
5164         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5165         adapter->stats.xoffrxc += adapter->pause_frames;
5166         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5167         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5168         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5169         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5170         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5171         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5172         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5173         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5174         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5175         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5176         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5177         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5178
5179         /* For the 64-bit byte counters the low dword must be read first. */
5180         /* Both registers clear on the read of the high dword */
5181
5182         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5183             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5184         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5185             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5186
5187         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5188         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5189         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5190         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5191         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5192
5193         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5194         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5195
5196         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5197         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5198         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5199         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5200         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5201         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5202         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5203         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5204         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5205         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5206
5207         /* Interrupt Counts */
5208
5209         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5210         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5211         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5212         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5213         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5214         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5215         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5216         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5217         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5218
5219         if (adapter->hw.mac.type >= e1000_82543) {
5220                 adapter->stats.algnerrc += 
5221                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5222                 adapter->stats.rxerrc += 
5223                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5224                 adapter->stats.tncrs += 
5225                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5226                 adapter->stats.cexterr += 
5227                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5228                 adapter->stats.tsctc += 
5229                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5230                 adapter->stats.tsctfc += 
5231                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5232         }
5233         ifp = adapter->ifp;
5234
5235         ifp->if_collisions = adapter->stats.colc;
5236
5237         /* Rx Errors */
5238         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5239             adapter->stats.crcerrs + adapter->stats.algnerrc +
5240             adapter->stats.ruc + adapter->stats.roc +
5241             adapter->stats.mpc + adapter->stats.cexterr;
5242
5243         /* Tx Errors */
5244         ifp->if_oerrors = adapter->stats.ecol +
5245             adapter->stats.latecol + adapter->watchdog_events;
5246 }
5247
5248 /* Export a single 32-bit register via a read-only sysctl. */
5249 static int
5250 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5251 {
5252         struct adapter *adapter;
5253         u_int val;
5254
5255         adapter = oidp->oid_arg1;
5256         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5257         return (sysctl_handle_int(oidp, &val, 0, req));
5258 }
5259
5260 /*
5261  * Add sysctl variables, one per statistic, to the system.
5262  */
5263 static void
5264 em_add_hw_stats(struct adapter *adapter)
5265 {
5266         device_t dev = adapter->dev;
5267
5268         struct tx_ring *txr = adapter->tx_rings;
5269         struct rx_ring *rxr = adapter->rx_rings;
5270
5271         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5272         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5273         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5274         struct e1000_hw_stats *stats = &adapter->stats;
5275
5276         struct sysctl_oid *stat_node, *queue_node, *int_node;
5277         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5278
5279 #define QUEUE_NAME_LEN 32
5280         char namebuf[QUEUE_NAME_LEN];
5281         
5282         /* Driver Statistics */
5283         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5284                         CTLFLAG_RD, &adapter->link_irq,
5285                         "Link MSIX IRQ Handled");
5286         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5287                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5288                          "Std mbuf failed");
5289         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5290                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5291                          "Std mbuf cluster failed");
5292         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5293                         CTLFLAG_RD, &adapter->dropped_pkts,
5294                         "Driver dropped packets");
5295         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5296                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5297                         "Driver tx dma failure in xmit");
5298         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5299                         CTLFLAG_RD, &adapter->rx_overruns,
5300                         "RX overruns");
5301         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5302                         CTLFLAG_RD, &adapter->watchdog_events,
5303                         "Watchdog timeouts");
5304         
5305         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5306                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5307                         em_sysctl_reg_handler, "IU",
5308                         "Device Control Register");
5309         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5310                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5311                         em_sysctl_reg_handler, "IU",
5312                         "Receiver Control Register");
5313         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5314                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5315                         "Flow Control High Watermark");
5316         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5317                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5318                         "Flow Control Low Watermark");
5319
5320         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5321                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5322                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5323                                             CTLFLAG_RD, NULL, "Queue Name");
5324                 queue_list = SYSCTL_CHILDREN(queue_node);
5325
5326                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5327                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5328                                 E1000_TDH(txr->me),
5329                                 em_sysctl_reg_handler, "IU",
5330                                 "Transmit Descriptor Head");
5331                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5332                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5333                                 E1000_TDT(txr->me),
5334                                 em_sysctl_reg_handler, "IU",
5335                                 "Transmit Descriptor Tail");
5336                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5337                                 CTLFLAG_RD, &txr->tx_irq,
5338                                 "Queue MSI-X Transmit Interrupts");
5339                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5340                                 CTLFLAG_RD, &txr->no_desc_avail,
5341                                 "Queue No Descriptor Available");
5342                 
5343                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5344                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5345                                 E1000_RDH(rxr->me),
5346                                 em_sysctl_reg_handler, "IU",
5347                                 "Receive Descriptor Head");
5348                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5349                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5350                                 E1000_RDT(rxr->me),
5351                                 em_sysctl_reg_handler, "IU",
5352                                 "Receive Descriptor Tail");
5353                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5354                                 CTLFLAG_RD, &rxr->rx_irq,
5355                                 "Queue MSI-X Receive Interrupts");
5356         }
5357
5358         /* MAC stats get their own sub node */
5359
5360         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5361                                     CTLFLAG_RD, NULL, "Statistics");
5362         stat_list = SYSCTL_CHILDREN(stat_node);
5363
5364         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5365                         CTLFLAG_RD, &stats->ecol,
5366                         "Excessive collisions");
5367         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5368                         CTLFLAG_RD, &stats->scc,
5369                         "Single collisions");
5370         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5371                         CTLFLAG_RD, &stats->mcc,
5372                         "Multiple collisions");
5373         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5374                         CTLFLAG_RD, &stats->latecol,
5375                         "Late collisions");
5376         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5377                         CTLFLAG_RD, &stats->colc,
5378                         "Collision Count");
5379         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5380                         CTLFLAG_RD, &adapter->stats.symerrs,
5381                         "Symbol Errors");
5382         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5383                         CTLFLAG_RD, &adapter->stats.sec,
5384                         "Sequence Errors");
5385         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5386                         CTLFLAG_RD, &adapter->stats.dc,
5387                         "Defer Count");
5388         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5389                         CTLFLAG_RD, &adapter->stats.mpc,
5390                         "Missed Packets");
5391         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5392                         CTLFLAG_RD, &adapter->stats.rnbc,
5393                         "Receive No Buffers");
5394         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5395                         CTLFLAG_RD, &adapter->stats.ruc,
5396                         "Receive Undersize");
5397         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5398                         CTLFLAG_RD, &adapter->stats.rfc,
5399                         "Fragmented Packets Received ");
5400         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5401                         CTLFLAG_RD, &adapter->stats.roc,
5402                         "Oversized Packets Received");
5403         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5404                         CTLFLAG_RD, &adapter->stats.rjc,
5405                         "Recevied Jabber");
5406         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5407                         CTLFLAG_RD, &adapter->stats.rxerrc,
5408                         "Receive Errors");
5409         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5410                         CTLFLAG_RD, &adapter->stats.crcerrs,
5411                         "CRC errors");
5412         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5413                         CTLFLAG_RD, &adapter->stats.algnerrc,
5414                         "Alignment Errors");
5415         /* On 82575 these are collision counts */
5416         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5417                         CTLFLAG_RD, &adapter->stats.cexterr,
5418                         "Collision/Carrier extension errors");
5419         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5420                         CTLFLAG_RD, &adapter->stats.xonrxc,
5421                         "XON Received");
5422         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5423                         CTLFLAG_RD, &adapter->stats.xontxc,
5424                         "XON Transmitted");
5425         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5426                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5427                         "XOFF Received");
5428         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5429                         CTLFLAG_RD, &adapter->stats.xofftxc,
5430                         "XOFF Transmitted");
5431
5432         /* Packet Reception Stats */
5433         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5434                         CTLFLAG_RD, &adapter->stats.tpr,
5435                         "Total Packets Received ");
5436         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5437                         CTLFLAG_RD, &adapter->stats.gprc,
5438                         "Good Packets Received");
5439         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5440                         CTLFLAG_RD, &adapter->stats.bprc,
5441                         "Broadcast Packets Received");
5442         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5443                         CTLFLAG_RD, &adapter->stats.mprc,
5444                         "Multicast Packets Received");
5445         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5446                         CTLFLAG_RD, &adapter->stats.prc64,
5447                         "64 byte frames received ");
5448         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5449                         CTLFLAG_RD, &adapter->stats.prc127,
5450                         "65-127 byte frames received");
5451         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5452                         CTLFLAG_RD, &adapter->stats.prc255,
5453                         "128-255 byte frames received");
5454         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5455                         CTLFLAG_RD, &adapter->stats.prc511,
5456                         "256-511 byte frames received");
5457         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5458                         CTLFLAG_RD, &adapter->stats.prc1023,
5459                         "512-1023 byte frames received");
5460         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5461                         CTLFLAG_RD, &adapter->stats.prc1522,
5462                         "1023-1522 byte frames received");
5463         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5464                         CTLFLAG_RD, &adapter->stats.gorc, 
5465                         "Good Octets Received"); 
5466
5467         /* Packet Transmission Stats */
5468         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5469                         CTLFLAG_RD, &adapter->stats.gotc, 
5470                         "Good Octets Transmitted"); 
5471         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5472                         CTLFLAG_RD, &adapter->stats.tpt,
5473                         "Total Packets Transmitted");
5474         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5475                         CTLFLAG_RD, &adapter->stats.gptc,
5476                         "Good Packets Transmitted");
5477         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5478                         CTLFLAG_RD, &adapter->stats.bptc,
5479                         "Broadcast Packets Transmitted");
5480         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5481                         CTLFLAG_RD, &adapter->stats.mptc,
5482                         "Multicast Packets Transmitted");
5483         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5484                         CTLFLAG_RD, &adapter->stats.ptc64,
5485                         "64 byte frames transmitted ");
5486         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5487                         CTLFLAG_RD, &adapter->stats.ptc127,
5488                         "65-127 byte frames transmitted");
5489         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5490                         CTLFLAG_RD, &adapter->stats.ptc255,
5491                         "128-255 byte frames transmitted");
5492         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5493                         CTLFLAG_RD, &adapter->stats.ptc511,
5494                         "256-511 byte frames transmitted");
5495         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5496                         CTLFLAG_RD, &adapter->stats.ptc1023,
5497                         "512-1023 byte frames transmitted");
5498         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5499                         CTLFLAG_RD, &adapter->stats.ptc1522,
5500                         "1024-1522 byte frames transmitted");
5501         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5502                         CTLFLAG_RD, &adapter->stats.tsctc,
5503                         "TSO Contexts Transmitted");
5504         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5505                         CTLFLAG_RD, &adapter->stats.tsctfc,
5506                         "TSO Contexts Failed");
5507
5508
5509         /* Interrupt Stats */
5510
5511         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5512                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5513         int_list = SYSCTL_CHILDREN(int_node);
5514
5515         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5516                         CTLFLAG_RD, &adapter->stats.iac,
5517                         "Interrupt Assertion Count");
5518
5519         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5520                         CTLFLAG_RD, &adapter->stats.icrxptc,
5521                         "Interrupt Cause Rx Pkt Timer Expire Count");
5522
5523         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5524                         CTLFLAG_RD, &adapter->stats.icrxatc,
5525                         "Interrupt Cause Rx Abs Timer Expire Count");
5526
5527         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5528                         CTLFLAG_RD, &adapter->stats.ictxptc,
5529                         "Interrupt Cause Tx Pkt Timer Expire Count");
5530
5531         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5532                         CTLFLAG_RD, &adapter->stats.ictxatc,
5533                         "Interrupt Cause Tx Abs Timer Expire Count");
5534
5535         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5536                         CTLFLAG_RD, &adapter->stats.ictxqec,
5537                         "Interrupt Cause Tx Queue Empty Count");
5538
5539         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5540                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5541                         "Interrupt Cause Tx Queue Min Thresh Count");
5542
5543         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5544                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5545                         "Interrupt Cause Rx Desc Min Thresh Count");
5546
5547         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5548                         CTLFLAG_RD, &adapter->stats.icrxoc,
5549                         "Interrupt Cause Receiver Overrun Count");
5550 }
5551
5552 /**********************************************************************
5553  *
5554  *  This routine provides a way to dump out the adapter eeprom,
5555  *  often a useful debug/service tool. This only dumps the first
5556  *  32 words, stuff that matters is in that extent.
5557  *
5558  **********************************************************************/
5559 static int
5560 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5561 {
5562         struct adapter *adapter = (struct adapter *)arg1;
5563         int error;
5564         int result;
5565
5566         result = -1;
5567         error = sysctl_handle_int(oidp, &result, 0, req);
5568
5569         if (error || !req->newptr)
5570                 return (error);
5571
5572         /*
5573          * This value will cause a hex dump of the
5574          * first 32 16-bit words of the EEPROM to
5575          * the screen.
5576          */
5577         if (result == 1)
5578                 em_print_nvm_info(adapter);
5579
5580         return (error);
5581 }
5582
5583 static void
5584 em_print_nvm_info(struct adapter *adapter)
5585 {
5586         u16     eeprom_data;
5587         int     i, j, row = 0;
5588
5589         /* Its a bit crude, but it gets the job done */
5590         printf("\nInterface EEPROM Dump:\n");
5591         printf("Offset\n0x0000  ");
5592         for (i = 0, j = 0; i < 32; i++, j++) {
5593                 if (j == 8) { /* Make the offset block */
5594                         j = 0; ++row;
5595                         printf("\n0x00%x0  ",row);
5596                 }
5597                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5598                 printf("%04x ", eeprom_data);
5599         }
5600         printf("\n");
5601 }
5602
5603 static int
5604 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5605 {
5606         struct em_int_delay_info *info;
5607         struct adapter *adapter;
5608         u32 regval;
5609         int error, usecs, ticks;
5610
5611         info = (struct em_int_delay_info *)arg1;
5612         usecs = info->value;
5613         error = sysctl_handle_int(oidp, &usecs, 0, req);
5614         if (error != 0 || req->newptr == NULL)
5615                 return (error);
5616         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5617                 return (EINVAL);
5618         info->value = usecs;
5619         ticks = EM_USECS_TO_TICKS(usecs);
5620         if (info->offset == E1000_ITR)  /* units are 256ns here */
5621                 ticks *= 4;
5622
5623         adapter = info->adapter;
5624         
5625         EM_CORE_LOCK(adapter);
5626         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5627         regval = (regval & ~0xffff) | (ticks & 0xffff);
5628         /* Handle a few special cases. */
5629         switch (info->offset) {
5630         case E1000_RDTR:
5631                 break;
5632         case E1000_TIDV:
5633                 if (ticks == 0) {
5634                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5635                         /* Don't write 0 into the TIDV register. */
5636                         regval++;
5637                 } else
5638                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5639                 break;
5640         }
5641         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5642         EM_CORE_UNLOCK(adapter);
5643         return (0);
5644 }
5645
5646 static void
5647 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5648         const char *description, struct em_int_delay_info *info,
5649         int offset, int value)
5650 {
5651         info->adapter = adapter;
5652         info->offset = offset;
5653         info->value = value;
5654         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5655             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5656             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5657             info, 0, em_sysctl_int_delay, "I", description);
5658 }
5659
5660 static void
5661 em_set_sysctl_value(struct adapter *adapter, const char *name,
5662         const char *description, int *limit, int value)
5663 {
5664         *limit = value;
5665         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5666             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5667             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5668 }
5669
5670
5671 /*
5672 ** Set flow control using sysctl:
5673 ** Flow control values:
5674 **      0 - off
5675 **      1 - rx pause
5676 **      2 - tx pause
5677 **      3 - full
5678 */
5679 static int
5680 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5681 {       
5682         int             error;
5683         static int      input = 3; /* default is full */
5684         struct adapter  *adapter = (struct adapter *) arg1;
5685                     
5686         error = sysctl_handle_int(oidp, &input, 0, req);
5687     
5688         if ((error) || (req->newptr == NULL))
5689                 return (error);
5690                 
5691         if (input == adapter->fc) /* no change? */
5692                 return (error);
5693
5694         switch (input) {
5695                 case e1000_fc_rx_pause:
5696                 case e1000_fc_tx_pause:
5697                 case e1000_fc_full:
5698                 case e1000_fc_none:
5699                         adapter->hw.fc.requested_mode = input;
5700                         adapter->fc = input;
5701                         break;
5702                 default:
5703                         /* Do nothing */
5704                         return (error);
5705         }
5706
5707         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5708         e1000_force_mac_fc(&adapter->hw);
5709         return (error);
5710 }
5711
5712 /*
5713 ** Manage Energy Efficient Ethernet:
5714 ** Control values:
5715 **     0/1 - enabled/disabled
5716 */
5717 static int
5718 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5719 {
5720        struct adapter *adapter = (struct adapter *) arg1;
5721        int             error, value;
5722
5723        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5724        error = sysctl_handle_int(oidp, &value, 0, req);
5725        if (error || req->newptr == NULL)
5726                return (error);
5727        EM_CORE_LOCK(adapter);
5728        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5729        em_init_locked(adapter);
5730        EM_CORE_UNLOCK(adapter);
5731        return (0);
5732 }
5733
5734 static int
5735 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5736 {
5737         struct adapter *adapter;
5738         int error;
5739         int result;
5740
5741         result = -1;
5742         error = sysctl_handle_int(oidp, &result, 0, req);
5743
5744         if (error || !req->newptr)
5745                 return (error);
5746
5747         if (result == 1) {
5748                 adapter = (struct adapter *)arg1;
5749                 em_print_debug_info(adapter);
5750         }
5751
5752         return (error);
5753 }
5754
5755 /*
5756 ** This routine is meant to be fluid, add whatever is
5757 ** needed for debugging a problem.  -jfv
5758 */
5759 static void
5760 em_print_debug_info(struct adapter *adapter)
5761 {
5762         device_t dev = adapter->dev;
5763         struct tx_ring *txr = adapter->tx_rings;
5764         struct rx_ring *rxr = adapter->rx_rings;
5765
5766         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5767                 printf("Interface is RUNNING ");
5768         else
5769                 printf("Interface is NOT RUNNING\n");
5770
5771         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5772                 printf("and INACTIVE\n");
5773         else
5774                 printf("and ACTIVE\n");
5775
5776         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5777             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5778             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5779         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5780             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5781             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5782         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5783         device_printf(dev, "TX descriptors avail = %d\n",
5784             txr->tx_avail);
5785         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5786             txr->no_desc_avail);
5787         device_printf(dev, "RX discarded packets = %ld\n",
5788             rxr->rx_discarded);
5789         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5790         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5791 }