]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/dev/e1000/if_em.c
MFC: sync the version of netmap with the one in HEAD, including device
[FreeBSD/stable/9.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int     em_display_debug_stats = 0;
93
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.7";
98
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111         /* Intel(R) PRO/1000 Network Connection */
112         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
178                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
180                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
181         /* required last entry */
182         { 0, 0, 0, 0, 0}
183 };
184
185 /*********************************************************************
186  *  Table of branding strings for all supported NICs.
187  *********************************************************************/
188
189 static char *em_strings[] = {
190         "Intel(R) PRO/1000 Network Connection"
191 };
192
193 /*********************************************************************
194  *  Function prototypes
195  *********************************************************************/
196 static int      em_probe(device_t);
197 static int      em_attach(device_t);
198 static int      em_detach(device_t);
199 static int      em_shutdown(device_t);
200 static int      em_suspend(device_t);
201 static int      em_resume(device_t);
202 #ifdef EM_MULTIQUEUE
203 static int      em_mq_start(struct ifnet *, struct mbuf *);
204 static int      em_mq_start_locked(struct ifnet *,
205                     struct tx_ring *, struct mbuf *);
206 static void     em_qflush(struct ifnet *);
207 #else
208 static void     em_start(struct ifnet *);
209 static void     em_start_locked(struct ifnet *, struct tx_ring *);
210 #endif
211 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
212 static void     em_init(void *);
213 static void     em_init_locked(struct adapter *);
214 static void     em_stop(void *);
215 static void     em_media_status(struct ifnet *, struct ifmediareq *);
216 static int      em_media_change(struct ifnet *);
217 static void     em_identify_hardware(struct adapter *);
218 static int      em_allocate_pci_resources(struct adapter *);
219 static int      em_allocate_legacy(struct adapter *);
220 static int      em_allocate_msix(struct adapter *);
221 static int      em_allocate_queues(struct adapter *);
222 static int      em_setup_msix(struct adapter *);
223 static void     em_free_pci_resources(struct adapter *);
224 static void     em_local_timer(void *);
225 static void     em_reset(struct adapter *);
226 static int      em_setup_interface(device_t, struct adapter *);
227
228 static void     em_setup_transmit_structures(struct adapter *);
229 static void     em_initialize_transmit_unit(struct adapter *);
230 static int      em_allocate_transmit_buffers(struct tx_ring *);
231 static void     em_free_transmit_structures(struct adapter *);
232 static void     em_free_transmit_buffers(struct tx_ring *);
233
234 static int      em_setup_receive_structures(struct adapter *);
235 static int      em_allocate_receive_buffers(struct rx_ring *);
236 static void     em_initialize_receive_unit(struct adapter *);
237 static void     em_free_receive_structures(struct adapter *);
238 static void     em_free_receive_buffers(struct rx_ring *);
239
240 static void     em_enable_intr(struct adapter *);
241 static void     em_disable_intr(struct adapter *);
242 static void     em_update_stats_counters(struct adapter *);
243 static void     em_add_hw_stats(struct adapter *adapter);
244 static void     em_txeof(struct tx_ring *);
245 static bool     em_rxeof(struct rx_ring *, int, int *);
246 #ifndef __NO_STRICT_ALIGNMENT
247 static int      em_fixup_rx(struct rx_ring *);
248 #endif
249 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
250 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
251                     struct ip *, u32 *, u32 *);
252 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
253                     struct tcphdr *, u32 *, u32 *);
254 static void     em_set_promisc(struct adapter *);
255 static void     em_disable_promisc(struct adapter *);
256 static void     em_set_multi(struct adapter *);
257 static void     em_update_link_status(struct adapter *);
258 static void     em_refresh_mbufs(struct rx_ring *, int);
259 static void     em_register_vlan(void *, struct ifnet *, u16);
260 static void     em_unregister_vlan(void *, struct ifnet *, u16);
261 static void     em_setup_vlan_hw_support(struct adapter *);
262 static int      em_xmit(struct tx_ring *, struct mbuf **);
263 static int      em_dma_malloc(struct adapter *, bus_size_t,
264                     struct em_dma_alloc *, int);
265 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
266 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
267 static void     em_print_nvm_info(struct adapter *);
268 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
269 static void     em_print_debug_info(struct adapter *);
270 static int      em_is_valid_ether_addr(u8 *);
271 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
272 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
273                     const char *, struct em_int_delay_info *, int, int);
274 /* Management and WOL Support */
275 static void     em_init_manageability(struct adapter *);
276 static void     em_release_manageability(struct adapter *);
277 static void     em_get_hw_control(struct adapter *);
278 static void     em_release_hw_control(struct adapter *);
279 static void     em_get_wakeup(device_t);
280 static void     em_enable_wakeup(device_t);
281 static int      em_enable_phy_wakeup(struct adapter *);
282 static void     em_led_func(void *, int);
283 static void     em_disable_aspm(struct adapter *);
284
285 static int      em_irq_fast(void *);
286
287 /* MSIX handlers */
288 static void     em_msix_tx(void *);
289 static void     em_msix_rx(void *);
290 static void     em_msix_link(void *);
291 static void     em_handle_tx(void *context, int pending);
292 static void     em_handle_rx(void *context, int pending);
293 static void     em_handle_link(void *context, int pending);
294
295 static void     em_set_sysctl_value(struct adapter *, const char *,
296                     const char *, int *, int);
297 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
298 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
299
300 static __inline void em_rx_discard(struct rx_ring *, int);
301
302 #ifdef DEVICE_POLLING
303 static poll_handler_t em_poll;
304 #endif /* POLLING */
305
306 /*********************************************************************
307  *  FreeBSD Device Interface Entry Points
308  *********************************************************************/
309
310 static device_method_t em_methods[] = {
311         /* Device interface */
312         DEVMETHOD(device_probe, em_probe),
313         DEVMETHOD(device_attach, em_attach),
314         DEVMETHOD(device_detach, em_detach),
315         DEVMETHOD(device_shutdown, em_shutdown),
316         DEVMETHOD(device_suspend, em_suspend),
317         DEVMETHOD(device_resume, em_resume),
318         DEVMETHOD_END
319 };
320
321 static driver_t em_driver = {
322         "em", em_methods, sizeof(struct adapter),
323 };
324
325 devclass_t em_devclass;
326 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
327 MODULE_DEPEND(em, pci, 1, 1, 1);
328 MODULE_DEPEND(em, ether, 1, 1, 1);
329
330 /*********************************************************************
331  *  Tunable default values.
332  *********************************************************************/
333
334 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
335 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
336 #define M_TSO_LEN                       66
337
338 #define MAX_INTS_PER_SEC        8000
339 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
340
341 /* Allow common code without TSO */
342 #ifndef CSUM_TSO
343 #define CSUM_TSO        0
344 #endif
345
346 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
347
348 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
349 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
350 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
351 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
352 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
353     0, "Default transmit interrupt delay in usecs");
354 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
355     0, "Default receive interrupt delay in usecs");
356
357 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
358 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
359 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
360 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
361 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
362     &em_tx_abs_int_delay_dflt, 0,
363     "Default transmit interrupt delay limit in usecs");
364 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
365     &em_rx_abs_int_delay_dflt, 0,
366     "Default receive interrupt delay limit in usecs");
367
368 static int em_rxd = EM_DEFAULT_RXD;
369 static int em_txd = EM_DEFAULT_TXD;
370 TUNABLE_INT("hw.em.rxd", &em_rxd);
371 TUNABLE_INT("hw.em.txd", &em_txd);
372 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
373     "Number of receive descriptors per queue");
374 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
375     "Number of transmit descriptors per queue");
376
377 static int em_smart_pwr_down = FALSE;
378 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
379 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
380     0, "Set to true to leave smart power down enabled on newer adapters");
381
382 /* Controls whether promiscuous also shows bad packets */
383 static int em_debug_sbp = FALSE;
384 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
385 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
386     "Show bad packets in promiscuous mode");
387
388 static int em_enable_msix = TRUE;
389 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
390 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
391     "Enable MSI-X interrupts");
392
393 /* How many packets rxeof tries to clean at a time */
394 static int em_rx_process_limit = 100;
395 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
396 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
397     &em_rx_process_limit, 0,
398     "Maximum number of received packets to process "
399     "at a time, -1 means unlimited");
400
401 /* Energy efficient ethernet - default to OFF */
402 static int eee_setting = 1;
403 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
404 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
405     "Enable Energy Efficient Ethernet");
406
407 /* Global used in WOL setup with multiport cards */
408 static int global_quad_port_a = 0;
409
410 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
411 #include <dev/netmap/if_em_netmap.h>
412 #endif /* DEV_NETMAP */
413
414 /*********************************************************************
415  *  Device identification routine
416  *
417  *  em_probe determines if the driver should be loaded on
418  *  adapter based on PCI vendor/device id of the adapter.
419  *
420  *  return BUS_PROBE_DEFAULT on success, positive on failure
421  *********************************************************************/
422
423 static int
424 em_probe(device_t dev)
425 {
426         char            adapter_name[60];
427         u16             pci_vendor_id = 0;
428         u16             pci_device_id = 0;
429         u16             pci_subvendor_id = 0;
430         u16             pci_subdevice_id = 0;
431         em_vendor_info_t *ent;
432
433         INIT_DEBUGOUT("em_probe: begin");
434
435         pci_vendor_id = pci_get_vendor(dev);
436         if (pci_vendor_id != EM_VENDOR_ID)
437                 return (ENXIO);
438
439         pci_device_id = pci_get_device(dev);
440         pci_subvendor_id = pci_get_subvendor(dev);
441         pci_subdevice_id = pci_get_subdevice(dev);
442
443         ent = em_vendor_info_array;
444         while (ent->vendor_id != 0) {
445                 if ((pci_vendor_id == ent->vendor_id) &&
446                     (pci_device_id == ent->device_id) &&
447
448                     ((pci_subvendor_id == ent->subvendor_id) ||
449                     (ent->subvendor_id == PCI_ANY_ID)) &&
450
451                     ((pci_subdevice_id == ent->subdevice_id) ||
452                     (ent->subdevice_id == PCI_ANY_ID))) {
453                         sprintf(adapter_name, "%s %s",
454                                 em_strings[ent->index],
455                                 em_driver_version);
456                         device_set_desc_copy(dev, adapter_name);
457                         return (BUS_PROBE_DEFAULT);
458                 }
459                 ent++;
460         }
461
462         return (ENXIO);
463 }
464
465 /*********************************************************************
466  *  Device initialization routine
467  *
468  *  The attach entry point is called when the driver is being loaded.
469  *  This routine identifies the type of hardware, allocates all resources
470  *  and initializes the hardware.
471  *
472  *  return 0 on success, positive on failure
473  *********************************************************************/
474
475 static int
476 em_attach(device_t dev)
477 {
478         struct adapter  *adapter;
479         struct e1000_hw *hw;
480         int             error = 0;
481
482         INIT_DEBUGOUT("em_attach: begin");
483
484         if (resource_disabled("em", device_get_unit(dev))) {
485                 device_printf(dev, "Disabled by device hint\n");
486                 return (ENXIO);
487         }
488
489         adapter = device_get_softc(dev);
490         adapter->dev = adapter->osdep.dev = dev;
491         hw = &adapter->hw;
492         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
493
494         /* SYSCTL stuff */
495         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
496             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
497             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
498             em_sysctl_nvm_info, "I", "NVM Information");
499
500         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
501             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
502             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
503             em_sysctl_debug_info, "I", "Debug Information");
504
505         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
506             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
507             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
508             em_set_flowcntl, "I", "Flow Control");
509
510         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
511
512         /* Determine hardware and mac info */
513         em_identify_hardware(adapter);
514
515         /* Setup PCI resources */
516         if (em_allocate_pci_resources(adapter)) {
517                 device_printf(dev, "Allocation of PCI resources failed\n");
518                 error = ENXIO;
519                 goto err_pci;
520         }
521
522         /*
523         ** For ICH8 and family we need to
524         ** map the flash memory, and this
525         ** must happen after the MAC is 
526         ** identified
527         */
528         if ((hw->mac.type == e1000_ich8lan) ||
529             (hw->mac.type == e1000_ich9lan) ||
530             (hw->mac.type == e1000_ich10lan) ||
531             (hw->mac.type == e1000_pchlan) ||
532             (hw->mac.type == e1000_pch2lan) ||
533             (hw->mac.type == e1000_pch_lpt)) {
534                 int rid = EM_BAR_TYPE_FLASH;
535                 adapter->flash = bus_alloc_resource_any(dev,
536                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
537                 if (adapter->flash == NULL) {
538                         device_printf(dev, "Mapping of Flash failed\n");
539                         error = ENXIO;
540                         goto err_pci;
541                 }
542                 /* This is used in the shared code */
543                 hw->flash_address = (u8 *)adapter->flash;
544                 adapter->osdep.flash_bus_space_tag =
545                     rman_get_bustag(adapter->flash);
546                 adapter->osdep.flash_bus_space_handle =
547                     rman_get_bushandle(adapter->flash);
548         }
549
550         /* Do Shared Code initialization */
551         if (e1000_setup_init_funcs(hw, TRUE)) {
552                 device_printf(dev, "Setup of Shared code failed\n");
553                 error = ENXIO;
554                 goto err_pci;
555         }
556
557         e1000_get_bus_info(hw);
558
559         /* Set up some sysctls for the tunable interrupt delays */
560         em_add_int_delay_sysctl(adapter, "rx_int_delay",
561             "receive interrupt delay in usecs", &adapter->rx_int_delay,
562             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
563         em_add_int_delay_sysctl(adapter, "tx_int_delay",
564             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
565             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
566         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
567             "receive interrupt delay limit in usecs",
568             &adapter->rx_abs_int_delay,
569             E1000_REGISTER(hw, E1000_RADV),
570             em_rx_abs_int_delay_dflt);
571         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
572             "transmit interrupt delay limit in usecs",
573             &adapter->tx_abs_int_delay,
574             E1000_REGISTER(hw, E1000_TADV),
575             em_tx_abs_int_delay_dflt);
576         em_add_int_delay_sysctl(adapter, "itr",
577             "interrupt delay limit in usecs/4",
578             &adapter->tx_itr,
579             E1000_REGISTER(hw, E1000_ITR),
580             DEFAULT_ITR);
581
582         /* Sysctl for limiting the amount of work done in the taskqueue */
583         em_set_sysctl_value(adapter, "rx_processing_limit",
584             "max number of rx packets to process", &adapter->rx_process_limit,
585             em_rx_process_limit);
586
587         /*
588          * Validate number of transmit and receive descriptors. It
589          * must not exceed hardware maximum, and must be multiple
590          * of E1000_DBA_ALIGN.
591          */
592         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
593             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
594                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
595                     EM_DEFAULT_TXD, em_txd);
596                 adapter->num_tx_desc = EM_DEFAULT_TXD;
597         } else
598                 adapter->num_tx_desc = em_txd;
599
600         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
601             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
602                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
603                     EM_DEFAULT_RXD, em_rxd);
604                 adapter->num_rx_desc = EM_DEFAULT_RXD;
605         } else
606                 adapter->num_rx_desc = em_rxd;
607
608         hw->mac.autoneg = DO_AUTO_NEG;
609         hw->phy.autoneg_wait_to_complete = FALSE;
610         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
611
612         /* Copper options */
613         if (hw->phy.media_type == e1000_media_type_copper) {
614                 hw->phy.mdix = AUTO_ALL_MODES;
615                 hw->phy.disable_polarity_correction = FALSE;
616                 hw->phy.ms_type = EM_MASTER_SLAVE;
617         }
618
619         /*
620          * Set the frame limits assuming
621          * standard ethernet sized frames.
622          */
623         adapter->hw.mac.max_frame_size =
624             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
625
626         /*
627          * This controls when hardware reports transmit completion
628          * status.
629          */
630         hw->mac.report_tx_early = 1;
631
632         /* 
633         ** Get queue/ring memory
634         */
635         if (em_allocate_queues(adapter)) {
636                 error = ENOMEM;
637                 goto err_pci;
638         }
639
640         /* Allocate multicast array memory. */
641         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
642             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
643         if (adapter->mta == NULL) {
644                 device_printf(dev, "Can not allocate multicast setup array\n");
645                 error = ENOMEM;
646                 goto err_late;
647         }
648
649         /* Check SOL/IDER usage */
650         if (e1000_check_reset_block(hw))
651                 device_printf(dev, "PHY reset is blocked"
652                     " due to SOL/IDER session.\n");
653
654         /* Sysctl for setting Energy Efficient Ethernet */
655         hw->dev_spec.ich8lan.eee_disable = eee_setting;
656         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
657             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
658             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
659             adapter, 0, em_sysctl_eee, "I",
660             "Disable Energy Efficient Ethernet");
661
662         /*
663         ** Start from a known state, this is
664         ** important in reading the nvm and
665         ** mac from that.
666         */
667         e1000_reset_hw(hw);
668
669
670         /* Make sure we have a good EEPROM before we read from it */
671         if (e1000_validate_nvm_checksum(hw) < 0) {
672                 /*
673                 ** Some PCI-E parts fail the first check due to
674                 ** the link being in sleep state, call it again,
675                 ** if it fails a second time its a real issue.
676                 */
677                 if (e1000_validate_nvm_checksum(hw) < 0) {
678                         device_printf(dev,
679                             "The EEPROM Checksum Is Not Valid\n");
680                         error = EIO;
681                         goto err_late;
682                 }
683         }
684
685         /* Copy the permanent MAC address out of the EEPROM */
686         if (e1000_read_mac_addr(hw) < 0) {
687                 device_printf(dev, "EEPROM read error while reading MAC"
688                     " address\n");
689                 error = EIO;
690                 goto err_late;
691         }
692
693         if (!em_is_valid_ether_addr(hw->mac.addr)) {
694                 device_printf(dev, "Invalid MAC address\n");
695                 error = EIO;
696                 goto err_late;
697         }
698
699         /*
700         **  Do interrupt configuration
701         */
702         if (adapter->msix > 1) /* Do MSIX */
703                 error = em_allocate_msix(adapter);
704         else  /* MSI or Legacy */
705                 error = em_allocate_legacy(adapter);
706         if (error)
707                 goto err_late;
708
709         /*
710          * Get Wake-on-Lan and Management info for later use
711          */
712         em_get_wakeup(dev);
713
714         /* Setup OS specific network interface */
715         if (em_setup_interface(dev, adapter) != 0)
716                 goto err_late;
717
718         em_reset(adapter);
719
720         /* Initialize statistics */
721         em_update_stats_counters(adapter);
722
723         hw->mac.get_link_status = 1;
724         em_update_link_status(adapter);
725
726         /* Register for VLAN events */
727         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
728             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
729         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
730             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
731
732         em_add_hw_stats(adapter);
733
734         /* Non-AMT based hardware can now take control from firmware */
735         if (adapter->has_manage && !adapter->has_amt)
736                 em_get_hw_control(adapter);
737
738         /* Tell the stack that the interface is not active */
739         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
740         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
741
742         adapter->led_dev = led_create(em_led_func, adapter,
743             device_get_nameunit(dev));
744 #ifdef DEV_NETMAP
745         em_netmap_attach(adapter);
746 #endif /* DEV_NETMAP */
747
748         INIT_DEBUGOUT("em_attach: end");
749
750         return (0);
751
752 err_late:
753         em_free_transmit_structures(adapter);
754         em_free_receive_structures(adapter);
755         em_release_hw_control(adapter);
756         if (adapter->ifp != NULL)
757                 if_free(adapter->ifp);
758 err_pci:
759         em_free_pci_resources(adapter);
760         free(adapter->mta, M_DEVBUF);
761         EM_CORE_LOCK_DESTROY(adapter);
762
763         return (error);
764 }
765
766 /*********************************************************************
767  *  Device removal routine
768  *
769  *  The detach entry point is called when the driver is being removed.
770  *  This routine stops the adapter and deallocates all the resources
771  *  that were allocated for driver operation.
772  *
773  *  return 0 on success, positive on failure
774  *********************************************************************/
775
776 static int
777 em_detach(device_t dev)
778 {
779         struct adapter  *adapter = device_get_softc(dev);
780         struct ifnet    *ifp = adapter->ifp;
781
782         INIT_DEBUGOUT("em_detach: begin");
783
784         /* Make sure VLANS are not using driver */
785         if (adapter->ifp->if_vlantrunk != NULL) {
786                 device_printf(dev,"Vlan in use, detach first\n");
787                 return (EBUSY);
788         }
789
790 #ifdef DEVICE_POLLING
791         if (ifp->if_capenable & IFCAP_POLLING)
792                 ether_poll_deregister(ifp);
793 #endif
794
795         if (adapter->led_dev != NULL)
796                 led_destroy(adapter->led_dev);
797
798         EM_CORE_LOCK(adapter);
799         adapter->in_detach = 1;
800         em_stop(adapter);
801         EM_CORE_UNLOCK(adapter);
802         EM_CORE_LOCK_DESTROY(adapter);
803
804         e1000_phy_hw_reset(&adapter->hw);
805
806         em_release_manageability(adapter);
807         em_release_hw_control(adapter);
808
809         /* Unregister VLAN events */
810         if (adapter->vlan_attach != NULL)
811                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
812         if (adapter->vlan_detach != NULL)
813                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
814
815         ether_ifdetach(adapter->ifp);
816         callout_drain(&adapter->timer);
817
818 #ifdef DEV_NETMAP
819         netmap_detach(ifp);
820 #endif /* DEV_NETMAP */
821
822         em_free_pci_resources(adapter);
823         bus_generic_detach(dev);
824         if_free(ifp);
825
826         em_free_transmit_structures(adapter);
827         em_free_receive_structures(adapter);
828
829         em_release_hw_control(adapter);
830         free(adapter->mta, M_DEVBUF);
831
832         return (0);
833 }
834
835 /*********************************************************************
836  *
837  *  Shutdown entry point
838  *
839  **********************************************************************/
840
841 static int
842 em_shutdown(device_t dev)
843 {
844         return em_suspend(dev);
845 }
846
847 /*
848  * Suspend/resume device methods.
849  */
850 static int
851 em_suspend(device_t dev)
852 {
853         struct adapter *adapter = device_get_softc(dev);
854
855         EM_CORE_LOCK(adapter);
856
857         em_release_manageability(adapter);
858         em_release_hw_control(adapter);
859         em_enable_wakeup(dev);
860
861         EM_CORE_UNLOCK(adapter);
862
863         return bus_generic_suspend(dev);
864 }
865
866 static int
867 em_resume(device_t dev)
868 {
869         struct adapter *adapter = device_get_softc(dev);
870         struct tx_ring  *txr = adapter->tx_rings;
871         struct ifnet *ifp = adapter->ifp;
872
873         EM_CORE_LOCK(adapter);
874         if (adapter->hw.mac.type == e1000_pch2lan)
875                 e1000_resume_workarounds_pchlan(&adapter->hw);
876         em_init_locked(adapter);
877         em_init_manageability(adapter);
878
879         if ((ifp->if_flags & IFF_UP) &&
880             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
881                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
882                         EM_TX_LOCK(txr);
883 #ifdef EM_MULTIQUEUE
884                         if (!drbr_empty(ifp, txr->br))
885                                 em_mq_start_locked(ifp, txr, NULL);
886 #else
887                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
888                                 em_start_locked(ifp, txr);
889 #endif
890                         EM_TX_UNLOCK(txr);
891                 }
892         }
893         EM_CORE_UNLOCK(adapter);
894
895         return bus_generic_resume(dev);
896 }
897
898
899 #ifdef EM_MULTIQUEUE
900 /*********************************************************************
901  *  Multiqueue Transmit routines 
902  *
903  *  em_mq_start is called by the stack to initiate a transmit.
904  *  however, if busy the driver can queue the request rather
905  *  than do an immediate send. It is this that is an advantage
906  *  in this driver, rather than also having multiple tx queues.
907  **********************************************************************/
908 static int
909 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
910 {
911         struct adapter  *adapter = txr->adapter;
912         struct mbuf     *next;
913         int             err = 0, enq = 0;
914
915         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
916             IFF_DRV_RUNNING || adapter->link_active == 0) {
917                 if (m != NULL)
918                         err = drbr_enqueue(ifp, txr->br, m);
919                 return (err);
920         }
921
922         enq = 0;
923         if (m != NULL) {
924                 err = drbr_enqueue(ifp, txr->br, m);
925                 if (err)
926                         return (err);
927         } 
928
929         /* Process the queue */
930         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
931                 if ((err = em_xmit(txr, &next)) != 0) {
932                         if (next == NULL)
933                                 drbr_advance(ifp, txr->br);
934                         else 
935                                 drbr_putback(ifp, txr->br, next);
936                         break;
937                 }
938                 drbr_advance(ifp, txr->br);
939                 enq++;
940                 ifp->if_obytes += next->m_pkthdr.len;
941                 if (next->m_flags & M_MCAST)
942                         ifp->if_omcasts++;
943                 ETHER_BPF_MTAP(ifp, next);
944                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
945                         break;
946         }
947
948         if (enq > 0) {
949                 /* Set the watchdog */
950                 txr->queue_status = EM_QUEUE_WORKING;
951                 txr->watchdog_time = ticks;
952         }
953
954         if (txr->tx_avail < EM_MAX_SCATTER)
955                 em_txeof(txr);
956         if (txr->tx_avail < EM_MAX_SCATTER)
957                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
958         return (err);
959 }
960
961 /*
962 ** Multiqueue capable stack interface
963 */
964 static int
965 em_mq_start(struct ifnet *ifp, struct mbuf *m)
966 {
967         struct adapter  *adapter = ifp->if_softc;
968         struct tx_ring  *txr = adapter->tx_rings;
969         int             error;
970
971         if (EM_TX_TRYLOCK(txr)) {
972                 error = em_mq_start_locked(ifp, txr, m);
973                 EM_TX_UNLOCK(txr);
974         } else 
975                 error = drbr_enqueue(ifp, txr->br, m);
976
977         return (error);
978 }
979
980 /*
981 ** Flush all ring buffers
982 */
983 static void
984 em_qflush(struct ifnet *ifp)
985 {
986         struct adapter  *adapter = ifp->if_softc;
987         struct tx_ring  *txr = adapter->tx_rings;
988         struct mbuf     *m;
989
990         for (int i = 0; i < adapter->num_queues; i++, txr++) {
991                 EM_TX_LOCK(txr);
992                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
993                         m_freem(m);
994                 EM_TX_UNLOCK(txr);
995         }
996         if_qflush(ifp);
997 }
998 #else  /* !EM_MULTIQUEUE */
999
1000 static void
1001 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1002 {
1003         struct adapter  *adapter = ifp->if_softc;
1004         struct mbuf     *m_head;
1005
1006         EM_TX_LOCK_ASSERT(txr);
1007
1008         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1009             IFF_DRV_RUNNING)
1010                 return;
1011
1012         if (!adapter->link_active)
1013                 return;
1014
1015         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1016                 /* Call cleanup if number of TX descriptors low */
1017                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1018                         em_txeof(txr);
1019                 if (txr->tx_avail < EM_MAX_SCATTER) {
1020                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1021                         break;
1022                 }
1023                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1024                 if (m_head == NULL)
1025                         break;
1026                 /*
1027                  *  Encapsulation can modify our pointer, and or make it
1028                  *  NULL on failure.  In that event, we can't requeue.
1029                  */
1030                 if (em_xmit(txr, &m_head)) {
1031                         if (m_head == NULL)
1032                                 break;
1033                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1034                         break;
1035                 }
1036
1037                 /* Send a copy of the frame to the BPF listener */
1038                 ETHER_BPF_MTAP(ifp, m_head);
1039
1040                 /* Set timeout in case hardware has problems transmitting. */
1041                 txr->watchdog_time = ticks;
1042                 txr->queue_status = EM_QUEUE_WORKING;
1043         }
1044
1045         return;
1046 }
1047
1048 static void
1049 em_start(struct ifnet *ifp)
1050 {
1051         struct adapter  *adapter = ifp->if_softc;
1052         struct tx_ring  *txr = adapter->tx_rings;
1053
1054         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1055                 EM_TX_LOCK(txr);
1056                 em_start_locked(ifp, txr);
1057                 EM_TX_UNLOCK(txr);
1058         }
1059         return;
1060 }
1061 #endif /* EM_MULTIQUEUE */
1062
1063 /*********************************************************************
1064  *  Ioctl entry point
1065  *
1066  *  em_ioctl is called when the user wants to configure the
1067  *  interface.
1068  *
1069  *  return 0 on success, positive on failure
1070  **********************************************************************/
1071
1072 static int
1073 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1074 {
1075         struct adapter  *adapter = ifp->if_softc;
1076         struct ifreq    *ifr = (struct ifreq *)data;
1077 #if defined(INET) || defined(INET6)
1078         struct ifaddr   *ifa = (struct ifaddr *)data;
1079 #endif
1080         bool            avoid_reset = FALSE;
1081         int             error = 0;
1082
1083         if (adapter->in_detach)
1084                 return (error);
1085
1086         switch (command) {
1087         case SIOCSIFADDR:
1088 #ifdef INET
1089                 if (ifa->ifa_addr->sa_family == AF_INET)
1090                         avoid_reset = TRUE;
1091 #endif
1092 #ifdef INET6
1093                 if (ifa->ifa_addr->sa_family == AF_INET6)
1094                         avoid_reset = TRUE;
1095 #endif
1096                 /*
1097                 ** Calling init results in link renegotiation,
1098                 ** so we avoid doing it when possible.
1099                 */
1100                 if (avoid_reset) {
1101                         ifp->if_flags |= IFF_UP;
1102                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1103                                 em_init(adapter);
1104 #ifdef INET
1105                         if (!(ifp->if_flags & IFF_NOARP))
1106                                 arp_ifinit(ifp, ifa);
1107 #endif
1108                 } else
1109                         error = ether_ioctl(ifp, command, data);
1110                 break;
1111         case SIOCSIFMTU:
1112             {
1113                 int max_frame_size;
1114
1115                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1116
1117                 EM_CORE_LOCK(adapter);
1118                 switch (adapter->hw.mac.type) {
1119                 case e1000_82571:
1120                 case e1000_82572:
1121                 case e1000_ich9lan:
1122                 case e1000_ich10lan:
1123                 case e1000_pch2lan:
1124                 case e1000_pch_lpt:
1125                 case e1000_82574:
1126                 case e1000_82583:
1127                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1128                         max_frame_size = 9234;
1129                         break;
1130                 case e1000_pchlan:
1131                         max_frame_size = 4096;
1132                         break;
1133                         /* Adapters that do not support jumbo frames */
1134                 case e1000_ich8lan:
1135                         max_frame_size = ETHER_MAX_LEN;
1136                         break;
1137                 default:
1138                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1139                 }
1140                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1141                     ETHER_CRC_LEN) {
1142                         EM_CORE_UNLOCK(adapter);
1143                         error = EINVAL;
1144                         break;
1145                 }
1146
1147                 ifp->if_mtu = ifr->ifr_mtu;
1148                 adapter->hw.mac.max_frame_size =
1149                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1150                 em_init_locked(adapter);
1151                 EM_CORE_UNLOCK(adapter);
1152                 break;
1153             }
1154         case SIOCSIFFLAGS:
1155                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1156                     SIOCSIFFLAGS (Set Interface Flags)");
1157                 EM_CORE_LOCK(adapter);
1158                 if (ifp->if_flags & IFF_UP) {
1159                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1160                                 if ((ifp->if_flags ^ adapter->if_flags) &
1161                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1162                                         em_disable_promisc(adapter);
1163                                         em_set_promisc(adapter);
1164                                 }
1165                         } else
1166                                 em_init_locked(adapter);
1167                 } else
1168                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1169                                 em_stop(adapter);
1170                 adapter->if_flags = ifp->if_flags;
1171                 EM_CORE_UNLOCK(adapter);
1172                 break;
1173         case SIOCADDMULTI:
1174         case SIOCDELMULTI:
1175                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1176                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1177                         EM_CORE_LOCK(adapter);
1178                         em_disable_intr(adapter);
1179                         em_set_multi(adapter);
1180 #ifdef DEVICE_POLLING
1181                         if (!(ifp->if_capenable & IFCAP_POLLING))
1182 #endif
1183                                 em_enable_intr(adapter);
1184                         EM_CORE_UNLOCK(adapter);
1185                 }
1186                 break;
1187         case SIOCSIFMEDIA:
1188                 /* Check SOL/IDER usage */
1189                 EM_CORE_LOCK(adapter);
1190                 if (e1000_check_reset_block(&adapter->hw)) {
1191                         EM_CORE_UNLOCK(adapter);
1192                         device_printf(adapter->dev, "Media change is"
1193                             " blocked due to SOL/IDER session.\n");
1194                         break;
1195                 }
1196                 EM_CORE_UNLOCK(adapter);
1197                 /* falls thru */
1198         case SIOCGIFMEDIA:
1199                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1200                     SIOCxIFMEDIA (Get/Set Interface Media)");
1201                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1202                 break;
1203         case SIOCSIFCAP:
1204             {
1205                 int mask, reinit;
1206
1207                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1208                 reinit = 0;
1209                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1210 #ifdef DEVICE_POLLING
1211                 if (mask & IFCAP_POLLING) {
1212                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1213                                 error = ether_poll_register(em_poll, ifp);
1214                                 if (error)
1215                                         return (error);
1216                                 EM_CORE_LOCK(adapter);
1217                                 em_disable_intr(adapter);
1218                                 ifp->if_capenable |= IFCAP_POLLING;
1219                                 EM_CORE_UNLOCK(adapter);
1220                         } else {
1221                                 error = ether_poll_deregister(ifp);
1222                                 /* Enable interrupt even in error case */
1223                                 EM_CORE_LOCK(adapter);
1224                                 em_enable_intr(adapter);
1225                                 ifp->if_capenable &= ~IFCAP_POLLING;
1226                                 EM_CORE_UNLOCK(adapter);
1227                         }
1228                 }
1229 #endif
1230                 if (mask & IFCAP_HWCSUM) {
1231                         ifp->if_capenable ^= IFCAP_HWCSUM;
1232                         reinit = 1;
1233                 }
1234                 if (mask & IFCAP_TSO4) {
1235                         ifp->if_capenable ^= IFCAP_TSO4;
1236                         reinit = 1;
1237                 }
1238                 if (mask & IFCAP_VLAN_HWTAGGING) {
1239                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1240                         reinit = 1;
1241                 }
1242                 if (mask & IFCAP_VLAN_HWFILTER) {
1243                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1244                         reinit = 1;
1245                 }
1246                 if (mask & IFCAP_VLAN_HWTSO) {
1247                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1248                         reinit = 1;
1249                 }
1250                 if ((mask & IFCAP_WOL) &&
1251                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1252                         if (mask & IFCAP_WOL_MCAST)
1253                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1254                         if (mask & IFCAP_WOL_MAGIC)
1255                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1256                 }
1257                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1258                         em_init(adapter);
1259                 VLAN_CAPABILITIES(ifp);
1260                 break;
1261             }
1262
1263         default:
1264                 error = ether_ioctl(ifp, command, data);
1265                 break;
1266         }
1267
1268         return (error);
1269 }
1270
1271
1272 /*********************************************************************
1273  *  Init entry point
1274  *
1275  *  This routine is used in two ways. It is used by the stack as
1276  *  init entry point in network interface structure. It is also used
1277  *  by the driver as a hw/sw initialization routine to get to a
1278  *  consistent state.
1279  *
1280  *  return 0 on success, positive on failure
1281  **********************************************************************/
1282
1283 static void
1284 em_init_locked(struct adapter *adapter)
1285 {
1286         struct ifnet    *ifp = adapter->ifp;
1287         device_t        dev = adapter->dev;
1288
1289         INIT_DEBUGOUT("em_init: begin");
1290
1291         EM_CORE_LOCK_ASSERT(adapter);
1292
1293         em_disable_intr(adapter);
1294         callout_stop(&adapter->timer);
1295
1296         /* Get the latest mac address, User can use a LAA */
1297         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1298               ETHER_ADDR_LEN);
1299
1300         /* Put the address into the Receive Address Array */
1301         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1302
1303         /*
1304          * With the 82571 adapter, RAR[0] may be overwritten
1305          * when the other port is reset, we make a duplicate
1306          * in RAR[14] for that eventuality, this assures
1307          * the interface continues to function.
1308          */
1309         if (adapter->hw.mac.type == e1000_82571) {
1310                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1311                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1312                     E1000_RAR_ENTRIES - 1);
1313         }
1314
1315         /* Initialize the hardware */
1316         em_reset(adapter);
1317         em_update_link_status(adapter);
1318
1319         /* Setup VLAN support, basic and offload if available */
1320         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1321
1322         /* Set hardware offload abilities */
1323         ifp->if_hwassist = 0;
1324         if (ifp->if_capenable & IFCAP_TXCSUM)
1325                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1326         if (ifp->if_capenable & IFCAP_TSO4)
1327                 ifp->if_hwassist |= CSUM_TSO;
1328
1329         /* Configure for OS presence */
1330         em_init_manageability(adapter);
1331
1332         /* Prepare transmit descriptors and buffers */
1333         em_setup_transmit_structures(adapter);
1334         em_initialize_transmit_unit(adapter);
1335
1336         /* Setup Multicast table */
1337         em_set_multi(adapter);
1338
1339         /*
1340         ** Figure out the desired mbuf
1341         ** pool for doing jumbos
1342         */
1343         if (adapter->hw.mac.max_frame_size <= 2048)
1344                 adapter->rx_mbuf_sz = MCLBYTES;
1345         else if (adapter->hw.mac.max_frame_size <= 4096)
1346                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1347         else
1348                 adapter->rx_mbuf_sz = MJUM9BYTES;
1349
1350         /* Prepare receive descriptors and buffers */
1351         if (em_setup_receive_structures(adapter)) {
1352                 device_printf(dev, "Could not setup receive structures\n");
1353                 em_stop(adapter);
1354                 return;
1355         }
1356         em_initialize_receive_unit(adapter);
1357
1358         /* Use real VLAN Filter support? */
1359         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1360                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1361                         /* Use real VLAN Filter support */
1362                         em_setup_vlan_hw_support(adapter);
1363                 else {
1364                         u32 ctrl;
1365                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1366                         ctrl |= E1000_CTRL_VME;
1367                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1368                 }
1369         }
1370
1371         /* Don't lose promiscuous settings */
1372         em_set_promisc(adapter);
1373
1374         /* Set the interface as ACTIVE */
1375         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1376         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1377
1378         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1379         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1380
1381         /* MSI/X configuration for 82574 */
1382         if (adapter->hw.mac.type == e1000_82574) {
1383                 int tmp;
1384                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1385                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1386                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1387                 /* Set the IVAR - interrupt vector routing. */
1388                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1389         }
1390
1391 #ifdef DEVICE_POLLING
1392         /*
1393          * Only enable interrupts if we are not polling, make sure
1394          * they are off otherwise.
1395          */
1396         if (ifp->if_capenable & IFCAP_POLLING)
1397                 em_disable_intr(adapter);
1398         else
1399 #endif /* DEVICE_POLLING */
1400                 em_enable_intr(adapter);
1401
1402         /* AMT based hardware can now take control from firmware */
1403         if (adapter->has_manage && adapter->has_amt)
1404                 em_get_hw_control(adapter);
1405 }
1406
1407 static void
1408 em_init(void *arg)
1409 {
1410         struct adapter *adapter = arg;
1411
1412         EM_CORE_LOCK(adapter);
1413         em_init_locked(adapter);
1414         EM_CORE_UNLOCK(adapter);
1415 }
1416
1417
1418 #ifdef DEVICE_POLLING
1419 /*********************************************************************
1420  *
1421  *  Legacy polling routine: note this only works with single queue
1422  *
1423  *********************************************************************/
1424 static int
1425 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1426 {
1427         struct adapter *adapter = ifp->if_softc;
1428         struct tx_ring  *txr = adapter->tx_rings;
1429         struct rx_ring  *rxr = adapter->rx_rings;
1430         u32             reg_icr;
1431         int             rx_done;
1432
1433         EM_CORE_LOCK(adapter);
1434         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1435                 EM_CORE_UNLOCK(adapter);
1436                 return (0);
1437         }
1438
1439         if (cmd == POLL_AND_CHECK_STATUS) {
1440                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1441                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1442                         callout_stop(&adapter->timer);
1443                         adapter->hw.mac.get_link_status = 1;
1444                         em_update_link_status(adapter);
1445                         callout_reset(&adapter->timer, hz,
1446                             em_local_timer, adapter);
1447                 }
1448         }
1449         EM_CORE_UNLOCK(adapter);
1450
1451         em_rxeof(rxr, count, &rx_done);
1452
1453         EM_TX_LOCK(txr);
1454         em_txeof(txr);
1455 #ifdef EM_MULTIQUEUE
1456         if (!drbr_empty(ifp, txr->br))
1457                 em_mq_start_locked(ifp, txr, NULL);
1458 #else
1459         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1460                 em_start_locked(ifp, txr);
1461 #endif
1462         EM_TX_UNLOCK(txr);
1463
1464         return (rx_done);
1465 }
1466 #endif /* DEVICE_POLLING */
1467
1468
1469 /*********************************************************************
1470  *
1471  *  Fast Legacy/MSI Combined Interrupt Service routine  
1472  *
1473  *********************************************************************/
1474 static int
1475 em_irq_fast(void *arg)
1476 {
1477         struct adapter  *adapter = arg;
1478         struct ifnet    *ifp;
1479         u32             reg_icr;
1480
1481         ifp = adapter->ifp;
1482
1483         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1484
1485         /* Hot eject?  */
1486         if (reg_icr == 0xffffffff)
1487                 return FILTER_STRAY;
1488
1489         /* Definitely not our interrupt.  */
1490         if (reg_icr == 0x0)
1491                 return FILTER_STRAY;
1492
1493         /*
1494          * Starting with the 82571 chip, bit 31 should be used to
1495          * determine whether the interrupt belongs to us.
1496          */
1497         if (adapter->hw.mac.type >= e1000_82571 &&
1498             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1499                 return FILTER_STRAY;
1500
1501         em_disable_intr(adapter);
1502         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1503
1504         /* Link status change */
1505         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1506                 adapter->hw.mac.get_link_status = 1;
1507                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1508         }
1509
1510         if (reg_icr & E1000_ICR_RXO)
1511                 adapter->rx_overruns++;
1512         return FILTER_HANDLED;
1513 }
1514
1515 /* Combined RX/TX handler, used by Legacy and MSI */
1516 static void
1517 em_handle_que(void *context, int pending)
1518 {
1519         struct adapter  *adapter = context;
1520         struct ifnet    *ifp = adapter->ifp;
1521         struct tx_ring  *txr = adapter->tx_rings;
1522         struct rx_ring  *rxr = adapter->rx_rings;
1523
1524
1525         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1526                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1527                 EM_TX_LOCK(txr);
1528                 em_txeof(txr);
1529 #ifdef EM_MULTIQUEUE
1530                 if (!drbr_empty(ifp, txr->br))
1531                         em_mq_start_locked(ifp, txr, NULL);
1532 #else
1533                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1534                         em_start_locked(ifp, txr);
1535 #endif
1536                 EM_TX_UNLOCK(txr);
1537                 if (more) {
1538                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1539                         return;
1540                 }
1541         }
1542
1543         em_enable_intr(adapter);
1544         return;
1545 }
1546
1547
1548 /*********************************************************************
1549  *
1550  *  MSIX Interrupt Service Routines
1551  *
1552  **********************************************************************/
1553 static void
1554 em_msix_tx(void *arg)
1555 {
1556         struct tx_ring *txr = arg;
1557         struct adapter *adapter = txr->adapter;
1558         struct ifnet    *ifp = adapter->ifp;
1559
1560         ++txr->tx_irq;
1561         EM_TX_LOCK(txr);
1562         em_txeof(txr);
1563 #ifdef EM_MULTIQUEUE
1564         if (!drbr_empty(ifp, txr->br))
1565                 em_mq_start_locked(ifp, txr, NULL);
1566 #else
1567         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1568                 em_start_locked(ifp, txr);
1569 #endif
1570         /* Reenable this interrupt */
1571         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1572         EM_TX_UNLOCK(txr);
1573         return;
1574 }
1575
1576 /*********************************************************************
1577  *
1578  *  MSIX RX Interrupt Service routine
1579  *
1580  **********************************************************************/
1581
1582 static void
1583 em_msix_rx(void *arg)
1584 {
1585         struct rx_ring  *rxr = arg;
1586         struct adapter  *adapter = rxr->adapter;
1587         bool            more;
1588
1589         ++rxr->rx_irq;
1590         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1591                 return;
1592         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1593         if (more)
1594                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1595         else
1596                 /* Reenable this interrupt */
1597                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1598         return;
1599 }
1600
1601 /*********************************************************************
1602  *
1603  *  MSIX Link Fast Interrupt Service routine
1604  *
1605  **********************************************************************/
1606 static void
1607 em_msix_link(void *arg)
1608 {
1609         struct adapter  *adapter = arg;
1610         u32             reg_icr;
1611
1612         ++adapter->link_irq;
1613         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1614
1615         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1616                 adapter->hw.mac.get_link_status = 1;
1617                 em_handle_link(adapter, 0);
1618         } else
1619                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1620                     EM_MSIX_LINK | E1000_IMS_LSC);
1621         return;
1622 }
1623
1624 static void
1625 em_handle_rx(void *context, int pending)
1626 {
1627         struct rx_ring  *rxr = context;
1628         struct adapter  *adapter = rxr->adapter;
1629         bool            more;
1630
1631         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1632         if (more)
1633                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1634         else
1635                 /* Reenable this interrupt */
1636                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1637 }
1638
1639 static void
1640 em_handle_tx(void *context, int pending)
1641 {
1642         struct tx_ring  *txr = context;
1643         struct adapter  *adapter = txr->adapter;
1644         struct ifnet    *ifp = adapter->ifp;
1645
1646         EM_TX_LOCK(txr);
1647         em_txeof(txr);
1648 #ifdef EM_MULTIQUEUE
1649         if (!drbr_empty(ifp, txr->br))
1650                 em_mq_start_locked(ifp, txr, NULL);
1651 #else
1652         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1653                 em_start_locked(ifp, txr);
1654 #endif
1655         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1656         EM_TX_UNLOCK(txr);
1657 }
1658
1659 static void
1660 em_handle_link(void *context, int pending)
1661 {
1662         struct adapter  *adapter = context;
1663         struct tx_ring  *txr = adapter->tx_rings;
1664         struct ifnet *ifp = adapter->ifp;
1665
1666         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1667                 return;
1668
1669         EM_CORE_LOCK(adapter);
1670         callout_stop(&adapter->timer);
1671         em_update_link_status(adapter);
1672         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1673         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1674             EM_MSIX_LINK | E1000_IMS_LSC);
1675         if (adapter->link_active) {
1676                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1677                         EM_TX_LOCK(txr);
1678 #ifdef EM_MULTIQUEUE
1679                         if (!drbr_empty(ifp, txr->br))
1680                                 em_mq_start_locked(ifp, txr, NULL);
1681 #else
1682                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1683                                 em_start_locked(ifp, txr);
1684 #endif
1685                         EM_TX_UNLOCK(txr);
1686                 }
1687         }
1688         EM_CORE_UNLOCK(adapter);
1689 }
1690
1691
1692 /*********************************************************************
1693  *
1694  *  Media Ioctl callback
1695  *
1696  *  This routine is called whenever the user queries the status of
1697  *  the interface using ifconfig.
1698  *
1699  **********************************************************************/
1700 static void
1701 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1702 {
1703         struct adapter *adapter = ifp->if_softc;
1704         u_char fiber_type = IFM_1000_SX;
1705
1706         INIT_DEBUGOUT("em_media_status: begin");
1707
1708         EM_CORE_LOCK(adapter);
1709         em_update_link_status(adapter);
1710
1711         ifmr->ifm_status = IFM_AVALID;
1712         ifmr->ifm_active = IFM_ETHER;
1713
1714         if (!adapter->link_active) {
1715                 EM_CORE_UNLOCK(adapter);
1716                 return;
1717         }
1718
1719         ifmr->ifm_status |= IFM_ACTIVE;
1720
1721         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1722             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1723                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1724         } else {
1725                 switch (adapter->link_speed) {
1726                 case 10:
1727                         ifmr->ifm_active |= IFM_10_T;
1728                         break;
1729                 case 100:
1730                         ifmr->ifm_active |= IFM_100_TX;
1731                         break;
1732                 case 1000:
1733                         ifmr->ifm_active |= IFM_1000_T;
1734                         break;
1735                 }
1736                 if (adapter->link_duplex == FULL_DUPLEX)
1737                         ifmr->ifm_active |= IFM_FDX;
1738                 else
1739                         ifmr->ifm_active |= IFM_HDX;
1740         }
1741         EM_CORE_UNLOCK(adapter);
1742 }
1743
1744 /*********************************************************************
1745  *
1746  *  Media Ioctl callback
1747  *
1748  *  This routine is called when the user changes speed/duplex using
1749  *  media/mediopt option with ifconfig.
1750  *
1751  **********************************************************************/
1752 static int
1753 em_media_change(struct ifnet *ifp)
1754 {
1755         struct adapter *adapter = ifp->if_softc;
1756         struct ifmedia  *ifm = &adapter->media;
1757
1758         INIT_DEBUGOUT("em_media_change: begin");
1759
1760         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1761                 return (EINVAL);
1762
1763         EM_CORE_LOCK(adapter);
1764         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1765         case IFM_AUTO:
1766                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1767                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1768                 break;
1769         case IFM_1000_LX:
1770         case IFM_1000_SX:
1771         case IFM_1000_T:
1772                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1773                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1774                 break;
1775         case IFM_100_TX:
1776                 adapter->hw.mac.autoneg = FALSE;
1777                 adapter->hw.phy.autoneg_advertised = 0;
1778                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1779                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1780                 else
1781                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1782                 break;
1783         case IFM_10_T:
1784                 adapter->hw.mac.autoneg = FALSE;
1785                 adapter->hw.phy.autoneg_advertised = 0;
1786                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1787                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1788                 else
1789                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1790                 break;
1791         default:
1792                 device_printf(adapter->dev, "Unsupported media type\n");
1793         }
1794
1795         em_init_locked(adapter);
1796         EM_CORE_UNLOCK(adapter);
1797
1798         return (0);
1799 }
1800
1801 /*********************************************************************
1802  *
1803  *  This routine maps the mbufs to tx descriptors.
1804  *
1805  *  return 0 on success, positive on failure
1806  **********************************************************************/
1807
1808 static int
1809 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1810 {
1811         struct adapter          *adapter = txr->adapter;
1812         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1813         bus_dmamap_t            map;
1814         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1815         struct e1000_tx_desc    *ctxd = NULL;
1816         struct mbuf             *m_head;
1817         struct ether_header     *eh;
1818         struct ip               *ip = NULL;
1819         struct tcphdr           *tp = NULL;
1820         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1821         int                     ip_off, poff;
1822         int                     nsegs, i, j, first, last = 0;
1823         int                     error, do_tso, tso_desc = 0, remap = 1;
1824
1825 retry:
1826         m_head = *m_headp;
1827         txd_upper = txd_lower = txd_used = txd_saved = 0;
1828         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1829         ip_off = poff = 0;
1830
1831         /*
1832          * Intel recommends entire IP/TCP header length reside in a single
1833          * buffer. If multiple descriptors are used to describe the IP and
1834          * TCP header, each descriptor should describe one or more
1835          * complete headers; descriptors referencing only parts of headers
1836          * are not supported. If all layer headers are not coalesced into
1837          * a single buffer, each buffer should not cross a 4KB boundary,
1838          * or be larger than the maximum read request size.
1839          * Controller also requires modifing IP/TCP header to make TSO work
1840          * so we firstly get a writable mbuf chain then coalesce ethernet/
1841          * IP/TCP header into a single buffer to meet the requirement of
1842          * controller. This also simplifies IP/TCP/UDP checksum offloading
1843          * which also has similiar restrictions.
1844          */
1845         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1846                 if (do_tso || (m_head->m_next != NULL && 
1847                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1848                         if (M_WRITABLE(*m_headp) == 0) {
1849                                 m_head = m_dup(*m_headp, M_NOWAIT);
1850                                 m_freem(*m_headp);
1851                                 if (m_head == NULL) {
1852                                         *m_headp = NULL;
1853                                         return (ENOBUFS);
1854                                 }
1855                                 *m_headp = m_head;
1856                         }
1857                 }
1858                 /*
1859                  * XXX
1860                  * Assume IPv4, we don't have TSO/checksum offload support
1861                  * for IPv6 yet.
1862                  */
1863                 ip_off = sizeof(struct ether_header);
1864                 m_head = m_pullup(m_head, ip_off);
1865                 if (m_head == NULL) {
1866                         *m_headp = NULL;
1867                         return (ENOBUFS);
1868                 }
1869                 eh = mtod(m_head, struct ether_header *);
1870                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1871                         ip_off = sizeof(struct ether_vlan_header);
1872                         m_head = m_pullup(m_head, ip_off);
1873                         if (m_head == NULL) {
1874                                 *m_headp = NULL;
1875                                 return (ENOBUFS);
1876                         }
1877                 }
1878                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1879                 if (m_head == NULL) {
1880                         *m_headp = NULL;
1881                         return (ENOBUFS);
1882                 }
1883                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1884                 poff = ip_off + (ip->ip_hl << 2);
1885                 if (do_tso) {
1886                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1887                         if (m_head == NULL) {
1888                                 *m_headp = NULL;
1889                                 return (ENOBUFS);
1890                         }
1891                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1892                         /*
1893                          * TSO workaround:
1894                          *   pull 4 more bytes of data into it.
1895                          */
1896                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1897                         if (m_head == NULL) {
1898                                 *m_headp = NULL;
1899                                 return (ENOBUFS);
1900                         }
1901                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1902                         ip->ip_len = 0;
1903                         ip->ip_sum = 0;
1904                         /*
1905                          * The pseudo TCP checksum does not include TCP payload
1906                          * length so driver should recompute the checksum here
1907                          * what hardware expect to see. This is adherence of
1908                          * Microsoft's Large Send specification.
1909                          */
1910                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1911                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1912                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1913                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1914                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1915                         if (m_head == NULL) {
1916                                 *m_headp = NULL;
1917                                 return (ENOBUFS);
1918                         }
1919                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1920                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1921                         if (m_head == NULL) {
1922                                 *m_headp = NULL;
1923                                 return (ENOBUFS);
1924                         }
1925                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1926                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1927                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1928                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1929                         if (m_head == NULL) {
1930                                 *m_headp = NULL;
1931                                 return (ENOBUFS);
1932                         }
1933                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1934                 }
1935                 *m_headp = m_head;
1936         }
1937
1938         /*
1939          * Map the packet for DMA
1940          *
1941          * Capture the first descriptor index,
1942          * this descriptor will have the index
1943          * of the EOP which is the only one that
1944          * now gets a DONE bit writeback.
1945          */
1946         first = txr->next_avail_desc;
1947         tx_buffer = &txr->tx_buffers[first];
1948         tx_buffer_mapped = tx_buffer;
1949         map = tx_buffer->map;
1950
1951         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1952             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1953
1954         /*
1955          * There are two types of errors we can (try) to handle:
1956          * - EFBIG means the mbuf chain was too long and bus_dma ran
1957          *   out of segments.  Defragment the mbuf chain and try again.
1958          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1959          *   at this point in time.  Defer sending and try again later.
1960          * All other errors, in particular EINVAL, are fatal and prevent the
1961          * mbuf chain from ever going through.  Drop it and report error.
1962          */
1963         if (error == EFBIG && remap) {
1964                 struct mbuf *m;
1965
1966                 m = m_defrag(*m_headp, M_NOWAIT);
1967                 if (m == NULL) {
1968                         adapter->mbuf_alloc_failed++;
1969                         m_freem(*m_headp);
1970                         *m_headp = NULL;
1971                         return (ENOBUFS);
1972                 }
1973                 *m_headp = m;
1974
1975                 /* Try it again, but only once */
1976                 remap = 0;
1977                 goto retry;
1978         } else if (error == ENOMEM) {
1979                 adapter->no_tx_dma_setup++;
1980                 return (error);
1981         } else if (error != 0) {
1982                 adapter->no_tx_dma_setup++;
1983                 m_freem(*m_headp);
1984                 *m_headp = NULL;
1985                 return (error);
1986         }
1987
1988         /*
1989          * TSO Hardware workaround, if this packet is not
1990          * TSO, and is only a single descriptor long, and
1991          * it follows a TSO burst, then we need to add a
1992          * sentinel descriptor to prevent premature writeback.
1993          */
1994         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1995                 if (nsegs == 1)
1996                         tso_desc = TRUE;
1997                 txr->tx_tso = FALSE;
1998         }
1999
2000         if (nsegs > (txr->tx_avail - 2)) {
2001                 txr->no_desc_avail++;
2002                 bus_dmamap_unload(txr->txtag, map);
2003                 return (ENOBUFS);
2004         }
2005         m_head = *m_headp;
2006
2007         /* Do hardware assists */
2008         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2009                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2010                     &txd_upper, &txd_lower);
2011                 /* we need to make a final sentinel transmit desc */
2012                 tso_desc = TRUE;
2013         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2014                 em_transmit_checksum_setup(txr, m_head,
2015                     ip_off, ip, &txd_upper, &txd_lower);
2016
2017         if (m_head->m_flags & M_VLANTAG) {
2018                 /* Set the vlan id. */
2019                 txd_upper |=
2020                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2021                 /* Tell hardware to add tag */
2022                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2023         }
2024
2025         i = txr->next_avail_desc;
2026
2027         /* Set up our transmit descriptors */
2028         for (j = 0; j < nsegs; j++) {
2029                 bus_size_t seg_len;
2030                 bus_addr_t seg_addr;
2031
2032                 tx_buffer = &txr->tx_buffers[i];
2033                 ctxd = &txr->tx_base[i];
2034                 seg_addr = segs[j].ds_addr;
2035                 seg_len  = segs[j].ds_len;
2036                 /*
2037                 ** TSO Workaround:
2038                 ** If this is the last descriptor, we want to
2039                 ** split it so we have a small final sentinel
2040                 */
2041                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2042                         seg_len -= 4;
2043                         ctxd->buffer_addr = htole64(seg_addr);
2044                         ctxd->lower.data = htole32(
2045                         adapter->txd_cmd | txd_lower | seg_len);
2046                         ctxd->upper.data =
2047                             htole32(txd_upper);
2048                         if (++i == adapter->num_tx_desc)
2049                                 i = 0;
2050                         /* Now make the sentinel */     
2051                         ++txd_used; /* using an extra txd */
2052                         ctxd = &txr->tx_base[i];
2053                         tx_buffer = &txr->tx_buffers[i];
2054                         ctxd->buffer_addr =
2055                             htole64(seg_addr + seg_len);
2056                         ctxd->lower.data = htole32(
2057                         adapter->txd_cmd | txd_lower | 4);
2058                         ctxd->upper.data =
2059                             htole32(txd_upper);
2060                         last = i;
2061                         if (++i == adapter->num_tx_desc)
2062                                 i = 0;
2063                 } else {
2064                         ctxd->buffer_addr = htole64(seg_addr);
2065                         ctxd->lower.data = htole32(
2066                         adapter->txd_cmd | txd_lower | seg_len);
2067                         ctxd->upper.data =
2068                             htole32(txd_upper);
2069                         last = i;
2070                         if (++i == adapter->num_tx_desc)
2071                                 i = 0;
2072                 }
2073                 tx_buffer->m_head = NULL;
2074                 tx_buffer->next_eop = -1;
2075         }
2076
2077         txr->next_avail_desc = i;
2078         txr->tx_avail -= nsegs;
2079         if (tso_desc) /* TSO used an extra for sentinel */
2080                 txr->tx_avail -= txd_used;
2081
2082         tx_buffer->m_head = m_head;
2083         /*
2084         ** Here we swap the map so the last descriptor,
2085         ** which gets the completion interrupt has the
2086         ** real map, and the first descriptor gets the
2087         ** unused map from this descriptor.
2088         */
2089         tx_buffer_mapped->map = tx_buffer->map;
2090         tx_buffer->map = map;
2091         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2092
2093         /*
2094          * Last Descriptor of Packet
2095          * needs End Of Packet (EOP)
2096          * and Report Status (RS)
2097          */
2098         ctxd->lower.data |=
2099             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2100         /*
2101          * Keep track in the first buffer which
2102          * descriptor will be written back
2103          */
2104         tx_buffer = &txr->tx_buffers[first];
2105         tx_buffer->next_eop = last;
2106         /* Update the watchdog time early and often */
2107         txr->watchdog_time = ticks;
2108
2109         /*
2110          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2111          * that this frame is available to transmit.
2112          */
2113         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2114             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2115         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2116
2117         return (0);
2118 }
2119
2120 static void
2121 em_set_promisc(struct adapter *adapter)
2122 {
2123         struct ifnet    *ifp = adapter->ifp;
2124         u32             reg_rctl;
2125
2126         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2127
2128         if (ifp->if_flags & IFF_PROMISC) {
2129                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2130                 /* Turn this on if you want to see bad packets */
2131                 if (em_debug_sbp)
2132                         reg_rctl |= E1000_RCTL_SBP;
2133                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2134         } else if (ifp->if_flags & IFF_ALLMULTI) {
2135                 reg_rctl |= E1000_RCTL_MPE;
2136                 reg_rctl &= ~E1000_RCTL_UPE;
2137                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2138         }
2139 }
2140
2141 static void
2142 em_disable_promisc(struct adapter *adapter)
2143 {
2144         u32     reg_rctl;
2145
2146         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2147
2148         reg_rctl &=  (~E1000_RCTL_UPE);
2149         reg_rctl &=  (~E1000_RCTL_MPE);
2150         reg_rctl &=  (~E1000_RCTL_SBP);
2151         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2152 }
2153
2154
2155 /*********************************************************************
2156  *  Multicast Update
2157  *
2158  *  This routine is called whenever multicast address list is updated.
2159  *
2160  **********************************************************************/
2161
2162 static void
2163 em_set_multi(struct adapter *adapter)
2164 {
2165         struct ifnet    *ifp = adapter->ifp;
2166         struct ifmultiaddr *ifma;
2167         u32 reg_rctl = 0;
2168         u8  *mta; /* Multicast array memory */
2169         int mcnt = 0;
2170
2171         IOCTL_DEBUGOUT("em_set_multi: begin");
2172
2173         mta = adapter->mta;
2174         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2175
2176         if (adapter->hw.mac.type == e1000_82542 && 
2177             adapter->hw.revision_id == E1000_REVISION_2) {
2178                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2179                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2180                         e1000_pci_clear_mwi(&adapter->hw);
2181                 reg_rctl |= E1000_RCTL_RST;
2182                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2183                 msec_delay(5);
2184         }
2185
2186 #if __FreeBSD_version < 800000
2187         IF_ADDR_LOCK(ifp);
2188 #else
2189         if_maddr_rlock(ifp);
2190 #endif
2191         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2192                 if (ifma->ifma_addr->sa_family != AF_LINK)
2193                         continue;
2194
2195                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2196                         break;
2197
2198                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2199                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2200                 mcnt++;
2201         }
2202 #if __FreeBSD_version < 800000
2203         IF_ADDR_UNLOCK(ifp);
2204 #else
2205         if_maddr_runlock(ifp);
2206 #endif
2207         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2208                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2209                 reg_rctl |= E1000_RCTL_MPE;
2210                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2211         } else
2212                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2213
2214         if (adapter->hw.mac.type == e1000_82542 && 
2215             adapter->hw.revision_id == E1000_REVISION_2) {
2216                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2217                 reg_rctl &= ~E1000_RCTL_RST;
2218                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2219                 msec_delay(5);
2220                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2221                         e1000_pci_set_mwi(&adapter->hw);
2222         }
2223 }
2224
2225
2226 /*********************************************************************
2227  *  Timer routine
2228  *
2229  *  This routine checks for link status and updates statistics.
2230  *
2231  **********************************************************************/
2232
2233 static void
2234 em_local_timer(void *arg)
2235 {
2236         struct adapter  *adapter = arg;
2237         struct ifnet    *ifp = adapter->ifp;
2238         struct tx_ring  *txr = adapter->tx_rings;
2239         struct rx_ring  *rxr = adapter->rx_rings;
2240         u32             trigger;
2241
2242         EM_CORE_LOCK_ASSERT(adapter);
2243
2244         em_update_link_status(adapter);
2245         em_update_stats_counters(adapter);
2246
2247         /* Reset LAA into RAR[0] on 82571 */
2248         if ((adapter->hw.mac.type == e1000_82571) &&
2249             e1000_get_laa_state_82571(&adapter->hw))
2250                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2251
2252         /* Mask to use in the irq trigger */
2253         if (adapter->msix_mem)
2254                 trigger = rxr->ims; /* RX for 82574 */
2255         else
2256                 trigger = E1000_ICS_RXDMT0;
2257
2258         /*
2259         ** Check on the state of the TX queue(s), this 
2260         ** can be done without the lock because its RO
2261         ** and the HUNG state will be static if set.
2262         */
2263         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2264                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2265                     (adapter->pause_frames == 0))
2266                         goto hung;
2267                 /* Schedule a TX tasklet if needed */
2268                 if (txr->tx_avail <= EM_MAX_SCATTER)
2269                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2270         }
2271         
2272         adapter->pause_frames = 0;
2273         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2274 #ifndef DEVICE_POLLING
2275         /* Trigger an RX interrupt to guarantee mbuf refresh */
2276         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2277 #endif
2278         return;
2279 hung:
2280         /* Looks like we're hung */
2281         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2282         device_printf(adapter->dev,
2283             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2284             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2285             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2286         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2287             "Next TX to Clean = %d\n",
2288             txr->me, txr->tx_avail, txr->next_to_clean);
2289         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2290         adapter->watchdog_events++;
2291         adapter->pause_frames = 0;
2292         em_init_locked(adapter);
2293 }
2294
2295
2296 static void
2297 em_update_link_status(struct adapter *adapter)
2298 {
2299         struct e1000_hw *hw = &adapter->hw;
2300         struct ifnet *ifp = adapter->ifp;
2301         device_t dev = adapter->dev;
2302         struct tx_ring *txr = adapter->tx_rings;
2303         u32 link_check = 0;
2304
2305         /* Get the cached link value or read phy for real */
2306         switch (hw->phy.media_type) {
2307         case e1000_media_type_copper:
2308                 if (hw->mac.get_link_status) {
2309                         /* Do the work to read phy */
2310                         e1000_check_for_link(hw);
2311                         link_check = !hw->mac.get_link_status;
2312                         if (link_check) /* ESB2 fix */
2313                                 e1000_cfg_on_link_up(hw);
2314                 } else
2315                         link_check = TRUE;
2316                 break;
2317         case e1000_media_type_fiber:
2318                 e1000_check_for_link(hw);
2319                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2320                                  E1000_STATUS_LU);
2321                 break;
2322         case e1000_media_type_internal_serdes:
2323                 e1000_check_for_link(hw);
2324                 link_check = adapter->hw.mac.serdes_has_link;
2325                 break;
2326         default:
2327         case e1000_media_type_unknown:
2328                 break;
2329         }
2330
2331         /* Now check for a transition */
2332         if (link_check && (adapter->link_active == 0)) {
2333                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2334                     &adapter->link_duplex);
2335                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2336                 if ((adapter->link_speed != SPEED_1000) &&
2337                     ((hw->mac.type == e1000_82571) ||
2338                     (hw->mac.type == e1000_82572))) {
2339                         int tarc0;
2340                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2341                         tarc0 &= ~SPEED_MODE_BIT;
2342                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2343                 }
2344                 if (bootverbose)
2345                         device_printf(dev, "Link is up %d Mbps %s\n",
2346                             adapter->link_speed,
2347                             ((adapter->link_duplex == FULL_DUPLEX) ?
2348                             "Full Duplex" : "Half Duplex"));
2349                 adapter->link_active = 1;
2350                 adapter->smartspeed = 0;
2351                 ifp->if_baudrate = adapter->link_speed * 1000000;
2352                 if_link_state_change(ifp, LINK_STATE_UP);
2353         } else if (!link_check && (adapter->link_active == 1)) {
2354                 ifp->if_baudrate = adapter->link_speed = 0;
2355                 adapter->link_duplex = 0;
2356                 if (bootverbose)
2357                         device_printf(dev, "Link is Down\n");
2358                 adapter->link_active = 0;
2359                 /* Link down, disable watchdog */
2360                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2361                         txr->queue_status = EM_QUEUE_IDLE;
2362                 if_link_state_change(ifp, LINK_STATE_DOWN);
2363         }
2364 }
2365
2366 /*********************************************************************
2367  *
2368  *  This routine disables all traffic on the adapter by issuing a
2369  *  global reset on the MAC and deallocates TX/RX buffers.
2370  *
2371  *  This routine should always be called with BOTH the CORE
2372  *  and TX locks.
2373  **********************************************************************/
2374
2375 static void
2376 em_stop(void *arg)
2377 {
2378         struct adapter  *adapter = arg;
2379         struct ifnet    *ifp = adapter->ifp;
2380         struct tx_ring  *txr = adapter->tx_rings;
2381
2382         EM_CORE_LOCK_ASSERT(adapter);
2383
2384         INIT_DEBUGOUT("em_stop: begin");
2385
2386         em_disable_intr(adapter);
2387         callout_stop(&adapter->timer);
2388
2389         /* Tell the stack that the interface is no longer active */
2390         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2391         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2392
2393         /* Unarm watchdog timer. */
2394         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2395                 EM_TX_LOCK(txr);
2396                 txr->queue_status = EM_QUEUE_IDLE;
2397                 EM_TX_UNLOCK(txr);
2398         }
2399
2400         e1000_reset_hw(&adapter->hw);
2401         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2402
2403         e1000_led_off(&adapter->hw);
2404         e1000_cleanup_led(&adapter->hw);
2405 }
2406
2407
2408 /*********************************************************************
2409  *
2410  *  Determine hardware revision.
2411  *
2412  **********************************************************************/
2413 static void
2414 em_identify_hardware(struct adapter *adapter)
2415 {
2416         device_t dev = adapter->dev;
2417
2418         /* Make sure our PCI config space has the necessary stuff set */
2419         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2420         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2421             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2422                 device_printf(dev, "Memory Access and/or Bus Master bits "
2423                     "were not set!\n");
2424                 adapter->hw.bus.pci_cmd_word |=
2425                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2426                 pci_write_config(dev, PCIR_COMMAND,
2427                     adapter->hw.bus.pci_cmd_word, 2);
2428         }
2429
2430         /* Save off the information about this board */
2431         adapter->hw.vendor_id = pci_get_vendor(dev);
2432         adapter->hw.device_id = pci_get_device(dev);
2433         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2434         adapter->hw.subsystem_vendor_id =
2435             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2436         adapter->hw.subsystem_device_id =
2437             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2438
2439         /* Do Shared Code Init and Setup */
2440         if (e1000_set_mac_type(&adapter->hw)) {
2441                 device_printf(dev, "Setup init failure\n");
2442                 return;
2443         }
2444 }
2445
2446 static int
2447 em_allocate_pci_resources(struct adapter *adapter)
2448 {
2449         device_t        dev = adapter->dev;
2450         int             rid;
2451
2452         rid = PCIR_BAR(0);
2453         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2454             &rid, RF_ACTIVE);
2455         if (adapter->memory == NULL) {
2456                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2457                 return (ENXIO);
2458         }
2459         adapter->osdep.mem_bus_space_tag =
2460             rman_get_bustag(adapter->memory);
2461         adapter->osdep.mem_bus_space_handle =
2462             rman_get_bushandle(adapter->memory);
2463         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2464
2465         /* Default to a single queue */
2466         adapter->num_queues = 1;
2467
2468         /*
2469          * Setup MSI/X or MSI if PCI Express
2470          */
2471         adapter->msix = em_setup_msix(adapter);
2472
2473         adapter->hw.back = &adapter->osdep;
2474
2475         return (0);
2476 }
2477
2478 /*********************************************************************
2479  *
2480  *  Setup the Legacy or MSI Interrupt handler
2481  *
2482  **********************************************************************/
2483 int
2484 em_allocate_legacy(struct adapter *adapter)
2485 {
2486         device_t dev = adapter->dev;
2487         struct tx_ring  *txr = adapter->tx_rings;
2488         int error, rid = 0;
2489
2490         /* Manually turn off all interrupts */
2491         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2492
2493         if (adapter->msix == 1) /* using MSI */
2494                 rid = 1;
2495         /* We allocate a single interrupt resource */
2496         adapter->res = bus_alloc_resource_any(dev,
2497             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2498         if (adapter->res == NULL) {
2499                 device_printf(dev, "Unable to allocate bus resource: "
2500                     "interrupt\n");
2501                 return (ENXIO);
2502         }
2503
2504         /*
2505          * Allocate a fast interrupt and the associated
2506          * deferred processing contexts.
2507          */
2508         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2509         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2510             taskqueue_thread_enqueue, &adapter->tq);
2511         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2512             device_get_nameunit(adapter->dev));
2513         /* Use a TX only tasklet for local timer */
2514         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2515         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2516             taskqueue_thread_enqueue, &txr->tq);
2517         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2518             device_get_nameunit(adapter->dev));
2519         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2520         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2521             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2522                 device_printf(dev, "Failed to register fast interrupt "
2523                             "handler: %d\n", error);
2524                 taskqueue_free(adapter->tq);
2525                 adapter->tq = NULL;
2526                 return (error);
2527         }
2528         
2529         return (0);
2530 }
2531
2532 /*********************************************************************
2533  *
2534  *  Setup the MSIX Interrupt handlers
2535  *   This is not really Multiqueue, rather
2536  *   its just seperate interrupt vectors
2537  *   for TX, RX, and Link.
2538  *
2539  **********************************************************************/
2540 int
2541 em_allocate_msix(struct adapter *adapter)
2542 {
2543         device_t        dev = adapter->dev;
2544         struct          tx_ring *txr = adapter->tx_rings;
2545         struct          rx_ring *rxr = adapter->rx_rings;
2546         int             error, rid, vector = 0;
2547
2548
2549         /* Make sure all interrupts are disabled */
2550         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2551
2552         /* First set up ring resources */
2553         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2554
2555                 /* RX ring */
2556                 rid = vector + 1;
2557
2558                 rxr->res = bus_alloc_resource_any(dev,
2559                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2560                 if (rxr->res == NULL) {
2561                         device_printf(dev,
2562                             "Unable to allocate bus resource: "
2563                             "RX MSIX Interrupt %d\n", i);
2564                         return (ENXIO);
2565                 }
2566                 if ((error = bus_setup_intr(dev, rxr->res,
2567                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2568                     rxr, &rxr->tag)) != 0) {
2569                         device_printf(dev, "Failed to register RX handler");
2570                         return (error);
2571                 }
2572 #if __FreeBSD_version >= 800504
2573                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2574 #endif
2575                 rxr->msix = vector++; /* NOTE increment vector for TX */
2576                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2577                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2578                     taskqueue_thread_enqueue, &rxr->tq);
2579                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2580                     device_get_nameunit(adapter->dev));
2581                 /*
2582                 ** Set the bit to enable interrupt
2583                 ** in E1000_IMS -- bits 20 and 21
2584                 ** are for RX0 and RX1, note this has
2585                 ** NOTHING to do with the MSIX vector
2586                 */
2587                 rxr->ims = 1 << (20 + i);
2588                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2589
2590                 /* TX ring */
2591                 rid = vector + 1;
2592                 txr->res = bus_alloc_resource_any(dev,
2593                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2594                 if (txr->res == NULL) {
2595                         device_printf(dev,
2596                             "Unable to allocate bus resource: "
2597                             "TX MSIX Interrupt %d\n", i);
2598                         return (ENXIO);
2599                 }
2600                 if ((error = bus_setup_intr(dev, txr->res,
2601                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2602                     txr, &txr->tag)) != 0) {
2603                         device_printf(dev, "Failed to register TX handler");
2604                         return (error);
2605                 }
2606 #if __FreeBSD_version >= 800504
2607                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2608 #endif
2609                 txr->msix = vector++; /* Increment vector for next pass */
2610                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2611                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2612                     taskqueue_thread_enqueue, &txr->tq);
2613                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2614                     device_get_nameunit(adapter->dev));
2615                 /*
2616                 ** Set the bit to enable interrupt
2617                 ** in E1000_IMS -- bits 22 and 23
2618                 ** are for TX0 and TX1, note this has
2619                 ** NOTHING to do with the MSIX vector
2620                 */
2621                 txr->ims = 1 << (22 + i);
2622                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2623         }
2624
2625         /* Link interrupt */
2626         ++rid;
2627         adapter->res = bus_alloc_resource_any(dev,
2628             SYS_RES_IRQ, &rid, RF_ACTIVE);
2629         if (!adapter->res) {
2630                 device_printf(dev,"Unable to allocate "
2631                     "bus resource: Link interrupt [%d]\n", rid);
2632                 return (ENXIO);
2633         }
2634         /* Set the link handler function */
2635         error = bus_setup_intr(dev, adapter->res,
2636             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2637             em_msix_link, adapter, &adapter->tag);
2638         if (error) {
2639                 adapter->res = NULL;
2640                 device_printf(dev, "Failed to register LINK handler");
2641                 return (error);
2642         }
2643 #if __FreeBSD_version >= 800504
2644                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2645 #endif
2646         adapter->linkvec = vector;
2647         adapter->ivars |=  (8 | vector) << 16;
2648         adapter->ivars |= 0x80000000;
2649
2650         return (0);
2651 }
2652
2653
2654 static void
2655 em_free_pci_resources(struct adapter *adapter)
2656 {
2657         device_t        dev = adapter->dev;
2658         struct tx_ring  *txr;
2659         struct rx_ring  *rxr;
2660         int             rid;
2661
2662
2663         /*
2664         ** Release all the queue interrupt resources:
2665         */
2666         for (int i = 0; i < adapter->num_queues; i++) {
2667                 txr = &adapter->tx_rings[i];
2668                 rxr = &adapter->rx_rings[i];
2669                 /* an early abort? */
2670                 if ((txr == NULL) || (rxr == NULL))
2671                         break;
2672                 rid = txr->msix +1;
2673                 if (txr->tag != NULL) {
2674                         bus_teardown_intr(dev, txr->res, txr->tag);
2675                         txr->tag = NULL;
2676                 }
2677                 if (txr->res != NULL)
2678                         bus_release_resource(dev, SYS_RES_IRQ,
2679                             rid, txr->res);
2680                 rid = rxr->msix +1;
2681                 if (rxr->tag != NULL) {
2682                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2683                         rxr->tag = NULL;
2684                 }
2685                 if (rxr->res != NULL)
2686                         bus_release_resource(dev, SYS_RES_IRQ,
2687                             rid, rxr->res);
2688         }
2689
2690         if (adapter->linkvec) /* we are doing MSIX */
2691                 rid = adapter->linkvec + 1;
2692         else
2693                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2694
2695         if (adapter->tag != NULL) {
2696                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2697                 adapter->tag = NULL;
2698         }
2699
2700         if (adapter->res != NULL)
2701                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2702
2703
2704         if (adapter->msix)
2705                 pci_release_msi(dev);
2706
2707         if (adapter->msix_mem != NULL)
2708                 bus_release_resource(dev, SYS_RES_MEMORY,
2709                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2710
2711         if (adapter->memory != NULL)
2712                 bus_release_resource(dev, SYS_RES_MEMORY,
2713                     PCIR_BAR(0), adapter->memory);
2714
2715         if (adapter->flash != NULL)
2716                 bus_release_resource(dev, SYS_RES_MEMORY,
2717                     EM_FLASH, adapter->flash);
2718 }
2719
2720 /*
2721  * Setup MSI or MSI/X
2722  */
2723 static int
2724 em_setup_msix(struct adapter *adapter)
2725 {
2726         device_t dev = adapter->dev;
2727         int val = 0;
2728
2729         /*
2730         ** Setup MSI/X for Hartwell: tests have shown
2731         ** use of two queues to be unstable, and to
2732         ** provide no great gain anyway, so we simply
2733         ** seperate the interrupts and use a single queue.
2734         */
2735         if ((adapter->hw.mac.type == e1000_82574) &&
2736             (em_enable_msix == TRUE)) {
2737                 /* Map the MSIX BAR */
2738                 int rid = PCIR_BAR(EM_MSIX_BAR);
2739                 adapter->msix_mem = bus_alloc_resource_any(dev,
2740                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2741                 if (!adapter->msix_mem) {
2742                         /* May not be enabled */
2743                         device_printf(adapter->dev,
2744                             "Unable to map MSIX table \n");
2745                         goto msi;
2746                 }
2747                 val = pci_msix_count(dev); 
2748                 /* We only need 3 vectors */
2749                 if (val > 3)
2750                         val = 3;
2751                 if ((val != 3) && (val != 5)) {
2752                         bus_release_resource(dev, SYS_RES_MEMORY,
2753                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2754                         adapter->msix_mem = NULL;
2755                         device_printf(adapter->dev,
2756                             "MSIX: incorrect vectors, using MSI\n");
2757                         goto msi;
2758                 }
2759
2760                 if (pci_alloc_msix(dev, &val) == 0) {
2761                         device_printf(adapter->dev,
2762                             "Using MSIX interrupts "
2763                             "with %d vectors\n", val);
2764                 }
2765
2766                 return (val);
2767         }
2768 msi:
2769         val = pci_msi_count(dev);
2770         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2771                 adapter->msix = 1;
2772                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2773                 return (val);
2774         } 
2775         /* Should only happen due to manual configuration */
2776         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2777         return (0);
2778 }
2779
2780
2781 /*********************************************************************
2782  *
2783  *  Initialize the hardware to a configuration
2784  *  as specified by the adapter structure.
2785  *
2786  **********************************************************************/
2787 static void
2788 em_reset(struct adapter *adapter)
2789 {
2790         device_t        dev = adapter->dev;
2791         struct ifnet    *ifp = adapter->ifp;
2792         struct e1000_hw *hw = &adapter->hw;
2793         u16             rx_buffer_size;
2794         u32             pba;
2795
2796         INIT_DEBUGOUT("em_reset: begin");
2797
2798         /* Set up smart power down as default off on newer adapters. */
2799         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2800             hw->mac.type == e1000_82572)) {
2801                 u16 phy_tmp = 0;
2802
2803                 /* Speed up time to link by disabling smart power down. */
2804                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2805                 phy_tmp &= ~IGP02E1000_PM_SPD;
2806                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2807         }
2808
2809         /*
2810          * Packet Buffer Allocation (PBA)
2811          * Writing PBA sets the receive portion of the buffer
2812          * the remainder is used for the transmit buffer.
2813          */
2814         switch (hw->mac.type) {
2815         /* Total Packet Buffer on these is 48K */
2816         case e1000_82571:
2817         case e1000_82572:
2818         case e1000_80003es2lan:
2819                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2820                 break;
2821         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2822                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2823                 break;
2824         case e1000_82574:
2825         case e1000_82583:
2826                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2827                 break;
2828         case e1000_ich8lan:
2829                 pba = E1000_PBA_8K;
2830                 break;
2831         case e1000_ich9lan:
2832         case e1000_ich10lan:
2833                 /* Boost Receive side for jumbo frames */
2834                 if (adapter->hw.mac.max_frame_size > 4096)
2835                         pba = E1000_PBA_14K;
2836                 else
2837                         pba = E1000_PBA_10K;
2838                 break;
2839         case e1000_pchlan:
2840         case e1000_pch2lan:
2841         case e1000_pch_lpt:
2842                 pba = E1000_PBA_26K;
2843                 break;
2844         default:
2845                 if (adapter->hw.mac.max_frame_size > 8192)
2846                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2847                 else
2848                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2849         }
2850         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2851
2852         /*
2853          * These parameters control the automatic generation (Tx) and
2854          * response (Rx) to Ethernet PAUSE frames.
2855          * - High water mark should allow for at least two frames to be
2856          *   received after sending an XOFF.
2857          * - Low water mark works best when it is very near the high water mark.
2858          *   This allows the receiver to restart by sending XON when it has
2859          *   drained a bit. Here we use an arbitary value of 1500 which will
2860          *   restart after one full frame is pulled from the buffer. There
2861          *   could be several smaller frames in the buffer and if so they will
2862          *   not trigger the XON until their total number reduces the buffer
2863          *   by 1500.
2864          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2865          */
2866         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2867         hw->fc.high_water = rx_buffer_size -
2868             roundup2(adapter->hw.mac.max_frame_size, 1024);
2869         hw->fc.low_water = hw->fc.high_water - 1500;
2870
2871         if (adapter->fc) /* locally set flow control value? */
2872                 hw->fc.requested_mode = adapter->fc;
2873         else
2874                 hw->fc.requested_mode = e1000_fc_full;
2875
2876         if (hw->mac.type == e1000_80003es2lan)
2877                 hw->fc.pause_time = 0xFFFF;
2878         else
2879                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2880
2881         hw->fc.send_xon = TRUE;
2882
2883         /* Device specific overrides/settings */
2884         switch (hw->mac.type) {
2885         case e1000_pchlan:
2886                 /* Workaround: no TX flow ctrl for PCH */
2887                 hw->fc.requested_mode = e1000_fc_rx_pause;
2888                 hw->fc.pause_time = 0xFFFF; /* override */
2889                 if (ifp->if_mtu > ETHERMTU) {
2890                         hw->fc.high_water = 0x3500;
2891                         hw->fc.low_water = 0x1500;
2892                 } else {
2893                         hw->fc.high_water = 0x5000;
2894                         hw->fc.low_water = 0x3000;
2895                 }
2896                 hw->fc.refresh_time = 0x1000;
2897                 break;
2898         case e1000_pch2lan:
2899         case e1000_pch_lpt:
2900                 hw->fc.high_water = 0x5C20;
2901                 hw->fc.low_water = 0x5048;
2902                 hw->fc.pause_time = 0x0650;
2903                 hw->fc.refresh_time = 0x0400;
2904                 /* Jumbos need adjusted PBA */
2905                 if (ifp->if_mtu > ETHERMTU)
2906                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2907                 else
2908                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2909                 break;
2910         case e1000_ich9lan:
2911         case e1000_ich10lan:
2912                 if (ifp->if_mtu > ETHERMTU) {
2913                         hw->fc.high_water = 0x2800;
2914                         hw->fc.low_water = hw->fc.high_water - 8;
2915                         break;
2916                 } 
2917                 /* else fall thru */
2918         default:
2919                 if (hw->mac.type == e1000_80003es2lan)
2920                         hw->fc.pause_time = 0xFFFF;
2921                 break;
2922         }
2923
2924         /* Issue a global reset */
2925         e1000_reset_hw(hw);
2926         E1000_WRITE_REG(hw, E1000_WUC, 0);
2927         em_disable_aspm(adapter);
2928         /* and a re-init */
2929         if (e1000_init_hw(hw) < 0) {
2930                 device_printf(dev, "Hardware Initialization Failed\n");
2931                 return;
2932         }
2933
2934         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2935         e1000_get_phy_info(hw);
2936         e1000_check_for_link(hw);
2937         return;
2938 }
2939
2940 /*********************************************************************
2941  *
2942  *  Setup networking device structure and register an interface.
2943  *
2944  **********************************************************************/
2945 static int
2946 em_setup_interface(device_t dev, struct adapter *adapter)
2947 {
2948         struct ifnet   *ifp;
2949
2950         INIT_DEBUGOUT("em_setup_interface: begin");
2951
2952         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2953         if (ifp == NULL) {
2954                 device_printf(dev, "can not allocate ifnet structure\n");
2955                 return (-1);
2956         }
2957         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2958         ifp->if_init =  em_init;
2959         ifp->if_softc = adapter;
2960         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2961         ifp->if_ioctl = em_ioctl;
2962 #ifdef EM_MULTIQUEUE
2963         /* Multiqueue stack interface */
2964         ifp->if_transmit = em_mq_start;
2965         ifp->if_qflush = em_qflush;
2966 #else
2967         ifp->if_start = em_start;
2968         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2969         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2970         IFQ_SET_READY(&ifp->if_snd);
2971 #endif  
2972
2973         ether_ifattach(ifp, adapter->hw.mac.addr);
2974
2975         ifp->if_capabilities = ifp->if_capenable = 0;
2976
2977
2978         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2979         ifp->if_capabilities |= IFCAP_TSO4;
2980         /*
2981          * Tell the upper layer(s) we
2982          * support full VLAN capability
2983          */
2984         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2985         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2986                              |  IFCAP_VLAN_HWTSO
2987                              |  IFCAP_VLAN_MTU;
2988         ifp->if_capenable = ifp->if_capabilities;
2989
2990         /*
2991         ** Don't turn this on by default, if vlans are
2992         ** created on another pseudo device (eg. lagg)
2993         ** then vlan events are not passed thru, breaking
2994         ** operation, but with HW FILTER off it works. If
2995         ** using vlans directly on the em driver you can
2996         ** enable this and get full hardware tag filtering.
2997         */
2998         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2999
3000 #ifdef DEVICE_POLLING
3001         ifp->if_capabilities |= IFCAP_POLLING;
3002 #endif
3003
3004         /* Enable only WOL MAGIC by default */
3005         if (adapter->wol) {
3006                 ifp->if_capabilities |= IFCAP_WOL;
3007                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3008         }
3009                 
3010         /*
3011          * Specify the media types supported by this adapter and register
3012          * callbacks to update media and link information
3013          */
3014         ifmedia_init(&adapter->media, IFM_IMASK,
3015             em_media_change, em_media_status);
3016         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3017             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3018                 u_char fiber_type = IFM_1000_SX;        /* default type */
3019
3020                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3021                             0, NULL);
3022                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3023         } else {
3024                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3025                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3026                             0, NULL);
3027                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3028                             0, NULL);
3029                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3030                             0, NULL);
3031                 if (adapter->hw.phy.type != e1000_phy_ife) {
3032                         ifmedia_add(&adapter->media,
3033                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3034                         ifmedia_add(&adapter->media,
3035                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3036                 }
3037         }
3038         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3039         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3040         return (0);
3041 }
3042
3043
3044 /*
3045  * Manage DMA'able memory.
3046  */
3047 static void
3048 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3049 {
3050         if (error)
3051                 return;
3052         *(bus_addr_t *) arg = segs[0].ds_addr;
3053 }
3054
3055 static int
3056 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3057         struct em_dma_alloc *dma, int mapflags)
3058 {
3059         int error;
3060
3061         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3062                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3063                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3064                                 BUS_SPACE_MAXADDR,      /* highaddr */
3065                                 NULL, NULL,             /* filter, filterarg */
3066                                 size,                   /* maxsize */
3067                                 1,                      /* nsegments */
3068                                 size,                   /* maxsegsize */
3069                                 0,                      /* flags */
3070                                 NULL,                   /* lockfunc */
3071                                 NULL,                   /* lockarg */
3072                                 &dma->dma_tag);
3073         if (error) {
3074                 device_printf(adapter->dev,
3075                     "%s: bus_dma_tag_create failed: %d\n",
3076                     __func__, error);
3077                 goto fail_0;
3078         }
3079
3080         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3081             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3082         if (error) {
3083                 device_printf(adapter->dev,
3084                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3085                     __func__, (uintmax_t)size, error);
3086                 goto fail_2;
3087         }
3088
3089         dma->dma_paddr = 0;
3090         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3091             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3092         if (error || dma->dma_paddr == 0) {
3093                 device_printf(adapter->dev,
3094                     "%s: bus_dmamap_load failed: %d\n",
3095                     __func__, error);
3096                 goto fail_3;
3097         }
3098
3099         return (0);
3100
3101 fail_3:
3102         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3103 fail_2:
3104         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3105         bus_dma_tag_destroy(dma->dma_tag);
3106 fail_0:
3107         dma->dma_map = NULL;
3108         dma->dma_tag = NULL;
3109
3110         return (error);
3111 }
3112
3113 static void
3114 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3115 {
3116         if (dma->dma_tag == NULL)
3117                 return;
3118         if (dma->dma_map != NULL) {
3119                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3120                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3121                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3122                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3123                 dma->dma_map = NULL;
3124         }
3125         bus_dma_tag_destroy(dma->dma_tag);
3126         dma->dma_tag = NULL;
3127 }
3128
3129
3130 /*********************************************************************
3131  *
3132  *  Allocate memory for the transmit and receive rings, and then
3133  *  the descriptors associated with each, called only once at attach.
3134  *
3135  **********************************************************************/
3136 static int
3137 em_allocate_queues(struct adapter *adapter)
3138 {
3139         device_t                dev = adapter->dev;
3140         struct tx_ring          *txr = NULL;
3141         struct rx_ring          *rxr = NULL;
3142         int rsize, tsize, error = E1000_SUCCESS;
3143         int txconf = 0, rxconf = 0;
3144
3145
3146         /* Allocate the TX ring struct memory */
3147         if (!(adapter->tx_rings =
3148             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3149             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3150                 device_printf(dev, "Unable to allocate TX ring memory\n");
3151                 error = ENOMEM;
3152                 goto fail;
3153         }
3154
3155         /* Now allocate the RX */
3156         if (!(adapter->rx_rings =
3157             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3158             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3159                 device_printf(dev, "Unable to allocate RX ring memory\n");
3160                 error = ENOMEM;
3161                 goto rx_fail;
3162         }
3163
3164         tsize = roundup2(adapter->num_tx_desc *
3165             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3166         /*
3167          * Now set up the TX queues, txconf is needed to handle the
3168          * possibility that things fail midcourse and we need to
3169          * undo memory gracefully
3170          */ 
3171         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3172                 /* Set up some basics */
3173                 txr = &adapter->tx_rings[i];
3174                 txr->adapter = adapter;
3175                 txr->me = i;
3176
3177                 /* Initialize the TX lock */
3178                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3179                     device_get_nameunit(dev), txr->me);
3180                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3181
3182                 if (em_dma_malloc(adapter, tsize,
3183                         &txr->txdma, BUS_DMA_NOWAIT)) {
3184                         device_printf(dev,
3185                             "Unable to allocate TX Descriptor memory\n");
3186                         error = ENOMEM;
3187                         goto err_tx_desc;
3188                 }
3189                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3190                 bzero((void *)txr->tx_base, tsize);
3191
3192                 if (em_allocate_transmit_buffers(txr)) {
3193                         device_printf(dev,
3194                             "Critical Failure setting up transmit buffers\n");
3195                         error = ENOMEM;
3196                         goto err_tx_desc;
3197                 }
3198 #if __FreeBSD_version >= 800000
3199                 /* Allocate a buf ring */
3200                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3201                     M_WAITOK, &txr->tx_mtx);
3202 #endif
3203         }
3204
3205         /*
3206          * Next the RX queues...
3207          */ 
3208         rsize = roundup2(adapter->num_rx_desc *
3209             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3210         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3211                 rxr = &adapter->rx_rings[i];
3212                 rxr->adapter = adapter;
3213                 rxr->me = i;
3214
3215                 /* Initialize the RX lock */
3216                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3217                     device_get_nameunit(dev), txr->me);
3218                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3219
3220                 if (em_dma_malloc(adapter, rsize,
3221                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3222                         device_printf(dev,
3223                             "Unable to allocate RxDescriptor memory\n");
3224                         error = ENOMEM;
3225                         goto err_rx_desc;
3226                 }
3227                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3228                 bzero((void *)rxr->rx_base, rsize);
3229
3230                 /* Allocate receive buffers for the ring*/
3231                 if (em_allocate_receive_buffers(rxr)) {
3232                         device_printf(dev,
3233                             "Critical Failure setting up receive buffers\n");
3234                         error = ENOMEM;
3235                         goto err_rx_desc;
3236                 }
3237         }
3238
3239         return (0);
3240
3241 err_rx_desc:
3242         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3243                 em_dma_free(adapter, &rxr->rxdma);
3244 err_tx_desc:
3245         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3246                 em_dma_free(adapter, &txr->txdma);
3247         free(adapter->rx_rings, M_DEVBUF);
3248 rx_fail:
3249 #if __FreeBSD_version >= 800000
3250         buf_ring_free(txr->br, M_DEVBUF);
3251 #endif
3252         free(adapter->tx_rings, M_DEVBUF);
3253 fail:
3254         return (error);
3255 }
3256
3257
3258 /*********************************************************************
3259  *
3260  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3261  *  the information needed to transmit a packet on the wire. This is
3262  *  called only once at attach, setup is done every reset.
3263  *
3264  **********************************************************************/
3265 static int
3266 em_allocate_transmit_buffers(struct tx_ring *txr)
3267 {
3268         struct adapter *adapter = txr->adapter;
3269         device_t dev = adapter->dev;
3270         struct em_buffer *txbuf;
3271         int error, i;
3272
3273         /*
3274          * Setup DMA descriptor areas.
3275          */
3276         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3277                                1, 0,                    /* alignment, bounds */
3278                                BUS_SPACE_MAXADDR,       /* lowaddr */
3279                                BUS_SPACE_MAXADDR,       /* highaddr */
3280                                NULL, NULL,              /* filter, filterarg */
3281                                EM_TSO_SIZE,             /* maxsize */
3282                                EM_MAX_SCATTER,          /* nsegments */
3283                                PAGE_SIZE,               /* maxsegsize */
3284                                0,                       /* flags */
3285                                NULL,                    /* lockfunc */
3286                                NULL,                    /* lockfuncarg */
3287                                &txr->txtag))) {
3288                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3289                 goto fail;
3290         }
3291
3292         if (!(txr->tx_buffers =
3293             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3294             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3295                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3296                 error = ENOMEM;
3297                 goto fail;
3298         }
3299
3300         /* Create the descriptor buffer dma maps */
3301         txbuf = txr->tx_buffers;
3302         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3303                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3304                 if (error != 0) {
3305                         device_printf(dev, "Unable to create TX DMA map\n");
3306                         goto fail;
3307                 }
3308         }
3309
3310         return 0;
3311 fail:
3312         /* We free all, it handles case where we are in the middle */
3313         em_free_transmit_structures(adapter);
3314         return (error);
3315 }
3316
3317 /*********************************************************************
3318  *
3319  *  Initialize a transmit ring.
3320  *
3321  **********************************************************************/
3322 static void
3323 em_setup_transmit_ring(struct tx_ring *txr)
3324 {
3325         struct adapter *adapter = txr->adapter;
3326         struct em_buffer *txbuf;
3327         int i;
3328 #ifdef DEV_NETMAP
3329         struct netmap_adapter *na = NA(adapter->ifp);
3330         struct netmap_slot *slot;
3331 #endif /* DEV_NETMAP */
3332
3333         /* Clear the old descriptor contents */
3334         EM_TX_LOCK(txr);
3335 #ifdef DEV_NETMAP
3336         slot = netmap_reset(na, NR_TX, txr->me, 0);
3337 #endif /* DEV_NETMAP */
3338
3339         bzero((void *)txr->tx_base,
3340               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3341         /* Reset indices */
3342         txr->next_avail_desc = 0;
3343         txr->next_to_clean = 0;
3344
3345         /* Free any existing tx buffers. */
3346         txbuf = txr->tx_buffers;
3347         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3348                 if (txbuf->m_head != NULL) {
3349                         bus_dmamap_sync(txr->txtag, txbuf->map,
3350                             BUS_DMASYNC_POSTWRITE);
3351                         bus_dmamap_unload(txr->txtag, txbuf->map);
3352                         m_freem(txbuf->m_head);
3353                         txbuf->m_head = NULL;
3354                 }
3355 #ifdef DEV_NETMAP
3356                 if (slot) {
3357                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3358                         uint64_t paddr;
3359                         void *addr;
3360
3361                         addr = PNMB(slot + si, &paddr);
3362                         txr->tx_base[i].buffer_addr = htole64(paddr);
3363                         /* reload the map for netmap mode */
3364                         netmap_load_map(txr->txtag, txbuf->map, addr);
3365                 }
3366 #endif /* DEV_NETMAP */
3367
3368                 /* clear the watch index */
3369                 txbuf->next_eop = -1;
3370         }
3371
3372         /* Set number of descriptors available */
3373         txr->tx_avail = adapter->num_tx_desc;
3374         txr->queue_status = EM_QUEUE_IDLE;
3375
3376         /* Clear checksum offload context. */
3377         txr->last_hw_offload = 0;
3378         txr->last_hw_ipcss = 0;
3379         txr->last_hw_ipcso = 0;
3380         txr->last_hw_tucss = 0;
3381         txr->last_hw_tucso = 0;
3382
3383         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3384             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3385         EM_TX_UNLOCK(txr);
3386 }
3387
3388 /*********************************************************************
3389  *
3390  *  Initialize all transmit rings.
3391  *
3392  **********************************************************************/
3393 static void
3394 em_setup_transmit_structures(struct adapter *adapter)
3395 {
3396         struct tx_ring *txr = adapter->tx_rings;
3397
3398         for (int i = 0; i < adapter->num_queues; i++, txr++)
3399                 em_setup_transmit_ring(txr);
3400
3401         return;
3402 }
3403
3404 /*********************************************************************
3405  *
3406  *  Enable transmit unit.
3407  *
3408  **********************************************************************/
3409 static void
3410 em_initialize_transmit_unit(struct adapter *adapter)
3411 {
3412         struct tx_ring  *txr = adapter->tx_rings;
3413         struct e1000_hw *hw = &adapter->hw;
3414         u32     tctl, tarc, tipg = 0;
3415
3416          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3417
3418         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3419                 u64 bus_addr = txr->txdma.dma_paddr;
3420                 /* Base and Len of TX Ring */
3421                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3422                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3423                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3424                     (u32)(bus_addr >> 32));
3425                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3426                     (u32)bus_addr);
3427                 /* Init the HEAD/TAIL indices */
3428                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3429                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3430
3431                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3432                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3433                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3434
3435                 txr->queue_status = EM_QUEUE_IDLE;
3436         }
3437
3438         /* Set the default values for the Tx Inter Packet Gap timer */
3439         switch (adapter->hw.mac.type) {
3440         case e1000_80003es2lan:
3441                 tipg = DEFAULT_82543_TIPG_IPGR1;
3442                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3443                     E1000_TIPG_IPGR2_SHIFT;
3444                 break;
3445         default:
3446                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3447                     (adapter->hw.phy.media_type ==
3448                     e1000_media_type_internal_serdes))
3449                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3450                 else
3451                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3452                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3453                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3454         }
3455
3456         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3457         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3458
3459         if(adapter->hw.mac.type >= e1000_82540)
3460                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3461                     adapter->tx_abs_int_delay.value);
3462
3463         if ((adapter->hw.mac.type == e1000_82571) ||
3464             (adapter->hw.mac.type == e1000_82572)) {
3465                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3466                 tarc |= SPEED_MODE_BIT;
3467                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3468         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3469                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3470                 tarc |= 1;
3471                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3472                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3473                 tarc |= 1;
3474                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3475         }
3476
3477         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3478         if (adapter->tx_int_delay.value > 0)
3479                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3480
3481         /* Program the Transmit Control Register */
3482         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3483         tctl &= ~E1000_TCTL_CT;
3484         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3485                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3486
3487         if (adapter->hw.mac.type >= e1000_82571)
3488                 tctl |= E1000_TCTL_MULR;
3489
3490         /* This write will effectively turn on the transmit unit. */
3491         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3492
3493 }
3494
3495
3496 /*********************************************************************
3497  *
3498  *  Free all transmit rings.
3499  *
3500  **********************************************************************/
3501 static void
3502 em_free_transmit_structures(struct adapter *adapter)
3503 {
3504         struct tx_ring *txr = adapter->tx_rings;
3505
3506         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3507                 EM_TX_LOCK(txr);
3508                 em_free_transmit_buffers(txr);
3509                 em_dma_free(adapter, &txr->txdma);
3510                 EM_TX_UNLOCK(txr);
3511                 EM_TX_LOCK_DESTROY(txr);
3512         }
3513
3514         free(adapter->tx_rings, M_DEVBUF);
3515 }
3516
3517 /*********************************************************************
3518  *
3519  *  Free transmit ring related data structures.
3520  *
3521  **********************************************************************/
3522 static void
3523 em_free_transmit_buffers(struct tx_ring *txr)
3524 {
3525         struct adapter          *adapter = txr->adapter;
3526         struct em_buffer        *txbuf;
3527
3528         INIT_DEBUGOUT("free_transmit_ring: begin");
3529
3530         if (txr->tx_buffers == NULL)
3531                 return;
3532
3533         for (int i = 0; i < adapter->num_tx_desc; i++) {
3534                 txbuf = &txr->tx_buffers[i];
3535                 if (txbuf->m_head != NULL) {
3536                         bus_dmamap_sync(txr->txtag, txbuf->map,
3537                             BUS_DMASYNC_POSTWRITE);
3538                         bus_dmamap_unload(txr->txtag,
3539                             txbuf->map);
3540                         m_freem(txbuf->m_head);
3541                         txbuf->m_head = NULL;
3542                         if (txbuf->map != NULL) {
3543                                 bus_dmamap_destroy(txr->txtag,
3544                                     txbuf->map);
3545                                 txbuf->map = NULL;
3546                         }
3547                 } else if (txbuf->map != NULL) {
3548                         bus_dmamap_unload(txr->txtag,
3549                             txbuf->map);
3550                         bus_dmamap_destroy(txr->txtag,
3551                             txbuf->map);
3552                         txbuf->map = NULL;
3553                 }
3554         }
3555 #if __FreeBSD_version >= 800000
3556         if (txr->br != NULL)
3557                 buf_ring_free(txr->br, M_DEVBUF);
3558 #endif
3559         if (txr->tx_buffers != NULL) {
3560                 free(txr->tx_buffers, M_DEVBUF);
3561                 txr->tx_buffers = NULL;
3562         }
3563         if (txr->txtag != NULL) {
3564                 bus_dma_tag_destroy(txr->txtag);
3565                 txr->txtag = NULL;
3566         }
3567         return;
3568 }
3569
3570
3571 /*********************************************************************
3572  *  The offload context is protocol specific (TCP/UDP) and thus
3573  *  only needs to be set when the protocol changes. The occasion
3574  *  of a context change can be a performance detriment, and
3575  *  might be better just disabled. The reason arises in the way
3576  *  in which the controller supports pipelined requests from the
3577  *  Tx data DMA. Up to four requests can be pipelined, and they may
3578  *  belong to the same packet or to multiple packets. However all
3579  *  requests for one packet are issued before a request is issued
3580  *  for a subsequent packet and if a request for the next packet
3581  *  requires a context change, that request will be stalled
3582  *  until the previous request completes. This means setting up
3583  *  a new context effectively disables pipelined Tx data DMA which
3584  *  in turn greatly slow down performance to send small sized
3585  *  frames. 
3586  **********************************************************************/
3587 static void
3588 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3589     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3590 {
3591         struct adapter                  *adapter = txr->adapter;
3592         struct e1000_context_desc       *TXD = NULL;
3593         struct em_buffer                *tx_buffer;
3594         int                             cur, hdr_len;
3595         u32                             cmd = 0;
3596         u16                             offload = 0;
3597         u8                              ipcso, ipcss, tucso, tucss;
3598
3599         ipcss = ipcso = tucss = tucso = 0;
3600         hdr_len = ip_off + (ip->ip_hl << 2);
3601         cur = txr->next_avail_desc;
3602
3603         /* Setup of IP header checksum. */
3604         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3605                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3606                 offload |= CSUM_IP;
3607                 ipcss = ip_off;
3608                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3609                 /*
3610                  * Start offset for header checksum calculation.
3611                  * End offset for header checksum calculation.
3612                  * Offset of place to put the checksum.
3613                  */
3614                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3615                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3616                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3617                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3618                 cmd |= E1000_TXD_CMD_IP;
3619         }
3620
3621         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3622                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3623                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3624                 offload |= CSUM_TCP;
3625                 tucss = hdr_len;
3626                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3627                 /*
3628                  * Setting up new checksum offload context for every frames
3629                  * takes a lot of processing time for hardware. This also
3630                  * reduces performance a lot for small sized frames so avoid
3631                  * it if driver can use previously configured checksum
3632                  * offload context.
3633                  */
3634                 if (txr->last_hw_offload == offload) {
3635                         if (offload & CSUM_IP) {
3636                                 if (txr->last_hw_ipcss == ipcss &&
3637                                     txr->last_hw_ipcso == ipcso &&
3638                                     txr->last_hw_tucss == tucss &&
3639                                     txr->last_hw_tucso == tucso)
3640                                         return;
3641                         } else {
3642                                 if (txr->last_hw_tucss == tucss &&
3643                                     txr->last_hw_tucso == tucso)
3644                                         return;
3645                         }
3646                 }
3647                 txr->last_hw_offload = offload;
3648                 txr->last_hw_tucss = tucss;
3649                 txr->last_hw_tucso = tucso;
3650                 /*
3651                  * Start offset for payload checksum calculation.
3652                  * End offset for payload checksum calculation.
3653                  * Offset of place to put the checksum.
3654                  */
3655                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3656                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3657                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3658                 TXD->upper_setup.tcp_fields.tucso = tucso;
3659                 cmd |= E1000_TXD_CMD_TCP;
3660         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3661                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3662                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3663                 tucss = hdr_len;
3664                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3665                 /*
3666                  * Setting up new checksum offload context for every frames
3667                  * takes a lot of processing time for hardware. This also
3668                  * reduces performance a lot for small sized frames so avoid
3669                  * it if driver can use previously configured checksum
3670                  * offload context.
3671                  */
3672                 if (txr->last_hw_offload == offload) {
3673                         if (offload & CSUM_IP) {
3674                                 if (txr->last_hw_ipcss == ipcss &&
3675                                     txr->last_hw_ipcso == ipcso &&
3676                                     txr->last_hw_tucss == tucss &&
3677                                     txr->last_hw_tucso == tucso)
3678                                         return;
3679                         } else {
3680                                 if (txr->last_hw_tucss == tucss &&
3681                                     txr->last_hw_tucso == tucso)
3682                                         return;
3683                         }
3684                 }
3685                 txr->last_hw_offload = offload;
3686                 txr->last_hw_tucss = tucss;
3687                 txr->last_hw_tucso = tucso;
3688                 /*
3689                  * Start offset for header checksum calculation.
3690                  * End offset for header checksum calculation.
3691                  * Offset of place to put the checksum.
3692                  */
3693                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3694                 TXD->upper_setup.tcp_fields.tucss = tucss;
3695                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3696                 TXD->upper_setup.tcp_fields.tucso = tucso;
3697         }
3698   
3699         if (offload & CSUM_IP) {
3700                 txr->last_hw_ipcss = ipcss;
3701                 txr->last_hw_ipcso = ipcso;
3702         }
3703
3704         TXD->tcp_seg_setup.data = htole32(0);
3705         TXD->cmd_and_length =
3706             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3707         tx_buffer = &txr->tx_buffers[cur];
3708         tx_buffer->m_head = NULL;
3709         tx_buffer->next_eop = -1;
3710
3711         if (++cur == adapter->num_tx_desc)
3712                 cur = 0;
3713
3714         txr->tx_avail--;
3715         txr->next_avail_desc = cur;
3716 }
3717
3718
3719 /**********************************************************************
3720  *
3721  *  Setup work for hardware segmentation offload (TSO)
3722  *
3723  **********************************************************************/
3724 static void
3725 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3726     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3727 {
3728         struct adapter                  *adapter = txr->adapter;
3729         struct e1000_context_desc       *TXD;
3730         struct em_buffer                *tx_buffer;
3731         int cur, hdr_len;
3732
3733         /*
3734          * In theory we can use the same TSO context if and only if
3735          * frame is the same type(IP/TCP) and the same MSS. However
3736          * checking whether a frame has the same IP/TCP structure is
3737          * hard thing so just ignore that and always restablish a
3738          * new TSO context.
3739          */
3740         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3741         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3742                       E1000_TXD_DTYP_D |        /* Data descr type */
3743                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3744
3745         /* IP and/or TCP header checksum calculation and insertion. */
3746         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3747
3748         cur = txr->next_avail_desc;
3749         tx_buffer = &txr->tx_buffers[cur];
3750         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3751
3752         /*
3753          * Start offset for header checksum calculation.
3754          * End offset for header checksum calculation.
3755          * Offset of place put the checksum.
3756          */
3757         TXD->lower_setup.ip_fields.ipcss = ip_off;
3758         TXD->lower_setup.ip_fields.ipcse =
3759             htole16(ip_off + (ip->ip_hl << 2) - 1);
3760         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3761         /*
3762          * Start offset for payload checksum calculation.
3763          * End offset for payload checksum calculation.
3764          * Offset of place to put the checksum.
3765          */
3766         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3767         TXD->upper_setup.tcp_fields.tucse = 0;
3768         TXD->upper_setup.tcp_fields.tucso =
3769             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3770         /*
3771          * Payload size per packet w/o any headers.
3772          * Length of all headers up to payload.
3773          */
3774         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3775         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3776
3777         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3778                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3779                                 E1000_TXD_CMD_TSE |     /* TSE context */
3780                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3781                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3782                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3783
3784         tx_buffer->m_head = NULL;
3785         tx_buffer->next_eop = -1;
3786
3787         if (++cur == adapter->num_tx_desc)
3788                 cur = 0;
3789
3790         txr->tx_avail--;
3791         txr->next_avail_desc = cur;
3792         txr->tx_tso = TRUE;
3793 }
3794
3795
3796 /**********************************************************************
3797  *
3798  *  Examine each tx_buffer in the used queue. If the hardware is done
3799  *  processing the packet then free associated resources. The
3800  *  tx_buffer is put back on the free queue.
3801  *
3802  **********************************************************************/
3803 static void
3804 em_txeof(struct tx_ring *txr)
3805 {
3806         struct adapter  *adapter = txr->adapter;
3807         int first, last, done, processed;
3808         struct em_buffer *tx_buffer;
3809         struct e1000_tx_desc   *tx_desc, *eop_desc;
3810         struct ifnet   *ifp = adapter->ifp;
3811
3812         EM_TX_LOCK_ASSERT(txr);
3813 #ifdef DEV_NETMAP
3814         if (netmap_tx_irq(ifp, txr->me |
3815             (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
3816                 return;
3817 #endif /* DEV_NETMAP */
3818
3819         /* No work, make sure watchdog is off */
3820         if (txr->tx_avail == adapter->num_tx_desc) {
3821                 txr->queue_status = EM_QUEUE_IDLE;
3822                 return;
3823         }
3824
3825         processed = 0;
3826         first = txr->next_to_clean;
3827         tx_desc = &txr->tx_base[first];
3828         tx_buffer = &txr->tx_buffers[first];
3829         last = tx_buffer->next_eop;
3830         eop_desc = &txr->tx_base[last];
3831
3832         /*
3833          * What this does is get the index of the
3834          * first descriptor AFTER the EOP of the 
3835          * first packet, that way we can do the
3836          * simple comparison on the inner while loop.
3837          */
3838         if (++last == adapter->num_tx_desc)
3839                 last = 0;
3840         done = last;
3841
3842         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3843             BUS_DMASYNC_POSTREAD);
3844
3845         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3846                 /* We clean the range of the packet */
3847                 while (first != done) {
3848                         tx_desc->upper.data = 0;
3849                         tx_desc->lower.data = 0;
3850                         tx_desc->buffer_addr = 0;
3851                         ++txr->tx_avail;
3852                         ++processed;
3853
3854                         if (tx_buffer->m_head) {
3855                                 bus_dmamap_sync(txr->txtag,
3856                                     tx_buffer->map,
3857                                     BUS_DMASYNC_POSTWRITE);
3858                                 bus_dmamap_unload(txr->txtag,
3859                                     tx_buffer->map);
3860                                 m_freem(tx_buffer->m_head);
3861                                 tx_buffer->m_head = NULL;
3862                         }
3863                         tx_buffer->next_eop = -1;
3864                         txr->watchdog_time = ticks;
3865
3866                         if (++first == adapter->num_tx_desc)
3867                                 first = 0;
3868
3869                         tx_buffer = &txr->tx_buffers[first];
3870                         tx_desc = &txr->tx_base[first];
3871                 }
3872                 ++ifp->if_opackets;
3873                 /* See if we can continue to the next packet */
3874                 last = tx_buffer->next_eop;
3875                 if (last != -1) {
3876                         eop_desc = &txr->tx_base[last];
3877                         /* Get new done point */
3878                         if (++last == adapter->num_tx_desc) last = 0;
3879                         done = last;
3880                 } else
3881                         break;
3882         }
3883         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3884             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3885
3886         txr->next_to_clean = first;
3887
3888         /*
3889         ** Watchdog calculation, we know there's
3890         ** work outstanding or the first return
3891         ** would have been taken, so none processed
3892         ** for too long indicates a hang. local timer
3893         ** will examine this and do a reset if needed.
3894         */
3895         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3896                 txr->queue_status = EM_QUEUE_HUNG;
3897
3898         /*
3899          * If we have a minimum free, clear IFF_DRV_OACTIVE
3900          * to tell the stack that it is OK to send packets.
3901          * Notice that all writes of OACTIVE happen under the
3902          * TX lock which, with a single queue, guarantees 
3903          * sanity.
3904          */
3905         if (txr->tx_avail >= EM_MAX_SCATTER)
3906                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3907
3908         /* Disable watchdog if all clean */
3909         if (txr->tx_avail == adapter->num_tx_desc) {
3910                 txr->queue_status = EM_QUEUE_IDLE;
3911         } 
3912 }
3913
3914
3915 /*********************************************************************
3916  *
3917  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3918  *
3919  **********************************************************************/
3920 static void
3921 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3922 {
3923         struct adapter          *adapter = rxr->adapter;
3924         struct mbuf             *m;
3925         bus_dma_segment_t       segs[1];
3926         struct em_buffer        *rxbuf;
3927         int                     i, j, error, nsegs;
3928         bool                    cleaned = FALSE;
3929
3930         i = j = rxr->next_to_refresh;
3931         /*
3932         ** Get one descriptor beyond
3933         ** our work mark to control
3934         ** the loop.
3935         */
3936         if (++j == adapter->num_rx_desc)
3937                 j = 0;
3938
3939         while (j != limit) {
3940                 rxbuf = &rxr->rx_buffers[i];
3941                 if (rxbuf->m_head == NULL) {
3942                         m = m_getjcl(M_NOWAIT, MT_DATA,
3943                             M_PKTHDR, adapter->rx_mbuf_sz);
3944                         /*
3945                         ** If we have a temporary resource shortage
3946                         ** that causes a failure, just abort refresh
3947                         ** for now, we will return to this point when
3948                         ** reinvoked from em_rxeof.
3949                         */
3950                         if (m == NULL)
3951                                 goto update;
3952                 } else
3953                         m = rxbuf->m_head;
3954
3955                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3956                 m->m_flags |= M_PKTHDR;
3957                 m->m_data = m->m_ext.ext_buf;
3958
3959                 /* Use bus_dma machinery to setup the memory mapping  */
3960                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3961                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3962                 if (error != 0) {
3963                         printf("Refresh mbufs: hdr dmamap load"
3964                             " failure - %d\n", error);
3965                         m_free(m);
3966                         rxbuf->m_head = NULL;
3967                         goto update;
3968                 }
3969                 rxbuf->m_head = m;
3970                 bus_dmamap_sync(rxr->rxtag,
3971                     rxbuf->map, BUS_DMASYNC_PREREAD);
3972                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3973                 cleaned = TRUE;
3974
3975                 i = j; /* Next is precalulated for us */
3976                 rxr->next_to_refresh = i;
3977                 /* Calculate next controlling index */
3978                 if (++j == adapter->num_rx_desc)
3979                         j = 0;
3980         }
3981 update:
3982         /*
3983         ** Update the tail pointer only if,
3984         ** and as far as we have refreshed.
3985         */
3986         if (cleaned)
3987                 E1000_WRITE_REG(&adapter->hw,
3988                     E1000_RDT(rxr->me), rxr->next_to_refresh);
3989
3990         return;
3991 }
3992
3993
3994 /*********************************************************************
3995  *
3996  *  Allocate memory for rx_buffer structures. Since we use one
3997  *  rx_buffer per received packet, the maximum number of rx_buffer's
3998  *  that we'll need is equal to the number of receive descriptors
3999  *  that we've allocated.
4000  *
4001  **********************************************************************/
4002 static int
4003 em_allocate_receive_buffers(struct rx_ring *rxr)
4004 {
4005         struct adapter          *adapter = rxr->adapter;
4006         device_t                dev = adapter->dev;
4007         struct em_buffer        *rxbuf;
4008         int                     error;
4009
4010         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4011             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4012         if (rxr->rx_buffers == NULL) {
4013                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4014                 return (ENOMEM);
4015         }
4016
4017         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4018                                 1, 0,                   /* alignment, bounds */
4019                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4020                                 BUS_SPACE_MAXADDR,      /* highaddr */
4021                                 NULL, NULL,             /* filter, filterarg */
4022                                 MJUM9BYTES,             /* maxsize */
4023                                 1,                      /* nsegments */
4024                                 MJUM9BYTES,             /* maxsegsize */
4025                                 0,                      /* flags */
4026                                 NULL,                   /* lockfunc */
4027                                 NULL,                   /* lockarg */
4028                                 &rxr->rxtag);
4029         if (error) {
4030                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4031                     __func__, error);
4032                 goto fail;
4033         }
4034
4035         rxbuf = rxr->rx_buffers;
4036         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4037                 rxbuf = &rxr->rx_buffers[i];
4038                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4039                     &rxbuf->map);
4040                 if (error) {
4041                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4042                             __func__, error);
4043                         goto fail;
4044                 }
4045         }
4046
4047         return (0);
4048
4049 fail:
4050         em_free_receive_structures(adapter);
4051         return (error);
4052 }
4053
4054
4055 /*********************************************************************
4056  *
4057  *  Initialize a receive ring and its buffers.
4058  *
4059  **********************************************************************/
4060 static int
4061 em_setup_receive_ring(struct rx_ring *rxr)
4062 {
4063         struct  adapter         *adapter = rxr->adapter;
4064         struct em_buffer        *rxbuf;
4065         bus_dma_segment_t       seg[1];
4066         int                     rsize, nsegs, error = 0;
4067 #ifdef DEV_NETMAP
4068         struct netmap_adapter *na = NA(adapter->ifp);
4069         struct netmap_slot *slot;
4070 #endif
4071
4072
4073         /* Clear the ring contents */
4074         EM_RX_LOCK(rxr);
4075         rsize = roundup2(adapter->num_rx_desc *
4076             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4077         bzero((void *)rxr->rx_base, rsize);
4078 #ifdef DEV_NETMAP
4079         slot = netmap_reset(na, NR_RX, 0, 0);
4080 #endif
4081
4082         /*
4083         ** Free current RX buffer structs and their mbufs
4084         */
4085         for (int i = 0; i < adapter->num_rx_desc; i++) {
4086                 rxbuf = &rxr->rx_buffers[i];
4087                 if (rxbuf->m_head != NULL) {
4088                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4089                             BUS_DMASYNC_POSTREAD);
4090                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4091                         m_freem(rxbuf->m_head);
4092                         rxbuf->m_head = NULL; /* mark as freed */
4093                 }
4094         }
4095
4096         /* Now replenish the mbufs */
4097         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4098                 rxbuf = &rxr->rx_buffers[j];
4099 #ifdef DEV_NETMAP
4100                 if (slot) {
4101                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4102                         uint64_t paddr;
4103                         void *addr;
4104
4105                         addr = PNMB(slot + si, &paddr);
4106                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4107                         /* Update descriptor */
4108                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4109                         continue;
4110                 }
4111 #endif /* DEV_NETMAP */
4112                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4113                     M_PKTHDR, adapter->rx_mbuf_sz);
4114                 if (rxbuf->m_head == NULL) {
4115                         error = ENOBUFS;
4116                         goto fail;
4117                 }
4118                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4119                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4120                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4121
4122                 /* Get the memory mapping */
4123                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4124                     rxbuf->map, rxbuf->m_head, seg,
4125                     &nsegs, BUS_DMA_NOWAIT);
4126                 if (error != 0) {
4127                         m_freem(rxbuf->m_head);
4128                         rxbuf->m_head = NULL;
4129                         goto fail;
4130                 }
4131                 bus_dmamap_sync(rxr->rxtag,
4132                     rxbuf->map, BUS_DMASYNC_PREREAD);
4133
4134                 /* Update descriptor */
4135                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4136         }
4137         rxr->next_to_check = 0;
4138         rxr->next_to_refresh = 0;
4139         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4140             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4141
4142 fail:
4143         EM_RX_UNLOCK(rxr);
4144         return (error);
4145 }
4146
4147 /*********************************************************************
4148  *
4149  *  Initialize all receive rings.
4150  *
4151  **********************************************************************/
4152 static int
4153 em_setup_receive_structures(struct adapter *adapter)
4154 {
4155         struct rx_ring *rxr = adapter->rx_rings;
4156         int q;
4157
4158         for (q = 0; q < adapter->num_queues; q++, rxr++)
4159                 if (em_setup_receive_ring(rxr))
4160                         goto fail;
4161
4162         return (0);
4163 fail:
4164         /*
4165          * Free RX buffers allocated so far, we will only handle
4166          * the rings that completed, the failing case will have
4167          * cleaned up for itself. 'q' failed, so its the terminus.
4168          */
4169         for (int i = 0; i < q; ++i) {
4170                 rxr = &adapter->rx_rings[i];
4171                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4172                         struct em_buffer *rxbuf;
4173                         rxbuf = &rxr->rx_buffers[n];
4174                         if (rxbuf->m_head != NULL) {
4175                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4176                                   BUS_DMASYNC_POSTREAD);
4177                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4178                                 m_freem(rxbuf->m_head);
4179                                 rxbuf->m_head = NULL;
4180                         }
4181                 }
4182                 rxr->next_to_check = 0;
4183                 rxr->next_to_refresh = 0;
4184         }
4185
4186         return (ENOBUFS);
4187 }
4188
4189 /*********************************************************************
4190  *
4191  *  Free all receive rings.
4192  *
4193  **********************************************************************/
4194 static void
4195 em_free_receive_structures(struct adapter *adapter)
4196 {
4197         struct rx_ring *rxr = adapter->rx_rings;
4198
4199         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4200                 em_free_receive_buffers(rxr);
4201                 /* Free the ring memory as well */
4202                 em_dma_free(adapter, &rxr->rxdma);
4203                 EM_RX_LOCK_DESTROY(rxr);
4204         }
4205
4206         free(adapter->rx_rings, M_DEVBUF);
4207 }
4208
4209
4210 /*********************************************************************
4211  *
4212  *  Free receive ring data structures
4213  *
4214  **********************************************************************/
4215 static void
4216 em_free_receive_buffers(struct rx_ring *rxr)
4217 {
4218         struct adapter          *adapter = rxr->adapter;
4219         struct em_buffer        *rxbuf = NULL;
4220
4221         INIT_DEBUGOUT("free_receive_buffers: begin");
4222
4223         if (rxr->rx_buffers != NULL) {
4224                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4225                         rxbuf = &rxr->rx_buffers[i];
4226                         if (rxbuf->map != NULL) {
4227                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4228                                     BUS_DMASYNC_POSTREAD);
4229                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4230                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4231                         }
4232                         if (rxbuf->m_head != NULL) {
4233                                 m_freem(rxbuf->m_head);
4234                                 rxbuf->m_head = NULL;
4235                         }
4236                 }
4237                 free(rxr->rx_buffers, M_DEVBUF);
4238                 rxr->rx_buffers = NULL;
4239                 rxr->next_to_check = 0;
4240                 rxr->next_to_refresh = 0;
4241         }
4242
4243         if (rxr->rxtag != NULL) {
4244                 bus_dma_tag_destroy(rxr->rxtag);
4245                 rxr->rxtag = NULL;
4246         }
4247
4248         return;
4249 }
4250
4251
4252 /*********************************************************************
4253  *
4254  *  Enable receive unit.
4255  *
4256  **********************************************************************/
4257
4258 static void
4259 em_initialize_receive_unit(struct adapter *adapter)
4260 {
4261         struct rx_ring  *rxr = adapter->rx_rings;
4262         struct ifnet    *ifp = adapter->ifp;
4263         struct e1000_hw *hw = &adapter->hw;
4264         u64     bus_addr;
4265         u32     rctl, rxcsum;
4266
4267         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4268
4269         /*
4270          * Make sure receives are disabled while setting
4271          * up the descriptor ring
4272          */
4273         rctl = E1000_READ_REG(hw, E1000_RCTL);
4274         /* Do not disable if ever enabled on this hardware */
4275         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4276                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4277
4278         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4279             adapter->rx_abs_int_delay.value);
4280         /*
4281          * Set the interrupt throttling rate. Value is calculated
4282          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4283          */
4284         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4285
4286         /*
4287         ** When using MSIX interrupts we need to throttle
4288         ** using the EITR register (82574 only)
4289         */
4290         if (hw->mac.type == e1000_82574) {
4291                 for (int i = 0; i < 4; i++)
4292                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4293                             DEFAULT_ITR);
4294                 /* Disable accelerated acknowledge */
4295                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4296         }
4297
4298         if (ifp->if_capenable & IFCAP_RXCSUM) {
4299                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4300                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4301                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4302         }
4303
4304         /*
4305         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4306         ** long latencies are observed, like Lenovo X60. This
4307         ** change eliminates the problem, but since having positive
4308         ** values in RDTR is a known source of problems on other
4309         ** platforms another solution is being sought.
4310         */
4311         if (hw->mac.type == e1000_82573)
4312                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4313
4314         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4315                 /* Setup the Base and Length of the Rx Descriptor Ring */
4316                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4317
4318                 bus_addr = rxr->rxdma.dma_paddr;
4319                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4320                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4321                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4322                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4323                 /* Setup the Head and Tail Descriptor Pointers */
4324                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4325 #ifdef DEV_NETMAP
4326                 /*
4327                  * an init() while a netmap client is active must
4328                  * preserve the rx buffers passed to userspace.
4329                  */
4330                 if (ifp->if_capenable & IFCAP_NETMAP)
4331                         rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4332 #endif /* DEV_NETMAP */
4333                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4334         }
4335
4336         /* Set PTHRESH for improved jumbo performance */
4337         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4338             (adapter->hw.mac.type == e1000_pch2lan) ||
4339             (adapter->hw.mac.type == e1000_ich10lan)) &&
4340             (ifp->if_mtu > ETHERMTU)) {
4341                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4342                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4343         }
4344                 
4345         if (adapter->hw.mac.type >= e1000_pch2lan) {
4346                 if (ifp->if_mtu > ETHERMTU)
4347                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4348                 else
4349                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4350         }
4351
4352         /* Setup the Receive Control Register */
4353         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4354         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4355             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4356             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4357
4358         /* Strip the CRC */
4359         rctl |= E1000_RCTL_SECRC;
4360
4361         /* Make sure VLAN Filters are off */
4362         rctl &= ~E1000_RCTL_VFE;
4363         rctl &= ~E1000_RCTL_SBP;
4364
4365         if (adapter->rx_mbuf_sz == MCLBYTES)
4366                 rctl |= E1000_RCTL_SZ_2048;
4367         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4368                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4369         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4370                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4371
4372         if (ifp->if_mtu > ETHERMTU)
4373                 rctl |= E1000_RCTL_LPE;
4374         else
4375                 rctl &= ~E1000_RCTL_LPE;
4376
4377         /* Write out the settings */
4378         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4379
4380         return;
4381 }
4382
4383
4384 /*********************************************************************
4385  *
4386  *  This routine executes in interrupt context. It replenishes
4387  *  the mbufs in the descriptor and sends data which has been
4388  *  dma'ed into host memory to upper layer.
4389  *
4390  *  We loop at most count times if count is > 0, or until done if
4391  *  count < 0.
4392  *  
4393  *  For polling we also now return the number of cleaned packets
4394  *********************************************************************/
4395 static bool
4396 em_rxeof(struct rx_ring *rxr, int count, int *done)
4397 {
4398         struct adapter          *adapter = rxr->adapter;
4399         struct ifnet            *ifp = adapter->ifp;
4400         struct mbuf             *mp, *sendmp;
4401         u8                      status = 0;
4402         u16                     len;
4403         int                     i, processed, rxdone = 0;
4404         bool                    eop;
4405         struct e1000_rx_desc    *cur;
4406
4407         EM_RX_LOCK(rxr);
4408
4409 #ifdef DEV_NETMAP
4410         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4411                 return (FALSE);
4412 #endif /* DEV_NETMAP */
4413
4414         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4415
4416                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4417                         break;
4418
4419                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4420                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4421
4422                 cur = &rxr->rx_base[i];
4423                 status = cur->status;
4424                 mp = sendmp = NULL;
4425
4426                 if ((status & E1000_RXD_STAT_DD) == 0)
4427                         break;
4428
4429                 len = le16toh(cur->length);
4430                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4431
4432                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4433                     (rxr->discard == TRUE)) {
4434                         adapter->dropped_pkts++;
4435                         ++rxr->rx_discarded;
4436                         if (!eop) /* Catch subsequent segs */
4437                                 rxr->discard = TRUE;
4438                         else
4439                                 rxr->discard = FALSE;
4440                         em_rx_discard(rxr, i);
4441                         goto next_desc;
4442                 }
4443
4444                 /* Assign correct length to the current fragment */
4445                 mp = rxr->rx_buffers[i].m_head;
4446                 mp->m_len = len;
4447
4448                 /* Trigger for refresh */
4449                 rxr->rx_buffers[i].m_head = NULL;
4450
4451                 /* First segment? */
4452                 if (rxr->fmp == NULL) {
4453                         mp->m_pkthdr.len = len;
4454                         rxr->fmp = rxr->lmp = mp;
4455                 } else {
4456                         /* Chain mbuf's together */
4457                         mp->m_flags &= ~M_PKTHDR;
4458                         rxr->lmp->m_next = mp;
4459                         rxr->lmp = mp;
4460                         rxr->fmp->m_pkthdr.len += len;
4461                 }
4462
4463                 if (eop) {
4464                         --count;
4465                         sendmp = rxr->fmp;
4466                         sendmp->m_pkthdr.rcvif = ifp;
4467                         ifp->if_ipackets++;
4468                         em_receive_checksum(cur, sendmp);
4469 #ifndef __NO_STRICT_ALIGNMENT
4470                         if (adapter->hw.mac.max_frame_size >
4471                             (MCLBYTES - ETHER_ALIGN) &&
4472                             em_fixup_rx(rxr) != 0)
4473                                 goto skip;
4474 #endif
4475                         if (status & E1000_RXD_STAT_VP) {
4476                                 sendmp->m_pkthdr.ether_vtag =
4477                                     le16toh(cur->special);
4478                                 sendmp->m_flags |= M_VLANTAG;
4479                         }
4480 #ifndef __NO_STRICT_ALIGNMENT
4481 skip:
4482 #endif
4483                         rxr->fmp = rxr->lmp = NULL;
4484                 }
4485 next_desc:
4486                 /* Zero out the receive descriptors status. */
4487                 cur->status = 0;
4488                 ++rxdone;       /* cumulative for POLL */
4489                 ++processed;
4490
4491                 /* Advance our pointers to the next descriptor. */
4492                 if (++i == adapter->num_rx_desc)
4493                         i = 0;
4494
4495                 /* Send to the stack */
4496                 if (sendmp != NULL) {
4497                         rxr->next_to_check = i;
4498                         EM_RX_UNLOCK(rxr);
4499                         (*ifp->if_input)(ifp, sendmp);
4500                         EM_RX_LOCK(rxr);
4501                         i = rxr->next_to_check;
4502                 }
4503
4504                 /* Only refresh mbufs every 8 descriptors */
4505                 if (processed == 8) {
4506                         em_refresh_mbufs(rxr, i);
4507                         processed = 0;
4508                 }
4509         }
4510
4511         /* Catch any remaining refresh work */
4512         if (e1000_rx_unrefreshed(rxr))
4513                 em_refresh_mbufs(rxr, i);
4514
4515         rxr->next_to_check = i;
4516         if (done != NULL)
4517                 *done = rxdone;
4518         EM_RX_UNLOCK(rxr);
4519
4520         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4521 }
4522
4523 static __inline void
4524 em_rx_discard(struct rx_ring *rxr, int i)
4525 {
4526         struct em_buffer        *rbuf;
4527
4528         rbuf = &rxr->rx_buffers[i];
4529         /* Free any previous pieces */
4530         if (rxr->fmp != NULL) {
4531                 rxr->fmp->m_flags |= M_PKTHDR;
4532                 m_freem(rxr->fmp);
4533                 rxr->fmp = NULL;
4534                 rxr->lmp = NULL;
4535         }
4536         /*
4537         ** Free buffer and allow em_refresh_mbufs()
4538         ** to clean up and recharge buffer.
4539         */
4540         if (rbuf->m_head) {
4541                 m_free(rbuf->m_head);
4542                 rbuf->m_head = NULL;
4543         }
4544         return;
4545 }
4546
4547 #ifndef __NO_STRICT_ALIGNMENT
4548 /*
4549  * When jumbo frames are enabled we should realign entire payload on
4550  * architecures with strict alignment. This is serious design mistake of 8254x
4551  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4552  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4553  * payload. On architecures without strict alignment restrictions 8254x still
4554  * performs unaligned memory access which would reduce the performance too.
4555  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4556  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4557  * existing mbuf chain.
4558  *
4559  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4560  * not used at all on architectures with strict alignment.
4561  */
4562 static int
4563 em_fixup_rx(struct rx_ring *rxr)
4564 {
4565         struct adapter *adapter = rxr->adapter;
4566         struct mbuf *m, *n;
4567         int error;
4568
4569         error = 0;
4570         m = rxr->fmp;
4571         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4572                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4573                 m->m_data += ETHER_HDR_LEN;
4574         } else {
4575                 MGETHDR(n, M_NOWAIT, MT_DATA);
4576                 if (n != NULL) {
4577                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4578                         m->m_data += ETHER_HDR_LEN;
4579                         m->m_len -= ETHER_HDR_LEN;
4580                         n->m_len = ETHER_HDR_LEN;
4581                         M_MOVE_PKTHDR(n, m);
4582                         n->m_next = m;
4583                         rxr->fmp = n;
4584                 } else {
4585                         adapter->dropped_pkts++;
4586                         m_freem(rxr->fmp);
4587                         rxr->fmp = NULL;
4588                         error = ENOMEM;
4589                 }
4590         }
4591
4592         return (error);
4593 }
4594 #endif
4595
4596 /*********************************************************************
4597  *
4598  *  Verify that the hardware indicated that the checksum is valid.
4599  *  Inform the stack about the status of checksum so that stack
4600  *  doesn't spend time verifying the checksum.
4601  *
4602  *********************************************************************/
4603 static void
4604 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4605 {
4606         /* Ignore Checksum bit is set */
4607         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4608                 mp->m_pkthdr.csum_flags = 0;
4609                 return;
4610         }
4611
4612         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4613                 /* Did it pass? */
4614                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4615                         /* IP Checksum Good */
4616                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4617                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4618
4619                 } else {
4620                         mp->m_pkthdr.csum_flags = 0;
4621                 }
4622         }
4623
4624         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4625                 /* Did it pass? */
4626                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4627                         mp->m_pkthdr.csum_flags |=
4628                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4629                         mp->m_pkthdr.csum_data = htons(0xffff);
4630                 }
4631         }
4632 }
4633
4634 /*
4635  * This routine is run via an vlan
4636  * config EVENT
4637  */
4638 static void
4639 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4640 {
4641         struct adapter  *adapter = ifp->if_softc;
4642         u32             index, bit;
4643
4644         if (ifp->if_softc !=  arg)   /* Not our event */
4645                 return;
4646
4647         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4648                 return;
4649
4650         EM_CORE_LOCK(adapter);
4651         index = (vtag >> 5) & 0x7F;
4652         bit = vtag & 0x1F;
4653         adapter->shadow_vfta[index] |= (1 << bit);
4654         ++adapter->num_vlans;
4655         /* Re-init to load the changes */
4656         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4657                 em_init_locked(adapter);
4658         EM_CORE_UNLOCK(adapter);
4659 }
4660
4661 /*
4662  * This routine is run via an vlan
4663  * unconfig EVENT
4664  */
4665 static void
4666 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4667 {
4668         struct adapter  *adapter = ifp->if_softc;
4669         u32             index, bit;
4670
4671         if (ifp->if_softc !=  arg)
4672                 return;
4673
4674         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4675                 return;
4676
4677         EM_CORE_LOCK(adapter);
4678         index = (vtag >> 5) & 0x7F;
4679         bit = vtag & 0x1F;
4680         adapter->shadow_vfta[index] &= ~(1 << bit);
4681         --adapter->num_vlans;
4682         /* Re-init to load the changes */
4683         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4684                 em_init_locked(adapter);
4685         EM_CORE_UNLOCK(adapter);
4686 }
4687
4688 static void
4689 em_setup_vlan_hw_support(struct adapter *adapter)
4690 {
4691         struct e1000_hw *hw = &adapter->hw;
4692         u32             reg;
4693
4694         /*
4695         ** We get here thru init_locked, meaning
4696         ** a soft reset, this has already cleared
4697         ** the VFTA and other state, so if there
4698         ** have been no vlan's registered do nothing.
4699         */
4700         if (adapter->num_vlans == 0)
4701                 return;
4702
4703         /*
4704         ** A soft reset zero's out the VFTA, so
4705         ** we need to repopulate it now.
4706         */
4707         for (int i = 0; i < EM_VFTA_SIZE; i++)
4708                 if (adapter->shadow_vfta[i] != 0)
4709                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4710                             i, adapter->shadow_vfta[i]);
4711
4712         reg = E1000_READ_REG(hw, E1000_CTRL);
4713         reg |= E1000_CTRL_VME;
4714         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4715
4716         /* Enable the Filter Table */
4717         reg = E1000_READ_REG(hw, E1000_RCTL);
4718         reg &= ~E1000_RCTL_CFIEN;
4719         reg |= E1000_RCTL_VFE;
4720         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4721 }
4722
4723 static void
4724 em_enable_intr(struct adapter *adapter)
4725 {
4726         struct e1000_hw *hw = &adapter->hw;
4727         u32 ims_mask = IMS_ENABLE_MASK;
4728
4729         if (hw->mac.type == e1000_82574) {
4730                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4731                 ims_mask |= EM_MSIX_MASK;
4732         } 
4733         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4734 }
4735
4736 static void
4737 em_disable_intr(struct adapter *adapter)
4738 {
4739         struct e1000_hw *hw = &adapter->hw;
4740
4741         if (hw->mac.type == e1000_82574)
4742                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4743         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4744 }
4745
4746 /*
4747  * Bit of a misnomer, what this really means is
4748  * to enable OS management of the system... aka
4749  * to disable special hardware management features 
4750  */
4751 static void
4752 em_init_manageability(struct adapter *adapter)
4753 {
4754         /* A shared code workaround */
4755 #define E1000_82542_MANC2H E1000_MANC2H
4756         if (adapter->has_manage) {
4757                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4758                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4759
4760                 /* disable hardware interception of ARP */
4761                 manc &= ~(E1000_MANC_ARP_EN);
4762
4763                 /* enable receiving management packets to the host */
4764                 manc |= E1000_MANC_EN_MNG2HOST;
4765 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4766 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4767                 manc2h |= E1000_MNG2HOST_PORT_623;
4768                 manc2h |= E1000_MNG2HOST_PORT_664;
4769                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4770                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4771         }
4772 }
4773
4774 /*
4775  * Give control back to hardware management
4776  * controller if there is one.
4777  */
4778 static void
4779 em_release_manageability(struct adapter *adapter)
4780 {
4781         if (adapter->has_manage) {
4782                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4783
4784                 /* re-enable hardware interception of ARP */
4785                 manc |= E1000_MANC_ARP_EN;
4786                 manc &= ~E1000_MANC_EN_MNG2HOST;
4787
4788                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4789         }
4790 }
4791
4792 /*
4793  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4794  * For ASF and Pass Through versions of f/w this means
4795  * that the driver is loaded. For AMT version type f/w
4796  * this means that the network i/f is open.
4797  */
4798 static void
4799 em_get_hw_control(struct adapter *adapter)
4800 {
4801         u32 ctrl_ext, swsm;
4802
4803         if (adapter->hw.mac.type == e1000_82573) {
4804                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4805                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4806                     swsm | E1000_SWSM_DRV_LOAD);
4807                 return;
4808         }
4809         /* else */
4810         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4811         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4812             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4813         return;
4814 }
4815
4816 /*
4817  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4818  * For ASF and Pass Through versions of f/w this means that
4819  * the driver is no longer loaded. For AMT versions of the
4820  * f/w this means that the network i/f is closed.
4821  */
4822 static void
4823 em_release_hw_control(struct adapter *adapter)
4824 {
4825         u32 ctrl_ext, swsm;
4826
4827         if (!adapter->has_manage)
4828                 return;
4829
4830         if (adapter->hw.mac.type == e1000_82573) {
4831                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4832                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4833                     swsm & ~E1000_SWSM_DRV_LOAD);
4834                 return;
4835         }
4836         /* else */
4837         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4838         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4839             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4840         return;
4841 }
4842
4843 static int
4844 em_is_valid_ether_addr(u8 *addr)
4845 {
4846         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4847
4848         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4849                 return (FALSE);
4850         }
4851
4852         return (TRUE);
4853 }
4854
4855 /*
4856 ** Parse the interface capabilities with regard
4857 ** to both system management and wake-on-lan for
4858 ** later use.
4859 */
4860 static void
4861 em_get_wakeup(device_t dev)
4862 {
4863         struct adapter  *adapter = device_get_softc(dev);
4864         u16             eeprom_data = 0, device_id, apme_mask;
4865
4866         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4867         apme_mask = EM_EEPROM_APME;
4868
4869         switch (adapter->hw.mac.type) {
4870         case e1000_82573:
4871         case e1000_82583:
4872                 adapter->has_amt = TRUE;
4873                 /* Falls thru */
4874         case e1000_82571:
4875         case e1000_82572:
4876         case e1000_80003es2lan:
4877                 if (adapter->hw.bus.func == 1) {
4878                         e1000_read_nvm(&adapter->hw,
4879                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4880                         break;
4881                 } else
4882                         e1000_read_nvm(&adapter->hw,
4883                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4884                 break;
4885         case e1000_ich8lan:
4886         case e1000_ich9lan:
4887         case e1000_ich10lan:
4888         case e1000_pchlan:
4889         case e1000_pch2lan:
4890                 apme_mask = E1000_WUC_APME;
4891                 adapter->has_amt = TRUE;
4892                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4893                 break;
4894         default:
4895                 e1000_read_nvm(&adapter->hw,
4896                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4897                 break;
4898         }
4899         if (eeprom_data & apme_mask)
4900                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4901         /*
4902          * We have the eeprom settings, now apply the special cases
4903          * where the eeprom may be wrong or the board won't support
4904          * wake on lan on a particular port
4905          */
4906         device_id = pci_get_device(dev);
4907         switch (device_id) {
4908         case E1000_DEV_ID_82571EB_FIBER:
4909                 /* Wake events only supported on port A for dual fiber
4910                  * regardless of eeprom setting */
4911                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4912                     E1000_STATUS_FUNC_1)
4913                         adapter->wol = 0;
4914                 break;
4915         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4916         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4917         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4918                 /* if quad port adapter, disable WoL on all but port A */
4919                 if (global_quad_port_a != 0)
4920                         adapter->wol = 0;
4921                 /* Reset for multiple quad port adapters */
4922                 if (++global_quad_port_a == 4)
4923                         global_quad_port_a = 0;
4924                 break;
4925         }
4926         return;
4927 }
4928
4929
4930 /*
4931  * Enable PCI Wake On Lan capability
4932  */
4933 static void
4934 em_enable_wakeup(device_t dev)
4935 {
4936         struct adapter  *adapter = device_get_softc(dev);
4937         struct ifnet    *ifp = adapter->ifp;
4938         u32             pmc, ctrl, ctrl_ext, rctl;
4939         u16             status;
4940
4941         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4942                 return;
4943
4944         /* Advertise the wakeup capability */
4945         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4946         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4947         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4948         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4949
4950         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4951             (adapter->hw.mac.type == e1000_pchlan) ||
4952             (adapter->hw.mac.type == e1000_ich9lan) ||
4953             (adapter->hw.mac.type == e1000_ich10lan))
4954                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4955
4956         /* Keep the laser running on Fiber adapters */
4957         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4958             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4959                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4960                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4961                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4962         }
4963
4964         /*
4965         ** Determine type of Wakeup: note that wol
4966         ** is set with all bits on by default.
4967         */
4968         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4969                 adapter->wol &= ~E1000_WUFC_MAG;
4970
4971         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4972                 adapter->wol &= ~E1000_WUFC_MC;
4973         else {
4974                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4975                 rctl |= E1000_RCTL_MPE;
4976                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4977         }
4978
4979         if ((adapter->hw.mac.type == e1000_pchlan) ||
4980             (adapter->hw.mac.type == e1000_pch2lan)) {
4981                 if (em_enable_phy_wakeup(adapter))
4982                         return;
4983         } else {
4984                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4985                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4986         }
4987
4988         if (adapter->hw.phy.type == e1000_phy_igp_3)
4989                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4990
4991         /* Request PME */
4992         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4993         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4994         if (ifp->if_capenable & IFCAP_WOL)
4995                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4996         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4997
4998         return;
4999 }
5000
5001 /*
5002 ** WOL in the newer chipset interfaces (pchlan)
5003 ** require thing to be copied into the phy
5004 */
5005 static int
5006 em_enable_phy_wakeup(struct adapter *adapter)
5007 {
5008         struct e1000_hw *hw = &adapter->hw;
5009         u32 mreg, ret = 0;
5010         u16 preg;
5011
5012         /* copy MAC RARs to PHY RARs */
5013         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5014
5015         /* copy MAC MTA to PHY MTA */
5016         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5017                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5018                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5019                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5020                     (u16)((mreg >> 16) & 0xFFFF));
5021         }
5022
5023         /* configure PHY Rx Control register */
5024         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5025         mreg = E1000_READ_REG(hw, E1000_RCTL);
5026         if (mreg & E1000_RCTL_UPE)
5027                 preg |= BM_RCTL_UPE;
5028         if (mreg & E1000_RCTL_MPE)
5029                 preg |= BM_RCTL_MPE;
5030         preg &= ~(BM_RCTL_MO_MASK);
5031         if (mreg & E1000_RCTL_MO_3)
5032                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5033                                 << BM_RCTL_MO_SHIFT);
5034         if (mreg & E1000_RCTL_BAM)
5035                 preg |= BM_RCTL_BAM;
5036         if (mreg & E1000_RCTL_PMCF)
5037                 preg |= BM_RCTL_PMCF;
5038         mreg = E1000_READ_REG(hw, E1000_CTRL);
5039         if (mreg & E1000_CTRL_RFCE)
5040                 preg |= BM_RCTL_RFCE;
5041         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5042
5043         /* enable PHY wakeup in MAC register */
5044         E1000_WRITE_REG(hw, E1000_WUC,
5045             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5046         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5047
5048         /* configure and enable PHY wakeup in PHY registers */
5049         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5050         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5051
5052         /* activate PHY wakeup */
5053         ret = hw->phy.ops.acquire(hw);
5054         if (ret) {
5055                 printf("Could not acquire PHY\n");
5056                 return ret;
5057         }
5058         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5059                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5060         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5061         if (ret) {
5062                 printf("Could not read PHY page 769\n");
5063                 goto out;
5064         }
5065         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5066         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5067         if (ret)
5068                 printf("Could not set PHY Host Wakeup bit\n");
5069 out:
5070         hw->phy.ops.release(hw);
5071
5072         return ret;
5073 }
5074
5075 static void
5076 em_led_func(void *arg, int onoff)
5077 {
5078         struct adapter  *adapter = arg;
5079  
5080         EM_CORE_LOCK(adapter);
5081         if (onoff) {
5082                 e1000_setup_led(&adapter->hw);
5083                 e1000_led_on(&adapter->hw);
5084         } else {
5085                 e1000_led_off(&adapter->hw);
5086                 e1000_cleanup_led(&adapter->hw);
5087         }
5088         EM_CORE_UNLOCK(adapter);
5089 }
5090
5091 /*
5092 ** Disable the L0S and L1 LINK states
5093 */
5094 static void
5095 em_disable_aspm(struct adapter *adapter)
5096 {
5097         int             base, reg;
5098         u16             link_cap,link_ctrl;
5099         device_t        dev = adapter->dev;
5100
5101         switch (adapter->hw.mac.type) {
5102                 case e1000_82573:
5103                 case e1000_82574:
5104                 case e1000_82583:
5105                         break;
5106                 default:
5107                         return;
5108         }
5109         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5110                 return;
5111         reg = base + PCIER_LINK_CAP;
5112         link_cap = pci_read_config(dev, reg, 2);
5113         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5114                 return;
5115         reg = base + PCIER_LINK_CTL;
5116         link_ctrl = pci_read_config(dev, reg, 2);
5117         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5118         pci_write_config(dev, reg, link_ctrl, 2);
5119         return;
5120 }
5121
5122 /**********************************************************************
5123  *
5124  *  Update the board statistics counters.
5125  *
5126  **********************************************************************/
5127 static void
5128 em_update_stats_counters(struct adapter *adapter)
5129 {
5130         struct ifnet   *ifp;
5131
5132         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5133            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5134                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5135                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5136         }
5137         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5138         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5139         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5140         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5141
5142         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5143         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5144         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5145         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5146         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5147         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5148         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5149         /*
5150         ** For watchdog management we need to know if we have been
5151         ** paused during the last interval, so capture that here.
5152         */
5153         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5154         adapter->stats.xoffrxc += adapter->pause_frames;
5155         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5156         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5157         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5158         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5159         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5160         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5161         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5162         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5163         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5164         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5165         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5166         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5167
5168         /* For the 64-bit byte counters the low dword must be read first. */
5169         /* Both registers clear on the read of the high dword */
5170
5171         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5172             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5173         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5174             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5175
5176         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5177         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5178         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5179         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5180         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5181
5182         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5183         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5184
5185         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5186         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5187         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5188         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5189         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5190         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5191         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5192         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5193         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5194         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5195
5196         /* Interrupt Counts */
5197
5198         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5199         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5200         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5201         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5202         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5203         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5204         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5205         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5206         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5207
5208         if (adapter->hw.mac.type >= e1000_82543) {
5209                 adapter->stats.algnerrc += 
5210                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5211                 adapter->stats.rxerrc += 
5212                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5213                 adapter->stats.tncrs += 
5214                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5215                 adapter->stats.cexterr += 
5216                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5217                 adapter->stats.tsctc += 
5218                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5219                 adapter->stats.tsctfc += 
5220                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5221         }
5222         ifp = adapter->ifp;
5223
5224         ifp->if_collisions = adapter->stats.colc;
5225
5226         /* Rx Errors */
5227         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5228             adapter->stats.crcerrs + adapter->stats.algnerrc +
5229             adapter->stats.ruc + adapter->stats.roc +
5230             adapter->stats.mpc + adapter->stats.cexterr;
5231
5232         /* Tx Errors */
5233         ifp->if_oerrors = adapter->stats.ecol +
5234             adapter->stats.latecol + adapter->watchdog_events;
5235 }
5236
5237 /* Export a single 32-bit register via a read-only sysctl. */
5238 static int
5239 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5240 {
5241         struct adapter *adapter;
5242         u_int val;
5243
5244         adapter = oidp->oid_arg1;
5245         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5246         return (sysctl_handle_int(oidp, &val, 0, req));
5247 }
5248
5249 /*
5250  * Add sysctl variables, one per statistic, to the system.
5251  */
5252 static void
5253 em_add_hw_stats(struct adapter *adapter)
5254 {
5255         device_t dev = adapter->dev;
5256
5257         struct tx_ring *txr = adapter->tx_rings;
5258         struct rx_ring *rxr = adapter->rx_rings;
5259
5260         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5261         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5262         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5263         struct e1000_hw_stats *stats = &adapter->stats;
5264
5265         struct sysctl_oid *stat_node, *queue_node, *int_node;
5266         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5267
5268 #define QUEUE_NAME_LEN 32
5269         char namebuf[QUEUE_NAME_LEN];
5270         
5271         /* Driver Statistics */
5272         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5273                         CTLFLAG_RD, &adapter->link_irq,
5274                         "Link MSIX IRQ Handled");
5275         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5276                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5277                          "Std mbuf failed");
5278         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5279                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5280                          "Std mbuf cluster failed");
5281         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5282                         CTLFLAG_RD, &adapter->dropped_pkts,
5283                         "Driver dropped packets");
5284         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5285                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5286                         "Driver tx dma failure in xmit");
5287         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5288                         CTLFLAG_RD, &adapter->rx_overruns,
5289                         "RX overruns");
5290         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5291                         CTLFLAG_RD, &adapter->watchdog_events,
5292                         "Watchdog timeouts");
5293         
5294         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5295                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5296                         em_sysctl_reg_handler, "IU",
5297                         "Device Control Register");
5298         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5299                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5300                         em_sysctl_reg_handler, "IU",
5301                         "Receiver Control Register");
5302         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5303                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5304                         "Flow Control High Watermark");
5305         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5306                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5307                         "Flow Control Low Watermark");
5308
5309         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5310                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5311                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5312                                             CTLFLAG_RD, NULL, "Queue Name");
5313                 queue_list = SYSCTL_CHILDREN(queue_node);
5314
5315                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5316                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5317                                 E1000_TDH(txr->me),
5318                                 em_sysctl_reg_handler, "IU",
5319                                 "Transmit Descriptor Head");
5320                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5321                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5322                                 E1000_TDT(txr->me),
5323                                 em_sysctl_reg_handler, "IU",
5324                                 "Transmit Descriptor Tail");
5325                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5326                                 CTLFLAG_RD, &txr->tx_irq,
5327                                 "Queue MSI-X Transmit Interrupts");
5328                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5329                                 CTLFLAG_RD, &txr->no_desc_avail,
5330                                 "Queue No Descriptor Available");
5331                 
5332                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5333                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5334                                 E1000_RDH(rxr->me),
5335                                 em_sysctl_reg_handler, "IU",
5336                                 "Receive Descriptor Head");
5337                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5338                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5339                                 E1000_RDT(rxr->me),
5340                                 em_sysctl_reg_handler, "IU",
5341                                 "Receive Descriptor Tail");
5342                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5343                                 CTLFLAG_RD, &rxr->rx_irq,
5344                                 "Queue MSI-X Receive Interrupts");
5345         }
5346
5347         /* MAC stats get their own sub node */
5348
5349         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5350                                     CTLFLAG_RD, NULL, "Statistics");
5351         stat_list = SYSCTL_CHILDREN(stat_node);
5352
5353         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5354                         CTLFLAG_RD, &stats->ecol,
5355                         "Excessive collisions");
5356         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5357                         CTLFLAG_RD, &stats->scc,
5358                         "Single collisions");
5359         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5360                         CTLFLAG_RD, &stats->mcc,
5361                         "Multiple collisions");
5362         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5363                         CTLFLAG_RD, &stats->latecol,
5364                         "Late collisions");
5365         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5366                         CTLFLAG_RD, &stats->colc,
5367                         "Collision Count");
5368         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5369                         CTLFLAG_RD, &adapter->stats.symerrs,
5370                         "Symbol Errors");
5371         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5372                         CTLFLAG_RD, &adapter->stats.sec,
5373                         "Sequence Errors");
5374         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5375                         CTLFLAG_RD, &adapter->stats.dc,
5376                         "Defer Count");
5377         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5378                         CTLFLAG_RD, &adapter->stats.mpc,
5379                         "Missed Packets");
5380         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5381                         CTLFLAG_RD, &adapter->stats.rnbc,
5382                         "Receive No Buffers");
5383         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5384                         CTLFLAG_RD, &adapter->stats.ruc,
5385                         "Receive Undersize");
5386         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5387                         CTLFLAG_RD, &adapter->stats.rfc,
5388                         "Fragmented Packets Received ");
5389         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5390                         CTLFLAG_RD, &adapter->stats.roc,
5391                         "Oversized Packets Received");
5392         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5393                         CTLFLAG_RD, &adapter->stats.rjc,
5394                         "Recevied Jabber");
5395         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5396                         CTLFLAG_RD, &adapter->stats.rxerrc,
5397                         "Receive Errors");
5398         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5399                         CTLFLAG_RD, &adapter->stats.crcerrs,
5400                         "CRC errors");
5401         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5402                         CTLFLAG_RD, &adapter->stats.algnerrc,
5403                         "Alignment Errors");
5404         /* On 82575 these are collision counts */
5405         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5406                         CTLFLAG_RD, &adapter->stats.cexterr,
5407                         "Collision/Carrier extension errors");
5408         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5409                         CTLFLAG_RD, &adapter->stats.xonrxc,
5410                         "XON Received");
5411         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5412                         CTLFLAG_RD, &adapter->stats.xontxc,
5413                         "XON Transmitted");
5414         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5415                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5416                         "XOFF Received");
5417         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5418                         CTLFLAG_RD, &adapter->stats.xofftxc,
5419                         "XOFF Transmitted");
5420
5421         /* Packet Reception Stats */
5422         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5423                         CTLFLAG_RD, &adapter->stats.tpr,
5424                         "Total Packets Received ");
5425         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5426                         CTLFLAG_RD, &adapter->stats.gprc,
5427                         "Good Packets Received");
5428         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5429                         CTLFLAG_RD, &adapter->stats.bprc,
5430                         "Broadcast Packets Received");
5431         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5432                         CTLFLAG_RD, &adapter->stats.mprc,
5433                         "Multicast Packets Received");
5434         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5435                         CTLFLAG_RD, &adapter->stats.prc64,
5436                         "64 byte frames received ");
5437         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5438                         CTLFLAG_RD, &adapter->stats.prc127,
5439                         "65-127 byte frames received");
5440         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5441                         CTLFLAG_RD, &adapter->stats.prc255,
5442                         "128-255 byte frames received");
5443         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5444                         CTLFLAG_RD, &adapter->stats.prc511,
5445                         "256-511 byte frames received");
5446         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5447                         CTLFLAG_RD, &adapter->stats.prc1023,
5448                         "512-1023 byte frames received");
5449         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5450                         CTLFLAG_RD, &adapter->stats.prc1522,
5451                         "1023-1522 byte frames received");
5452         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5453                         CTLFLAG_RD, &adapter->stats.gorc, 
5454                         "Good Octets Received"); 
5455
5456         /* Packet Transmission Stats */
5457         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5458                         CTLFLAG_RD, &adapter->stats.gotc, 
5459                         "Good Octets Transmitted"); 
5460         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5461                         CTLFLAG_RD, &adapter->stats.tpt,
5462                         "Total Packets Transmitted");
5463         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5464                         CTLFLAG_RD, &adapter->stats.gptc,
5465                         "Good Packets Transmitted");
5466         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5467                         CTLFLAG_RD, &adapter->stats.bptc,
5468                         "Broadcast Packets Transmitted");
5469         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5470                         CTLFLAG_RD, &adapter->stats.mptc,
5471                         "Multicast Packets Transmitted");
5472         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5473                         CTLFLAG_RD, &adapter->stats.ptc64,
5474                         "64 byte frames transmitted ");
5475         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5476                         CTLFLAG_RD, &adapter->stats.ptc127,
5477                         "65-127 byte frames transmitted");
5478         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5479                         CTLFLAG_RD, &adapter->stats.ptc255,
5480                         "128-255 byte frames transmitted");
5481         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5482                         CTLFLAG_RD, &adapter->stats.ptc511,
5483                         "256-511 byte frames transmitted");
5484         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5485                         CTLFLAG_RD, &adapter->stats.ptc1023,
5486                         "512-1023 byte frames transmitted");
5487         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5488                         CTLFLAG_RD, &adapter->stats.ptc1522,
5489                         "1024-1522 byte frames transmitted");
5490         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5491                         CTLFLAG_RD, &adapter->stats.tsctc,
5492                         "TSO Contexts Transmitted");
5493         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5494                         CTLFLAG_RD, &adapter->stats.tsctfc,
5495                         "TSO Contexts Failed");
5496
5497
5498         /* Interrupt Stats */
5499
5500         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5501                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5502         int_list = SYSCTL_CHILDREN(int_node);
5503
5504         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5505                         CTLFLAG_RD, &adapter->stats.iac,
5506                         "Interrupt Assertion Count");
5507
5508         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5509                         CTLFLAG_RD, &adapter->stats.icrxptc,
5510                         "Interrupt Cause Rx Pkt Timer Expire Count");
5511
5512         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5513                         CTLFLAG_RD, &adapter->stats.icrxatc,
5514                         "Interrupt Cause Rx Abs Timer Expire Count");
5515
5516         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5517                         CTLFLAG_RD, &adapter->stats.ictxptc,
5518                         "Interrupt Cause Tx Pkt Timer Expire Count");
5519
5520         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5521                         CTLFLAG_RD, &adapter->stats.ictxatc,
5522                         "Interrupt Cause Tx Abs Timer Expire Count");
5523
5524         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5525                         CTLFLAG_RD, &adapter->stats.ictxqec,
5526                         "Interrupt Cause Tx Queue Empty Count");
5527
5528         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5529                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5530                         "Interrupt Cause Tx Queue Min Thresh Count");
5531
5532         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5533                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5534                         "Interrupt Cause Rx Desc Min Thresh Count");
5535
5536         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5537                         CTLFLAG_RD, &adapter->stats.icrxoc,
5538                         "Interrupt Cause Receiver Overrun Count");
5539 }
5540
5541 /**********************************************************************
5542  *
5543  *  This routine provides a way to dump out the adapter eeprom,
5544  *  often a useful debug/service tool. This only dumps the first
5545  *  32 words, stuff that matters is in that extent.
5546  *
5547  **********************************************************************/
5548 static int
5549 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5550 {
5551         struct adapter *adapter = (struct adapter *)arg1;
5552         int error;
5553         int result;
5554
5555         result = -1;
5556         error = sysctl_handle_int(oidp, &result, 0, req);
5557
5558         if (error || !req->newptr)
5559                 return (error);
5560
5561         /*
5562          * This value will cause a hex dump of the
5563          * first 32 16-bit words of the EEPROM to
5564          * the screen.
5565          */
5566         if (result == 1)
5567                 em_print_nvm_info(adapter);
5568
5569         return (error);
5570 }
5571
5572 static void
5573 em_print_nvm_info(struct adapter *adapter)
5574 {
5575         u16     eeprom_data;
5576         int     i, j, row = 0;
5577
5578         /* Its a bit crude, but it gets the job done */
5579         printf("\nInterface EEPROM Dump:\n");
5580         printf("Offset\n0x0000  ");
5581         for (i = 0, j = 0; i < 32; i++, j++) {
5582                 if (j == 8) { /* Make the offset block */
5583                         j = 0; ++row;
5584                         printf("\n0x00%x0  ",row);
5585                 }
5586                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5587                 printf("%04x ", eeprom_data);
5588         }
5589         printf("\n");
5590 }
5591
5592 static int
5593 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5594 {
5595         struct em_int_delay_info *info;
5596         struct adapter *adapter;
5597         u32 regval;
5598         int error, usecs, ticks;
5599
5600         info = (struct em_int_delay_info *)arg1;
5601         usecs = info->value;
5602         error = sysctl_handle_int(oidp, &usecs, 0, req);
5603         if (error != 0 || req->newptr == NULL)
5604                 return (error);
5605         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5606                 return (EINVAL);
5607         info->value = usecs;
5608         ticks = EM_USECS_TO_TICKS(usecs);
5609         if (info->offset == E1000_ITR)  /* units are 256ns here */
5610                 ticks *= 4;
5611
5612         adapter = info->adapter;
5613         
5614         EM_CORE_LOCK(adapter);
5615         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5616         regval = (regval & ~0xffff) | (ticks & 0xffff);
5617         /* Handle a few special cases. */
5618         switch (info->offset) {
5619         case E1000_RDTR:
5620                 break;
5621         case E1000_TIDV:
5622                 if (ticks == 0) {
5623                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5624                         /* Don't write 0 into the TIDV register. */
5625                         regval++;
5626                 } else
5627                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5628                 break;
5629         }
5630         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5631         EM_CORE_UNLOCK(adapter);
5632         return (0);
5633 }
5634
5635 static void
5636 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5637         const char *description, struct em_int_delay_info *info,
5638         int offset, int value)
5639 {
5640         info->adapter = adapter;
5641         info->offset = offset;
5642         info->value = value;
5643         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5644             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5645             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5646             info, 0, em_sysctl_int_delay, "I", description);
5647 }
5648
5649 static void
5650 em_set_sysctl_value(struct adapter *adapter, const char *name,
5651         const char *description, int *limit, int value)
5652 {
5653         *limit = value;
5654         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5655             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5656             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5657 }
5658
5659
5660 /*
5661 ** Set flow control using sysctl:
5662 ** Flow control values:
5663 **      0 - off
5664 **      1 - rx pause
5665 **      2 - tx pause
5666 **      3 - full
5667 */
5668 static int
5669 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5670 {       
5671         int             error;
5672         static int      input = 3; /* default is full */
5673         struct adapter  *adapter = (struct adapter *) arg1;
5674                     
5675         error = sysctl_handle_int(oidp, &input, 0, req);
5676     
5677         if ((error) || (req->newptr == NULL))
5678                 return (error);
5679                 
5680         if (input == adapter->fc) /* no change? */
5681                 return (error);
5682
5683         switch (input) {
5684                 case e1000_fc_rx_pause:
5685                 case e1000_fc_tx_pause:
5686                 case e1000_fc_full:
5687                 case e1000_fc_none:
5688                         adapter->hw.fc.requested_mode = input;
5689                         adapter->fc = input;
5690                         break;
5691                 default:
5692                         /* Do nothing */
5693                         return (error);
5694         }
5695
5696         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5697         e1000_force_mac_fc(&adapter->hw);
5698         return (error);
5699 }
5700
5701 /*
5702 ** Manage Energy Efficient Ethernet:
5703 ** Control values:
5704 **     0/1 - enabled/disabled
5705 */
5706 static int
5707 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5708 {
5709        struct adapter *adapter = (struct adapter *) arg1;
5710        int             error, value;
5711
5712        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5713        error = sysctl_handle_int(oidp, &value, 0, req);
5714        if (error || req->newptr == NULL)
5715                return (error);
5716        EM_CORE_LOCK(adapter);
5717        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5718        em_init_locked(adapter);
5719        EM_CORE_UNLOCK(adapter);
5720        return (0);
5721 }
5722
5723 static int
5724 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5725 {
5726         struct adapter *adapter;
5727         int error;
5728         int result;
5729
5730         result = -1;
5731         error = sysctl_handle_int(oidp, &result, 0, req);
5732
5733         if (error || !req->newptr)
5734                 return (error);
5735
5736         if (result == 1) {
5737                 adapter = (struct adapter *)arg1;
5738                 em_print_debug_info(adapter);
5739         }
5740
5741         return (error);
5742 }
5743
5744 /*
5745 ** This routine is meant to be fluid, add whatever is
5746 ** needed for debugging a problem.  -jfv
5747 */
5748 static void
5749 em_print_debug_info(struct adapter *adapter)
5750 {
5751         device_t dev = adapter->dev;
5752         struct tx_ring *txr = adapter->tx_rings;
5753         struct rx_ring *rxr = adapter->rx_rings;
5754
5755         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5756                 printf("Interface is RUNNING ");
5757         else
5758                 printf("Interface is NOT RUNNING\n");
5759
5760         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5761                 printf("and INACTIVE\n");
5762         else
5763                 printf("and ACTIVE\n");
5764
5765         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5766             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5767             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5768         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5769             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5770             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5771         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5772         device_printf(dev, "TX descriptors avail = %d\n",
5773             txr->tx_avail);
5774         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5775             txr->no_desc_avail);
5776         device_printf(dev, "RX discarded packets = %ld\n",
5777             rxr->rx_discarded);
5778         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5779         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5780 }