]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/dev/e1000/if_em.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_arp.h>
67 #include <net/if_dl.h>
68 #include <net/if_media.h>
69
70 #include <net/if_types.h>
71 #include <net/if_vlan_var.h>
72
73 #include <netinet/in_systm.h>
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip.h>
77 #include <netinet/ip6.h>
78 #include <netinet/tcp.h>
79 #include <netinet/udp.h>
80
81 #include <machine/in_cksum.h>
82 #include <dev/led/led.h>
83 #include <dev/pci/pcivar.h>
84 #include <dev/pci/pcireg.h>
85
86 #include "e1000_api.h"
87 #include "e1000_82571.h"
88 #include "if_em.h"
89
90 /*********************************************************************
91  *  Set this to one to display debug statistics
92  *********************************************************************/
93 int     em_display_debug_stats = 0;
94
95 /*********************************************************************
96  *  Driver version:
97  *********************************************************************/
98 char em_driver_version[] = "7.3.8";
99
100 /*********************************************************************
101  *  PCI Device ID Table
102  *
103  *  Used by probe to select devices to load on
104  *  Last field stores an index into e1000_strings
105  *  Last entry must be all 0s
106  *
107  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108  *********************************************************************/
109
110 static em_vendor_info_t em_vendor_info_array[] =
111 {
112         /* Intel(R) PRO/1000 Network Connection */
113         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
132
133         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
182         /* required last entry */
183         { 0, 0, 0, 0, 0}
184 };
185
186 /*********************************************************************
187  *  Table of branding strings for all supported NICs.
188  *********************************************************************/
189
190 static char *em_strings[] = {
191         "Intel(R) PRO/1000 Network Connection"
192 };
193
194 /*********************************************************************
195  *  Function prototypes
196  *********************************************************************/
197 static int      em_probe(device_t);
198 static int      em_attach(device_t);
199 static int      em_detach(device_t);
200 static int      em_shutdown(device_t);
201 static int      em_suspend(device_t);
202 static int      em_resume(device_t);
203 #ifdef EM_MULTIQUEUE
204 static int      em_mq_start(struct ifnet *, struct mbuf *);
205 static int      em_mq_start_locked(struct ifnet *,
206                     struct tx_ring *, struct mbuf *);
207 static void     em_qflush(struct ifnet *);
208 #else
209 static void     em_start(struct ifnet *);
210 static void     em_start_locked(struct ifnet *, struct tx_ring *);
211 #endif
212 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
213 static void     em_init(void *);
214 static void     em_init_locked(struct adapter *);
215 static void     em_stop(void *);
216 static void     em_media_status(struct ifnet *, struct ifmediareq *);
217 static int      em_media_change(struct ifnet *);
218 static void     em_identify_hardware(struct adapter *);
219 static int      em_allocate_pci_resources(struct adapter *);
220 static int      em_allocate_legacy(struct adapter *);
221 static int      em_allocate_msix(struct adapter *);
222 static int      em_allocate_queues(struct adapter *);
223 static int      em_setup_msix(struct adapter *);
224 static void     em_free_pci_resources(struct adapter *);
225 static void     em_local_timer(void *);
226 static void     em_reset(struct adapter *);
227 static int      em_setup_interface(device_t, struct adapter *);
228
229 static void     em_setup_transmit_structures(struct adapter *);
230 static void     em_initialize_transmit_unit(struct adapter *);
231 static int      em_allocate_transmit_buffers(struct tx_ring *);
232 static void     em_free_transmit_structures(struct adapter *);
233 static void     em_free_transmit_buffers(struct tx_ring *);
234
235 static int      em_setup_receive_structures(struct adapter *);
236 static int      em_allocate_receive_buffers(struct rx_ring *);
237 static void     em_initialize_receive_unit(struct adapter *);
238 static void     em_free_receive_structures(struct adapter *);
239 static void     em_free_receive_buffers(struct rx_ring *);
240
241 static void     em_enable_intr(struct adapter *);
242 static void     em_disable_intr(struct adapter *);
243 static void     em_update_stats_counters(struct adapter *);
244 static void     em_add_hw_stats(struct adapter *adapter);
245 static void     em_txeof(struct tx_ring *);
246 static bool     em_rxeof(struct rx_ring *, int, int *);
247 #ifndef __NO_STRICT_ALIGNMENT
248 static int      em_fixup_rx(struct rx_ring *);
249 #endif
250 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
251 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
252                     struct ip *, u32 *, u32 *);
253 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
254                     struct tcphdr *, u32 *, u32 *);
255 static void     em_set_promisc(struct adapter *);
256 static void     em_disable_promisc(struct adapter *);
257 static void     em_set_multi(struct adapter *);
258 static void     em_update_link_status(struct adapter *);
259 static void     em_refresh_mbufs(struct rx_ring *, int);
260 static void     em_register_vlan(void *, struct ifnet *, u16);
261 static void     em_unregister_vlan(void *, struct ifnet *, u16);
262 static void     em_setup_vlan_hw_support(struct adapter *);
263 static int      em_xmit(struct tx_ring *, struct mbuf **);
264 static int      em_dma_malloc(struct adapter *, bus_size_t,
265                     struct em_dma_alloc *, int);
266 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
267 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
268 static void     em_print_nvm_info(struct adapter *);
269 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
270 static void     em_print_debug_info(struct adapter *);
271 static int      em_is_valid_ether_addr(u8 *);
272 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
273 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
274                     const char *, struct em_int_delay_info *, int, int);
275 /* Management and WOL Support */
276 static void     em_init_manageability(struct adapter *);
277 static void     em_release_manageability(struct adapter *);
278 static void     em_get_hw_control(struct adapter *);
279 static void     em_release_hw_control(struct adapter *);
280 static void     em_get_wakeup(device_t);
281 static void     em_enable_wakeup(device_t);
282 static int      em_enable_phy_wakeup(struct adapter *);
283 static void     em_led_func(void *, int);
284 static void     em_disable_aspm(struct adapter *);
285
286 static int      em_irq_fast(void *);
287
288 /* MSIX handlers */
289 static void     em_msix_tx(void *);
290 static void     em_msix_rx(void *);
291 static void     em_msix_link(void *);
292 static void     em_handle_tx(void *context, int pending);
293 static void     em_handle_rx(void *context, int pending);
294 static void     em_handle_link(void *context, int pending);
295
296 static void     em_set_sysctl_value(struct adapter *, const char *,
297                     const char *, int *, int);
298 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
299 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
300
301 static __inline void em_rx_discard(struct rx_ring *, int);
302
303 #ifdef DEVICE_POLLING
304 static poll_handler_t em_poll;
305 #endif /* POLLING */
306
307 /*********************************************************************
308  *  FreeBSD Device Interface Entry Points
309  *********************************************************************/
310
311 static device_method_t em_methods[] = {
312         /* Device interface */
313         DEVMETHOD(device_probe, em_probe),
314         DEVMETHOD(device_attach, em_attach),
315         DEVMETHOD(device_detach, em_detach),
316         DEVMETHOD(device_shutdown, em_shutdown),
317         DEVMETHOD(device_suspend, em_suspend),
318         DEVMETHOD(device_resume, em_resume),
319         DEVMETHOD_END
320 };
321
322 static driver_t em_driver = {
323         "em", em_methods, sizeof(struct adapter),
324 };
325
326 devclass_t em_devclass;
327 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
328 MODULE_DEPEND(em, pci, 1, 1, 1);
329 MODULE_DEPEND(em, ether, 1, 1, 1);
330
331 /*********************************************************************
332  *  Tunable default values.
333  *********************************************************************/
334
335 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
336 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
337 #define M_TSO_LEN                       66
338
339 #define MAX_INTS_PER_SEC        8000
340 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
341
342 /* Allow common code without TSO */
343 #ifndef CSUM_TSO
344 #define CSUM_TSO        0
345 #endif
346
347 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
348
349 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
350 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
351 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
352 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
353 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
354     0, "Default transmit interrupt delay in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
356     0, "Default receive interrupt delay in usecs");
357
358 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
359 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
360 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
361 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
362 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
363     &em_tx_abs_int_delay_dflt, 0,
364     "Default transmit interrupt delay limit in usecs");
365 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
366     &em_rx_abs_int_delay_dflt, 0,
367     "Default receive interrupt delay limit in usecs");
368
369 static int em_rxd = EM_DEFAULT_RXD;
370 static int em_txd = EM_DEFAULT_TXD;
371 TUNABLE_INT("hw.em.rxd", &em_rxd);
372 TUNABLE_INT("hw.em.txd", &em_txd);
373 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
374     "Number of receive descriptors per queue");
375 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
376     "Number of transmit descriptors per queue");
377
378 static int em_smart_pwr_down = FALSE;
379 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
380 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
381     0, "Set to true to leave smart power down enabled on newer adapters");
382
383 /* Controls whether promiscuous also shows bad packets */
384 static int em_debug_sbp = FALSE;
385 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
386 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
387     "Show bad packets in promiscuous mode");
388
389 static int em_enable_msix = TRUE;
390 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
391 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
392     "Enable MSI-X interrupts");
393
394 /* How many packets rxeof tries to clean at a time */
395 static int em_rx_process_limit = 100;
396 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
397 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398     &em_rx_process_limit, 0,
399     "Maximum number of received packets to process "
400     "at a time, -1 means unlimited");
401
402 /* Energy efficient ethernet - default to OFF */
403 static int eee_setting = 1;
404 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
405 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
406     "Enable Energy Efficient Ethernet");
407
408 /* Global used in WOL setup with multiport cards */
409 static int global_quad_port_a = 0;
410
411 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
412 #include <dev/netmap/if_em_netmap.h>
413 #endif /* DEV_NETMAP */
414
415 /*********************************************************************
416  *  Device identification routine
417  *
418  *  em_probe determines if the driver should be loaded on
419  *  adapter based on PCI vendor/device id of the adapter.
420  *
421  *  return BUS_PROBE_DEFAULT on success, positive on failure
422  *********************************************************************/
423
424 static int
425 em_probe(device_t dev)
426 {
427         char            adapter_name[60];
428         u16             pci_vendor_id = 0;
429         u16             pci_device_id = 0;
430         u16             pci_subvendor_id = 0;
431         u16             pci_subdevice_id = 0;
432         em_vendor_info_t *ent;
433
434         INIT_DEBUGOUT("em_probe: begin");
435
436         pci_vendor_id = pci_get_vendor(dev);
437         if (pci_vendor_id != EM_VENDOR_ID)
438                 return (ENXIO);
439
440         pci_device_id = pci_get_device(dev);
441         pci_subvendor_id = pci_get_subvendor(dev);
442         pci_subdevice_id = pci_get_subdevice(dev);
443
444         ent = em_vendor_info_array;
445         while (ent->vendor_id != 0) {
446                 if ((pci_vendor_id == ent->vendor_id) &&
447                     (pci_device_id == ent->device_id) &&
448
449                     ((pci_subvendor_id == ent->subvendor_id) ||
450                     (ent->subvendor_id == PCI_ANY_ID)) &&
451
452                     ((pci_subdevice_id == ent->subdevice_id) ||
453                     (ent->subdevice_id == PCI_ANY_ID))) {
454                         sprintf(adapter_name, "%s %s",
455                                 em_strings[ent->index],
456                                 em_driver_version);
457                         device_set_desc_copy(dev, adapter_name);
458                         return (BUS_PROBE_DEFAULT);
459                 }
460                 ent++;
461         }
462
463         return (ENXIO);
464 }
465
466 /*********************************************************************
467  *  Device initialization routine
468  *
469  *  The attach entry point is called when the driver is being loaded.
470  *  This routine identifies the type of hardware, allocates all resources
471  *  and initializes the hardware.
472  *
473  *  return 0 on success, positive on failure
474  *********************************************************************/
475
476 static int
477 em_attach(device_t dev)
478 {
479         struct adapter  *adapter;
480         struct e1000_hw *hw;
481         int             error = 0;
482
483         INIT_DEBUGOUT("em_attach: begin");
484
485         if (resource_disabled("em", device_get_unit(dev))) {
486                 device_printf(dev, "Disabled by device hint\n");
487                 return (ENXIO);
488         }
489
490         adapter = device_get_softc(dev);
491         adapter->dev = adapter->osdep.dev = dev;
492         hw = &adapter->hw;
493         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
494
495         /* SYSCTL stuff */
496         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499             em_sysctl_nvm_info, "I", "NVM Information");
500
501         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
502             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
503             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
504             em_sysctl_debug_info, "I", "Debug Information");
505
506         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
507             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
508             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
509             em_set_flowcntl, "I", "Flow Control");
510
511         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
512
513         /* Determine hardware and mac info */
514         em_identify_hardware(adapter);
515
516         /* Setup PCI resources */
517         if (em_allocate_pci_resources(adapter)) {
518                 device_printf(dev, "Allocation of PCI resources failed\n");
519                 error = ENXIO;
520                 goto err_pci;
521         }
522
523         /*
524         ** For ICH8 and family we need to
525         ** map the flash memory, and this
526         ** must happen after the MAC is 
527         ** identified
528         */
529         if ((hw->mac.type == e1000_ich8lan) ||
530             (hw->mac.type == e1000_ich9lan) ||
531             (hw->mac.type == e1000_ich10lan) ||
532             (hw->mac.type == e1000_pchlan) ||
533             (hw->mac.type == e1000_pch2lan) ||
534             (hw->mac.type == e1000_pch_lpt)) {
535                 int rid = EM_BAR_TYPE_FLASH;
536                 adapter->flash = bus_alloc_resource_any(dev,
537                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
538                 if (adapter->flash == NULL) {
539                         device_printf(dev, "Mapping of Flash failed\n");
540                         error = ENXIO;
541                         goto err_pci;
542                 }
543                 /* This is used in the shared code */
544                 hw->flash_address = (u8 *)adapter->flash;
545                 adapter->osdep.flash_bus_space_tag =
546                     rman_get_bustag(adapter->flash);
547                 adapter->osdep.flash_bus_space_handle =
548                     rman_get_bushandle(adapter->flash);
549         }
550
551         /* Do Shared Code initialization */
552         if (e1000_setup_init_funcs(hw, TRUE)) {
553                 device_printf(dev, "Setup of Shared code failed\n");
554                 error = ENXIO;
555                 goto err_pci;
556         }
557
558         e1000_get_bus_info(hw);
559
560         /* Set up some sysctls for the tunable interrupt delays */
561         em_add_int_delay_sysctl(adapter, "rx_int_delay",
562             "receive interrupt delay in usecs", &adapter->rx_int_delay,
563             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
564         em_add_int_delay_sysctl(adapter, "tx_int_delay",
565             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
566             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
567         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
568             "receive interrupt delay limit in usecs",
569             &adapter->rx_abs_int_delay,
570             E1000_REGISTER(hw, E1000_RADV),
571             em_rx_abs_int_delay_dflt);
572         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
573             "transmit interrupt delay limit in usecs",
574             &adapter->tx_abs_int_delay,
575             E1000_REGISTER(hw, E1000_TADV),
576             em_tx_abs_int_delay_dflt);
577         em_add_int_delay_sysctl(adapter, "itr",
578             "interrupt delay limit in usecs/4",
579             &adapter->tx_itr,
580             E1000_REGISTER(hw, E1000_ITR),
581             DEFAULT_ITR);
582
583         /* Sysctl for limiting the amount of work done in the taskqueue */
584         em_set_sysctl_value(adapter, "rx_processing_limit",
585             "max number of rx packets to process", &adapter->rx_process_limit,
586             em_rx_process_limit);
587
588         /*
589          * Validate number of transmit and receive descriptors. It
590          * must not exceed hardware maximum, and must be multiple
591          * of E1000_DBA_ALIGN.
592          */
593         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
594             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
595                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
596                     EM_DEFAULT_TXD, em_txd);
597                 adapter->num_tx_desc = EM_DEFAULT_TXD;
598         } else
599                 adapter->num_tx_desc = em_txd;
600
601         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
602             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
603                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
604                     EM_DEFAULT_RXD, em_rxd);
605                 adapter->num_rx_desc = EM_DEFAULT_RXD;
606         } else
607                 adapter->num_rx_desc = em_rxd;
608
609         hw->mac.autoneg = DO_AUTO_NEG;
610         hw->phy.autoneg_wait_to_complete = FALSE;
611         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
612
613         /* Copper options */
614         if (hw->phy.media_type == e1000_media_type_copper) {
615                 hw->phy.mdix = AUTO_ALL_MODES;
616                 hw->phy.disable_polarity_correction = FALSE;
617                 hw->phy.ms_type = EM_MASTER_SLAVE;
618         }
619
620         /*
621          * Set the frame limits assuming
622          * standard ethernet sized frames.
623          */
624         adapter->hw.mac.max_frame_size =
625             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
626
627         /*
628          * This controls when hardware reports transmit completion
629          * status.
630          */
631         hw->mac.report_tx_early = 1;
632
633         /* 
634         ** Get queue/ring memory
635         */
636         if (em_allocate_queues(adapter)) {
637                 error = ENOMEM;
638                 goto err_pci;
639         }
640
641         /* Allocate multicast array memory. */
642         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
643             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
644         if (adapter->mta == NULL) {
645                 device_printf(dev, "Can not allocate multicast setup array\n");
646                 error = ENOMEM;
647                 goto err_late;
648         }
649
650         /* Check SOL/IDER usage */
651         if (e1000_check_reset_block(hw))
652                 device_printf(dev, "PHY reset is blocked"
653                     " due to SOL/IDER session.\n");
654
655         /* Sysctl for setting Energy Efficient Ethernet */
656         hw->dev_spec.ich8lan.eee_disable = eee_setting;
657         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
658             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
659             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
660             adapter, 0, em_sysctl_eee, "I",
661             "Disable Energy Efficient Ethernet");
662
663         /*
664         ** Start from a known state, this is
665         ** important in reading the nvm and
666         ** mac from that.
667         */
668         e1000_reset_hw(hw);
669
670
671         /* Make sure we have a good EEPROM before we read from it */
672         if (e1000_validate_nvm_checksum(hw) < 0) {
673                 /*
674                 ** Some PCI-E parts fail the first check due to
675                 ** the link being in sleep state, call it again,
676                 ** if it fails a second time its a real issue.
677                 */
678                 if (e1000_validate_nvm_checksum(hw) < 0) {
679                         device_printf(dev,
680                             "The EEPROM Checksum Is Not Valid\n");
681                         error = EIO;
682                         goto err_late;
683                 }
684         }
685
686         /* Copy the permanent MAC address out of the EEPROM */
687         if (e1000_read_mac_addr(hw) < 0) {
688                 device_printf(dev, "EEPROM read error while reading MAC"
689                     " address\n");
690                 error = EIO;
691                 goto err_late;
692         }
693
694         if (!em_is_valid_ether_addr(hw->mac.addr)) {
695                 device_printf(dev, "Invalid MAC address\n");
696                 error = EIO;
697                 goto err_late;
698         }
699
700         /*
701         **  Do interrupt configuration
702         */
703         if (adapter->msix > 1) /* Do MSIX */
704                 error = em_allocate_msix(adapter);
705         else  /* MSI or Legacy */
706                 error = em_allocate_legacy(adapter);
707         if (error)
708                 goto err_late;
709
710         /*
711          * Get Wake-on-Lan and Management info for later use
712          */
713         em_get_wakeup(dev);
714
715         /* Setup OS specific network interface */
716         if (em_setup_interface(dev, adapter) != 0)
717                 goto err_late;
718
719         em_reset(adapter);
720
721         /* Initialize statistics */
722         em_update_stats_counters(adapter);
723
724         hw->mac.get_link_status = 1;
725         em_update_link_status(adapter);
726
727         /* Register for VLAN events */
728         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
729             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
730         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
731             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
732
733         em_add_hw_stats(adapter);
734
735         /* Non-AMT based hardware can now take control from firmware */
736         if (adapter->has_manage && !adapter->has_amt)
737                 em_get_hw_control(adapter);
738
739         /* Tell the stack that the interface is not active */
740         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
741         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
742
743         adapter->led_dev = led_create(em_led_func, adapter,
744             device_get_nameunit(dev));
745 #ifdef DEV_NETMAP
746         em_netmap_attach(adapter);
747 #endif /* DEV_NETMAP */
748
749         INIT_DEBUGOUT("em_attach: end");
750
751         return (0);
752
753 err_late:
754         em_free_transmit_structures(adapter);
755         em_free_receive_structures(adapter);
756         em_release_hw_control(adapter);
757         if (adapter->ifp != NULL)
758                 if_free(adapter->ifp);
759 err_pci:
760         em_free_pci_resources(adapter);
761         free(adapter->mta, M_DEVBUF);
762         EM_CORE_LOCK_DESTROY(adapter);
763
764         return (error);
765 }
766
767 /*********************************************************************
768  *  Device removal routine
769  *
770  *  The detach entry point is called when the driver is being removed.
771  *  This routine stops the adapter and deallocates all the resources
772  *  that were allocated for driver operation.
773  *
774  *  return 0 on success, positive on failure
775  *********************************************************************/
776
777 static int
778 em_detach(device_t dev)
779 {
780         struct adapter  *adapter = device_get_softc(dev);
781         struct ifnet    *ifp = adapter->ifp;
782
783         INIT_DEBUGOUT("em_detach: begin");
784
785         /* Make sure VLANS are not using driver */
786         if (adapter->ifp->if_vlantrunk != NULL) {
787                 device_printf(dev,"Vlan in use, detach first\n");
788                 return (EBUSY);
789         }
790
791 #ifdef DEVICE_POLLING
792         if (ifp->if_capenable & IFCAP_POLLING)
793                 ether_poll_deregister(ifp);
794 #endif
795
796         if (adapter->led_dev != NULL)
797                 led_destroy(adapter->led_dev);
798
799         EM_CORE_LOCK(adapter);
800         adapter->in_detach = 1;
801         em_stop(adapter);
802         EM_CORE_UNLOCK(adapter);
803         EM_CORE_LOCK_DESTROY(adapter);
804
805         e1000_phy_hw_reset(&adapter->hw);
806
807         em_release_manageability(adapter);
808         em_release_hw_control(adapter);
809
810         /* Unregister VLAN events */
811         if (adapter->vlan_attach != NULL)
812                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
813         if (adapter->vlan_detach != NULL)
814                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
815
816         ether_ifdetach(adapter->ifp);
817         callout_drain(&adapter->timer);
818
819 #ifdef DEV_NETMAP
820         netmap_detach(ifp);
821 #endif /* DEV_NETMAP */
822
823         em_free_pci_resources(adapter);
824         bus_generic_detach(dev);
825         if_free(ifp);
826
827         em_free_transmit_structures(adapter);
828         em_free_receive_structures(adapter);
829
830         em_release_hw_control(adapter);
831         free(adapter->mta, M_DEVBUF);
832
833         return (0);
834 }
835
836 /*********************************************************************
837  *
838  *  Shutdown entry point
839  *
840  **********************************************************************/
841
842 static int
843 em_shutdown(device_t dev)
844 {
845         return em_suspend(dev);
846 }
847
848 /*
849  * Suspend/resume device methods.
850  */
851 static int
852 em_suspend(device_t dev)
853 {
854         struct adapter *adapter = device_get_softc(dev);
855
856         EM_CORE_LOCK(adapter);
857
858         em_release_manageability(adapter);
859         em_release_hw_control(adapter);
860         em_enable_wakeup(dev);
861
862         EM_CORE_UNLOCK(adapter);
863
864         return bus_generic_suspend(dev);
865 }
866
867 static int
868 em_resume(device_t dev)
869 {
870         struct adapter *adapter = device_get_softc(dev);
871         struct tx_ring  *txr = adapter->tx_rings;
872         struct ifnet *ifp = adapter->ifp;
873
874         EM_CORE_LOCK(adapter);
875         if (adapter->hw.mac.type == e1000_pch2lan)
876                 e1000_resume_workarounds_pchlan(&adapter->hw);
877         em_init_locked(adapter);
878         em_init_manageability(adapter);
879
880         if ((ifp->if_flags & IFF_UP) &&
881             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
882                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
883                         EM_TX_LOCK(txr);
884 #ifdef EM_MULTIQUEUE
885                         if (!drbr_empty(ifp, txr->br))
886                                 em_mq_start_locked(ifp, txr, NULL);
887 #else
888                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
889                                 em_start_locked(ifp, txr);
890 #endif
891                         EM_TX_UNLOCK(txr);
892                 }
893         }
894         EM_CORE_UNLOCK(adapter);
895
896         return bus_generic_resume(dev);
897 }
898
899
900 #ifdef EM_MULTIQUEUE
901 /*********************************************************************
902  *  Multiqueue Transmit routines 
903  *
904  *  em_mq_start is called by the stack to initiate a transmit.
905  *  however, if busy the driver can queue the request rather
906  *  than do an immediate send. It is this that is an advantage
907  *  in this driver, rather than also having multiple tx queues.
908  **********************************************************************/
909 static int
910 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
911 {
912         struct adapter  *adapter = txr->adapter;
913         struct mbuf     *next;
914         int             err = 0, enq = 0;
915
916         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
917             IFF_DRV_RUNNING || adapter->link_active == 0) {
918                 if (m != NULL)
919                         err = drbr_enqueue(ifp, txr->br, m);
920                 return (err);
921         }
922
923         enq = 0;
924         if (m != NULL) {
925                 err = drbr_enqueue(ifp, txr->br, m);
926                 if (err)
927                         return (err);
928         } 
929
930         /* Process the queue */
931         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
932                 if ((err = em_xmit(txr, &next)) != 0) {
933                         if (next == NULL)
934                                 drbr_advance(ifp, txr->br);
935                         else 
936                                 drbr_putback(ifp, txr->br, next);
937                         break;
938                 }
939                 drbr_advance(ifp, txr->br);
940                 enq++;
941                 ifp->if_obytes += next->m_pkthdr.len;
942                 if (next->m_flags & M_MCAST)
943                         ifp->if_omcasts++;
944                 ETHER_BPF_MTAP(ifp, next);
945                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
946                         break;
947         }
948
949         if (enq > 0) {
950                 /* Set the watchdog */
951                 txr->queue_status = EM_QUEUE_WORKING;
952                 txr->watchdog_time = ticks;
953         }
954
955         if (txr->tx_avail < EM_MAX_SCATTER)
956                 em_txeof(txr);
957         if (txr->tx_avail < EM_MAX_SCATTER)
958                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959         return (err);
960 }
961
962 /*
963 ** Multiqueue capable stack interface
964 */
965 static int
966 em_mq_start(struct ifnet *ifp, struct mbuf *m)
967 {
968         struct adapter  *adapter = ifp->if_softc;
969         struct tx_ring  *txr = adapter->tx_rings;
970         int             error;
971
972         if (EM_TX_TRYLOCK(txr)) {
973                 error = em_mq_start_locked(ifp, txr, m);
974                 EM_TX_UNLOCK(txr);
975         } else 
976                 error = drbr_enqueue(ifp, txr->br, m);
977
978         return (error);
979 }
980
981 /*
982 ** Flush all ring buffers
983 */
984 static void
985 em_qflush(struct ifnet *ifp)
986 {
987         struct adapter  *adapter = ifp->if_softc;
988         struct tx_ring  *txr = adapter->tx_rings;
989         struct mbuf     *m;
990
991         for (int i = 0; i < adapter->num_queues; i++, txr++) {
992                 EM_TX_LOCK(txr);
993                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
994                         m_freem(m);
995                 EM_TX_UNLOCK(txr);
996         }
997         if_qflush(ifp);
998 }
999 #else  /* !EM_MULTIQUEUE */
1000
1001 static void
1002 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1003 {
1004         struct adapter  *adapter = ifp->if_softc;
1005         struct mbuf     *m_head;
1006
1007         EM_TX_LOCK_ASSERT(txr);
1008
1009         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1010             IFF_DRV_RUNNING)
1011                 return;
1012
1013         if (!adapter->link_active)
1014                 return;
1015
1016         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1017                 /* Call cleanup if number of TX descriptors low */
1018                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1019                         em_txeof(txr);
1020                 if (txr->tx_avail < EM_MAX_SCATTER) {
1021                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1022                         break;
1023                 }
1024                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1025                 if (m_head == NULL)
1026                         break;
1027                 /*
1028                  *  Encapsulation can modify our pointer, and or make it
1029                  *  NULL on failure.  In that event, we can't requeue.
1030                  */
1031                 if (em_xmit(txr, &m_head)) {
1032                         if (m_head == NULL)
1033                                 break;
1034                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1035                         break;
1036                 }
1037
1038                 /* Send a copy of the frame to the BPF listener */
1039                 ETHER_BPF_MTAP(ifp, m_head);
1040
1041                 /* Set timeout in case hardware has problems transmitting. */
1042                 txr->watchdog_time = ticks;
1043                 txr->queue_status = EM_QUEUE_WORKING;
1044         }
1045
1046         return;
1047 }
1048
1049 static void
1050 em_start(struct ifnet *ifp)
1051 {
1052         struct adapter  *adapter = ifp->if_softc;
1053         struct tx_ring  *txr = adapter->tx_rings;
1054
1055         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1056                 EM_TX_LOCK(txr);
1057                 em_start_locked(ifp, txr);
1058                 EM_TX_UNLOCK(txr);
1059         }
1060         return;
1061 }
1062 #endif /* EM_MULTIQUEUE */
1063
1064 /*********************************************************************
1065  *  Ioctl entry point
1066  *
1067  *  em_ioctl is called when the user wants to configure the
1068  *  interface.
1069  *
1070  *  return 0 on success, positive on failure
1071  **********************************************************************/
1072
1073 static int
1074 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1075 {
1076         struct adapter  *adapter = ifp->if_softc;
1077         struct ifreq    *ifr = (struct ifreq *)data;
1078 #if defined(INET) || defined(INET6)
1079         struct ifaddr   *ifa = (struct ifaddr *)data;
1080 #endif
1081         bool            avoid_reset = FALSE;
1082         int             error = 0;
1083
1084         if (adapter->in_detach)
1085                 return (error);
1086
1087         switch (command) {
1088         case SIOCSIFADDR:
1089 #ifdef INET
1090                 if (ifa->ifa_addr->sa_family == AF_INET)
1091                         avoid_reset = TRUE;
1092 #endif
1093 #ifdef INET6
1094                 if (ifa->ifa_addr->sa_family == AF_INET6)
1095                         avoid_reset = TRUE;
1096 #endif
1097                 /*
1098                 ** Calling init results in link renegotiation,
1099                 ** so we avoid doing it when possible.
1100                 */
1101                 if (avoid_reset) {
1102                         ifp->if_flags |= IFF_UP;
1103                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1104                                 em_init(adapter);
1105 #ifdef INET
1106                         if (!(ifp->if_flags & IFF_NOARP))
1107                                 arp_ifinit(ifp, ifa);
1108 #endif
1109                 } else
1110                         error = ether_ioctl(ifp, command, data);
1111                 break;
1112         case SIOCSIFMTU:
1113             {
1114                 int max_frame_size;
1115
1116                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1117
1118                 EM_CORE_LOCK(adapter);
1119                 switch (adapter->hw.mac.type) {
1120                 case e1000_82571:
1121                 case e1000_82572:
1122                 case e1000_ich9lan:
1123                 case e1000_ich10lan:
1124                 case e1000_pch2lan:
1125                 case e1000_pch_lpt:
1126                 case e1000_82574:
1127                 case e1000_82583:
1128                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1129                         max_frame_size = 9234;
1130                         break;
1131                 case e1000_pchlan:
1132                         max_frame_size = 4096;
1133                         break;
1134                         /* Adapters that do not support jumbo frames */
1135                 case e1000_ich8lan:
1136                         max_frame_size = ETHER_MAX_LEN;
1137                         break;
1138                 default:
1139                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1140                 }
1141                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1142                     ETHER_CRC_LEN) {
1143                         EM_CORE_UNLOCK(adapter);
1144                         error = EINVAL;
1145                         break;
1146                 }
1147
1148                 ifp->if_mtu = ifr->ifr_mtu;
1149                 adapter->hw.mac.max_frame_size =
1150                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1151                 em_init_locked(adapter);
1152                 EM_CORE_UNLOCK(adapter);
1153                 break;
1154             }
1155         case SIOCSIFFLAGS:
1156                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1157                     SIOCSIFFLAGS (Set Interface Flags)");
1158                 EM_CORE_LOCK(adapter);
1159                 if (ifp->if_flags & IFF_UP) {
1160                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1161                                 if ((ifp->if_flags ^ adapter->if_flags) &
1162                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1163                                         em_disable_promisc(adapter);
1164                                         em_set_promisc(adapter);
1165                                 }
1166                         } else
1167                                 em_init_locked(adapter);
1168                 } else
1169                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1170                                 em_stop(adapter);
1171                 adapter->if_flags = ifp->if_flags;
1172                 EM_CORE_UNLOCK(adapter);
1173                 break;
1174         case SIOCADDMULTI:
1175         case SIOCDELMULTI:
1176                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1177                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1178                         EM_CORE_LOCK(adapter);
1179                         em_disable_intr(adapter);
1180                         em_set_multi(adapter);
1181 #ifdef DEVICE_POLLING
1182                         if (!(ifp->if_capenable & IFCAP_POLLING))
1183 #endif
1184                                 em_enable_intr(adapter);
1185                         EM_CORE_UNLOCK(adapter);
1186                 }
1187                 break;
1188         case SIOCSIFMEDIA:
1189                 /* Check SOL/IDER usage */
1190                 EM_CORE_LOCK(adapter);
1191                 if (e1000_check_reset_block(&adapter->hw)) {
1192                         EM_CORE_UNLOCK(adapter);
1193                         device_printf(adapter->dev, "Media change is"
1194                             " blocked due to SOL/IDER session.\n");
1195                         break;
1196                 }
1197                 EM_CORE_UNLOCK(adapter);
1198                 /* falls thru */
1199         case SIOCGIFMEDIA:
1200                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1201                     SIOCxIFMEDIA (Get/Set Interface Media)");
1202                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203                 break;
1204         case SIOCSIFCAP:
1205             {
1206                 int mask, reinit;
1207
1208                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209                 reinit = 0;
1210                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211 #ifdef DEVICE_POLLING
1212                 if (mask & IFCAP_POLLING) {
1213                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214                                 error = ether_poll_register(em_poll, ifp);
1215                                 if (error)
1216                                         return (error);
1217                                 EM_CORE_LOCK(adapter);
1218                                 em_disable_intr(adapter);
1219                                 ifp->if_capenable |= IFCAP_POLLING;
1220                                 EM_CORE_UNLOCK(adapter);
1221                         } else {
1222                                 error = ether_poll_deregister(ifp);
1223                                 /* Enable interrupt even in error case */
1224                                 EM_CORE_LOCK(adapter);
1225                                 em_enable_intr(adapter);
1226                                 ifp->if_capenable &= ~IFCAP_POLLING;
1227                                 EM_CORE_UNLOCK(adapter);
1228                         }
1229                 }
1230 #endif
1231                 if (mask & IFCAP_HWCSUM) {
1232                         ifp->if_capenable ^= IFCAP_HWCSUM;
1233                         reinit = 1;
1234                 }
1235                 if (mask & IFCAP_TSO4) {
1236                         ifp->if_capenable ^= IFCAP_TSO4;
1237                         reinit = 1;
1238                 }
1239                 if (mask & IFCAP_VLAN_HWTAGGING) {
1240                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1241                         reinit = 1;
1242                 }
1243                 if (mask & IFCAP_VLAN_HWFILTER) {
1244                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1245                         reinit = 1;
1246                 }
1247                 if (mask & IFCAP_VLAN_HWTSO) {
1248                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1249                         reinit = 1;
1250                 }
1251                 if ((mask & IFCAP_WOL) &&
1252                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1253                         if (mask & IFCAP_WOL_MCAST)
1254                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1255                         if (mask & IFCAP_WOL_MAGIC)
1256                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1257                 }
1258                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1259                         em_init(adapter);
1260                 VLAN_CAPABILITIES(ifp);
1261                 break;
1262             }
1263
1264         default:
1265                 error = ether_ioctl(ifp, command, data);
1266                 break;
1267         }
1268
1269         return (error);
1270 }
1271
1272
1273 /*********************************************************************
1274  *  Init entry point
1275  *
1276  *  This routine is used in two ways. It is used by the stack as
1277  *  init entry point in network interface structure. It is also used
1278  *  by the driver as a hw/sw initialization routine to get to a
1279  *  consistent state.
1280  *
1281  *  return 0 on success, positive on failure
1282  **********************************************************************/
1283
1284 static void
1285 em_init_locked(struct adapter *adapter)
1286 {
1287         struct ifnet    *ifp = adapter->ifp;
1288         device_t        dev = adapter->dev;
1289
1290         INIT_DEBUGOUT("em_init: begin");
1291
1292         EM_CORE_LOCK_ASSERT(adapter);
1293
1294         em_disable_intr(adapter);
1295         callout_stop(&adapter->timer);
1296
1297         /* Get the latest mac address, User can use a LAA */
1298         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1299               ETHER_ADDR_LEN);
1300
1301         /* Put the address into the Receive Address Array */
1302         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1303
1304         /*
1305          * With the 82571 adapter, RAR[0] may be overwritten
1306          * when the other port is reset, we make a duplicate
1307          * in RAR[14] for that eventuality, this assures
1308          * the interface continues to function.
1309          */
1310         if (adapter->hw.mac.type == e1000_82571) {
1311                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1312                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1313                     E1000_RAR_ENTRIES - 1);
1314         }
1315
1316         /* Initialize the hardware */
1317         em_reset(adapter);
1318         em_update_link_status(adapter);
1319
1320         /* Setup VLAN support, basic and offload if available */
1321         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1322
1323         /* Set hardware offload abilities */
1324         ifp->if_hwassist = 0;
1325         if (ifp->if_capenable & IFCAP_TXCSUM)
1326                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1327         if (ifp->if_capenable & IFCAP_TSO4)
1328                 ifp->if_hwassist |= CSUM_TSO;
1329
1330         /* Configure for OS presence */
1331         em_init_manageability(adapter);
1332
1333         /* Prepare transmit descriptors and buffers */
1334         em_setup_transmit_structures(adapter);
1335         em_initialize_transmit_unit(adapter);
1336
1337         /* Setup Multicast table */
1338         em_set_multi(adapter);
1339
1340         /*
1341         ** Figure out the desired mbuf
1342         ** pool for doing jumbos
1343         */
1344         if (adapter->hw.mac.max_frame_size <= 2048)
1345                 adapter->rx_mbuf_sz = MCLBYTES;
1346         else if (adapter->hw.mac.max_frame_size <= 4096)
1347                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1348         else
1349                 adapter->rx_mbuf_sz = MJUM9BYTES;
1350
1351         /* Prepare receive descriptors and buffers */
1352         if (em_setup_receive_structures(adapter)) {
1353                 device_printf(dev, "Could not setup receive structures\n");
1354                 em_stop(adapter);
1355                 return;
1356         }
1357         em_initialize_receive_unit(adapter);
1358
1359         /* Use real VLAN Filter support? */
1360         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1361                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1362                         /* Use real VLAN Filter support */
1363                         em_setup_vlan_hw_support(adapter);
1364                 else {
1365                         u32 ctrl;
1366                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1367                         ctrl |= E1000_CTRL_VME;
1368                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1369                 }
1370         }
1371
1372         /* Don't lose promiscuous settings */
1373         em_set_promisc(adapter);
1374
1375         /* Set the interface as ACTIVE */
1376         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1377         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1378
1379         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1380         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1381
1382         /* MSI/X configuration for 82574 */
1383         if (adapter->hw.mac.type == e1000_82574) {
1384                 int tmp;
1385                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1386                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1387                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1388                 /* Set the IVAR - interrupt vector routing. */
1389                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1390         }
1391
1392 #ifdef DEVICE_POLLING
1393         /*
1394          * Only enable interrupts if we are not polling, make sure
1395          * they are off otherwise.
1396          */
1397         if (ifp->if_capenable & IFCAP_POLLING)
1398                 em_disable_intr(adapter);
1399         else
1400 #endif /* DEVICE_POLLING */
1401                 em_enable_intr(adapter);
1402
1403         /* AMT based hardware can now take control from firmware */
1404         if (adapter->has_manage && adapter->has_amt)
1405                 em_get_hw_control(adapter);
1406 }
1407
1408 static void
1409 em_init(void *arg)
1410 {
1411         struct adapter *adapter = arg;
1412
1413         EM_CORE_LOCK(adapter);
1414         em_init_locked(adapter);
1415         EM_CORE_UNLOCK(adapter);
1416 }
1417
1418
1419 #ifdef DEVICE_POLLING
1420 /*********************************************************************
1421  *
1422  *  Legacy polling routine: note this only works with single queue
1423  *
1424  *********************************************************************/
1425 static int
1426 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1427 {
1428         struct adapter *adapter = ifp->if_softc;
1429         struct tx_ring  *txr = adapter->tx_rings;
1430         struct rx_ring  *rxr = adapter->rx_rings;
1431         u32             reg_icr;
1432         int             rx_done;
1433
1434         EM_CORE_LOCK(adapter);
1435         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1436                 EM_CORE_UNLOCK(adapter);
1437                 return (0);
1438         }
1439
1440         if (cmd == POLL_AND_CHECK_STATUS) {
1441                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1442                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1443                         callout_stop(&adapter->timer);
1444                         adapter->hw.mac.get_link_status = 1;
1445                         em_update_link_status(adapter);
1446                         callout_reset(&adapter->timer, hz,
1447                             em_local_timer, adapter);
1448                 }
1449         }
1450         EM_CORE_UNLOCK(adapter);
1451
1452         em_rxeof(rxr, count, &rx_done);
1453
1454         EM_TX_LOCK(txr);
1455         em_txeof(txr);
1456 #ifdef EM_MULTIQUEUE
1457         if (!drbr_empty(ifp, txr->br))
1458                 em_mq_start_locked(ifp, txr, NULL);
1459 #else
1460         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1461                 em_start_locked(ifp, txr);
1462 #endif
1463         EM_TX_UNLOCK(txr);
1464
1465         return (rx_done);
1466 }
1467 #endif /* DEVICE_POLLING */
1468
1469
1470 /*********************************************************************
1471  *
1472  *  Fast Legacy/MSI Combined Interrupt Service routine  
1473  *
1474  *********************************************************************/
1475 static int
1476 em_irq_fast(void *arg)
1477 {
1478         struct adapter  *adapter = arg;
1479         struct ifnet    *ifp;
1480         u32             reg_icr;
1481
1482         ifp = adapter->ifp;
1483
1484         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1485
1486         /* Hot eject?  */
1487         if (reg_icr == 0xffffffff)
1488                 return FILTER_STRAY;
1489
1490         /* Definitely not our interrupt.  */
1491         if (reg_icr == 0x0)
1492                 return FILTER_STRAY;
1493
1494         /*
1495          * Starting with the 82571 chip, bit 31 should be used to
1496          * determine whether the interrupt belongs to us.
1497          */
1498         if (adapter->hw.mac.type >= e1000_82571 &&
1499             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1500                 return FILTER_STRAY;
1501
1502         em_disable_intr(adapter);
1503         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1504
1505         /* Link status change */
1506         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1507                 adapter->hw.mac.get_link_status = 1;
1508                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1509         }
1510
1511         if (reg_icr & E1000_ICR_RXO)
1512                 adapter->rx_overruns++;
1513         return FILTER_HANDLED;
1514 }
1515
1516 /* Combined RX/TX handler, used by Legacy and MSI */
1517 static void
1518 em_handle_que(void *context, int pending)
1519 {
1520         struct adapter  *adapter = context;
1521         struct ifnet    *ifp = adapter->ifp;
1522         struct tx_ring  *txr = adapter->tx_rings;
1523         struct rx_ring  *rxr = adapter->rx_rings;
1524
1525
1526         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1527                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1528                 EM_TX_LOCK(txr);
1529                 em_txeof(txr);
1530 #ifdef EM_MULTIQUEUE
1531                 if (!drbr_empty(ifp, txr->br))
1532                         em_mq_start_locked(ifp, txr, NULL);
1533 #else
1534                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1535                         em_start_locked(ifp, txr);
1536 #endif
1537                 EM_TX_UNLOCK(txr);
1538                 if (more) {
1539                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1540                         return;
1541                 }
1542         }
1543
1544         em_enable_intr(adapter);
1545         return;
1546 }
1547
1548
1549 /*********************************************************************
1550  *
1551  *  MSIX Interrupt Service Routines
1552  *
1553  **********************************************************************/
1554 static void
1555 em_msix_tx(void *arg)
1556 {
1557         struct tx_ring *txr = arg;
1558         struct adapter *adapter = txr->adapter;
1559         struct ifnet    *ifp = adapter->ifp;
1560
1561         ++txr->tx_irq;
1562         EM_TX_LOCK(txr);
1563         em_txeof(txr);
1564 #ifdef EM_MULTIQUEUE
1565         if (!drbr_empty(ifp, txr->br))
1566                 em_mq_start_locked(ifp, txr, NULL);
1567 #else
1568         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1569                 em_start_locked(ifp, txr);
1570 #endif
1571         /* Reenable this interrupt */
1572         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1573         EM_TX_UNLOCK(txr);
1574         return;
1575 }
1576
1577 /*********************************************************************
1578  *
1579  *  MSIX RX Interrupt Service routine
1580  *
1581  **********************************************************************/
1582
1583 static void
1584 em_msix_rx(void *arg)
1585 {
1586         struct rx_ring  *rxr = arg;
1587         struct adapter  *adapter = rxr->adapter;
1588         bool            more;
1589
1590         ++rxr->rx_irq;
1591         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1592                 return;
1593         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1594         if (more)
1595                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1596         else
1597                 /* Reenable this interrupt */
1598                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1599         return;
1600 }
1601
1602 /*********************************************************************
1603  *
1604  *  MSIX Link Fast Interrupt Service routine
1605  *
1606  **********************************************************************/
1607 static void
1608 em_msix_link(void *arg)
1609 {
1610         struct adapter  *adapter = arg;
1611         u32             reg_icr;
1612
1613         ++adapter->link_irq;
1614         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1615
1616         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1617                 adapter->hw.mac.get_link_status = 1;
1618                 em_handle_link(adapter, 0);
1619         } else
1620                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1621                     EM_MSIX_LINK | E1000_IMS_LSC);
1622         return;
1623 }
1624
1625 static void
1626 em_handle_rx(void *context, int pending)
1627 {
1628         struct rx_ring  *rxr = context;
1629         struct adapter  *adapter = rxr->adapter;
1630         bool            more;
1631
1632         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1633         if (more)
1634                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1635         else
1636                 /* Reenable this interrupt */
1637                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1638 }
1639
1640 static void
1641 em_handle_tx(void *context, int pending)
1642 {
1643         struct tx_ring  *txr = context;
1644         struct adapter  *adapter = txr->adapter;
1645         struct ifnet    *ifp = adapter->ifp;
1646
1647         EM_TX_LOCK(txr);
1648         em_txeof(txr);
1649 #ifdef EM_MULTIQUEUE
1650         if (!drbr_empty(ifp, txr->br))
1651                 em_mq_start_locked(ifp, txr, NULL);
1652 #else
1653         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1654                 em_start_locked(ifp, txr);
1655 #endif
1656         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1657         EM_TX_UNLOCK(txr);
1658 }
1659
1660 static void
1661 em_handle_link(void *context, int pending)
1662 {
1663         struct adapter  *adapter = context;
1664         struct tx_ring  *txr = adapter->tx_rings;
1665         struct ifnet *ifp = adapter->ifp;
1666
1667         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1668                 return;
1669
1670         EM_CORE_LOCK(adapter);
1671         callout_stop(&adapter->timer);
1672         em_update_link_status(adapter);
1673         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1674         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1675             EM_MSIX_LINK | E1000_IMS_LSC);
1676         if (adapter->link_active) {
1677                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1678                         EM_TX_LOCK(txr);
1679 #ifdef EM_MULTIQUEUE
1680                         if (!drbr_empty(ifp, txr->br))
1681                                 em_mq_start_locked(ifp, txr, NULL);
1682 #else
1683                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1684                                 em_start_locked(ifp, txr);
1685 #endif
1686                         EM_TX_UNLOCK(txr);
1687                 }
1688         }
1689         EM_CORE_UNLOCK(adapter);
1690 }
1691
1692
1693 /*********************************************************************
1694  *
1695  *  Media Ioctl callback
1696  *
1697  *  This routine is called whenever the user queries the status of
1698  *  the interface using ifconfig.
1699  *
1700  **********************************************************************/
1701 static void
1702 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1703 {
1704         struct adapter *adapter = ifp->if_softc;
1705         u_char fiber_type = IFM_1000_SX;
1706
1707         INIT_DEBUGOUT("em_media_status: begin");
1708
1709         EM_CORE_LOCK(adapter);
1710         em_update_link_status(adapter);
1711
1712         ifmr->ifm_status = IFM_AVALID;
1713         ifmr->ifm_active = IFM_ETHER;
1714
1715         if (!adapter->link_active) {
1716                 EM_CORE_UNLOCK(adapter);
1717                 return;
1718         }
1719
1720         ifmr->ifm_status |= IFM_ACTIVE;
1721
1722         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1723             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1724                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1725         } else {
1726                 switch (adapter->link_speed) {
1727                 case 10:
1728                         ifmr->ifm_active |= IFM_10_T;
1729                         break;
1730                 case 100:
1731                         ifmr->ifm_active |= IFM_100_TX;
1732                         break;
1733                 case 1000:
1734                         ifmr->ifm_active |= IFM_1000_T;
1735                         break;
1736                 }
1737                 if (adapter->link_duplex == FULL_DUPLEX)
1738                         ifmr->ifm_active |= IFM_FDX;
1739                 else
1740                         ifmr->ifm_active |= IFM_HDX;
1741         }
1742         EM_CORE_UNLOCK(adapter);
1743 }
1744
1745 /*********************************************************************
1746  *
1747  *  Media Ioctl callback
1748  *
1749  *  This routine is called when the user changes speed/duplex using
1750  *  media/mediopt option with ifconfig.
1751  *
1752  **********************************************************************/
1753 static int
1754 em_media_change(struct ifnet *ifp)
1755 {
1756         struct adapter *adapter = ifp->if_softc;
1757         struct ifmedia  *ifm = &adapter->media;
1758
1759         INIT_DEBUGOUT("em_media_change: begin");
1760
1761         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1762                 return (EINVAL);
1763
1764         EM_CORE_LOCK(adapter);
1765         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1766         case IFM_AUTO:
1767                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1768                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1769                 break;
1770         case IFM_1000_LX:
1771         case IFM_1000_SX:
1772         case IFM_1000_T:
1773                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1774                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1775                 break;
1776         case IFM_100_TX:
1777                 adapter->hw.mac.autoneg = FALSE;
1778                 adapter->hw.phy.autoneg_advertised = 0;
1779                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1781                 else
1782                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1783                 break;
1784         case IFM_10_T:
1785                 adapter->hw.mac.autoneg = FALSE;
1786                 adapter->hw.phy.autoneg_advertised = 0;
1787                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1788                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1789                 else
1790                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1791                 break;
1792         default:
1793                 device_printf(adapter->dev, "Unsupported media type\n");
1794         }
1795
1796         em_init_locked(adapter);
1797         EM_CORE_UNLOCK(adapter);
1798
1799         return (0);
1800 }
1801
1802 /*********************************************************************
1803  *
1804  *  This routine maps the mbufs to tx descriptors.
1805  *
1806  *  return 0 on success, positive on failure
1807  **********************************************************************/
1808
1809 static int
1810 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1811 {
1812         struct adapter          *adapter = txr->adapter;
1813         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1814         bus_dmamap_t            map;
1815         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1816         struct e1000_tx_desc    *ctxd = NULL;
1817         struct mbuf             *m_head;
1818         struct ether_header     *eh;
1819         struct ip               *ip = NULL;
1820         struct tcphdr           *tp = NULL;
1821         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1822         int                     ip_off, poff;
1823         int                     nsegs, i, j, first, last = 0;
1824         int                     error, do_tso, tso_desc = 0, remap = 1;
1825
1826 retry:
1827         m_head = *m_headp;
1828         txd_upper = txd_lower = txd_used = txd_saved = 0;
1829         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1830         ip_off = poff = 0;
1831
1832         /*
1833          * Intel recommends entire IP/TCP header length reside in a single
1834          * buffer. If multiple descriptors are used to describe the IP and
1835          * TCP header, each descriptor should describe one or more
1836          * complete headers; descriptors referencing only parts of headers
1837          * are not supported. If all layer headers are not coalesced into
1838          * a single buffer, each buffer should not cross a 4KB boundary,
1839          * or be larger than the maximum read request size.
1840          * Controller also requires modifing IP/TCP header to make TSO work
1841          * so we firstly get a writable mbuf chain then coalesce ethernet/
1842          * IP/TCP header into a single buffer to meet the requirement of
1843          * controller. This also simplifies IP/TCP/UDP checksum offloading
1844          * which also has similiar restrictions.
1845          */
1846         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1847                 if (do_tso || (m_head->m_next != NULL && 
1848                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1849                         if (M_WRITABLE(*m_headp) == 0) {
1850                                 m_head = m_dup(*m_headp, M_NOWAIT);
1851                                 m_freem(*m_headp);
1852                                 if (m_head == NULL) {
1853                                         *m_headp = NULL;
1854                                         return (ENOBUFS);
1855                                 }
1856                                 *m_headp = m_head;
1857                         }
1858                 }
1859                 /*
1860                  * XXX
1861                  * Assume IPv4, we don't have TSO/checksum offload support
1862                  * for IPv6 yet.
1863                  */
1864                 ip_off = sizeof(struct ether_header);
1865                 m_head = m_pullup(m_head, ip_off);
1866                 if (m_head == NULL) {
1867                         *m_headp = NULL;
1868                         return (ENOBUFS);
1869                 }
1870                 eh = mtod(m_head, struct ether_header *);
1871                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1872                         ip_off = sizeof(struct ether_vlan_header);
1873                         m_head = m_pullup(m_head, ip_off);
1874                         if (m_head == NULL) {
1875                                 *m_headp = NULL;
1876                                 return (ENOBUFS);
1877                         }
1878                 }
1879                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1880                 if (m_head == NULL) {
1881                         *m_headp = NULL;
1882                         return (ENOBUFS);
1883                 }
1884                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1885                 poff = ip_off + (ip->ip_hl << 2);
1886                 if (do_tso) {
1887                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1888                         if (m_head == NULL) {
1889                                 *m_headp = NULL;
1890                                 return (ENOBUFS);
1891                         }
1892                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893                         /*
1894                          * TSO workaround:
1895                          *   pull 4 more bytes of data into it.
1896                          */
1897                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1898                         if (m_head == NULL) {
1899                                 *m_headp = NULL;
1900                                 return (ENOBUFS);
1901                         }
1902                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903                         ip->ip_len = 0;
1904                         ip->ip_sum = 0;
1905                         /*
1906                          * The pseudo TCP checksum does not include TCP payload
1907                          * length so driver should recompute the checksum here
1908                          * what hardware expect to see. This is adherence of
1909                          * Microsoft's Large Send specification.
1910                          */
1911                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1913                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1914                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1915                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1916                         if (m_head == NULL) {
1917                                 *m_headp = NULL;
1918                                 return (ENOBUFS);
1919                         }
1920                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1921                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1922                         if (m_head == NULL) {
1923                                 *m_headp = NULL;
1924                                 return (ENOBUFS);
1925                         }
1926                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1927                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1928                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1929                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1930                         if (m_head == NULL) {
1931                                 *m_headp = NULL;
1932                                 return (ENOBUFS);
1933                         }
1934                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1935                 }
1936                 *m_headp = m_head;
1937         }
1938
1939         /*
1940          * Map the packet for DMA
1941          *
1942          * Capture the first descriptor index,
1943          * this descriptor will have the index
1944          * of the EOP which is the only one that
1945          * now gets a DONE bit writeback.
1946          */
1947         first = txr->next_avail_desc;
1948         tx_buffer = &txr->tx_buffers[first];
1949         tx_buffer_mapped = tx_buffer;
1950         map = tx_buffer->map;
1951
1952         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1953             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1954
1955         /*
1956          * There are two types of errors we can (try) to handle:
1957          * - EFBIG means the mbuf chain was too long and bus_dma ran
1958          *   out of segments.  Defragment the mbuf chain and try again.
1959          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1960          *   at this point in time.  Defer sending and try again later.
1961          * All other errors, in particular EINVAL, are fatal and prevent the
1962          * mbuf chain from ever going through.  Drop it and report error.
1963          */
1964         if (error == EFBIG && remap) {
1965                 struct mbuf *m;
1966
1967                 m = m_defrag(*m_headp, M_NOWAIT);
1968                 if (m == NULL) {
1969                         adapter->mbuf_alloc_failed++;
1970                         m_freem(*m_headp);
1971                         *m_headp = NULL;
1972                         return (ENOBUFS);
1973                 }
1974                 *m_headp = m;
1975
1976                 /* Try it again, but only once */
1977                 remap = 0;
1978                 goto retry;
1979         } else if (error == ENOMEM) {
1980                 adapter->no_tx_dma_setup++;
1981                 return (error);
1982         } else if (error != 0) {
1983                 adapter->no_tx_dma_setup++;
1984                 m_freem(*m_headp);
1985                 *m_headp = NULL;
1986                 return (error);
1987         }
1988
1989         /*
1990          * TSO Hardware workaround, if this packet is not
1991          * TSO, and is only a single descriptor long, and
1992          * it follows a TSO burst, then we need to add a
1993          * sentinel descriptor to prevent premature writeback.
1994          */
1995         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1996                 if (nsegs == 1)
1997                         tso_desc = TRUE;
1998                 txr->tx_tso = FALSE;
1999         }
2000
2001         if (nsegs > (txr->tx_avail - 2)) {
2002                 txr->no_desc_avail++;
2003                 bus_dmamap_unload(txr->txtag, map);
2004                 return (ENOBUFS);
2005         }
2006         m_head = *m_headp;
2007
2008         /* Do hardware assists */
2009         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2010                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2011                     &txd_upper, &txd_lower);
2012                 /* we need to make a final sentinel transmit desc */
2013                 tso_desc = TRUE;
2014         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2015                 em_transmit_checksum_setup(txr, m_head,
2016                     ip_off, ip, &txd_upper, &txd_lower);
2017
2018         if (m_head->m_flags & M_VLANTAG) {
2019                 /* Set the vlan id. */
2020                 txd_upper |=
2021                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2022                 /* Tell hardware to add tag */
2023                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2024         }
2025
2026         i = txr->next_avail_desc;
2027
2028         /* Set up our transmit descriptors */
2029         for (j = 0; j < nsegs; j++) {
2030                 bus_size_t seg_len;
2031                 bus_addr_t seg_addr;
2032
2033                 tx_buffer = &txr->tx_buffers[i];
2034                 ctxd = &txr->tx_base[i];
2035                 seg_addr = segs[j].ds_addr;
2036                 seg_len  = segs[j].ds_len;
2037                 /*
2038                 ** TSO Workaround:
2039                 ** If this is the last descriptor, we want to
2040                 ** split it so we have a small final sentinel
2041                 */
2042                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2043                         seg_len -= 4;
2044                         ctxd->buffer_addr = htole64(seg_addr);
2045                         ctxd->lower.data = htole32(
2046                         adapter->txd_cmd | txd_lower | seg_len);
2047                         ctxd->upper.data =
2048                             htole32(txd_upper);
2049                         if (++i == adapter->num_tx_desc)
2050                                 i = 0;
2051                         /* Now make the sentinel */     
2052                         ++txd_used; /* using an extra txd */
2053                         ctxd = &txr->tx_base[i];
2054                         tx_buffer = &txr->tx_buffers[i];
2055                         ctxd->buffer_addr =
2056                             htole64(seg_addr + seg_len);
2057                         ctxd->lower.data = htole32(
2058                         adapter->txd_cmd | txd_lower | 4);
2059                         ctxd->upper.data =
2060                             htole32(txd_upper);
2061                         last = i;
2062                         if (++i == adapter->num_tx_desc)
2063                                 i = 0;
2064                 } else {
2065                         ctxd->buffer_addr = htole64(seg_addr);
2066                         ctxd->lower.data = htole32(
2067                         adapter->txd_cmd | txd_lower | seg_len);
2068                         ctxd->upper.data =
2069                             htole32(txd_upper);
2070                         last = i;
2071                         if (++i == adapter->num_tx_desc)
2072                                 i = 0;
2073                 }
2074                 tx_buffer->m_head = NULL;
2075                 tx_buffer->next_eop = -1;
2076         }
2077
2078         txr->next_avail_desc = i;
2079         txr->tx_avail -= nsegs;
2080         if (tso_desc) /* TSO used an extra for sentinel */
2081                 txr->tx_avail -= txd_used;
2082
2083         tx_buffer->m_head = m_head;
2084         /*
2085         ** Here we swap the map so the last descriptor,
2086         ** which gets the completion interrupt has the
2087         ** real map, and the first descriptor gets the
2088         ** unused map from this descriptor.
2089         */
2090         tx_buffer_mapped->map = tx_buffer->map;
2091         tx_buffer->map = map;
2092         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2093
2094         /*
2095          * Last Descriptor of Packet
2096          * needs End Of Packet (EOP)
2097          * and Report Status (RS)
2098          */
2099         ctxd->lower.data |=
2100             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2101         /*
2102          * Keep track in the first buffer which
2103          * descriptor will be written back
2104          */
2105         tx_buffer = &txr->tx_buffers[first];
2106         tx_buffer->next_eop = last;
2107         /* Update the watchdog time early and often */
2108         txr->watchdog_time = ticks;
2109
2110         /*
2111          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2112          * that this frame is available to transmit.
2113          */
2114         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2115             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2116         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2117
2118         return (0);
2119 }
2120
2121 static void
2122 em_set_promisc(struct adapter *adapter)
2123 {
2124         struct ifnet    *ifp = adapter->ifp;
2125         u32             reg_rctl;
2126
2127         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2128
2129         if (ifp->if_flags & IFF_PROMISC) {
2130                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2131                 /* Turn this on if you want to see bad packets */
2132                 if (em_debug_sbp)
2133                         reg_rctl |= E1000_RCTL_SBP;
2134                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2135         } else if (ifp->if_flags & IFF_ALLMULTI) {
2136                 reg_rctl |= E1000_RCTL_MPE;
2137                 reg_rctl &= ~E1000_RCTL_UPE;
2138                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2139         }
2140 }
2141
2142 static void
2143 em_disable_promisc(struct adapter *adapter)
2144 {
2145         struct ifnet    *ifp = adapter->ifp;
2146         u32             reg_rctl;
2147         int             mcnt = 0;
2148
2149         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2150         reg_rctl &=  (~E1000_RCTL_UPE);
2151         if (ifp->if_flags & IFF_ALLMULTI)
2152                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2153         else {
2154                 struct  ifmultiaddr *ifma;
2155 #if __FreeBSD_version < 800000
2156                 IF_ADDR_LOCK(ifp);
2157 #else   
2158                 if_maddr_rlock(ifp);
2159 #endif
2160                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2161                         if (ifma->ifma_addr->sa_family != AF_LINK)
2162                                 continue;
2163                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2164                                 break;
2165                         mcnt++;
2166                 }
2167 #if __FreeBSD_version < 800000
2168                 IF_ADDR_UNLOCK(ifp);
2169 #else
2170                 if_maddr_runlock(ifp);
2171 #endif
2172         }
2173         /* Don't disable if in MAX groups */
2174         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2175                 reg_rctl &=  (~E1000_RCTL_MPE);
2176         reg_rctl &=  (~E1000_RCTL_SBP);
2177         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2178 }
2179
2180
2181 /*********************************************************************
2182  *  Multicast Update
2183  *
2184  *  This routine is called whenever multicast address list is updated.
2185  *
2186  **********************************************************************/
2187
2188 static void
2189 em_set_multi(struct adapter *adapter)
2190 {
2191         struct ifnet    *ifp = adapter->ifp;
2192         struct ifmultiaddr *ifma;
2193         u32 reg_rctl = 0;
2194         u8  *mta; /* Multicast array memory */
2195         int mcnt = 0;
2196
2197         IOCTL_DEBUGOUT("em_set_multi: begin");
2198
2199         mta = adapter->mta;
2200         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2201
2202         if (adapter->hw.mac.type == e1000_82542 && 
2203             adapter->hw.revision_id == E1000_REVISION_2) {
2204                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2205                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2206                         e1000_pci_clear_mwi(&adapter->hw);
2207                 reg_rctl |= E1000_RCTL_RST;
2208                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2209                 msec_delay(5);
2210         }
2211
2212 #if __FreeBSD_version < 800000
2213         IF_ADDR_LOCK(ifp);
2214 #else
2215         if_maddr_rlock(ifp);
2216 #endif
2217         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2218                 if (ifma->ifma_addr->sa_family != AF_LINK)
2219                         continue;
2220
2221                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2222                         break;
2223
2224                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2225                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2226                 mcnt++;
2227         }
2228 #if __FreeBSD_version < 800000
2229         IF_ADDR_UNLOCK(ifp);
2230 #else
2231         if_maddr_runlock(ifp);
2232 #endif
2233         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2234                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2235                 reg_rctl |= E1000_RCTL_MPE;
2236                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2237         } else
2238                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2239
2240         if (adapter->hw.mac.type == e1000_82542 && 
2241             adapter->hw.revision_id == E1000_REVISION_2) {
2242                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243                 reg_rctl &= ~E1000_RCTL_RST;
2244                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2245                 msec_delay(5);
2246                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2247                         e1000_pci_set_mwi(&adapter->hw);
2248         }
2249 }
2250
2251
2252 /*********************************************************************
2253  *  Timer routine
2254  *
2255  *  This routine checks for link status and updates statistics.
2256  *
2257  **********************************************************************/
2258
2259 static void
2260 em_local_timer(void *arg)
2261 {
2262         struct adapter  *adapter = arg;
2263         struct ifnet    *ifp = adapter->ifp;
2264         struct tx_ring  *txr = adapter->tx_rings;
2265         struct rx_ring  *rxr = adapter->rx_rings;
2266         u32             trigger;
2267
2268         EM_CORE_LOCK_ASSERT(adapter);
2269
2270         em_update_link_status(adapter);
2271         em_update_stats_counters(adapter);
2272
2273         /* Reset LAA into RAR[0] on 82571 */
2274         if ((adapter->hw.mac.type == e1000_82571) &&
2275             e1000_get_laa_state_82571(&adapter->hw))
2276                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2277
2278         /* Mask to use in the irq trigger */
2279         if (adapter->msix_mem)
2280                 trigger = rxr->ims; /* RX for 82574 */
2281         else
2282                 trigger = E1000_ICS_RXDMT0;
2283
2284         /*
2285         ** Check on the state of the TX queue(s), this 
2286         ** can be done without the lock because its RO
2287         ** and the HUNG state will be static if set.
2288         */
2289         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2290                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2291                     (adapter->pause_frames == 0))
2292                         goto hung;
2293                 /* Schedule a TX tasklet if needed */
2294                 if (txr->tx_avail <= EM_MAX_SCATTER)
2295                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2296         }
2297         
2298         adapter->pause_frames = 0;
2299         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2300 #ifndef DEVICE_POLLING
2301         /* Trigger an RX interrupt to guarantee mbuf refresh */
2302         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2303 #endif
2304         return;
2305 hung:
2306         /* Looks like we're hung */
2307         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2308         device_printf(adapter->dev,
2309             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2310             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2311             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2312         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2313             "Next TX to Clean = %d\n",
2314             txr->me, txr->tx_avail, txr->next_to_clean);
2315         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2316         adapter->watchdog_events++;
2317         adapter->pause_frames = 0;
2318         em_init_locked(adapter);
2319 }
2320
2321
2322 static void
2323 em_update_link_status(struct adapter *adapter)
2324 {
2325         struct e1000_hw *hw = &adapter->hw;
2326         struct ifnet *ifp = adapter->ifp;
2327         device_t dev = adapter->dev;
2328         struct tx_ring *txr = adapter->tx_rings;
2329         u32 link_check = 0;
2330
2331         /* Get the cached link value or read phy for real */
2332         switch (hw->phy.media_type) {
2333         case e1000_media_type_copper:
2334                 if (hw->mac.get_link_status) {
2335                         /* Do the work to read phy */
2336                         e1000_check_for_link(hw);
2337                         link_check = !hw->mac.get_link_status;
2338                         if (link_check) /* ESB2 fix */
2339                                 e1000_cfg_on_link_up(hw);
2340                 } else
2341                         link_check = TRUE;
2342                 break;
2343         case e1000_media_type_fiber:
2344                 e1000_check_for_link(hw);
2345                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2346                                  E1000_STATUS_LU);
2347                 break;
2348         case e1000_media_type_internal_serdes:
2349                 e1000_check_for_link(hw);
2350                 link_check = adapter->hw.mac.serdes_has_link;
2351                 break;
2352         default:
2353         case e1000_media_type_unknown:
2354                 break;
2355         }
2356
2357         /* Now check for a transition */
2358         if (link_check && (adapter->link_active == 0)) {
2359                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2360                     &adapter->link_duplex);
2361                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2362                 if ((adapter->link_speed != SPEED_1000) &&
2363                     ((hw->mac.type == e1000_82571) ||
2364                     (hw->mac.type == e1000_82572))) {
2365                         int tarc0;
2366                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2367                         tarc0 &= ~SPEED_MODE_BIT;
2368                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2369                 }
2370                 if (bootverbose)
2371                         device_printf(dev, "Link is up %d Mbps %s\n",
2372                             adapter->link_speed,
2373                             ((adapter->link_duplex == FULL_DUPLEX) ?
2374                             "Full Duplex" : "Half Duplex"));
2375                 adapter->link_active = 1;
2376                 adapter->smartspeed = 0;
2377                 ifp->if_baudrate = adapter->link_speed * 1000000;
2378                 if_link_state_change(ifp, LINK_STATE_UP);
2379         } else if (!link_check && (adapter->link_active == 1)) {
2380                 ifp->if_baudrate = adapter->link_speed = 0;
2381                 adapter->link_duplex = 0;
2382                 if (bootverbose)
2383                         device_printf(dev, "Link is Down\n");
2384                 adapter->link_active = 0;
2385                 /* Link down, disable watchdog */
2386                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2387                         txr->queue_status = EM_QUEUE_IDLE;
2388                 if_link_state_change(ifp, LINK_STATE_DOWN);
2389         }
2390 }
2391
2392 /*********************************************************************
2393  *
2394  *  This routine disables all traffic on the adapter by issuing a
2395  *  global reset on the MAC and deallocates TX/RX buffers.
2396  *
2397  *  This routine should always be called with BOTH the CORE
2398  *  and TX locks.
2399  **********************************************************************/
2400
2401 static void
2402 em_stop(void *arg)
2403 {
2404         struct adapter  *adapter = arg;
2405         struct ifnet    *ifp = adapter->ifp;
2406         struct tx_ring  *txr = adapter->tx_rings;
2407
2408         EM_CORE_LOCK_ASSERT(adapter);
2409
2410         INIT_DEBUGOUT("em_stop: begin");
2411
2412         em_disable_intr(adapter);
2413         callout_stop(&adapter->timer);
2414
2415         /* Tell the stack that the interface is no longer active */
2416         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2417         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2418
2419         /* Unarm watchdog timer. */
2420         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2421                 EM_TX_LOCK(txr);
2422                 txr->queue_status = EM_QUEUE_IDLE;
2423                 EM_TX_UNLOCK(txr);
2424         }
2425
2426         e1000_reset_hw(&adapter->hw);
2427         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2428
2429         e1000_led_off(&adapter->hw);
2430         e1000_cleanup_led(&adapter->hw);
2431 }
2432
2433
2434 /*********************************************************************
2435  *
2436  *  Determine hardware revision.
2437  *
2438  **********************************************************************/
2439 static void
2440 em_identify_hardware(struct adapter *adapter)
2441 {
2442         device_t dev = adapter->dev;
2443
2444         /* Make sure our PCI config space has the necessary stuff set */
2445         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2446         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2447             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2448                 device_printf(dev, "Memory Access and/or Bus Master bits "
2449                     "were not set!\n");
2450                 adapter->hw.bus.pci_cmd_word |=
2451                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2452                 pci_write_config(dev, PCIR_COMMAND,
2453                     adapter->hw.bus.pci_cmd_word, 2);
2454         }
2455
2456         /* Save off the information about this board */
2457         adapter->hw.vendor_id = pci_get_vendor(dev);
2458         adapter->hw.device_id = pci_get_device(dev);
2459         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2460         adapter->hw.subsystem_vendor_id =
2461             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2462         adapter->hw.subsystem_device_id =
2463             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2464
2465         /* Do Shared Code Init and Setup */
2466         if (e1000_set_mac_type(&adapter->hw)) {
2467                 device_printf(dev, "Setup init failure\n");
2468                 return;
2469         }
2470 }
2471
2472 static int
2473 em_allocate_pci_resources(struct adapter *adapter)
2474 {
2475         device_t        dev = adapter->dev;
2476         int             rid;
2477
2478         rid = PCIR_BAR(0);
2479         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2480             &rid, RF_ACTIVE);
2481         if (adapter->memory == NULL) {
2482                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2483                 return (ENXIO);
2484         }
2485         adapter->osdep.mem_bus_space_tag =
2486             rman_get_bustag(adapter->memory);
2487         adapter->osdep.mem_bus_space_handle =
2488             rman_get_bushandle(adapter->memory);
2489         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2490
2491         /* Default to a single queue */
2492         adapter->num_queues = 1;
2493
2494         /*
2495          * Setup MSI/X or MSI if PCI Express
2496          */
2497         adapter->msix = em_setup_msix(adapter);
2498
2499         adapter->hw.back = &adapter->osdep;
2500
2501         return (0);
2502 }
2503
2504 /*********************************************************************
2505  *
2506  *  Setup the Legacy or MSI Interrupt handler
2507  *
2508  **********************************************************************/
2509 int
2510 em_allocate_legacy(struct adapter *adapter)
2511 {
2512         device_t dev = adapter->dev;
2513         struct tx_ring  *txr = adapter->tx_rings;
2514         int error, rid = 0;
2515
2516         /* Manually turn off all interrupts */
2517         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2518
2519         if (adapter->msix == 1) /* using MSI */
2520                 rid = 1;
2521         /* We allocate a single interrupt resource */
2522         adapter->res = bus_alloc_resource_any(dev,
2523             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2524         if (adapter->res == NULL) {
2525                 device_printf(dev, "Unable to allocate bus resource: "
2526                     "interrupt\n");
2527                 return (ENXIO);
2528         }
2529
2530         /*
2531          * Allocate a fast interrupt and the associated
2532          * deferred processing contexts.
2533          */
2534         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2535         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2536             taskqueue_thread_enqueue, &adapter->tq);
2537         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2538             device_get_nameunit(adapter->dev));
2539         /* Use a TX only tasklet for local timer */
2540         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2541         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2542             taskqueue_thread_enqueue, &txr->tq);
2543         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2544             device_get_nameunit(adapter->dev));
2545         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2546         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2547             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2548                 device_printf(dev, "Failed to register fast interrupt "
2549                             "handler: %d\n", error);
2550                 taskqueue_free(adapter->tq);
2551                 adapter->tq = NULL;
2552                 return (error);
2553         }
2554         
2555         return (0);
2556 }
2557
2558 /*********************************************************************
2559  *
2560  *  Setup the MSIX Interrupt handlers
2561  *   This is not really Multiqueue, rather
2562  *   its just seperate interrupt vectors
2563  *   for TX, RX, and Link.
2564  *
2565  **********************************************************************/
2566 int
2567 em_allocate_msix(struct adapter *adapter)
2568 {
2569         device_t        dev = adapter->dev;
2570         struct          tx_ring *txr = adapter->tx_rings;
2571         struct          rx_ring *rxr = adapter->rx_rings;
2572         int             error, rid, vector = 0;
2573
2574
2575         /* Make sure all interrupts are disabled */
2576         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2577
2578         /* First set up ring resources */
2579         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2580
2581                 /* RX ring */
2582                 rid = vector + 1;
2583
2584                 rxr->res = bus_alloc_resource_any(dev,
2585                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2586                 if (rxr->res == NULL) {
2587                         device_printf(dev,
2588                             "Unable to allocate bus resource: "
2589                             "RX MSIX Interrupt %d\n", i);
2590                         return (ENXIO);
2591                 }
2592                 if ((error = bus_setup_intr(dev, rxr->res,
2593                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2594                     rxr, &rxr->tag)) != 0) {
2595                         device_printf(dev, "Failed to register RX handler");
2596                         return (error);
2597                 }
2598 #if __FreeBSD_version >= 800504
2599                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2600 #endif
2601                 rxr->msix = vector++; /* NOTE increment vector for TX */
2602                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2603                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2604                     taskqueue_thread_enqueue, &rxr->tq);
2605                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2606                     device_get_nameunit(adapter->dev));
2607                 /*
2608                 ** Set the bit to enable interrupt
2609                 ** in E1000_IMS -- bits 20 and 21
2610                 ** are for RX0 and RX1, note this has
2611                 ** NOTHING to do with the MSIX vector
2612                 */
2613                 rxr->ims = 1 << (20 + i);
2614                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2615
2616                 /* TX ring */
2617                 rid = vector + 1;
2618                 txr->res = bus_alloc_resource_any(dev,
2619                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2620                 if (txr->res == NULL) {
2621                         device_printf(dev,
2622                             "Unable to allocate bus resource: "
2623                             "TX MSIX Interrupt %d\n", i);
2624                         return (ENXIO);
2625                 }
2626                 if ((error = bus_setup_intr(dev, txr->res,
2627                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2628                     txr, &txr->tag)) != 0) {
2629                         device_printf(dev, "Failed to register TX handler");
2630                         return (error);
2631                 }
2632 #if __FreeBSD_version >= 800504
2633                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2634 #endif
2635                 txr->msix = vector++; /* Increment vector for next pass */
2636                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2637                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2638                     taskqueue_thread_enqueue, &txr->tq);
2639                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2640                     device_get_nameunit(adapter->dev));
2641                 /*
2642                 ** Set the bit to enable interrupt
2643                 ** in E1000_IMS -- bits 22 and 23
2644                 ** are for TX0 and TX1, note this has
2645                 ** NOTHING to do with the MSIX vector
2646                 */
2647                 txr->ims = 1 << (22 + i);
2648                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2649         }
2650
2651         /* Link interrupt */
2652         ++rid;
2653         adapter->res = bus_alloc_resource_any(dev,
2654             SYS_RES_IRQ, &rid, RF_ACTIVE);
2655         if (!adapter->res) {
2656                 device_printf(dev,"Unable to allocate "
2657                     "bus resource: Link interrupt [%d]\n", rid);
2658                 return (ENXIO);
2659         }
2660         /* Set the link handler function */
2661         error = bus_setup_intr(dev, adapter->res,
2662             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2663             em_msix_link, adapter, &adapter->tag);
2664         if (error) {
2665                 adapter->res = NULL;
2666                 device_printf(dev, "Failed to register LINK handler");
2667                 return (error);
2668         }
2669 #if __FreeBSD_version >= 800504
2670                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2671 #endif
2672         adapter->linkvec = vector;
2673         adapter->ivars |=  (8 | vector) << 16;
2674         adapter->ivars |= 0x80000000;
2675
2676         return (0);
2677 }
2678
2679
2680 static void
2681 em_free_pci_resources(struct adapter *adapter)
2682 {
2683         device_t        dev = adapter->dev;
2684         struct tx_ring  *txr;
2685         struct rx_ring  *rxr;
2686         int             rid;
2687
2688
2689         /*
2690         ** Release all the queue interrupt resources:
2691         */
2692         for (int i = 0; i < adapter->num_queues; i++) {
2693                 txr = &adapter->tx_rings[i];
2694                 rxr = &adapter->rx_rings[i];
2695                 /* an early abort? */
2696                 if ((txr == NULL) || (rxr == NULL))
2697                         break;
2698                 rid = txr->msix +1;
2699                 if (txr->tag != NULL) {
2700                         bus_teardown_intr(dev, txr->res, txr->tag);
2701                         txr->tag = NULL;
2702                 }
2703                 if (txr->res != NULL)
2704                         bus_release_resource(dev, SYS_RES_IRQ,
2705                             rid, txr->res);
2706                 rid = rxr->msix +1;
2707                 if (rxr->tag != NULL) {
2708                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2709                         rxr->tag = NULL;
2710                 }
2711                 if (rxr->res != NULL)
2712                         bus_release_resource(dev, SYS_RES_IRQ,
2713                             rid, rxr->res);
2714         }
2715
2716         if (adapter->linkvec) /* we are doing MSIX */
2717                 rid = adapter->linkvec + 1;
2718         else
2719                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2720
2721         if (adapter->tag != NULL) {
2722                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2723                 adapter->tag = NULL;
2724         }
2725
2726         if (adapter->res != NULL)
2727                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2728
2729
2730         if (adapter->msix)
2731                 pci_release_msi(dev);
2732
2733         if (adapter->msix_mem != NULL)
2734                 bus_release_resource(dev, SYS_RES_MEMORY,
2735                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2736
2737         if (adapter->memory != NULL)
2738                 bus_release_resource(dev, SYS_RES_MEMORY,
2739                     PCIR_BAR(0), adapter->memory);
2740
2741         if (adapter->flash != NULL)
2742                 bus_release_resource(dev, SYS_RES_MEMORY,
2743                     EM_FLASH, adapter->flash);
2744 }
2745
2746 /*
2747  * Setup MSI or MSI/X
2748  */
2749 static int
2750 em_setup_msix(struct adapter *adapter)
2751 {
2752         device_t dev = adapter->dev;
2753         int val = 0;
2754
2755         /*
2756         ** Setup MSI/X for Hartwell: tests have shown
2757         ** use of two queues to be unstable, and to
2758         ** provide no great gain anyway, so we simply
2759         ** seperate the interrupts and use a single queue.
2760         */
2761         if ((adapter->hw.mac.type == e1000_82574) &&
2762             (em_enable_msix == TRUE)) {
2763                 /* Map the MSIX BAR */
2764                 int rid = PCIR_BAR(EM_MSIX_BAR);
2765                 adapter->msix_mem = bus_alloc_resource_any(dev,
2766                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2767                 if (!adapter->msix_mem) {
2768                         /* May not be enabled */
2769                         device_printf(adapter->dev,
2770                             "Unable to map MSIX table \n");
2771                         goto msi;
2772                 }
2773                 val = pci_msix_count(dev); 
2774                 /* We only need 3 vectors */
2775                 if (val > 3)
2776                         val = 3;
2777                 if ((val != 3) && (val != 5)) {
2778                         bus_release_resource(dev, SYS_RES_MEMORY,
2779                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2780                         adapter->msix_mem = NULL;
2781                         device_printf(adapter->dev,
2782                             "MSIX: incorrect vectors, using MSI\n");
2783                         goto msi;
2784                 }
2785
2786                 if (pci_alloc_msix(dev, &val) == 0) {
2787                         device_printf(adapter->dev,
2788                             "Using MSIX interrupts "
2789                             "with %d vectors\n", val);
2790                 }
2791
2792                 return (val);
2793         }
2794 msi:
2795         val = pci_msi_count(dev);
2796         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2797                 adapter->msix = 1;
2798                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2799                 return (val);
2800         } 
2801         /* Should only happen due to manual configuration */
2802         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2803         return (0);
2804 }
2805
2806
2807 /*********************************************************************
2808  *
2809  *  Initialize the hardware to a configuration
2810  *  as specified by the adapter structure.
2811  *
2812  **********************************************************************/
2813 static void
2814 em_reset(struct adapter *adapter)
2815 {
2816         device_t        dev = adapter->dev;
2817         struct ifnet    *ifp = adapter->ifp;
2818         struct e1000_hw *hw = &adapter->hw;
2819         u16             rx_buffer_size;
2820         u32             pba;
2821
2822         INIT_DEBUGOUT("em_reset: begin");
2823
2824         /* Set up smart power down as default off on newer adapters. */
2825         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2826             hw->mac.type == e1000_82572)) {
2827                 u16 phy_tmp = 0;
2828
2829                 /* Speed up time to link by disabling smart power down. */
2830                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2831                 phy_tmp &= ~IGP02E1000_PM_SPD;
2832                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2833         }
2834
2835         /*
2836          * Packet Buffer Allocation (PBA)
2837          * Writing PBA sets the receive portion of the buffer
2838          * the remainder is used for the transmit buffer.
2839          */
2840         switch (hw->mac.type) {
2841         /* Total Packet Buffer on these is 48K */
2842         case e1000_82571:
2843         case e1000_82572:
2844         case e1000_80003es2lan:
2845                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2846                 break;
2847         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2848                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2849                 break;
2850         case e1000_82574:
2851         case e1000_82583:
2852                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2853                 break;
2854         case e1000_ich8lan:
2855                 pba = E1000_PBA_8K;
2856                 break;
2857         case e1000_ich9lan:
2858         case e1000_ich10lan:
2859                 /* Boost Receive side for jumbo frames */
2860                 if (adapter->hw.mac.max_frame_size > 4096)
2861                         pba = E1000_PBA_14K;
2862                 else
2863                         pba = E1000_PBA_10K;
2864                 break;
2865         case e1000_pchlan:
2866         case e1000_pch2lan:
2867         case e1000_pch_lpt:
2868                 pba = E1000_PBA_26K;
2869                 break;
2870         default:
2871                 if (adapter->hw.mac.max_frame_size > 8192)
2872                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2873                 else
2874                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2875         }
2876         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2877
2878         /*
2879          * These parameters control the automatic generation (Tx) and
2880          * response (Rx) to Ethernet PAUSE frames.
2881          * - High water mark should allow for at least two frames to be
2882          *   received after sending an XOFF.
2883          * - Low water mark works best when it is very near the high water mark.
2884          *   This allows the receiver to restart by sending XON when it has
2885          *   drained a bit. Here we use an arbitary value of 1500 which will
2886          *   restart after one full frame is pulled from the buffer. There
2887          *   could be several smaller frames in the buffer and if so they will
2888          *   not trigger the XON until their total number reduces the buffer
2889          *   by 1500.
2890          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2891          */
2892         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2893         hw->fc.high_water = rx_buffer_size -
2894             roundup2(adapter->hw.mac.max_frame_size, 1024);
2895         hw->fc.low_water = hw->fc.high_water - 1500;
2896
2897         if (adapter->fc) /* locally set flow control value? */
2898                 hw->fc.requested_mode = adapter->fc;
2899         else
2900                 hw->fc.requested_mode = e1000_fc_full;
2901
2902         if (hw->mac.type == e1000_80003es2lan)
2903                 hw->fc.pause_time = 0xFFFF;
2904         else
2905                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2906
2907         hw->fc.send_xon = TRUE;
2908
2909         /* Device specific overrides/settings */
2910         switch (hw->mac.type) {
2911         case e1000_pchlan:
2912                 /* Workaround: no TX flow ctrl for PCH */
2913                 hw->fc.requested_mode = e1000_fc_rx_pause;
2914                 hw->fc.pause_time = 0xFFFF; /* override */
2915                 if (ifp->if_mtu > ETHERMTU) {
2916                         hw->fc.high_water = 0x3500;
2917                         hw->fc.low_water = 0x1500;
2918                 } else {
2919                         hw->fc.high_water = 0x5000;
2920                         hw->fc.low_water = 0x3000;
2921                 }
2922                 hw->fc.refresh_time = 0x1000;
2923                 break;
2924         case e1000_pch2lan:
2925         case e1000_pch_lpt:
2926                 hw->fc.high_water = 0x5C20;
2927                 hw->fc.low_water = 0x5048;
2928                 hw->fc.pause_time = 0x0650;
2929                 hw->fc.refresh_time = 0x0400;
2930                 /* Jumbos need adjusted PBA */
2931                 if (ifp->if_mtu > ETHERMTU)
2932                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2933                 else
2934                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2935                 break;
2936         case e1000_ich9lan:
2937         case e1000_ich10lan:
2938                 if (ifp->if_mtu > ETHERMTU) {
2939                         hw->fc.high_water = 0x2800;
2940                         hw->fc.low_water = hw->fc.high_water - 8;
2941                         break;
2942                 } 
2943                 /* else fall thru */
2944         default:
2945                 if (hw->mac.type == e1000_80003es2lan)
2946                         hw->fc.pause_time = 0xFFFF;
2947                 break;
2948         }
2949
2950         /* Issue a global reset */
2951         e1000_reset_hw(hw);
2952         E1000_WRITE_REG(hw, E1000_WUC, 0);
2953         em_disable_aspm(adapter);
2954         /* and a re-init */
2955         if (e1000_init_hw(hw) < 0) {
2956                 device_printf(dev, "Hardware Initialization Failed\n");
2957                 return;
2958         }
2959
2960         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2961         e1000_get_phy_info(hw);
2962         e1000_check_for_link(hw);
2963         return;
2964 }
2965
2966 /*********************************************************************
2967  *
2968  *  Setup networking device structure and register an interface.
2969  *
2970  **********************************************************************/
2971 static int
2972 em_setup_interface(device_t dev, struct adapter *adapter)
2973 {
2974         struct ifnet   *ifp;
2975
2976         INIT_DEBUGOUT("em_setup_interface: begin");
2977
2978         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2979         if (ifp == NULL) {
2980                 device_printf(dev, "can not allocate ifnet structure\n");
2981                 return (-1);
2982         }
2983         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2984         ifp->if_init =  em_init;
2985         ifp->if_softc = adapter;
2986         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2987         ifp->if_ioctl = em_ioctl;
2988 #ifdef EM_MULTIQUEUE
2989         /* Multiqueue stack interface */
2990         ifp->if_transmit = em_mq_start;
2991         ifp->if_qflush = em_qflush;
2992 #else
2993         ifp->if_start = em_start;
2994         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2995         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2996         IFQ_SET_READY(&ifp->if_snd);
2997 #endif  
2998
2999         ether_ifattach(ifp, adapter->hw.mac.addr);
3000
3001         ifp->if_capabilities = ifp->if_capenable = 0;
3002
3003
3004         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3005         ifp->if_capabilities |= IFCAP_TSO4;
3006         /*
3007          * Tell the upper layer(s) we
3008          * support full VLAN capability
3009          */
3010         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3011         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3012                              |  IFCAP_VLAN_HWTSO
3013                              |  IFCAP_VLAN_MTU;
3014         ifp->if_capenable = ifp->if_capabilities;
3015
3016         /*
3017         ** Don't turn this on by default, if vlans are
3018         ** created on another pseudo device (eg. lagg)
3019         ** then vlan events are not passed thru, breaking
3020         ** operation, but with HW FILTER off it works. If
3021         ** using vlans directly on the em driver you can
3022         ** enable this and get full hardware tag filtering.
3023         */
3024         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3025
3026 #ifdef DEVICE_POLLING
3027         ifp->if_capabilities |= IFCAP_POLLING;
3028 #endif
3029
3030         /* Enable only WOL MAGIC by default */
3031         if (adapter->wol) {
3032                 ifp->if_capabilities |= IFCAP_WOL;
3033                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3034         }
3035                 
3036         /*
3037          * Specify the media types supported by this adapter and register
3038          * callbacks to update media and link information
3039          */
3040         ifmedia_init(&adapter->media, IFM_IMASK,
3041             em_media_change, em_media_status);
3042         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3043             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3044                 u_char fiber_type = IFM_1000_SX;        /* default type */
3045
3046                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3047                             0, NULL);
3048                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3049         } else {
3050                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3051                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3052                             0, NULL);
3053                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3054                             0, NULL);
3055                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3056                             0, NULL);
3057                 if (adapter->hw.phy.type != e1000_phy_ife) {
3058                         ifmedia_add(&adapter->media,
3059                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3060                         ifmedia_add(&adapter->media,
3061                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3062                 }
3063         }
3064         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3065         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3066         return (0);
3067 }
3068
3069
3070 /*
3071  * Manage DMA'able memory.
3072  */
3073 static void
3074 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3075 {
3076         if (error)
3077                 return;
3078         *(bus_addr_t *) arg = segs[0].ds_addr;
3079 }
3080
3081 static int
3082 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3083         struct em_dma_alloc *dma, int mapflags)
3084 {
3085         int error;
3086
3087         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3088                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3089                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3090                                 BUS_SPACE_MAXADDR,      /* highaddr */
3091                                 NULL, NULL,             /* filter, filterarg */
3092                                 size,                   /* maxsize */
3093                                 1,                      /* nsegments */
3094                                 size,                   /* maxsegsize */
3095                                 0,                      /* flags */
3096                                 NULL,                   /* lockfunc */
3097                                 NULL,                   /* lockarg */
3098                                 &dma->dma_tag);
3099         if (error) {
3100                 device_printf(adapter->dev,
3101                     "%s: bus_dma_tag_create failed: %d\n",
3102                     __func__, error);
3103                 goto fail_0;
3104         }
3105
3106         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3107             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3108         if (error) {
3109                 device_printf(adapter->dev,
3110                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3111                     __func__, (uintmax_t)size, error);
3112                 goto fail_2;
3113         }
3114
3115         dma->dma_paddr = 0;
3116         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3117             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3118         if (error || dma->dma_paddr == 0) {
3119                 device_printf(adapter->dev,
3120                     "%s: bus_dmamap_load failed: %d\n",
3121                     __func__, error);
3122                 goto fail_3;
3123         }
3124
3125         return (0);
3126
3127 fail_3:
3128         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3129 fail_2:
3130         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3131         bus_dma_tag_destroy(dma->dma_tag);
3132 fail_0:
3133         dma->dma_map = NULL;
3134         dma->dma_tag = NULL;
3135
3136         return (error);
3137 }
3138
3139 static void
3140 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3141 {
3142         if (dma->dma_tag == NULL)
3143                 return;
3144         if (dma->dma_map != NULL) {
3145                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3146                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3147                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3148                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3149                 dma->dma_map = NULL;
3150         }
3151         bus_dma_tag_destroy(dma->dma_tag);
3152         dma->dma_tag = NULL;
3153 }
3154
3155
3156 /*********************************************************************
3157  *
3158  *  Allocate memory for the transmit and receive rings, and then
3159  *  the descriptors associated with each, called only once at attach.
3160  *
3161  **********************************************************************/
3162 static int
3163 em_allocate_queues(struct adapter *adapter)
3164 {
3165         device_t                dev = adapter->dev;
3166         struct tx_ring          *txr = NULL;
3167         struct rx_ring          *rxr = NULL;
3168         int rsize, tsize, error = E1000_SUCCESS;
3169         int txconf = 0, rxconf = 0;
3170
3171
3172         /* Allocate the TX ring struct memory */
3173         if (!(adapter->tx_rings =
3174             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3175             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3176                 device_printf(dev, "Unable to allocate TX ring memory\n");
3177                 error = ENOMEM;
3178                 goto fail;
3179         }
3180
3181         /* Now allocate the RX */
3182         if (!(adapter->rx_rings =
3183             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3184             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3185                 device_printf(dev, "Unable to allocate RX ring memory\n");
3186                 error = ENOMEM;
3187                 goto rx_fail;
3188         }
3189
3190         tsize = roundup2(adapter->num_tx_desc *
3191             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3192         /*
3193          * Now set up the TX queues, txconf is needed to handle the
3194          * possibility that things fail midcourse and we need to
3195          * undo memory gracefully
3196          */ 
3197         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3198                 /* Set up some basics */
3199                 txr = &adapter->tx_rings[i];
3200                 txr->adapter = adapter;
3201                 txr->me = i;
3202
3203                 /* Initialize the TX lock */
3204                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3205                     device_get_nameunit(dev), txr->me);
3206                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3207
3208                 if (em_dma_malloc(adapter, tsize,
3209                         &txr->txdma, BUS_DMA_NOWAIT)) {
3210                         device_printf(dev,
3211                             "Unable to allocate TX Descriptor memory\n");
3212                         error = ENOMEM;
3213                         goto err_tx_desc;
3214                 }
3215                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3216                 bzero((void *)txr->tx_base, tsize);
3217
3218                 if (em_allocate_transmit_buffers(txr)) {
3219                         device_printf(dev,
3220                             "Critical Failure setting up transmit buffers\n");
3221                         error = ENOMEM;
3222                         goto err_tx_desc;
3223                 }
3224 #if __FreeBSD_version >= 800000
3225                 /* Allocate a buf ring */
3226                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3227                     M_WAITOK, &txr->tx_mtx);
3228 #endif
3229         }
3230
3231         /*
3232          * Next the RX queues...
3233          */ 
3234         rsize = roundup2(adapter->num_rx_desc *
3235             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3236         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3237                 rxr = &adapter->rx_rings[i];
3238                 rxr->adapter = adapter;
3239                 rxr->me = i;
3240
3241                 /* Initialize the RX lock */
3242                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3243                     device_get_nameunit(dev), txr->me);
3244                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3245
3246                 if (em_dma_malloc(adapter, rsize,
3247                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3248                         device_printf(dev,
3249                             "Unable to allocate RxDescriptor memory\n");
3250                         error = ENOMEM;
3251                         goto err_rx_desc;
3252                 }
3253                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3254                 bzero((void *)rxr->rx_base, rsize);
3255
3256                 /* Allocate receive buffers for the ring*/
3257                 if (em_allocate_receive_buffers(rxr)) {
3258                         device_printf(dev,
3259                             "Critical Failure setting up receive buffers\n");
3260                         error = ENOMEM;
3261                         goto err_rx_desc;
3262                 }
3263         }
3264
3265         return (0);
3266
3267 err_rx_desc:
3268         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3269                 em_dma_free(adapter, &rxr->rxdma);
3270 err_tx_desc:
3271         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3272                 em_dma_free(adapter, &txr->txdma);
3273         free(adapter->rx_rings, M_DEVBUF);
3274 rx_fail:
3275 #if __FreeBSD_version >= 800000
3276         buf_ring_free(txr->br, M_DEVBUF);
3277 #endif
3278         free(adapter->tx_rings, M_DEVBUF);
3279 fail:
3280         return (error);
3281 }
3282
3283
3284 /*********************************************************************
3285  *
3286  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3287  *  the information needed to transmit a packet on the wire. This is
3288  *  called only once at attach, setup is done every reset.
3289  *
3290  **********************************************************************/
3291 static int
3292 em_allocate_transmit_buffers(struct tx_ring *txr)
3293 {
3294         struct adapter *adapter = txr->adapter;
3295         device_t dev = adapter->dev;
3296         struct em_buffer *txbuf;
3297         int error, i;
3298
3299         /*
3300          * Setup DMA descriptor areas.
3301          */
3302         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3303                                1, 0,                    /* alignment, bounds */
3304                                BUS_SPACE_MAXADDR,       /* lowaddr */
3305                                BUS_SPACE_MAXADDR,       /* highaddr */
3306                                NULL, NULL,              /* filter, filterarg */
3307                                EM_TSO_SIZE,             /* maxsize */
3308                                EM_MAX_SCATTER,          /* nsegments */
3309                                PAGE_SIZE,               /* maxsegsize */
3310                                0,                       /* flags */
3311                                NULL,                    /* lockfunc */
3312                                NULL,                    /* lockfuncarg */
3313                                &txr->txtag))) {
3314                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3315                 goto fail;
3316         }
3317
3318         if (!(txr->tx_buffers =
3319             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3320             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3321                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3322                 error = ENOMEM;
3323                 goto fail;
3324         }
3325
3326         /* Create the descriptor buffer dma maps */
3327         txbuf = txr->tx_buffers;
3328         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3329                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3330                 if (error != 0) {
3331                         device_printf(dev, "Unable to create TX DMA map\n");
3332                         goto fail;
3333                 }
3334         }
3335
3336         return 0;
3337 fail:
3338         /* We free all, it handles case where we are in the middle */
3339         em_free_transmit_structures(adapter);
3340         return (error);
3341 }
3342
3343 /*********************************************************************
3344  *
3345  *  Initialize a transmit ring.
3346  *
3347  **********************************************************************/
3348 static void
3349 em_setup_transmit_ring(struct tx_ring *txr)
3350 {
3351         struct adapter *adapter = txr->adapter;
3352         struct em_buffer *txbuf;
3353         int i;
3354 #ifdef DEV_NETMAP
3355         struct netmap_adapter *na = NA(adapter->ifp);
3356         struct netmap_slot *slot;
3357 #endif /* DEV_NETMAP */
3358
3359         /* Clear the old descriptor contents */
3360         EM_TX_LOCK(txr);
3361 #ifdef DEV_NETMAP
3362         slot = netmap_reset(na, NR_TX, txr->me, 0);
3363 #endif /* DEV_NETMAP */
3364
3365         bzero((void *)txr->tx_base,
3366               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3367         /* Reset indices */
3368         txr->next_avail_desc = 0;
3369         txr->next_to_clean = 0;
3370
3371         /* Free any existing tx buffers. */
3372         txbuf = txr->tx_buffers;
3373         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3374                 if (txbuf->m_head != NULL) {
3375                         bus_dmamap_sync(txr->txtag, txbuf->map,
3376                             BUS_DMASYNC_POSTWRITE);
3377                         bus_dmamap_unload(txr->txtag, txbuf->map);
3378                         m_freem(txbuf->m_head);
3379                         txbuf->m_head = NULL;
3380                 }
3381 #ifdef DEV_NETMAP
3382                 if (slot) {
3383                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3384                         uint64_t paddr;
3385                         void *addr;
3386
3387                         addr = PNMB(slot + si, &paddr);
3388                         txr->tx_base[i].buffer_addr = htole64(paddr);
3389                         /* reload the map for netmap mode */
3390                         netmap_load_map(txr->txtag, txbuf->map, addr);
3391                 }
3392 #endif /* DEV_NETMAP */
3393
3394                 /* clear the watch index */
3395                 txbuf->next_eop = -1;
3396         }
3397
3398         /* Set number of descriptors available */
3399         txr->tx_avail = adapter->num_tx_desc;
3400         txr->queue_status = EM_QUEUE_IDLE;
3401
3402         /* Clear checksum offload context. */
3403         txr->last_hw_offload = 0;
3404         txr->last_hw_ipcss = 0;
3405         txr->last_hw_ipcso = 0;
3406         txr->last_hw_tucss = 0;
3407         txr->last_hw_tucso = 0;
3408
3409         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3410             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3411         EM_TX_UNLOCK(txr);
3412 }
3413
3414 /*********************************************************************
3415  *
3416  *  Initialize all transmit rings.
3417  *
3418  **********************************************************************/
3419 static void
3420 em_setup_transmit_structures(struct adapter *adapter)
3421 {
3422         struct tx_ring *txr = adapter->tx_rings;
3423
3424         for (int i = 0; i < adapter->num_queues; i++, txr++)
3425                 em_setup_transmit_ring(txr);
3426
3427         return;
3428 }
3429
3430 /*********************************************************************
3431  *
3432  *  Enable transmit unit.
3433  *
3434  **********************************************************************/
3435 static void
3436 em_initialize_transmit_unit(struct adapter *adapter)
3437 {
3438         struct tx_ring  *txr = adapter->tx_rings;
3439         struct e1000_hw *hw = &adapter->hw;
3440         u32     tctl, tarc, tipg = 0;
3441
3442          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3443
3444         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3445                 u64 bus_addr = txr->txdma.dma_paddr;
3446                 /* Base and Len of TX Ring */
3447                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3448                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3449                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3450                     (u32)(bus_addr >> 32));
3451                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3452                     (u32)bus_addr);
3453                 /* Init the HEAD/TAIL indices */
3454                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3455                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3456
3457                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3458                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3459                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3460
3461                 txr->queue_status = EM_QUEUE_IDLE;
3462         }
3463
3464         /* Set the default values for the Tx Inter Packet Gap timer */
3465         switch (adapter->hw.mac.type) {
3466         case e1000_80003es2lan:
3467                 tipg = DEFAULT_82543_TIPG_IPGR1;
3468                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3469                     E1000_TIPG_IPGR2_SHIFT;
3470                 break;
3471         default:
3472                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3473                     (adapter->hw.phy.media_type ==
3474                     e1000_media_type_internal_serdes))
3475                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3476                 else
3477                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3478                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3479                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3480         }
3481
3482         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3483         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3484
3485         if(adapter->hw.mac.type >= e1000_82540)
3486                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3487                     adapter->tx_abs_int_delay.value);
3488
3489         if ((adapter->hw.mac.type == e1000_82571) ||
3490             (adapter->hw.mac.type == e1000_82572)) {
3491                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3492                 tarc |= SPEED_MODE_BIT;
3493                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3494         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3495                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3496                 tarc |= 1;
3497                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3498                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3499                 tarc |= 1;
3500                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3501         }
3502
3503         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3504         if (adapter->tx_int_delay.value > 0)
3505                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3506
3507         /* Program the Transmit Control Register */
3508         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3509         tctl &= ~E1000_TCTL_CT;
3510         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3511                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3512
3513         if (adapter->hw.mac.type >= e1000_82571)
3514                 tctl |= E1000_TCTL_MULR;
3515
3516         /* This write will effectively turn on the transmit unit. */
3517         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3518
3519 }
3520
3521
3522 /*********************************************************************
3523  *
3524  *  Free all transmit rings.
3525  *
3526  **********************************************************************/
3527 static void
3528 em_free_transmit_structures(struct adapter *adapter)
3529 {
3530         struct tx_ring *txr = adapter->tx_rings;
3531
3532         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3533                 EM_TX_LOCK(txr);
3534                 em_free_transmit_buffers(txr);
3535                 em_dma_free(adapter, &txr->txdma);
3536                 EM_TX_UNLOCK(txr);
3537                 EM_TX_LOCK_DESTROY(txr);
3538         }
3539
3540         free(adapter->tx_rings, M_DEVBUF);
3541 }
3542
3543 /*********************************************************************
3544  *
3545  *  Free transmit ring related data structures.
3546  *
3547  **********************************************************************/
3548 static void
3549 em_free_transmit_buffers(struct tx_ring *txr)
3550 {
3551         struct adapter          *adapter = txr->adapter;
3552         struct em_buffer        *txbuf;
3553
3554         INIT_DEBUGOUT("free_transmit_ring: begin");
3555
3556         if (txr->tx_buffers == NULL)
3557                 return;
3558
3559         for (int i = 0; i < adapter->num_tx_desc; i++) {
3560                 txbuf = &txr->tx_buffers[i];
3561                 if (txbuf->m_head != NULL) {
3562                         bus_dmamap_sync(txr->txtag, txbuf->map,
3563                             BUS_DMASYNC_POSTWRITE);
3564                         bus_dmamap_unload(txr->txtag,
3565                             txbuf->map);
3566                         m_freem(txbuf->m_head);
3567                         txbuf->m_head = NULL;
3568                         if (txbuf->map != NULL) {
3569                                 bus_dmamap_destroy(txr->txtag,
3570                                     txbuf->map);
3571                                 txbuf->map = NULL;
3572                         }
3573                 } else if (txbuf->map != NULL) {
3574                         bus_dmamap_unload(txr->txtag,
3575                             txbuf->map);
3576                         bus_dmamap_destroy(txr->txtag,
3577                             txbuf->map);
3578                         txbuf->map = NULL;
3579                 }
3580         }
3581 #if __FreeBSD_version >= 800000
3582         if (txr->br != NULL)
3583                 buf_ring_free(txr->br, M_DEVBUF);
3584 #endif
3585         if (txr->tx_buffers != NULL) {
3586                 free(txr->tx_buffers, M_DEVBUF);
3587                 txr->tx_buffers = NULL;
3588         }
3589         if (txr->txtag != NULL) {
3590                 bus_dma_tag_destroy(txr->txtag);
3591                 txr->txtag = NULL;
3592         }
3593         return;
3594 }
3595
3596
3597 /*********************************************************************
3598  *  The offload context is protocol specific (TCP/UDP) and thus
3599  *  only needs to be set when the protocol changes. The occasion
3600  *  of a context change can be a performance detriment, and
3601  *  might be better just disabled. The reason arises in the way
3602  *  in which the controller supports pipelined requests from the
3603  *  Tx data DMA. Up to four requests can be pipelined, and they may
3604  *  belong to the same packet or to multiple packets. However all
3605  *  requests for one packet are issued before a request is issued
3606  *  for a subsequent packet and if a request for the next packet
3607  *  requires a context change, that request will be stalled
3608  *  until the previous request completes. This means setting up
3609  *  a new context effectively disables pipelined Tx data DMA which
3610  *  in turn greatly slow down performance to send small sized
3611  *  frames. 
3612  **********************************************************************/
3613 static void
3614 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3615     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3616 {
3617         struct adapter                  *adapter = txr->adapter;
3618         struct e1000_context_desc       *TXD = NULL;
3619         struct em_buffer                *tx_buffer;
3620         int                             cur, hdr_len;
3621         u32                             cmd = 0;
3622         u16                             offload = 0;
3623         u8                              ipcso, ipcss, tucso, tucss;
3624
3625         ipcss = ipcso = tucss = tucso = 0;
3626         hdr_len = ip_off + (ip->ip_hl << 2);
3627         cur = txr->next_avail_desc;
3628
3629         /* Setup of IP header checksum. */
3630         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3631                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3632                 offload |= CSUM_IP;
3633                 ipcss = ip_off;
3634                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3635                 /*
3636                  * Start offset for header checksum calculation.
3637                  * End offset for header checksum calculation.
3638                  * Offset of place to put the checksum.
3639                  */
3640                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3641                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3642                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3643                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3644                 cmd |= E1000_TXD_CMD_IP;
3645         }
3646
3647         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3648                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3649                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3650                 offload |= CSUM_TCP;
3651                 tucss = hdr_len;
3652                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3653                 /*
3654                  * Setting up new checksum offload context for every frames
3655                  * takes a lot of processing time for hardware. This also
3656                  * reduces performance a lot for small sized frames so avoid
3657                  * it if driver can use previously configured checksum
3658                  * offload context.
3659                  */
3660                 if (txr->last_hw_offload == offload) {
3661                         if (offload & CSUM_IP) {
3662                                 if (txr->last_hw_ipcss == ipcss &&
3663                                     txr->last_hw_ipcso == ipcso &&
3664                                     txr->last_hw_tucss == tucss &&
3665                                     txr->last_hw_tucso == tucso)
3666                                         return;
3667                         } else {
3668                                 if (txr->last_hw_tucss == tucss &&
3669                                     txr->last_hw_tucso == tucso)
3670                                         return;
3671                         }
3672                 }
3673                 txr->last_hw_offload = offload;
3674                 txr->last_hw_tucss = tucss;
3675                 txr->last_hw_tucso = tucso;
3676                 /*
3677                  * Start offset for payload checksum calculation.
3678                  * End offset for payload checksum calculation.
3679                  * Offset of place to put the checksum.
3680                  */
3681                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3682                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3683                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3684                 TXD->upper_setup.tcp_fields.tucso = tucso;
3685                 cmd |= E1000_TXD_CMD_TCP;
3686         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3687                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3688                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3689                 tucss = hdr_len;
3690                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3691                 /*
3692                  * Setting up new checksum offload context for every frames
3693                  * takes a lot of processing time for hardware. This also
3694                  * reduces performance a lot for small sized frames so avoid
3695                  * it if driver can use previously configured checksum
3696                  * offload context.
3697                  */
3698                 if (txr->last_hw_offload == offload) {
3699                         if (offload & CSUM_IP) {
3700                                 if (txr->last_hw_ipcss == ipcss &&
3701                                     txr->last_hw_ipcso == ipcso &&
3702                                     txr->last_hw_tucss == tucss &&
3703                                     txr->last_hw_tucso == tucso)
3704                                         return;
3705                         } else {
3706                                 if (txr->last_hw_tucss == tucss &&
3707                                     txr->last_hw_tucso == tucso)
3708                                         return;
3709                         }
3710                 }
3711                 txr->last_hw_offload = offload;
3712                 txr->last_hw_tucss = tucss;
3713                 txr->last_hw_tucso = tucso;
3714                 /*
3715                  * Start offset for header checksum calculation.
3716                  * End offset for header checksum calculation.
3717                  * Offset of place to put the checksum.
3718                  */
3719                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3720                 TXD->upper_setup.tcp_fields.tucss = tucss;
3721                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3722                 TXD->upper_setup.tcp_fields.tucso = tucso;
3723         }
3724   
3725         if (offload & CSUM_IP) {
3726                 txr->last_hw_ipcss = ipcss;
3727                 txr->last_hw_ipcso = ipcso;
3728         }
3729
3730         TXD->tcp_seg_setup.data = htole32(0);
3731         TXD->cmd_and_length =
3732             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3733         tx_buffer = &txr->tx_buffers[cur];
3734         tx_buffer->m_head = NULL;
3735         tx_buffer->next_eop = -1;
3736
3737         if (++cur == adapter->num_tx_desc)
3738                 cur = 0;
3739
3740         txr->tx_avail--;
3741         txr->next_avail_desc = cur;
3742 }
3743
3744
3745 /**********************************************************************
3746  *
3747  *  Setup work for hardware segmentation offload (TSO)
3748  *
3749  **********************************************************************/
3750 static void
3751 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3752     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3753 {
3754         struct adapter                  *adapter = txr->adapter;
3755         struct e1000_context_desc       *TXD;
3756         struct em_buffer                *tx_buffer;
3757         int cur, hdr_len;
3758
3759         /*
3760          * In theory we can use the same TSO context if and only if
3761          * frame is the same type(IP/TCP) and the same MSS. However
3762          * checking whether a frame has the same IP/TCP structure is
3763          * hard thing so just ignore that and always restablish a
3764          * new TSO context.
3765          */
3766         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3767         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3768                       E1000_TXD_DTYP_D |        /* Data descr type */
3769                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3770
3771         /* IP and/or TCP header checksum calculation and insertion. */
3772         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3773
3774         cur = txr->next_avail_desc;
3775         tx_buffer = &txr->tx_buffers[cur];
3776         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3777
3778         /*
3779          * Start offset for header checksum calculation.
3780          * End offset for header checksum calculation.
3781          * Offset of place put the checksum.
3782          */
3783         TXD->lower_setup.ip_fields.ipcss = ip_off;
3784         TXD->lower_setup.ip_fields.ipcse =
3785             htole16(ip_off + (ip->ip_hl << 2) - 1);
3786         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3787         /*
3788          * Start offset for payload checksum calculation.
3789          * End offset for payload checksum calculation.
3790          * Offset of place to put the checksum.
3791          */
3792         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3793         TXD->upper_setup.tcp_fields.tucse = 0;
3794         TXD->upper_setup.tcp_fields.tucso =
3795             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3796         /*
3797          * Payload size per packet w/o any headers.
3798          * Length of all headers up to payload.
3799          */
3800         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3801         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3802
3803         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3804                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3805                                 E1000_TXD_CMD_TSE |     /* TSE context */
3806                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3807                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3808                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3809
3810         tx_buffer->m_head = NULL;
3811         tx_buffer->next_eop = -1;
3812
3813         if (++cur == adapter->num_tx_desc)
3814                 cur = 0;
3815
3816         txr->tx_avail--;
3817         txr->next_avail_desc = cur;
3818         txr->tx_tso = TRUE;
3819 }
3820
3821
3822 /**********************************************************************
3823  *
3824  *  Examine each tx_buffer in the used queue. If the hardware is done
3825  *  processing the packet then free associated resources. The
3826  *  tx_buffer is put back on the free queue.
3827  *
3828  **********************************************************************/
3829 static void
3830 em_txeof(struct tx_ring *txr)
3831 {
3832         struct adapter  *adapter = txr->adapter;
3833         int first, last, done, processed;
3834         struct em_buffer *tx_buffer;
3835         struct e1000_tx_desc   *tx_desc, *eop_desc;
3836         struct ifnet   *ifp = adapter->ifp;
3837
3838         EM_TX_LOCK_ASSERT(txr);
3839 #ifdef DEV_NETMAP
3840         if (netmap_tx_irq(ifp, txr->me |
3841             (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
3842                 return;
3843 #endif /* DEV_NETMAP */
3844
3845         /* No work, make sure watchdog is off */
3846         if (txr->tx_avail == adapter->num_tx_desc) {
3847                 txr->queue_status = EM_QUEUE_IDLE;
3848                 return;
3849         }
3850
3851         processed = 0;
3852         first = txr->next_to_clean;
3853         tx_desc = &txr->tx_base[first];
3854         tx_buffer = &txr->tx_buffers[first];
3855         last = tx_buffer->next_eop;
3856         eop_desc = &txr->tx_base[last];
3857
3858         /*
3859          * What this does is get the index of the
3860          * first descriptor AFTER the EOP of the 
3861          * first packet, that way we can do the
3862          * simple comparison on the inner while loop.
3863          */
3864         if (++last == adapter->num_tx_desc)
3865                 last = 0;
3866         done = last;
3867
3868         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3869             BUS_DMASYNC_POSTREAD);
3870
3871         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3872                 /* We clean the range of the packet */
3873                 while (first != done) {
3874                         tx_desc->upper.data = 0;
3875                         tx_desc->lower.data = 0;
3876                         tx_desc->buffer_addr = 0;
3877                         ++txr->tx_avail;
3878                         ++processed;
3879
3880                         if (tx_buffer->m_head) {
3881                                 bus_dmamap_sync(txr->txtag,
3882                                     tx_buffer->map,
3883                                     BUS_DMASYNC_POSTWRITE);
3884                                 bus_dmamap_unload(txr->txtag,
3885                                     tx_buffer->map);
3886                                 m_freem(tx_buffer->m_head);
3887                                 tx_buffer->m_head = NULL;
3888                         }
3889                         tx_buffer->next_eop = -1;
3890                         txr->watchdog_time = ticks;
3891
3892                         if (++first == adapter->num_tx_desc)
3893                                 first = 0;
3894
3895                         tx_buffer = &txr->tx_buffers[first];
3896                         tx_desc = &txr->tx_base[first];
3897                 }
3898                 ++ifp->if_opackets;
3899                 /* See if we can continue to the next packet */
3900                 last = tx_buffer->next_eop;
3901                 if (last != -1) {
3902                         eop_desc = &txr->tx_base[last];
3903                         /* Get new done point */
3904                         if (++last == adapter->num_tx_desc) last = 0;
3905                         done = last;
3906                 } else
3907                         break;
3908         }
3909         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3910             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3911
3912         txr->next_to_clean = first;
3913
3914         /*
3915         ** Watchdog calculation, we know there's
3916         ** work outstanding or the first return
3917         ** would have been taken, so none processed
3918         ** for too long indicates a hang. local timer
3919         ** will examine this and do a reset if needed.
3920         */
3921         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3922                 txr->queue_status = EM_QUEUE_HUNG;
3923
3924         /*
3925          * If we have a minimum free, clear IFF_DRV_OACTIVE
3926          * to tell the stack that it is OK to send packets.
3927          * Notice that all writes of OACTIVE happen under the
3928          * TX lock which, with a single queue, guarantees 
3929          * sanity.
3930          */
3931         if (txr->tx_avail >= EM_MAX_SCATTER)
3932                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3933
3934         /* Disable watchdog if all clean */
3935         if (txr->tx_avail == adapter->num_tx_desc) {
3936                 txr->queue_status = EM_QUEUE_IDLE;
3937         } 
3938 }
3939
3940
3941 /*********************************************************************
3942  *
3943  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3944  *
3945  **********************************************************************/
3946 static void
3947 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3948 {
3949         struct adapter          *adapter = rxr->adapter;
3950         struct mbuf             *m;
3951         bus_dma_segment_t       segs[1];
3952         struct em_buffer        *rxbuf;
3953         int                     i, j, error, nsegs;
3954         bool                    cleaned = FALSE;
3955
3956         i = j = rxr->next_to_refresh;
3957         /*
3958         ** Get one descriptor beyond
3959         ** our work mark to control
3960         ** the loop.
3961         */
3962         if (++j == adapter->num_rx_desc)
3963                 j = 0;
3964
3965         while (j != limit) {
3966                 rxbuf = &rxr->rx_buffers[i];
3967                 if (rxbuf->m_head == NULL) {
3968                         m = m_getjcl(M_NOWAIT, MT_DATA,
3969                             M_PKTHDR, adapter->rx_mbuf_sz);
3970                         /*
3971                         ** If we have a temporary resource shortage
3972                         ** that causes a failure, just abort refresh
3973                         ** for now, we will return to this point when
3974                         ** reinvoked from em_rxeof.
3975                         */
3976                         if (m == NULL)
3977                                 goto update;
3978                 } else
3979                         m = rxbuf->m_head;
3980
3981                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3982                 m->m_flags |= M_PKTHDR;
3983                 m->m_data = m->m_ext.ext_buf;
3984
3985                 /* Use bus_dma machinery to setup the memory mapping  */
3986                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3987                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3988                 if (error != 0) {
3989                         printf("Refresh mbufs: hdr dmamap load"
3990                             " failure - %d\n", error);
3991                         m_free(m);
3992                         rxbuf->m_head = NULL;
3993                         goto update;
3994                 }
3995                 rxbuf->m_head = m;
3996                 bus_dmamap_sync(rxr->rxtag,
3997                     rxbuf->map, BUS_DMASYNC_PREREAD);
3998                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3999                 cleaned = TRUE;
4000
4001                 i = j; /* Next is precalulated for us */
4002                 rxr->next_to_refresh = i;
4003                 /* Calculate next controlling index */
4004                 if (++j == adapter->num_rx_desc)
4005                         j = 0;
4006         }
4007 update:
4008         /*
4009         ** Update the tail pointer only if,
4010         ** and as far as we have refreshed.
4011         */
4012         if (cleaned)
4013                 E1000_WRITE_REG(&adapter->hw,
4014                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4015
4016         return;
4017 }
4018
4019
4020 /*********************************************************************
4021  *
4022  *  Allocate memory for rx_buffer structures. Since we use one
4023  *  rx_buffer per received packet, the maximum number of rx_buffer's
4024  *  that we'll need is equal to the number of receive descriptors
4025  *  that we've allocated.
4026  *
4027  **********************************************************************/
4028 static int
4029 em_allocate_receive_buffers(struct rx_ring *rxr)
4030 {
4031         struct adapter          *adapter = rxr->adapter;
4032         device_t                dev = adapter->dev;
4033         struct em_buffer        *rxbuf;
4034         int                     error;
4035
4036         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4037             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4038         if (rxr->rx_buffers == NULL) {
4039                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4040                 return (ENOMEM);
4041         }
4042
4043         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4044                                 1, 0,                   /* alignment, bounds */
4045                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4046                                 BUS_SPACE_MAXADDR,      /* highaddr */
4047                                 NULL, NULL,             /* filter, filterarg */
4048                                 MJUM9BYTES,             /* maxsize */
4049                                 1,                      /* nsegments */
4050                                 MJUM9BYTES,             /* maxsegsize */
4051                                 0,                      /* flags */
4052                                 NULL,                   /* lockfunc */
4053                                 NULL,                   /* lockarg */
4054                                 &rxr->rxtag);
4055         if (error) {
4056                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4057                     __func__, error);
4058                 goto fail;
4059         }
4060
4061         rxbuf = rxr->rx_buffers;
4062         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4063                 rxbuf = &rxr->rx_buffers[i];
4064                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4065                     &rxbuf->map);
4066                 if (error) {
4067                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4068                             __func__, error);
4069                         goto fail;
4070                 }
4071         }
4072
4073         return (0);
4074
4075 fail:
4076         em_free_receive_structures(adapter);
4077         return (error);
4078 }
4079
4080
4081 /*********************************************************************
4082  *
4083  *  Initialize a receive ring and its buffers.
4084  *
4085  **********************************************************************/
4086 static int
4087 em_setup_receive_ring(struct rx_ring *rxr)
4088 {
4089         struct  adapter         *adapter = rxr->adapter;
4090         struct em_buffer        *rxbuf;
4091         bus_dma_segment_t       seg[1];
4092         int                     rsize, nsegs, error = 0;
4093 #ifdef DEV_NETMAP
4094         struct netmap_adapter *na = NA(adapter->ifp);
4095         struct netmap_slot *slot;
4096 #endif
4097
4098
4099         /* Clear the ring contents */
4100         EM_RX_LOCK(rxr);
4101         rsize = roundup2(adapter->num_rx_desc *
4102             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4103         bzero((void *)rxr->rx_base, rsize);
4104 #ifdef DEV_NETMAP
4105         slot = netmap_reset(na, NR_RX, 0, 0);
4106 #endif
4107
4108         /*
4109         ** Free current RX buffer structs and their mbufs
4110         */
4111         for (int i = 0; i < adapter->num_rx_desc; i++) {
4112                 rxbuf = &rxr->rx_buffers[i];
4113                 if (rxbuf->m_head != NULL) {
4114                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4115                             BUS_DMASYNC_POSTREAD);
4116                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4117                         m_freem(rxbuf->m_head);
4118                         rxbuf->m_head = NULL; /* mark as freed */
4119                 }
4120         }
4121
4122         /* Now replenish the mbufs */
4123         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4124                 rxbuf = &rxr->rx_buffers[j];
4125 #ifdef DEV_NETMAP
4126                 if (slot) {
4127                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4128                         uint64_t paddr;
4129                         void *addr;
4130
4131                         addr = PNMB(slot + si, &paddr);
4132                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4133                         /* Update descriptor */
4134                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4135                         continue;
4136                 }
4137 #endif /* DEV_NETMAP */
4138                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4139                     M_PKTHDR, adapter->rx_mbuf_sz);
4140                 if (rxbuf->m_head == NULL) {
4141                         error = ENOBUFS;
4142                         goto fail;
4143                 }
4144                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4145                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4146                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4147
4148                 /* Get the memory mapping */
4149                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4150                     rxbuf->map, rxbuf->m_head, seg,
4151                     &nsegs, BUS_DMA_NOWAIT);
4152                 if (error != 0) {
4153                         m_freem(rxbuf->m_head);
4154                         rxbuf->m_head = NULL;
4155                         goto fail;
4156                 }
4157                 bus_dmamap_sync(rxr->rxtag,
4158                     rxbuf->map, BUS_DMASYNC_PREREAD);
4159
4160                 /* Update descriptor */
4161                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4162         }
4163         rxr->next_to_check = 0;
4164         rxr->next_to_refresh = 0;
4165         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4166             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4167
4168 fail:
4169         EM_RX_UNLOCK(rxr);
4170         return (error);
4171 }
4172
4173 /*********************************************************************
4174  *
4175  *  Initialize all receive rings.
4176  *
4177  **********************************************************************/
4178 static int
4179 em_setup_receive_structures(struct adapter *adapter)
4180 {
4181         struct rx_ring *rxr = adapter->rx_rings;
4182         int q;
4183
4184         for (q = 0; q < adapter->num_queues; q++, rxr++)
4185                 if (em_setup_receive_ring(rxr))
4186                         goto fail;
4187
4188         return (0);
4189 fail:
4190         /*
4191          * Free RX buffers allocated so far, we will only handle
4192          * the rings that completed, the failing case will have
4193          * cleaned up for itself. 'q' failed, so its the terminus.
4194          */
4195         for (int i = 0; i < q; ++i) {
4196                 rxr = &adapter->rx_rings[i];
4197                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4198                         struct em_buffer *rxbuf;
4199                         rxbuf = &rxr->rx_buffers[n];
4200                         if (rxbuf->m_head != NULL) {
4201                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4202                                   BUS_DMASYNC_POSTREAD);
4203                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4204                                 m_freem(rxbuf->m_head);
4205                                 rxbuf->m_head = NULL;
4206                         }
4207                 }
4208                 rxr->next_to_check = 0;
4209                 rxr->next_to_refresh = 0;
4210         }
4211
4212         return (ENOBUFS);
4213 }
4214
4215 /*********************************************************************
4216  *
4217  *  Free all receive rings.
4218  *
4219  **********************************************************************/
4220 static void
4221 em_free_receive_structures(struct adapter *adapter)
4222 {
4223         struct rx_ring *rxr = adapter->rx_rings;
4224
4225         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4226                 em_free_receive_buffers(rxr);
4227                 /* Free the ring memory as well */
4228                 em_dma_free(adapter, &rxr->rxdma);
4229                 EM_RX_LOCK_DESTROY(rxr);
4230         }
4231
4232         free(adapter->rx_rings, M_DEVBUF);
4233 }
4234
4235
4236 /*********************************************************************
4237  *
4238  *  Free receive ring data structures
4239  *
4240  **********************************************************************/
4241 static void
4242 em_free_receive_buffers(struct rx_ring *rxr)
4243 {
4244         struct adapter          *adapter = rxr->adapter;
4245         struct em_buffer        *rxbuf = NULL;
4246
4247         INIT_DEBUGOUT("free_receive_buffers: begin");
4248
4249         if (rxr->rx_buffers != NULL) {
4250                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4251                         rxbuf = &rxr->rx_buffers[i];
4252                         if (rxbuf->map != NULL) {
4253                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4254                                     BUS_DMASYNC_POSTREAD);
4255                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4256                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4257                         }
4258                         if (rxbuf->m_head != NULL) {
4259                                 m_freem(rxbuf->m_head);
4260                                 rxbuf->m_head = NULL;
4261                         }
4262                 }
4263                 free(rxr->rx_buffers, M_DEVBUF);
4264                 rxr->rx_buffers = NULL;
4265                 rxr->next_to_check = 0;
4266                 rxr->next_to_refresh = 0;
4267         }
4268
4269         if (rxr->rxtag != NULL) {
4270                 bus_dma_tag_destroy(rxr->rxtag);
4271                 rxr->rxtag = NULL;
4272         }
4273
4274         return;
4275 }
4276
4277
4278 /*********************************************************************
4279  *
4280  *  Enable receive unit.
4281  *
4282  **********************************************************************/
4283
4284 static void
4285 em_initialize_receive_unit(struct adapter *adapter)
4286 {
4287         struct rx_ring  *rxr = adapter->rx_rings;
4288         struct ifnet    *ifp = adapter->ifp;
4289         struct e1000_hw *hw = &adapter->hw;
4290         u64     bus_addr;
4291         u32     rctl, rxcsum;
4292
4293         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4294
4295         /*
4296          * Make sure receives are disabled while setting
4297          * up the descriptor ring
4298          */
4299         rctl = E1000_READ_REG(hw, E1000_RCTL);
4300         /* Do not disable if ever enabled on this hardware */
4301         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4302                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4303
4304         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4305             adapter->rx_abs_int_delay.value);
4306         /*
4307          * Set the interrupt throttling rate. Value is calculated
4308          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4309          */
4310         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4311
4312         /*
4313         ** When using MSIX interrupts we need to throttle
4314         ** using the EITR register (82574 only)
4315         */
4316         if (hw->mac.type == e1000_82574) {
4317                 for (int i = 0; i < 4; i++)
4318                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4319                             DEFAULT_ITR);
4320                 /* Disable accelerated acknowledge */
4321                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4322         }
4323
4324         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4325         if (ifp->if_capenable & IFCAP_RXCSUM)
4326                 rxcsum |= E1000_RXCSUM_TUOFL;
4327         else
4328                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4329         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4330
4331         /*
4332         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4333         ** long latencies are observed, like Lenovo X60. This
4334         ** change eliminates the problem, but since having positive
4335         ** values in RDTR is a known source of problems on other
4336         ** platforms another solution is being sought.
4337         */
4338         if (hw->mac.type == e1000_82573)
4339                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4340
4341         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4342                 /* Setup the Base and Length of the Rx Descriptor Ring */
4343                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4344
4345                 bus_addr = rxr->rxdma.dma_paddr;
4346                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4347                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4348                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4349                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4350                 /* Setup the Head and Tail Descriptor Pointers */
4351                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4352 #ifdef DEV_NETMAP
4353                 /*
4354                  * an init() while a netmap client is active must
4355                  * preserve the rx buffers passed to userspace.
4356                  */
4357                 if (ifp->if_capenable & IFCAP_NETMAP)
4358                         rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4359 #endif /* DEV_NETMAP */
4360                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4361         }
4362
4363         /* Set PTHRESH for improved jumbo performance */
4364         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4365             (adapter->hw.mac.type == e1000_pch2lan) ||
4366             (adapter->hw.mac.type == e1000_ich10lan)) &&
4367             (ifp->if_mtu > ETHERMTU)) {
4368                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4369                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4370         }
4371                 
4372         if (adapter->hw.mac.type >= e1000_pch2lan) {
4373                 if (ifp->if_mtu > ETHERMTU)
4374                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4375                 else
4376                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4377         }
4378
4379         /* Setup the Receive Control Register */
4380         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4381         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4382             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4383             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4384
4385         /* Strip the CRC */
4386         rctl |= E1000_RCTL_SECRC;
4387
4388         /* Make sure VLAN Filters are off */
4389         rctl &= ~E1000_RCTL_VFE;
4390         rctl &= ~E1000_RCTL_SBP;
4391
4392         if (adapter->rx_mbuf_sz == MCLBYTES)
4393                 rctl |= E1000_RCTL_SZ_2048;
4394         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4395                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4396         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4397                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4398
4399         if (ifp->if_mtu > ETHERMTU)
4400                 rctl |= E1000_RCTL_LPE;
4401         else
4402                 rctl &= ~E1000_RCTL_LPE;
4403
4404         /* Write out the settings */
4405         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4406
4407         return;
4408 }
4409
4410
4411 /*********************************************************************
4412  *
4413  *  This routine executes in interrupt context. It replenishes
4414  *  the mbufs in the descriptor and sends data which has been
4415  *  dma'ed into host memory to upper layer.
4416  *
4417  *  We loop at most count times if count is > 0, or until done if
4418  *  count < 0.
4419  *  
4420  *  For polling we also now return the number of cleaned packets
4421  *********************************************************************/
4422 static bool
4423 em_rxeof(struct rx_ring *rxr, int count, int *done)
4424 {
4425         struct adapter          *adapter = rxr->adapter;
4426         struct ifnet            *ifp = adapter->ifp;
4427         struct mbuf             *mp, *sendmp;
4428         u8                      status = 0;
4429         u16                     len;
4430         int                     i, processed, rxdone = 0;
4431         bool                    eop;
4432         struct e1000_rx_desc    *cur;
4433
4434         EM_RX_LOCK(rxr);
4435
4436 #ifdef DEV_NETMAP
4437         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4438                 return (FALSE);
4439 #endif /* DEV_NETMAP */
4440
4441         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4442
4443                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4444                         break;
4445
4446                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4447                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4448
4449                 cur = &rxr->rx_base[i];
4450                 status = cur->status;
4451                 mp = sendmp = NULL;
4452
4453                 if ((status & E1000_RXD_STAT_DD) == 0)
4454                         break;
4455
4456                 len = le16toh(cur->length);
4457                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4458
4459                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4460                     (rxr->discard == TRUE)) {
4461                         adapter->dropped_pkts++;
4462                         ++rxr->rx_discarded;
4463                         if (!eop) /* Catch subsequent segs */
4464                                 rxr->discard = TRUE;
4465                         else
4466                                 rxr->discard = FALSE;
4467                         em_rx_discard(rxr, i);
4468                         goto next_desc;
4469                 }
4470
4471                 /* Assign correct length to the current fragment */
4472                 mp = rxr->rx_buffers[i].m_head;
4473                 mp->m_len = len;
4474
4475                 /* Trigger for refresh */
4476                 rxr->rx_buffers[i].m_head = NULL;
4477
4478                 /* First segment? */
4479                 if (rxr->fmp == NULL) {
4480                         mp->m_pkthdr.len = len;
4481                         rxr->fmp = rxr->lmp = mp;
4482                 } else {
4483                         /* Chain mbuf's together */
4484                         mp->m_flags &= ~M_PKTHDR;
4485                         rxr->lmp->m_next = mp;
4486                         rxr->lmp = mp;
4487                         rxr->fmp->m_pkthdr.len += len;
4488                 }
4489
4490                 if (eop) {
4491                         --count;
4492                         sendmp = rxr->fmp;
4493                         sendmp->m_pkthdr.rcvif = ifp;
4494                         ifp->if_ipackets++;
4495                         em_receive_checksum(cur, sendmp);
4496 #ifndef __NO_STRICT_ALIGNMENT
4497                         if (adapter->hw.mac.max_frame_size >
4498                             (MCLBYTES - ETHER_ALIGN) &&
4499                             em_fixup_rx(rxr) != 0)
4500                                 goto skip;
4501 #endif
4502                         if (status & E1000_RXD_STAT_VP) {
4503                                 sendmp->m_pkthdr.ether_vtag =
4504                                     le16toh(cur->special);
4505                                 sendmp->m_flags |= M_VLANTAG;
4506                         }
4507 #ifndef __NO_STRICT_ALIGNMENT
4508 skip:
4509 #endif
4510                         rxr->fmp = rxr->lmp = NULL;
4511                 }
4512 next_desc:
4513                 /* Zero out the receive descriptors status. */
4514                 cur->status = 0;
4515                 ++rxdone;       /* cumulative for POLL */
4516                 ++processed;
4517
4518                 /* Advance our pointers to the next descriptor. */
4519                 if (++i == adapter->num_rx_desc)
4520                         i = 0;
4521
4522                 /* Send to the stack */
4523                 if (sendmp != NULL) {
4524                         rxr->next_to_check = i;
4525                         EM_RX_UNLOCK(rxr);
4526                         (*ifp->if_input)(ifp, sendmp);
4527                         EM_RX_LOCK(rxr);
4528                         i = rxr->next_to_check;
4529                 }
4530
4531                 /* Only refresh mbufs every 8 descriptors */
4532                 if (processed == 8) {
4533                         em_refresh_mbufs(rxr, i);
4534                         processed = 0;
4535                 }
4536         }
4537
4538         /* Catch any remaining refresh work */
4539         if (e1000_rx_unrefreshed(rxr))
4540                 em_refresh_mbufs(rxr, i);
4541
4542         rxr->next_to_check = i;
4543         if (done != NULL)
4544                 *done = rxdone;
4545         EM_RX_UNLOCK(rxr);
4546
4547         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4548 }
4549
4550 static __inline void
4551 em_rx_discard(struct rx_ring *rxr, int i)
4552 {
4553         struct em_buffer        *rbuf;
4554
4555         rbuf = &rxr->rx_buffers[i];
4556         /* Free any previous pieces */
4557         if (rxr->fmp != NULL) {
4558                 rxr->fmp->m_flags |= M_PKTHDR;
4559                 m_freem(rxr->fmp);
4560                 rxr->fmp = NULL;
4561                 rxr->lmp = NULL;
4562         }
4563         /*
4564         ** Free buffer and allow em_refresh_mbufs()
4565         ** to clean up and recharge buffer.
4566         */
4567         if (rbuf->m_head) {
4568                 m_free(rbuf->m_head);
4569                 rbuf->m_head = NULL;
4570         }
4571         return;
4572 }
4573
4574 #ifndef __NO_STRICT_ALIGNMENT
4575 /*
4576  * When jumbo frames are enabled we should realign entire payload on
4577  * architecures with strict alignment. This is serious design mistake of 8254x
4578  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4579  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4580  * payload. On architecures without strict alignment restrictions 8254x still
4581  * performs unaligned memory access which would reduce the performance too.
4582  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4583  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4584  * existing mbuf chain.
4585  *
4586  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4587  * not used at all on architectures with strict alignment.
4588  */
4589 static int
4590 em_fixup_rx(struct rx_ring *rxr)
4591 {
4592         struct adapter *adapter = rxr->adapter;
4593         struct mbuf *m, *n;
4594         int error;
4595
4596         error = 0;
4597         m = rxr->fmp;
4598         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4599                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4600                 m->m_data += ETHER_HDR_LEN;
4601         } else {
4602                 MGETHDR(n, M_NOWAIT, MT_DATA);
4603                 if (n != NULL) {
4604                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4605                         m->m_data += ETHER_HDR_LEN;
4606                         m->m_len -= ETHER_HDR_LEN;
4607                         n->m_len = ETHER_HDR_LEN;
4608                         M_MOVE_PKTHDR(n, m);
4609                         n->m_next = m;
4610                         rxr->fmp = n;
4611                 } else {
4612                         adapter->dropped_pkts++;
4613                         m_freem(rxr->fmp);
4614                         rxr->fmp = NULL;
4615                         error = ENOMEM;
4616                 }
4617         }
4618
4619         return (error);
4620 }
4621 #endif
4622
4623 /*********************************************************************
4624  *
4625  *  Verify that the hardware indicated that the checksum is valid.
4626  *  Inform the stack about the status of checksum so that stack
4627  *  doesn't spend time verifying the checksum.
4628  *
4629  *********************************************************************/
4630 static void
4631 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4632 {
4633         mp->m_pkthdr.csum_flags = 0;
4634
4635         /* Ignore Checksum bit is set */
4636         if (rx_desc->status & E1000_RXD_STAT_IXSM)
4637                 return;
4638
4639         if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4640                 return;
4641
4642         /* IP Checksum Good? */
4643         if (rx_desc->status & E1000_RXD_STAT_IPCS)
4644                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4645
4646         /* TCP or UDP checksum */
4647         if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4648                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4649                 mp->m_pkthdr.csum_data = htons(0xffff);
4650         }
4651 }
4652
4653 /*
4654  * This routine is run via an vlan
4655  * config EVENT
4656  */
4657 static void
4658 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4659 {
4660         struct adapter  *adapter = ifp->if_softc;
4661         u32             index, bit;
4662
4663         if (ifp->if_softc !=  arg)   /* Not our event */
4664                 return;
4665
4666         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4667                 return;
4668
4669         EM_CORE_LOCK(adapter);
4670         index = (vtag >> 5) & 0x7F;
4671         bit = vtag & 0x1F;
4672         adapter->shadow_vfta[index] |= (1 << bit);
4673         ++adapter->num_vlans;
4674         /* Re-init to load the changes */
4675         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4676                 em_init_locked(adapter);
4677         EM_CORE_UNLOCK(adapter);
4678 }
4679
4680 /*
4681  * This routine is run via an vlan
4682  * unconfig EVENT
4683  */
4684 static void
4685 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4686 {
4687         struct adapter  *adapter = ifp->if_softc;
4688         u32             index, bit;
4689
4690         if (ifp->if_softc !=  arg)
4691                 return;
4692
4693         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4694                 return;
4695
4696         EM_CORE_LOCK(adapter);
4697         index = (vtag >> 5) & 0x7F;
4698         bit = vtag & 0x1F;
4699         adapter->shadow_vfta[index] &= ~(1 << bit);
4700         --adapter->num_vlans;
4701         /* Re-init to load the changes */
4702         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4703                 em_init_locked(adapter);
4704         EM_CORE_UNLOCK(adapter);
4705 }
4706
4707 static void
4708 em_setup_vlan_hw_support(struct adapter *adapter)
4709 {
4710         struct e1000_hw *hw = &adapter->hw;
4711         u32             reg;
4712
4713         /*
4714         ** We get here thru init_locked, meaning
4715         ** a soft reset, this has already cleared
4716         ** the VFTA and other state, so if there
4717         ** have been no vlan's registered do nothing.
4718         */
4719         if (adapter->num_vlans == 0)
4720                 return;
4721
4722         /*
4723         ** A soft reset zero's out the VFTA, so
4724         ** we need to repopulate it now.
4725         */
4726         for (int i = 0; i < EM_VFTA_SIZE; i++)
4727                 if (adapter->shadow_vfta[i] != 0)
4728                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4729                             i, adapter->shadow_vfta[i]);
4730
4731         reg = E1000_READ_REG(hw, E1000_CTRL);
4732         reg |= E1000_CTRL_VME;
4733         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4734
4735         /* Enable the Filter Table */
4736         reg = E1000_READ_REG(hw, E1000_RCTL);
4737         reg &= ~E1000_RCTL_CFIEN;
4738         reg |= E1000_RCTL_VFE;
4739         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4740 }
4741
4742 static void
4743 em_enable_intr(struct adapter *adapter)
4744 {
4745         struct e1000_hw *hw = &adapter->hw;
4746         u32 ims_mask = IMS_ENABLE_MASK;
4747
4748         if (hw->mac.type == e1000_82574) {
4749                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4750                 ims_mask |= EM_MSIX_MASK;
4751         } 
4752         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4753 }
4754
4755 static void
4756 em_disable_intr(struct adapter *adapter)
4757 {
4758         struct e1000_hw *hw = &adapter->hw;
4759
4760         if (hw->mac.type == e1000_82574)
4761                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4762         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4763 }
4764
4765 /*
4766  * Bit of a misnomer, what this really means is
4767  * to enable OS management of the system... aka
4768  * to disable special hardware management features 
4769  */
4770 static void
4771 em_init_manageability(struct adapter *adapter)
4772 {
4773         /* A shared code workaround */
4774 #define E1000_82542_MANC2H E1000_MANC2H
4775         if (adapter->has_manage) {
4776                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4777                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4778
4779                 /* disable hardware interception of ARP */
4780                 manc &= ~(E1000_MANC_ARP_EN);
4781
4782                 /* enable receiving management packets to the host */
4783                 manc |= E1000_MANC_EN_MNG2HOST;
4784 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4785 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4786                 manc2h |= E1000_MNG2HOST_PORT_623;
4787                 manc2h |= E1000_MNG2HOST_PORT_664;
4788                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4789                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4790         }
4791 }
4792
4793 /*
4794  * Give control back to hardware management
4795  * controller if there is one.
4796  */
4797 static void
4798 em_release_manageability(struct adapter *adapter)
4799 {
4800         if (adapter->has_manage) {
4801                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4802
4803                 /* re-enable hardware interception of ARP */
4804                 manc |= E1000_MANC_ARP_EN;
4805                 manc &= ~E1000_MANC_EN_MNG2HOST;
4806
4807                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4808         }
4809 }
4810
4811 /*
4812  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4813  * For ASF and Pass Through versions of f/w this means
4814  * that the driver is loaded. For AMT version type f/w
4815  * this means that the network i/f is open.
4816  */
4817 static void
4818 em_get_hw_control(struct adapter *adapter)
4819 {
4820         u32 ctrl_ext, swsm;
4821
4822         if (adapter->hw.mac.type == e1000_82573) {
4823                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4824                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4825                     swsm | E1000_SWSM_DRV_LOAD);
4826                 return;
4827         }
4828         /* else */
4829         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4830         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4831             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4832         return;
4833 }
4834
4835 /*
4836  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4837  * For ASF and Pass Through versions of f/w this means that
4838  * the driver is no longer loaded. For AMT versions of the
4839  * f/w this means that the network i/f is closed.
4840  */
4841 static void
4842 em_release_hw_control(struct adapter *adapter)
4843 {
4844         u32 ctrl_ext, swsm;
4845
4846         if (!adapter->has_manage)
4847                 return;
4848
4849         if (adapter->hw.mac.type == e1000_82573) {
4850                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4851                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4852                     swsm & ~E1000_SWSM_DRV_LOAD);
4853                 return;
4854         }
4855         /* else */
4856         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4857         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4858             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4859         return;
4860 }
4861
4862 static int
4863 em_is_valid_ether_addr(u8 *addr)
4864 {
4865         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4866
4867         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4868                 return (FALSE);
4869         }
4870
4871         return (TRUE);
4872 }
4873
4874 /*
4875 ** Parse the interface capabilities with regard
4876 ** to both system management and wake-on-lan for
4877 ** later use.
4878 */
4879 static void
4880 em_get_wakeup(device_t dev)
4881 {
4882         struct adapter  *adapter = device_get_softc(dev);
4883         u16             eeprom_data = 0, device_id, apme_mask;
4884
4885         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4886         apme_mask = EM_EEPROM_APME;
4887
4888         switch (adapter->hw.mac.type) {
4889         case e1000_82573:
4890         case e1000_82583:
4891                 adapter->has_amt = TRUE;
4892                 /* Falls thru */
4893         case e1000_82571:
4894         case e1000_82572:
4895         case e1000_80003es2lan:
4896                 if (adapter->hw.bus.func == 1) {
4897                         e1000_read_nvm(&adapter->hw,
4898                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4899                         break;
4900                 } else
4901                         e1000_read_nvm(&adapter->hw,
4902                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4903                 break;
4904         case e1000_ich8lan:
4905         case e1000_ich9lan:
4906         case e1000_ich10lan:
4907         case e1000_pchlan:
4908         case e1000_pch2lan:
4909                 apme_mask = E1000_WUC_APME;
4910                 adapter->has_amt = TRUE;
4911                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4912                 break;
4913         default:
4914                 e1000_read_nvm(&adapter->hw,
4915                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4916                 break;
4917         }
4918         if (eeprom_data & apme_mask)
4919                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4920         /*
4921          * We have the eeprom settings, now apply the special cases
4922          * where the eeprom may be wrong or the board won't support
4923          * wake on lan on a particular port
4924          */
4925         device_id = pci_get_device(dev);
4926         switch (device_id) {
4927         case E1000_DEV_ID_82571EB_FIBER:
4928                 /* Wake events only supported on port A for dual fiber
4929                  * regardless of eeprom setting */
4930                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4931                     E1000_STATUS_FUNC_1)
4932                         adapter->wol = 0;
4933                 break;
4934         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4935         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4936         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4937                 /* if quad port adapter, disable WoL on all but port A */
4938                 if (global_quad_port_a != 0)
4939                         adapter->wol = 0;
4940                 /* Reset for multiple quad port adapters */
4941                 if (++global_quad_port_a == 4)
4942                         global_quad_port_a = 0;
4943                 break;
4944         }
4945         return;
4946 }
4947
4948
4949 /*
4950  * Enable PCI Wake On Lan capability
4951  */
4952 static void
4953 em_enable_wakeup(device_t dev)
4954 {
4955         struct adapter  *adapter = device_get_softc(dev);
4956         struct ifnet    *ifp = adapter->ifp;
4957         u32             pmc, ctrl, ctrl_ext, rctl;
4958         u16             status;
4959
4960         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4961                 return;
4962
4963         /* Advertise the wakeup capability */
4964         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4965         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4966         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4967         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4968
4969         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4970             (adapter->hw.mac.type == e1000_pchlan) ||
4971             (adapter->hw.mac.type == e1000_ich9lan) ||
4972             (adapter->hw.mac.type == e1000_ich10lan))
4973                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4974
4975         /* Keep the laser running on Fiber adapters */
4976         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4977             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4978                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4979                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4980                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4981         }
4982
4983         /*
4984         ** Determine type of Wakeup: note that wol
4985         ** is set with all bits on by default.
4986         */
4987         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4988                 adapter->wol &= ~E1000_WUFC_MAG;
4989
4990         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4991                 adapter->wol &= ~E1000_WUFC_MC;
4992         else {
4993                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4994                 rctl |= E1000_RCTL_MPE;
4995                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4996         }
4997
4998         if ((adapter->hw.mac.type == e1000_pchlan) ||
4999             (adapter->hw.mac.type == e1000_pch2lan)) {
5000                 if (em_enable_phy_wakeup(adapter))
5001                         return;
5002         } else {
5003                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5004                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5005         }
5006
5007         if (adapter->hw.phy.type == e1000_phy_igp_3)
5008                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5009
5010         /* Request PME */
5011         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5012         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5013         if (ifp->if_capenable & IFCAP_WOL)
5014                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5015         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5016
5017         return;
5018 }
5019
5020 /*
5021 ** WOL in the newer chipset interfaces (pchlan)
5022 ** require thing to be copied into the phy
5023 */
5024 static int
5025 em_enable_phy_wakeup(struct adapter *adapter)
5026 {
5027         struct e1000_hw *hw = &adapter->hw;
5028         u32 mreg, ret = 0;
5029         u16 preg;
5030
5031         /* copy MAC RARs to PHY RARs */
5032         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5033
5034         /* copy MAC MTA to PHY MTA */
5035         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5036                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5037                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5038                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5039                     (u16)((mreg >> 16) & 0xFFFF));
5040         }
5041
5042         /* configure PHY Rx Control register */
5043         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5044         mreg = E1000_READ_REG(hw, E1000_RCTL);
5045         if (mreg & E1000_RCTL_UPE)
5046                 preg |= BM_RCTL_UPE;
5047         if (mreg & E1000_RCTL_MPE)
5048                 preg |= BM_RCTL_MPE;
5049         preg &= ~(BM_RCTL_MO_MASK);
5050         if (mreg & E1000_RCTL_MO_3)
5051                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5052                                 << BM_RCTL_MO_SHIFT);
5053         if (mreg & E1000_RCTL_BAM)
5054                 preg |= BM_RCTL_BAM;
5055         if (mreg & E1000_RCTL_PMCF)
5056                 preg |= BM_RCTL_PMCF;
5057         mreg = E1000_READ_REG(hw, E1000_CTRL);
5058         if (mreg & E1000_CTRL_RFCE)
5059                 preg |= BM_RCTL_RFCE;
5060         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5061
5062         /* enable PHY wakeup in MAC register */
5063         E1000_WRITE_REG(hw, E1000_WUC,
5064             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5065         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5066
5067         /* configure and enable PHY wakeup in PHY registers */
5068         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5069         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5070
5071         /* activate PHY wakeup */
5072         ret = hw->phy.ops.acquire(hw);
5073         if (ret) {
5074                 printf("Could not acquire PHY\n");
5075                 return ret;
5076         }
5077         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5078                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5079         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5080         if (ret) {
5081                 printf("Could not read PHY page 769\n");
5082                 goto out;
5083         }
5084         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5085         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5086         if (ret)
5087                 printf("Could not set PHY Host Wakeup bit\n");
5088 out:
5089         hw->phy.ops.release(hw);
5090
5091         return ret;
5092 }
5093
5094 static void
5095 em_led_func(void *arg, int onoff)
5096 {
5097         struct adapter  *adapter = arg;
5098  
5099         EM_CORE_LOCK(adapter);
5100         if (onoff) {
5101                 e1000_setup_led(&adapter->hw);
5102                 e1000_led_on(&adapter->hw);
5103         } else {
5104                 e1000_led_off(&adapter->hw);
5105                 e1000_cleanup_led(&adapter->hw);
5106         }
5107         EM_CORE_UNLOCK(adapter);
5108 }
5109
5110 /*
5111 ** Disable the L0S and L1 LINK states
5112 */
5113 static void
5114 em_disable_aspm(struct adapter *adapter)
5115 {
5116         int             base, reg;
5117         u16             link_cap,link_ctrl;
5118         device_t        dev = adapter->dev;
5119
5120         switch (adapter->hw.mac.type) {
5121                 case e1000_82573:
5122                 case e1000_82574:
5123                 case e1000_82583:
5124                         break;
5125                 default:
5126                         return;
5127         }
5128         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5129                 return;
5130         reg = base + PCIER_LINK_CAP;
5131         link_cap = pci_read_config(dev, reg, 2);
5132         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5133                 return;
5134         reg = base + PCIER_LINK_CTL;
5135         link_ctrl = pci_read_config(dev, reg, 2);
5136         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5137         pci_write_config(dev, reg, link_ctrl, 2);
5138         return;
5139 }
5140
5141 /**********************************************************************
5142  *
5143  *  Update the board statistics counters.
5144  *
5145  **********************************************************************/
5146 static void
5147 em_update_stats_counters(struct adapter *adapter)
5148 {
5149         struct ifnet   *ifp;
5150
5151         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5152            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5153                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5154                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5155         }
5156         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5157         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5158         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5159         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5160
5161         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5162         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5163         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5164         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5165         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5166         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5167         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5168         /*
5169         ** For watchdog management we need to know if we have been
5170         ** paused during the last interval, so capture that here.
5171         */
5172         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5173         adapter->stats.xoffrxc += adapter->pause_frames;
5174         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5175         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5176         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5177         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5178         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5179         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5180         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5181         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5182         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5183         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5184         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5185         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5186
5187         /* For the 64-bit byte counters the low dword must be read first. */
5188         /* Both registers clear on the read of the high dword */
5189
5190         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5191             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5192         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5193             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5194
5195         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5196         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5197         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5198         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5199         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5200
5201         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5202         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5203
5204         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5205         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5206         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5207         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5208         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5209         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5210         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5211         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5212         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5213         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5214
5215         /* Interrupt Counts */
5216
5217         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5218         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5219         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5220         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5221         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5222         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5223         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5224         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5225         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5226
5227         if (adapter->hw.mac.type >= e1000_82543) {
5228                 adapter->stats.algnerrc += 
5229                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5230                 adapter->stats.rxerrc += 
5231                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5232                 adapter->stats.tncrs += 
5233                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5234                 adapter->stats.cexterr += 
5235                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5236                 adapter->stats.tsctc += 
5237                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5238                 adapter->stats.tsctfc += 
5239                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5240         }
5241         ifp = adapter->ifp;
5242
5243         ifp->if_collisions = adapter->stats.colc;
5244
5245         /* Rx Errors */
5246         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5247             adapter->stats.crcerrs + adapter->stats.algnerrc +
5248             adapter->stats.ruc + adapter->stats.roc +
5249             adapter->stats.mpc + adapter->stats.cexterr;
5250
5251         /* Tx Errors */
5252         ifp->if_oerrors = adapter->stats.ecol +
5253             adapter->stats.latecol + adapter->watchdog_events;
5254 }
5255
5256 /* Export a single 32-bit register via a read-only sysctl. */
5257 static int
5258 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5259 {
5260         struct adapter *adapter;
5261         u_int val;
5262
5263         adapter = oidp->oid_arg1;
5264         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5265         return (sysctl_handle_int(oidp, &val, 0, req));
5266 }
5267
5268 /*
5269  * Add sysctl variables, one per statistic, to the system.
5270  */
5271 static void
5272 em_add_hw_stats(struct adapter *adapter)
5273 {
5274         device_t dev = adapter->dev;
5275
5276         struct tx_ring *txr = adapter->tx_rings;
5277         struct rx_ring *rxr = adapter->rx_rings;
5278
5279         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5280         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5281         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5282         struct e1000_hw_stats *stats = &adapter->stats;
5283
5284         struct sysctl_oid *stat_node, *queue_node, *int_node;
5285         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5286
5287 #define QUEUE_NAME_LEN 32
5288         char namebuf[QUEUE_NAME_LEN];
5289         
5290         /* Driver Statistics */
5291         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5292                         CTLFLAG_RD, &adapter->link_irq,
5293                         "Link MSIX IRQ Handled");
5294         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5295                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5296                          "Std mbuf failed");
5297         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5298                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5299                          "Std mbuf cluster failed");
5300         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5301                         CTLFLAG_RD, &adapter->dropped_pkts,
5302                         "Driver dropped packets");
5303         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5304                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5305                         "Driver tx dma failure in xmit");
5306         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5307                         CTLFLAG_RD, &adapter->rx_overruns,
5308                         "RX overruns");
5309         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5310                         CTLFLAG_RD, &adapter->watchdog_events,
5311                         "Watchdog timeouts");
5312         
5313         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5314                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5315                         em_sysctl_reg_handler, "IU",
5316                         "Device Control Register");
5317         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5318                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5319                         em_sysctl_reg_handler, "IU",
5320                         "Receiver Control Register");
5321         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5322                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5323                         "Flow Control High Watermark");
5324         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5325                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5326                         "Flow Control Low Watermark");
5327
5328         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5329                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5330                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5331                                             CTLFLAG_RD, NULL, "Queue Name");
5332                 queue_list = SYSCTL_CHILDREN(queue_node);
5333
5334                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5335                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5336                                 E1000_TDH(txr->me),
5337                                 em_sysctl_reg_handler, "IU",
5338                                 "Transmit Descriptor Head");
5339                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5340                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5341                                 E1000_TDT(txr->me),
5342                                 em_sysctl_reg_handler, "IU",
5343                                 "Transmit Descriptor Tail");
5344                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5345                                 CTLFLAG_RD, &txr->tx_irq,
5346                                 "Queue MSI-X Transmit Interrupts");
5347                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5348                                 CTLFLAG_RD, &txr->no_desc_avail,
5349                                 "Queue No Descriptor Available");
5350                 
5351                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5352                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5353                                 E1000_RDH(rxr->me),
5354                                 em_sysctl_reg_handler, "IU",
5355                                 "Receive Descriptor Head");
5356                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5357                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5358                                 E1000_RDT(rxr->me),
5359                                 em_sysctl_reg_handler, "IU",
5360                                 "Receive Descriptor Tail");
5361                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5362                                 CTLFLAG_RD, &rxr->rx_irq,
5363                                 "Queue MSI-X Receive Interrupts");
5364         }
5365
5366         /* MAC stats get their own sub node */
5367
5368         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5369                                     CTLFLAG_RD, NULL, "Statistics");
5370         stat_list = SYSCTL_CHILDREN(stat_node);
5371
5372         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5373                         CTLFLAG_RD, &stats->ecol,
5374                         "Excessive collisions");
5375         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5376                         CTLFLAG_RD, &stats->scc,
5377                         "Single collisions");
5378         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5379                         CTLFLAG_RD, &stats->mcc,
5380                         "Multiple collisions");
5381         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5382                         CTLFLAG_RD, &stats->latecol,
5383                         "Late collisions");
5384         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5385                         CTLFLAG_RD, &stats->colc,
5386                         "Collision Count");
5387         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5388                         CTLFLAG_RD, &adapter->stats.symerrs,
5389                         "Symbol Errors");
5390         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5391                         CTLFLAG_RD, &adapter->stats.sec,
5392                         "Sequence Errors");
5393         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5394                         CTLFLAG_RD, &adapter->stats.dc,
5395                         "Defer Count");
5396         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5397                         CTLFLAG_RD, &adapter->stats.mpc,
5398                         "Missed Packets");
5399         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5400                         CTLFLAG_RD, &adapter->stats.rnbc,
5401                         "Receive No Buffers");
5402         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5403                         CTLFLAG_RD, &adapter->stats.ruc,
5404                         "Receive Undersize");
5405         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5406                         CTLFLAG_RD, &adapter->stats.rfc,
5407                         "Fragmented Packets Received ");
5408         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5409                         CTLFLAG_RD, &adapter->stats.roc,
5410                         "Oversized Packets Received");
5411         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5412                         CTLFLAG_RD, &adapter->stats.rjc,
5413                         "Recevied Jabber");
5414         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5415                         CTLFLAG_RD, &adapter->stats.rxerrc,
5416                         "Receive Errors");
5417         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5418                         CTLFLAG_RD, &adapter->stats.crcerrs,
5419                         "CRC errors");
5420         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5421                         CTLFLAG_RD, &adapter->stats.algnerrc,
5422                         "Alignment Errors");
5423         /* On 82575 these are collision counts */
5424         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5425                         CTLFLAG_RD, &adapter->stats.cexterr,
5426                         "Collision/Carrier extension errors");
5427         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5428                         CTLFLAG_RD, &adapter->stats.xonrxc,
5429                         "XON Received");
5430         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5431                         CTLFLAG_RD, &adapter->stats.xontxc,
5432                         "XON Transmitted");
5433         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5434                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5435                         "XOFF Received");
5436         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5437                         CTLFLAG_RD, &adapter->stats.xofftxc,
5438                         "XOFF Transmitted");
5439
5440         /* Packet Reception Stats */
5441         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5442                         CTLFLAG_RD, &adapter->stats.tpr,
5443                         "Total Packets Received ");
5444         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5445                         CTLFLAG_RD, &adapter->stats.gprc,
5446                         "Good Packets Received");
5447         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5448                         CTLFLAG_RD, &adapter->stats.bprc,
5449                         "Broadcast Packets Received");
5450         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5451                         CTLFLAG_RD, &adapter->stats.mprc,
5452                         "Multicast Packets Received");
5453         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5454                         CTLFLAG_RD, &adapter->stats.prc64,
5455                         "64 byte frames received ");
5456         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5457                         CTLFLAG_RD, &adapter->stats.prc127,
5458                         "65-127 byte frames received");
5459         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5460                         CTLFLAG_RD, &adapter->stats.prc255,
5461                         "128-255 byte frames received");
5462         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5463                         CTLFLAG_RD, &adapter->stats.prc511,
5464                         "256-511 byte frames received");
5465         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5466                         CTLFLAG_RD, &adapter->stats.prc1023,
5467                         "512-1023 byte frames received");
5468         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5469                         CTLFLAG_RD, &adapter->stats.prc1522,
5470                         "1023-1522 byte frames received");
5471         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5472                         CTLFLAG_RD, &adapter->stats.gorc, 
5473                         "Good Octets Received"); 
5474
5475         /* Packet Transmission Stats */
5476         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5477                         CTLFLAG_RD, &adapter->stats.gotc, 
5478                         "Good Octets Transmitted"); 
5479         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5480                         CTLFLAG_RD, &adapter->stats.tpt,
5481                         "Total Packets Transmitted");
5482         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5483                         CTLFLAG_RD, &adapter->stats.gptc,
5484                         "Good Packets Transmitted");
5485         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5486                         CTLFLAG_RD, &adapter->stats.bptc,
5487                         "Broadcast Packets Transmitted");
5488         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5489                         CTLFLAG_RD, &adapter->stats.mptc,
5490                         "Multicast Packets Transmitted");
5491         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5492                         CTLFLAG_RD, &adapter->stats.ptc64,
5493                         "64 byte frames transmitted ");
5494         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5495                         CTLFLAG_RD, &adapter->stats.ptc127,
5496                         "65-127 byte frames transmitted");
5497         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5498                         CTLFLAG_RD, &adapter->stats.ptc255,
5499                         "128-255 byte frames transmitted");
5500         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5501                         CTLFLAG_RD, &adapter->stats.ptc511,
5502                         "256-511 byte frames transmitted");
5503         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5504                         CTLFLAG_RD, &adapter->stats.ptc1023,
5505                         "512-1023 byte frames transmitted");
5506         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5507                         CTLFLAG_RD, &adapter->stats.ptc1522,
5508                         "1024-1522 byte frames transmitted");
5509         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5510                         CTLFLAG_RD, &adapter->stats.tsctc,
5511                         "TSO Contexts Transmitted");
5512         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5513                         CTLFLAG_RD, &adapter->stats.tsctfc,
5514                         "TSO Contexts Failed");
5515
5516
5517         /* Interrupt Stats */
5518
5519         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5520                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5521         int_list = SYSCTL_CHILDREN(int_node);
5522
5523         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5524                         CTLFLAG_RD, &adapter->stats.iac,
5525                         "Interrupt Assertion Count");
5526
5527         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5528                         CTLFLAG_RD, &adapter->stats.icrxptc,
5529                         "Interrupt Cause Rx Pkt Timer Expire Count");
5530
5531         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5532                         CTLFLAG_RD, &adapter->stats.icrxatc,
5533                         "Interrupt Cause Rx Abs Timer Expire Count");
5534
5535         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5536                         CTLFLAG_RD, &adapter->stats.ictxptc,
5537                         "Interrupt Cause Tx Pkt Timer Expire Count");
5538
5539         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5540                         CTLFLAG_RD, &adapter->stats.ictxatc,
5541                         "Interrupt Cause Tx Abs Timer Expire Count");
5542
5543         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5544                         CTLFLAG_RD, &adapter->stats.ictxqec,
5545                         "Interrupt Cause Tx Queue Empty Count");
5546
5547         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5548                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5549                         "Interrupt Cause Tx Queue Min Thresh Count");
5550
5551         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5552                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5553                         "Interrupt Cause Rx Desc Min Thresh Count");
5554
5555         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5556                         CTLFLAG_RD, &adapter->stats.icrxoc,
5557                         "Interrupt Cause Receiver Overrun Count");
5558 }
5559
5560 /**********************************************************************
5561  *
5562  *  This routine provides a way to dump out the adapter eeprom,
5563  *  often a useful debug/service tool. This only dumps the first
5564  *  32 words, stuff that matters is in that extent.
5565  *
5566  **********************************************************************/
5567 static int
5568 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5569 {
5570         struct adapter *adapter = (struct adapter *)arg1;
5571         int error;
5572         int result;
5573
5574         result = -1;
5575         error = sysctl_handle_int(oidp, &result, 0, req);
5576
5577         if (error || !req->newptr)
5578                 return (error);
5579
5580         /*
5581          * This value will cause a hex dump of the
5582          * first 32 16-bit words of the EEPROM to
5583          * the screen.
5584          */
5585         if (result == 1)
5586                 em_print_nvm_info(adapter);
5587
5588         return (error);
5589 }
5590
5591 static void
5592 em_print_nvm_info(struct adapter *adapter)
5593 {
5594         u16     eeprom_data;
5595         int     i, j, row = 0;
5596
5597         /* Its a bit crude, but it gets the job done */
5598         printf("\nInterface EEPROM Dump:\n");
5599         printf("Offset\n0x0000  ");
5600         for (i = 0, j = 0; i < 32; i++, j++) {
5601                 if (j == 8) { /* Make the offset block */
5602                         j = 0; ++row;
5603                         printf("\n0x00%x0  ",row);
5604                 }
5605                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5606                 printf("%04x ", eeprom_data);
5607         }
5608         printf("\n");
5609 }
5610
5611 static int
5612 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5613 {
5614         struct em_int_delay_info *info;
5615         struct adapter *adapter;
5616         u32 regval;
5617         int error, usecs, ticks;
5618
5619         info = (struct em_int_delay_info *)arg1;
5620         usecs = info->value;
5621         error = sysctl_handle_int(oidp, &usecs, 0, req);
5622         if (error != 0 || req->newptr == NULL)
5623                 return (error);
5624         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5625                 return (EINVAL);
5626         info->value = usecs;
5627         ticks = EM_USECS_TO_TICKS(usecs);
5628         if (info->offset == E1000_ITR)  /* units are 256ns here */
5629                 ticks *= 4;
5630
5631         adapter = info->adapter;
5632         
5633         EM_CORE_LOCK(adapter);
5634         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5635         regval = (regval & ~0xffff) | (ticks & 0xffff);
5636         /* Handle a few special cases. */
5637         switch (info->offset) {
5638         case E1000_RDTR:
5639                 break;
5640         case E1000_TIDV:
5641                 if (ticks == 0) {
5642                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5643                         /* Don't write 0 into the TIDV register. */
5644                         regval++;
5645                 } else
5646                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5647                 break;
5648         }
5649         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5650         EM_CORE_UNLOCK(adapter);
5651         return (0);
5652 }
5653
5654 static void
5655 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5656         const char *description, struct em_int_delay_info *info,
5657         int offset, int value)
5658 {
5659         info->adapter = adapter;
5660         info->offset = offset;
5661         info->value = value;
5662         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5663             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5664             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5665             info, 0, em_sysctl_int_delay, "I", description);
5666 }
5667
5668 static void
5669 em_set_sysctl_value(struct adapter *adapter, const char *name,
5670         const char *description, int *limit, int value)
5671 {
5672         *limit = value;
5673         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5674             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5675             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5676 }
5677
5678
5679 /*
5680 ** Set flow control using sysctl:
5681 ** Flow control values:
5682 **      0 - off
5683 **      1 - rx pause
5684 **      2 - tx pause
5685 **      3 - full
5686 */
5687 static int
5688 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5689 {       
5690         int             error;
5691         static int      input = 3; /* default is full */
5692         struct adapter  *adapter = (struct adapter *) arg1;
5693                     
5694         error = sysctl_handle_int(oidp, &input, 0, req);
5695     
5696         if ((error) || (req->newptr == NULL))
5697                 return (error);
5698                 
5699         if (input == adapter->fc) /* no change? */
5700                 return (error);
5701
5702         switch (input) {
5703                 case e1000_fc_rx_pause:
5704                 case e1000_fc_tx_pause:
5705                 case e1000_fc_full:
5706                 case e1000_fc_none:
5707                         adapter->hw.fc.requested_mode = input;
5708                         adapter->fc = input;
5709                         break;
5710                 default:
5711                         /* Do nothing */
5712                         return (error);
5713         }
5714
5715         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5716         e1000_force_mac_fc(&adapter->hw);
5717         return (error);
5718 }
5719
5720 /*
5721 ** Manage Energy Efficient Ethernet:
5722 ** Control values:
5723 **     0/1 - enabled/disabled
5724 */
5725 static int
5726 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5727 {
5728        struct adapter *adapter = (struct adapter *) arg1;
5729        int             error, value;
5730
5731        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5732        error = sysctl_handle_int(oidp, &value, 0, req);
5733        if (error || req->newptr == NULL)
5734                return (error);
5735        EM_CORE_LOCK(adapter);
5736        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5737        em_init_locked(adapter);
5738        EM_CORE_UNLOCK(adapter);
5739        return (0);
5740 }
5741
5742 static int
5743 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5744 {
5745         struct adapter *adapter;
5746         int error;
5747         int result;
5748
5749         result = -1;
5750         error = sysctl_handle_int(oidp, &result, 0, req);
5751
5752         if (error || !req->newptr)
5753                 return (error);
5754
5755         if (result == 1) {
5756                 adapter = (struct adapter *)arg1;
5757                 em_print_debug_info(adapter);
5758         }
5759
5760         return (error);
5761 }
5762
5763 /*
5764 ** This routine is meant to be fluid, add whatever is
5765 ** needed for debugging a problem.  -jfv
5766 */
5767 static void
5768 em_print_debug_info(struct adapter *adapter)
5769 {
5770         device_t dev = adapter->dev;
5771         struct tx_ring *txr = adapter->tx_rings;
5772         struct rx_ring *rxr = adapter->rx_rings;
5773
5774         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5775                 printf("Interface is RUNNING ");
5776         else
5777                 printf("Interface is NOT RUNNING\n");
5778
5779         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5780                 printf("and INACTIVE\n");
5781         else
5782                 printf("and ACTIVE\n");
5783
5784         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5785             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5786             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5787         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5788             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5789             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5790         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5791         device_printf(dev, "TX descriptors avail = %d\n",
5792             txr->tx_avail);
5793         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5794             txr->no_desc_avail);
5795         device_printf(dev, "RX discarded packets = %ld\n",
5796             rxr->rx_discarded);
5797         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5798         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5799 }