]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/dev/e1000/if_em.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_arp.h>
67 #include <net/if_dl.h>
68 #include <net/if_media.h>
69
70 #include <net/if_types.h>
71 #include <net/if_vlan_var.h>
72
73 #include <netinet/in_systm.h>
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip.h>
77 #include <netinet/ip6.h>
78 #include <netinet/tcp.h>
79 #include <netinet/udp.h>
80
81 #include <machine/in_cksum.h>
82 #include <dev/led/led.h>
83 #include <dev/pci/pcivar.h>
84 #include <dev/pci/pcireg.h>
85
86 #include "e1000_api.h"
87 #include "e1000_82571.h"
88 #include "if_em.h"
89
90 /*********************************************************************
91  *  Set this to one to display debug statistics
92  *********************************************************************/
93 int     em_display_debug_stats = 0;
94
95 /*********************************************************************
96  *  Driver version:
97  *********************************************************************/
98 char em_driver_version[] = "7.3.8";
99
100 /*********************************************************************
101  *  PCI Device ID Table
102  *
103  *  Used by probe to select devices to load on
104  *  Last field stores an index into e1000_strings
105  *  Last entry must be all 0s
106  *
107  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108  *********************************************************************/
109
110 static em_vendor_info_t em_vendor_info_array[] =
111 {
112         /* Intel(R) PRO/1000 Network Connection */
113         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
132
133         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
182         /* required last entry */
183         { 0, 0, 0, 0, 0}
184 };
185
186 /*********************************************************************
187  *  Table of branding strings for all supported NICs.
188  *********************************************************************/
189
190 static char *em_strings[] = {
191         "Intel(R) PRO/1000 Network Connection"
192 };
193
194 /*********************************************************************
195  *  Function prototypes
196  *********************************************************************/
197 static int      em_probe(device_t);
198 static int      em_attach(device_t);
199 static int      em_detach(device_t);
200 static int      em_shutdown(device_t);
201 static int      em_suspend(device_t);
202 static int      em_resume(device_t);
203 #ifdef EM_MULTIQUEUE
204 static int      em_mq_start(struct ifnet *, struct mbuf *);
205 static int      em_mq_start_locked(struct ifnet *,
206                     struct tx_ring *, struct mbuf *);
207 static void     em_qflush(struct ifnet *);
208 #else
209 static void     em_start(struct ifnet *);
210 static void     em_start_locked(struct ifnet *, struct tx_ring *);
211 #endif
212 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
213 static void     em_init(void *);
214 static void     em_init_locked(struct adapter *);
215 static void     em_stop(void *);
216 static void     em_media_status(struct ifnet *, struct ifmediareq *);
217 static int      em_media_change(struct ifnet *);
218 static void     em_identify_hardware(struct adapter *);
219 static int      em_allocate_pci_resources(struct adapter *);
220 static int      em_allocate_legacy(struct adapter *);
221 static int      em_allocate_msix(struct adapter *);
222 static int      em_allocate_queues(struct adapter *);
223 static int      em_setup_msix(struct adapter *);
224 static void     em_free_pci_resources(struct adapter *);
225 static void     em_local_timer(void *);
226 static void     em_reset(struct adapter *);
227 static int      em_setup_interface(device_t, struct adapter *);
228
229 static void     em_setup_transmit_structures(struct adapter *);
230 static void     em_initialize_transmit_unit(struct adapter *);
231 static int      em_allocate_transmit_buffers(struct tx_ring *);
232 static void     em_free_transmit_structures(struct adapter *);
233 static void     em_free_transmit_buffers(struct tx_ring *);
234
235 static int      em_setup_receive_structures(struct adapter *);
236 static int      em_allocate_receive_buffers(struct rx_ring *);
237 static void     em_initialize_receive_unit(struct adapter *);
238 static void     em_free_receive_structures(struct adapter *);
239 static void     em_free_receive_buffers(struct rx_ring *);
240
241 static void     em_enable_intr(struct adapter *);
242 static void     em_disable_intr(struct adapter *);
243 static void     em_update_stats_counters(struct adapter *);
244 static void     em_add_hw_stats(struct adapter *adapter);
245 static void     em_txeof(struct tx_ring *);
246 static bool     em_rxeof(struct rx_ring *, int, int *);
247 #ifndef __NO_STRICT_ALIGNMENT
248 static int      em_fixup_rx(struct rx_ring *);
249 #endif
250 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
251 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
252                     struct ip *, u32 *, u32 *);
253 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
254                     struct tcphdr *, u32 *, u32 *);
255 static void     em_set_promisc(struct adapter *);
256 static void     em_disable_promisc(struct adapter *);
257 static void     em_set_multi(struct adapter *);
258 static void     em_update_link_status(struct adapter *);
259 static void     em_refresh_mbufs(struct rx_ring *, int);
260 static void     em_register_vlan(void *, struct ifnet *, u16);
261 static void     em_unregister_vlan(void *, struct ifnet *, u16);
262 static void     em_setup_vlan_hw_support(struct adapter *);
263 static int      em_xmit(struct tx_ring *, struct mbuf **);
264 static int      em_dma_malloc(struct adapter *, bus_size_t,
265                     struct em_dma_alloc *, int);
266 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
267 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
268 static void     em_print_nvm_info(struct adapter *);
269 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
270 static void     em_print_debug_info(struct adapter *);
271 static int      em_is_valid_ether_addr(u8 *);
272 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
273 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
274                     const char *, struct em_int_delay_info *, int, int);
275 /* Management and WOL Support */
276 static void     em_init_manageability(struct adapter *);
277 static void     em_release_manageability(struct adapter *);
278 static void     em_get_hw_control(struct adapter *);
279 static void     em_release_hw_control(struct adapter *);
280 static void     em_get_wakeup(device_t);
281 static void     em_enable_wakeup(device_t);
282 static int      em_enable_phy_wakeup(struct adapter *);
283 static void     em_led_func(void *, int);
284 static void     em_disable_aspm(struct adapter *);
285
286 static int      em_irq_fast(void *);
287
288 /* MSIX handlers */
289 static void     em_msix_tx(void *);
290 static void     em_msix_rx(void *);
291 static void     em_msix_link(void *);
292 static void     em_handle_tx(void *context, int pending);
293 static void     em_handle_rx(void *context, int pending);
294 static void     em_handle_link(void *context, int pending);
295
296 static void     em_set_sysctl_value(struct adapter *, const char *,
297                     const char *, int *, int);
298 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
299 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
300
301 static __inline void em_rx_discard(struct rx_ring *, int);
302
303 #ifdef DEVICE_POLLING
304 static poll_handler_t em_poll;
305 #endif /* POLLING */
306
307 /*********************************************************************
308  *  FreeBSD Device Interface Entry Points
309  *********************************************************************/
310
311 static device_method_t em_methods[] = {
312         /* Device interface */
313         DEVMETHOD(device_probe, em_probe),
314         DEVMETHOD(device_attach, em_attach),
315         DEVMETHOD(device_detach, em_detach),
316         DEVMETHOD(device_shutdown, em_shutdown),
317         DEVMETHOD(device_suspend, em_suspend),
318         DEVMETHOD(device_resume, em_resume),
319         DEVMETHOD_END
320 };
321
322 static driver_t em_driver = {
323         "em", em_methods, sizeof(struct adapter),
324 };
325
326 devclass_t em_devclass;
327 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
328 MODULE_DEPEND(em, pci, 1, 1, 1);
329 MODULE_DEPEND(em, ether, 1, 1, 1);
330
331 /*********************************************************************
332  *  Tunable default values.
333  *********************************************************************/
334
335 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
336 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
337 #define M_TSO_LEN                       66
338
339 #define MAX_INTS_PER_SEC        8000
340 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
341
342 /* Allow common code without TSO */
343 #ifndef CSUM_TSO
344 #define CSUM_TSO        0
345 #endif
346
347 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
348
349 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
350 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
351 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
352 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
353 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
354     0, "Default transmit interrupt delay in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
356     0, "Default receive interrupt delay in usecs");
357
358 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
359 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
360 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
361 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
362 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
363     &em_tx_abs_int_delay_dflt, 0,
364     "Default transmit interrupt delay limit in usecs");
365 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
366     &em_rx_abs_int_delay_dflt, 0,
367     "Default receive interrupt delay limit in usecs");
368
369 static int em_rxd = EM_DEFAULT_RXD;
370 static int em_txd = EM_DEFAULT_TXD;
371 TUNABLE_INT("hw.em.rxd", &em_rxd);
372 TUNABLE_INT("hw.em.txd", &em_txd);
373 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
374     "Number of receive descriptors per queue");
375 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
376     "Number of transmit descriptors per queue");
377
378 static int em_smart_pwr_down = FALSE;
379 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
380 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
381     0, "Set to true to leave smart power down enabled on newer adapters");
382
383 /* Controls whether promiscuous also shows bad packets */
384 static int em_debug_sbp = FALSE;
385 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
386 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
387     "Show bad packets in promiscuous mode");
388
389 static int em_enable_msix = TRUE;
390 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
391 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
392     "Enable MSI-X interrupts");
393
394 /* How many packets rxeof tries to clean at a time */
395 static int em_rx_process_limit = 100;
396 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
397 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398     &em_rx_process_limit, 0,
399     "Maximum number of received packets to process "
400     "at a time, -1 means unlimited");
401
402 /* Energy efficient ethernet - default to OFF */
403 static int eee_setting = 1;
404 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
405 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
406     "Enable Energy Efficient Ethernet");
407
408 /* Global used in WOL setup with multiport cards */
409 static int global_quad_port_a = 0;
410
411 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
412 #include <dev/netmap/if_em_netmap.h>
413 #endif /* DEV_NETMAP */
414
415 /*********************************************************************
416  *  Device identification routine
417  *
418  *  em_probe determines if the driver should be loaded on
419  *  adapter based on PCI vendor/device id of the adapter.
420  *
421  *  return BUS_PROBE_DEFAULT on success, positive on failure
422  *********************************************************************/
423
424 static int
425 em_probe(device_t dev)
426 {
427         char            adapter_name[60];
428         u16             pci_vendor_id = 0;
429         u16             pci_device_id = 0;
430         u16             pci_subvendor_id = 0;
431         u16             pci_subdevice_id = 0;
432         em_vendor_info_t *ent;
433
434         INIT_DEBUGOUT("em_probe: begin");
435
436         pci_vendor_id = pci_get_vendor(dev);
437         if (pci_vendor_id != EM_VENDOR_ID)
438                 return (ENXIO);
439
440         pci_device_id = pci_get_device(dev);
441         pci_subvendor_id = pci_get_subvendor(dev);
442         pci_subdevice_id = pci_get_subdevice(dev);
443
444         ent = em_vendor_info_array;
445         while (ent->vendor_id != 0) {
446                 if ((pci_vendor_id == ent->vendor_id) &&
447                     (pci_device_id == ent->device_id) &&
448
449                     ((pci_subvendor_id == ent->subvendor_id) ||
450                     (ent->subvendor_id == PCI_ANY_ID)) &&
451
452                     ((pci_subdevice_id == ent->subdevice_id) ||
453                     (ent->subdevice_id == PCI_ANY_ID))) {
454                         sprintf(adapter_name, "%s %s",
455                                 em_strings[ent->index],
456                                 em_driver_version);
457                         device_set_desc_copy(dev, adapter_name);
458                         return (BUS_PROBE_DEFAULT);
459                 }
460                 ent++;
461         }
462
463         return (ENXIO);
464 }
465
466 /*********************************************************************
467  *  Device initialization routine
468  *
469  *  The attach entry point is called when the driver is being loaded.
470  *  This routine identifies the type of hardware, allocates all resources
471  *  and initializes the hardware.
472  *
473  *  return 0 on success, positive on failure
474  *********************************************************************/
475
476 static int
477 em_attach(device_t dev)
478 {
479         struct adapter  *adapter;
480         struct e1000_hw *hw;
481         int             error = 0;
482
483         INIT_DEBUGOUT("em_attach: begin");
484
485         if (resource_disabled("em", device_get_unit(dev))) {
486                 device_printf(dev, "Disabled by device hint\n");
487                 return (ENXIO);
488         }
489
490         adapter = device_get_softc(dev);
491         adapter->dev = adapter->osdep.dev = dev;
492         hw = &adapter->hw;
493         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
494
495         /* SYSCTL stuff */
496         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499             em_sysctl_nvm_info, "I", "NVM Information");
500
501         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
502             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
503             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
504             em_sysctl_debug_info, "I", "Debug Information");
505
506         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
507             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
508             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
509             em_set_flowcntl, "I", "Flow Control");
510
511         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
512
513         /* Determine hardware and mac info */
514         em_identify_hardware(adapter);
515
516         /* Setup PCI resources */
517         if (em_allocate_pci_resources(adapter)) {
518                 device_printf(dev, "Allocation of PCI resources failed\n");
519                 error = ENXIO;
520                 goto err_pci;
521         }
522
523         /*
524         ** For ICH8 and family we need to
525         ** map the flash memory, and this
526         ** must happen after the MAC is 
527         ** identified
528         */
529         if ((hw->mac.type == e1000_ich8lan) ||
530             (hw->mac.type == e1000_ich9lan) ||
531             (hw->mac.type == e1000_ich10lan) ||
532             (hw->mac.type == e1000_pchlan) ||
533             (hw->mac.type == e1000_pch2lan) ||
534             (hw->mac.type == e1000_pch_lpt)) {
535                 int rid = EM_BAR_TYPE_FLASH;
536                 adapter->flash = bus_alloc_resource_any(dev,
537                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
538                 if (adapter->flash == NULL) {
539                         device_printf(dev, "Mapping of Flash failed\n");
540                         error = ENXIO;
541                         goto err_pci;
542                 }
543                 /* This is used in the shared code */
544                 hw->flash_address = (u8 *)adapter->flash;
545                 adapter->osdep.flash_bus_space_tag =
546                     rman_get_bustag(adapter->flash);
547                 adapter->osdep.flash_bus_space_handle =
548                     rman_get_bushandle(adapter->flash);
549         }
550
551         /* Do Shared Code initialization */
552         if (e1000_setup_init_funcs(hw, TRUE)) {
553                 device_printf(dev, "Setup of Shared code failed\n");
554                 error = ENXIO;
555                 goto err_pci;
556         }
557
558         e1000_get_bus_info(hw);
559
560         /* Set up some sysctls for the tunable interrupt delays */
561         em_add_int_delay_sysctl(adapter, "rx_int_delay",
562             "receive interrupt delay in usecs", &adapter->rx_int_delay,
563             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
564         em_add_int_delay_sysctl(adapter, "tx_int_delay",
565             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
566             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
567         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
568             "receive interrupt delay limit in usecs",
569             &adapter->rx_abs_int_delay,
570             E1000_REGISTER(hw, E1000_RADV),
571             em_rx_abs_int_delay_dflt);
572         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
573             "transmit interrupt delay limit in usecs",
574             &adapter->tx_abs_int_delay,
575             E1000_REGISTER(hw, E1000_TADV),
576             em_tx_abs_int_delay_dflt);
577         em_add_int_delay_sysctl(adapter, "itr",
578             "interrupt delay limit in usecs/4",
579             &adapter->tx_itr,
580             E1000_REGISTER(hw, E1000_ITR),
581             DEFAULT_ITR);
582
583         /* Sysctl for limiting the amount of work done in the taskqueue */
584         em_set_sysctl_value(adapter, "rx_processing_limit",
585             "max number of rx packets to process", &adapter->rx_process_limit,
586             em_rx_process_limit);
587
588         /*
589          * Validate number of transmit and receive descriptors. It
590          * must not exceed hardware maximum, and must be multiple
591          * of E1000_DBA_ALIGN.
592          */
593         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
594             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
595                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
596                     EM_DEFAULT_TXD, em_txd);
597                 adapter->num_tx_desc = EM_DEFAULT_TXD;
598         } else
599                 adapter->num_tx_desc = em_txd;
600
601         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
602             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
603                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
604                     EM_DEFAULT_RXD, em_rxd);
605                 adapter->num_rx_desc = EM_DEFAULT_RXD;
606         } else
607                 adapter->num_rx_desc = em_rxd;
608
609         hw->mac.autoneg = DO_AUTO_NEG;
610         hw->phy.autoneg_wait_to_complete = FALSE;
611         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
612
613         /* Copper options */
614         if (hw->phy.media_type == e1000_media_type_copper) {
615                 hw->phy.mdix = AUTO_ALL_MODES;
616                 hw->phy.disable_polarity_correction = FALSE;
617                 hw->phy.ms_type = EM_MASTER_SLAVE;
618         }
619
620         /*
621          * Set the frame limits assuming
622          * standard ethernet sized frames.
623          */
624         adapter->hw.mac.max_frame_size =
625             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
626
627         /*
628          * This controls when hardware reports transmit completion
629          * status.
630          */
631         hw->mac.report_tx_early = 1;
632
633         /* 
634         ** Get queue/ring memory
635         */
636         if (em_allocate_queues(adapter)) {
637                 error = ENOMEM;
638                 goto err_pci;
639         }
640
641         /* Allocate multicast array memory. */
642         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
643             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
644         if (adapter->mta == NULL) {
645                 device_printf(dev, "Can not allocate multicast setup array\n");
646                 error = ENOMEM;
647                 goto err_late;
648         }
649
650         /* Check SOL/IDER usage */
651         if (e1000_check_reset_block(hw))
652                 device_printf(dev, "PHY reset is blocked"
653                     " due to SOL/IDER session.\n");
654
655         /* Sysctl for setting Energy Efficient Ethernet */
656         hw->dev_spec.ich8lan.eee_disable = eee_setting;
657         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
658             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
659             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
660             adapter, 0, em_sysctl_eee, "I",
661             "Disable Energy Efficient Ethernet");
662
663         /*
664         ** Start from a known state, this is
665         ** important in reading the nvm and
666         ** mac from that.
667         */
668         e1000_reset_hw(hw);
669
670
671         /* Make sure we have a good EEPROM before we read from it */
672         if (e1000_validate_nvm_checksum(hw) < 0) {
673                 /*
674                 ** Some PCI-E parts fail the first check due to
675                 ** the link being in sleep state, call it again,
676                 ** if it fails a second time its a real issue.
677                 */
678                 if (e1000_validate_nvm_checksum(hw) < 0) {
679                         device_printf(dev,
680                             "The EEPROM Checksum Is Not Valid\n");
681                         error = EIO;
682                         goto err_late;
683                 }
684         }
685
686         /* Copy the permanent MAC address out of the EEPROM */
687         if (e1000_read_mac_addr(hw) < 0) {
688                 device_printf(dev, "EEPROM read error while reading MAC"
689                     " address\n");
690                 error = EIO;
691                 goto err_late;
692         }
693
694         if (!em_is_valid_ether_addr(hw->mac.addr)) {
695                 device_printf(dev, "Invalid MAC address\n");
696                 error = EIO;
697                 goto err_late;
698         }
699
700         /*
701         **  Do interrupt configuration
702         */
703         if (adapter->msix > 1) /* Do MSIX */
704                 error = em_allocate_msix(adapter);
705         else  /* MSI or Legacy */
706                 error = em_allocate_legacy(adapter);
707         if (error)
708                 goto err_late;
709
710         /*
711          * Get Wake-on-Lan and Management info for later use
712          */
713         em_get_wakeup(dev);
714
715         /* Setup OS specific network interface */
716         if (em_setup_interface(dev, adapter) != 0)
717                 goto err_late;
718
719         em_reset(adapter);
720
721         /* Initialize statistics */
722         em_update_stats_counters(adapter);
723
724         hw->mac.get_link_status = 1;
725         em_update_link_status(adapter);
726
727         /* Register for VLAN events */
728         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
729             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
730         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
731             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
732
733         em_add_hw_stats(adapter);
734
735         /* Non-AMT based hardware can now take control from firmware */
736         if (adapter->has_manage && !adapter->has_amt)
737                 em_get_hw_control(adapter);
738
739         /* Tell the stack that the interface is not active */
740         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
741         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
742
743         adapter->led_dev = led_create(em_led_func, adapter,
744             device_get_nameunit(dev));
745 #ifdef DEV_NETMAP
746         em_netmap_attach(adapter);
747 #endif /* DEV_NETMAP */
748
749         INIT_DEBUGOUT("em_attach: end");
750
751         return (0);
752
753 err_late:
754         em_free_transmit_structures(adapter);
755         em_free_receive_structures(adapter);
756         em_release_hw_control(adapter);
757         if (adapter->ifp != NULL)
758                 if_free(adapter->ifp);
759 err_pci:
760         em_free_pci_resources(adapter);
761         free(adapter->mta, M_DEVBUF);
762         EM_CORE_LOCK_DESTROY(adapter);
763
764         return (error);
765 }
766
767 /*********************************************************************
768  *  Device removal routine
769  *
770  *  The detach entry point is called when the driver is being removed.
771  *  This routine stops the adapter and deallocates all the resources
772  *  that were allocated for driver operation.
773  *
774  *  return 0 on success, positive on failure
775  *********************************************************************/
776
777 static int
778 em_detach(device_t dev)
779 {
780         struct adapter  *adapter = device_get_softc(dev);
781         struct ifnet    *ifp = adapter->ifp;
782
783         INIT_DEBUGOUT("em_detach: begin");
784
785         /* Make sure VLANS are not using driver */
786         if (adapter->ifp->if_vlantrunk != NULL) {
787                 device_printf(dev,"Vlan in use, detach first\n");
788                 return (EBUSY);
789         }
790
791 #ifdef DEVICE_POLLING
792         if (ifp->if_capenable & IFCAP_POLLING)
793                 ether_poll_deregister(ifp);
794 #endif
795
796         if (adapter->led_dev != NULL)
797                 led_destroy(adapter->led_dev);
798
799         EM_CORE_LOCK(adapter);
800         adapter->in_detach = 1;
801         em_stop(adapter);
802         EM_CORE_UNLOCK(adapter);
803         EM_CORE_LOCK_DESTROY(adapter);
804
805         e1000_phy_hw_reset(&adapter->hw);
806
807         em_release_manageability(adapter);
808         em_release_hw_control(adapter);
809
810         /* Unregister VLAN events */
811         if (adapter->vlan_attach != NULL)
812                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
813         if (adapter->vlan_detach != NULL)
814                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
815
816         ether_ifdetach(adapter->ifp);
817         callout_drain(&adapter->timer);
818
819 #ifdef DEV_NETMAP
820         netmap_detach(ifp);
821 #endif /* DEV_NETMAP */
822
823         em_free_pci_resources(adapter);
824         bus_generic_detach(dev);
825         if_free(ifp);
826
827         em_free_transmit_structures(adapter);
828         em_free_receive_structures(adapter);
829
830         em_release_hw_control(adapter);
831         free(adapter->mta, M_DEVBUF);
832
833         return (0);
834 }
835
836 /*********************************************************************
837  *
838  *  Shutdown entry point
839  *
840  **********************************************************************/
841
842 static int
843 em_shutdown(device_t dev)
844 {
845         return em_suspend(dev);
846 }
847
848 /*
849  * Suspend/resume device methods.
850  */
851 static int
852 em_suspend(device_t dev)
853 {
854         struct adapter *adapter = device_get_softc(dev);
855
856         EM_CORE_LOCK(adapter);
857
858         em_release_manageability(adapter);
859         em_release_hw_control(adapter);
860         em_enable_wakeup(dev);
861
862         EM_CORE_UNLOCK(adapter);
863
864         return bus_generic_suspend(dev);
865 }
866
867 static int
868 em_resume(device_t dev)
869 {
870         struct adapter *adapter = device_get_softc(dev);
871         struct tx_ring  *txr = adapter->tx_rings;
872         struct ifnet *ifp = adapter->ifp;
873
874         EM_CORE_LOCK(adapter);
875         if (adapter->hw.mac.type == e1000_pch2lan)
876                 e1000_resume_workarounds_pchlan(&adapter->hw);
877         em_init_locked(adapter);
878         em_init_manageability(adapter);
879
880         if ((ifp->if_flags & IFF_UP) &&
881             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
882                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
883                         EM_TX_LOCK(txr);
884 #ifdef EM_MULTIQUEUE
885                         if (!drbr_empty(ifp, txr->br))
886                                 em_mq_start_locked(ifp, txr, NULL);
887 #else
888                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
889                                 em_start_locked(ifp, txr);
890 #endif
891                         EM_TX_UNLOCK(txr);
892                 }
893         }
894         EM_CORE_UNLOCK(adapter);
895
896         return bus_generic_resume(dev);
897 }
898
899
900 #ifdef EM_MULTIQUEUE
901 /*********************************************************************
902  *  Multiqueue Transmit routines 
903  *
904  *  em_mq_start is called by the stack to initiate a transmit.
905  *  however, if busy the driver can queue the request rather
906  *  than do an immediate send. It is this that is an advantage
907  *  in this driver, rather than also having multiple tx queues.
908  **********************************************************************/
909 static int
910 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
911 {
912         struct adapter  *adapter = txr->adapter;
913         struct mbuf     *next;
914         int             err = 0, enq = 0;
915
916         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
917             IFF_DRV_RUNNING || adapter->link_active == 0) {
918                 if (m != NULL)
919                         err = drbr_enqueue(ifp, txr->br, m);
920                 return (err);
921         }
922
923         enq = 0;
924         if (m != NULL) {
925                 err = drbr_enqueue(ifp, txr->br, m);
926                 if (err)
927                         return (err);
928         } 
929
930         /* Process the queue */
931         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
932                 if ((err = em_xmit(txr, &next)) != 0) {
933                         if (next == NULL)
934                                 drbr_advance(ifp, txr->br);
935                         else 
936                                 drbr_putback(ifp, txr->br, next);
937                         break;
938                 }
939                 drbr_advance(ifp, txr->br);
940                 enq++;
941                 ifp->if_obytes += next->m_pkthdr.len;
942                 if (next->m_flags & M_MCAST)
943                         ifp->if_omcasts++;
944                 ETHER_BPF_MTAP(ifp, next);
945                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
946                         break;
947         }
948
949         if (enq > 0) {
950                 /* Set the watchdog */
951                 txr->queue_status = EM_QUEUE_WORKING;
952                 txr->watchdog_time = ticks;
953         }
954
955         if (txr->tx_avail < EM_MAX_SCATTER)
956                 em_txeof(txr);
957         if (txr->tx_avail < EM_MAX_SCATTER)
958                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959         return (err);
960 }
961
962 /*
963 ** Multiqueue capable stack interface
964 */
965 static int
966 em_mq_start(struct ifnet *ifp, struct mbuf *m)
967 {
968         struct adapter  *adapter = ifp->if_softc;
969         struct tx_ring  *txr = adapter->tx_rings;
970         int             error;
971
972         if (EM_TX_TRYLOCK(txr)) {
973                 error = em_mq_start_locked(ifp, txr, m);
974                 EM_TX_UNLOCK(txr);
975         } else 
976                 error = drbr_enqueue(ifp, txr->br, m);
977
978         return (error);
979 }
980
981 /*
982 ** Flush all ring buffers
983 */
984 static void
985 em_qflush(struct ifnet *ifp)
986 {
987         struct adapter  *adapter = ifp->if_softc;
988         struct tx_ring  *txr = adapter->tx_rings;
989         struct mbuf     *m;
990
991         for (int i = 0; i < adapter->num_queues; i++, txr++) {
992                 EM_TX_LOCK(txr);
993                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
994                         m_freem(m);
995                 EM_TX_UNLOCK(txr);
996         }
997         if_qflush(ifp);
998 }
999 #else  /* !EM_MULTIQUEUE */
1000
1001 static void
1002 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1003 {
1004         struct adapter  *adapter = ifp->if_softc;
1005         struct mbuf     *m_head;
1006
1007         EM_TX_LOCK_ASSERT(txr);
1008
1009         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1010             IFF_DRV_RUNNING)
1011                 return;
1012
1013         if (!adapter->link_active)
1014                 return;
1015
1016         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1017                 /* Call cleanup if number of TX descriptors low */
1018                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1019                         em_txeof(txr);
1020                 if (txr->tx_avail < EM_MAX_SCATTER) {
1021                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1022                         break;
1023                 }
1024                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1025                 if (m_head == NULL)
1026                         break;
1027                 /*
1028                  *  Encapsulation can modify our pointer, and or make it
1029                  *  NULL on failure.  In that event, we can't requeue.
1030                  */
1031                 if (em_xmit(txr, &m_head)) {
1032                         if (m_head == NULL)
1033                                 break;
1034                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1035                         break;
1036                 }
1037
1038                 /* Send a copy of the frame to the BPF listener */
1039                 ETHER_BPF_MTAP(ifp, m_head);
1040
1041                 /* Set timeout in case hardware has problems transmitting. */
1042                 txr->watchdog_time = ticks;
1043                 txr->queue_status = EM_QUEUE_WORKING;
1044         }
1045
1046         return;
1047 }
1048
1049 static void
1050 em_start(struct ifnet *ifp)
1051 {
1052         struct adapter  *adapter = ifp->if_softc;
1053         struct tx_ring  *txr = adapter->tx_rings;
1054
1055         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1056                 EM_TX_LOCK(txr);
1057                 em_start_locked(ifp, txr);
1058                 EM_TX_UNLOCK(txr);
1059         }
1060         return;
1061 }
1062 #endif /* EM_MULTIQUEUE */
1063
1064 /*********************************************************************
1065  *  Ioctl entry point
1066  *
1067  *  em_ioctl is called when the user wants to configure the
1068  *  interface.
1069  *
1070  *  return 0 on success, positive on failure
1071  **********************************************************************/
1072
1073 static int
1074 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1075 {
1076         struct adapter  *adapter = ifp->if_softc;
1077         struct ifreq    *ifr = (struct ifreq *)data;
1078 #if defined(INET) || defined(INET6)
1079         struct ifaddr   *ifa = (struct ifaddr *)data;
1080 #endif
1081         bool            avoid_reset = FALSE;
1082         int             error = 0;
1083
1084         if (adapter->in_detach)
1085                 return (error);
1086
1087         switch (command) {
1088         case SIOCSIFADDR:
1089 #ifdef INET
1090                 if (ifa->ifa_addr->sa_family == AF_INET)
1091                         avoid_reset = TRUE;
1092 #endif
1093 #ifdef INET6
1094                 if (ifa->ifa_addr->sa_family == AF_INET6)
1095                         avoid_reset = TRUE;
1096 #endif
1097                 /*
1098                 ** Calling init results in link renegotiation,
1099                 ** so we avoid doing it when possible.
1100                 */
1101                 if (avoid_reset) {
1102                         ifp->if_flags |= IFF_UP;
1103                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1104                                 em_init(adapter);
1105 #ifdef INET
1106                         if (!(ifp->if_flags & IFF_NOARP))
1107                                 arp_ifinit(ifp, ifa);
1108 #endif
1109                 } else
1110                         error = ether_ioctl(ifp, command, data);
1111                 break;
1112         case SIOCSIFMTU:
1113             {
1114                 int max_frame_size;
1115
1116                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1117
1118                 EM_CORE_LOCK(adapter);
1119                 switch (adapter->hw.mac.type) {
1120                 case e1000_82571:
1121                 case e1000_82572:
1122                 case e1000_ich9lan:
1123                 case e1000_ich10lan:
1124                 case e1000_pch2lan:
1125                 case e1000_pch_lpt:
1126                 case e1000_82574:
1127                 case e1000_82583:
1128                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1129                         max_frame_size = 9234;
1130                         break;
1131                 case e1000_pchlan:
1132                         max_frame_size = 4096;
1133                         break;
1134                         /* Adapters that do not support jumbo frames */
1135                 case e1000_ich8lan:
1136                         max_frame_size = ETHER_MAX_LEN;
1137                         break;
1138                 default:
1139                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1140                 }
1141                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1142                     ETHER_CRC_LEN) {
1143                         EM_CORE_UNLOCK(adapter);
1144                         error = EINVAL;
1145                         break;
1146                 }
1147
1148                 ifp->if_mtu = ifr->ifr_mtu;
1149                 adapter->hw.mac.max_frame_size =
1150                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1151                 em_init_locked(adapter);
1152                 EM_CORE_UNLOCK(adapter);
1153                 break;
1154             }
1155         case SIOCSIFFLAGS:
1156                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1157                     SIOCSIFFLAGS (Set Interface Flags)");
1158                 EM_CORE_LOCK(adapter);
1159                 if (ifp->if_flags & IFF_UP) {
1160                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1161                                 if ((ifp->if_flags ^ adapter->if_flags) &
1162                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1163                                         em_disable_promisc(adapter);
1164                                         em_set_promisc(adapter);
1165                                 }
1166                         } else
1167                                 em_init_locked(adapter);
1168                 } else
1169                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1170                                 em_stop(adapter);
1171                 adapter->if_flags = ifp->if_flags;
1172                 EM_CORE_UNLOCK(adapter);
1173                 break;
1174         case SIOCADDMULTI:
1175         case SIOCDELMULTI:
1176                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1177                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1178                         EM_CORE_LOCK(adapter);
1179                         em_disable_intr(adapter);
1180                         em_set_multi(adapter);
1181 #ifdef DEVICE_POLLING
1182                         if (!(ifp->if_capenable & IFCAP_POLLING))
1183 #endif
1184                                 em_enable_intr(adapter);
1185                         EM_CORE_UNLOCK(adapter);
1186                 }
1187                 break;
1188         case SIOCSIFMEDIA:
1189                 /* Check SOL/IDER usage */
1190                 EM_CORE_LOCK(adapter);
1191                 if (e1000_check_reset_block(&adapter->hw)) {
1192                         EM_CORE_UNLOCK(adapter);
1193                         device_printf(adapter->dev, "Media change is"
1194                             " blocked due to SOL/IDER session.\n");
1195                         break;
1196                 }
1197                 EM_CORE_UNLOCK(adapter);
1198                 /* falls thru */
1199         case SIOCGIFMEDIA:
1200                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1201                     SIOCxIFMEDIA (Get/Set Interface Media)");
1202                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203                 break;
1204         case SIOCSIFCAP:
1205             {
1206                 int mask, reinit;
1207
1208                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209                 reinit = 0;
1210                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211 #ifdef DEVICE_POLLING
1212                 if (mask & IFCAP_POLLING) {
1213                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214                                 error = ether_poll_register(em_poll, ifp);
1215                                 if (error)
1216                                         return (error);
1217                                 EM_CORE_LOCK(adapter);
1218                                 em_disable_intr(adapter);
1219                                 ifp->if_capenable |= IFCAP_POLLING;
1220                                 EM_CORE_UNLOCK(adapter);
1221                         } else {
1222                                 error = ether_poll_deregister(ifp);
1223                                 /* Enable interrupt even in error case */
1224                                 EM_CORE_LOCK(adapter);
1225                                 em_enable_intr(adapter);
1226                                 ifp->if_capenable &= ~IFCAP_POLLING;
1227                                 EM_CORE_UNLOCK(adapter);
1228                         }
1229                 }
1230 #endif
1231                 if (mask & IFCAP_HWCSUM) {
1232                         ifp->if_capenable ^= IFCAP_HWCSUM;
1233                         reinit = 1;
1234                 }
1235                 if (mask & IFCAP_TSO4) {
1236                         ifp->if_capenable ^= IFCAP_TSO4;
1237                         reinit = 1;
1238                 }
1239                 if (mask & IFCAP_VLAN_HWTAGGING) {
1240                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1241                         reinit = 1;
1242                 }
1243                 if (mask & IFCAP_VLAN_HWFILTER) {
1244                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1245                         reinit = 1;
1246                 }
1247                 if (mask & IFCAP_VLAN_HWTSO) {
1248                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1249                         reinit = 1;
1250                 }
1251                 if ((mask & IFCAP_WOL) &&
1252                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1253                         if (mask & IFCAP_WOL_MCAST)
1254                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1255                         if (mask & IFCAP_WOL_MAGIC)
1256                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1257                 }
1258                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1259                         em_init(adapter);
1260                 VLAN_CAPABILITIES(ifp);
1261                 break;
1262             }
1263
1264         default:
1265                 error = ether_ioctl(ifp, command, data);
1266                 break;
1267         }
1268
1269         return (error);
1270 }
1271
1272
1273 /*********************************************************************
1274  *  Init entry point
1275  *
1276  *  This routine is used in two ways. It is used by the stack as
1277  *  init entry point in network interface structure. It is also used
1278  *  by the driver as a hw/sw initialization routine to get to a
1279  *  consistent state.
1280  *
1281  *  return 0 on success, positive on failure
1282  **********************************************************************/
1283
1284 static void
1285 em_init_locked(struct adapter *adapter)
1286 {
1287         struct ifnet    *ifp = adapter->ifp;
1288         device_t        dev = adapter->dev;
1289
1290         INIT_DEBUGOUT("em_init: begin");
1291
1292         EM_CORE_LOCK_ASSERT(adapter);
1293
1294         em_disable_intr(adapter);
1295         callout_stop(&adapter->timer);
1296
1297         /* Get the latest mac address, User can use a LAA */
1298         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1299               ETHER_ADDR_LEN);
1300
1301         /* Put the address into the Receive Address Array */
1302         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1303
1304         /*
1305          * With the 82571 adapter, RAR[0] may be overwritten
1306          * when the other port is reset, we make a duplicate
1307          * in RAR[14] for that eventuality, this assures
1308          * the interface continues to function.
1309          */
1310         if (adapter->hw.mac.type == e1000_82571) {
1311                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1312                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1313                     E1000_RAR_ENTRIES - 1);
1314         }
1315
1316         /* Initialize the hardware */
1317         em_reset(adapter);
1318         em_update_link_status(adapter);
1319
1320         /* Setup VLAN support, basic and offload if available */
1321         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1322
1323         /* Set hardware offload abilities */
1324         ifp->if_hwassist = 0;
1325         if (ifp->if_capenable & IFCAP_TXCSUM)
1326                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1327         if (ifp->if_capenable & IFCAP_TSO4)
1328                 ifp->if_hwassist |= CSUM_TSO;
1329
1330         /* Configure for OS presence */
1331         em_init_manageability(adapter);
1332
1333         /* Prepare transmit descriptors and buffers */
1334         em_setup_transmit_structures(adapter);
1335         em_initialize_transmit_unit(adapter);
1336
1337         /* Setup Multicast table */
1338         em_set_multi(adapter);
1339
1340         /*
1341         ** Figure out the desired mbuf
1342         ** pool for doing jumbos
1343         */
1344         if (adapter->hw.mac.max_frame_size <= 2048)
1345                 adapter->rx_mbuf_sz = MCLBYTES;
1346         else if (adapter->hw.mac.max_frame_size <= 4096)
1347                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1348         else
1349                 adapter->rx_mbuf_sz = MJUM9BYTES;
1350
1351         /* Prepare receive descriptors and buffers */
1352         if (em_setup_receive_structures(adapter)) {
1353                 device_printf(dev, "Could not setup receive structures\n");
1354                 em_stop(adapter);
1355                 return;
1356         }
1357         em_initialize_receive_unit(adapter);
1358
1359         /* Use real VLAN Filter support? */
1360         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1361                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1362                         /* Use real VLAN Filter support */
1363                         em_setup_vlan_hw_support(adapter);
1364                 else {
1365                         u32 ctrl;
1366                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1367                         ctrl |= E1000_CTRL_VME;
1368                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1369                 }
1370         }
1371
1372         /* Don't lose promiscuous settings */
1373         em_set_promisc(adapter);
1374
1375         /* Set the interface as ACTIVE */
1376         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1377         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1378
1379         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1380         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1381
1382         /* MSI/X configuration for 82574 */
1383         if (adapter->hw.mac.type == e1000_82574) {
1384                 int tmp;
1385                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1386                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1387                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1388                 /* Set the IVAR - interrupt vector routing. */
1389                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1390         }
1391
1392 #ifdef DEVICE_POLLING
1393         /*
1394          * Only enable interrupts if we are not polling, make sure
1395          * they are off otherwise.
1396          */
1397         if (ifp->if_capenable & IFCAP_POLLING)
1398                 em_disable_intr(adapter);
1399         else
1400 #endif /* DEVICE_POLLING */
1401                 em_enable_intr(adapter);
1402
1403         /* AMT based hardware can now take control from firmware */
1404         if (adapter->has_manage && adapter->has_amt)
1405                 em_get_hw_control(adapter);
1406 }
1407
1408 static void
1409 em_init(void *arg)
1410 {
1411         struct adapter *adapter = arg;
1412
1413         EM_CORE_LOCK(adapter);
1414         em_init_locked(adapter);
1415         EM_CORE_UNLOCK(adapter);
1416 }
1417
1418
1419 #ifdef DEVICE_POLLING
1420 /*********************************************************************
1421  *
1422  *  Legacy polling routine: note this only works with single queue
1423  *
1424  *********************************************************************/
1425 static int
1426 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1427 {
1428         struct adapter *adapter = ifp->if_softc;
1429         struct tx_ring  *txr = adapter->tx_rings;
1430         struct rx_ring  *rxr = adapter->rx_rings;
1431         u32             reg_icr;
1432         int             rx_done;
1433
1434         EM_CORE_LOCK(adapter);
1435         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1436                 EM_CORE_UNLOCK(adapter);
1437                 return (0);
1438         }
1439
1440         if (cmd == POLL_AND_CHECK_STATUS) {
1441                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1442                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1443                         callout_stop(&adapter->timer);
1444                         adapter->hw.mac.get_link_status = 1;
1445                         em_update_link_status(adapter);
1446                         callout_reset(&adapter->timer, hz,
1447                             em_local_timer, adapter);
1448                 }
1449         }
1450         EM_CORE_UNLOCK(adapter);
1451
1452         em_rxeof(rxr, count, &rx_done);
1453
1454         EM_TX_LOCK(txr);
1455         em_txeof(txr);
1456 #ifdef EM_MULTIQUEUE
1457         if (!drbr_empty(ifp, txr->br))
1458                 em_mq_start_locked(ifp, txr, NULL);
1459 #else
1460         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1461                 em_start_locked(ifp, txr);
1462 #endif
1463         EM_TX_UNLOCK(txr);
1464
1465         return (rx_done);
1466 }
1467 #endif /* DEVICE_POLLING */
1468
1469
1470 /*********************************************************************
1471  *
1472  *  Fast Legacy/MSI Combined Interrupt Service routine  
1473  *
1474  *********************************************************************/
1475 static int
1476 em_irq_fast(void *arg)
1477 {
1478         struct adapter  *adapter = arg;
1479         struct ifnet    *ifp;
1480         u32             reg_icr;
1481
1482         ifp = adapter->ifp;
1483
1484         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1485
1486         /* Hot eject?  */
1487         if (reg_icr == 0xffffffff)
1488                 return FILTER_STRAY;
1489
1490         /* Definitely not our interrupt.  */
1491         if (reg_icr == 0x0)
1492                 return FILTER_STRAY;
1493
1494         /*
1495          * Starting with the 82571 chip, bit 31 should be used to
1496          * determine whether the interrupt belongs to us.
1497          */
1498         if (adapter->hw.mac.type >= e1000_82571 &&
1499             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1500                 return FILTER_STRAY;
1501
1502         em_disable_intr(adapter);
1503         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1504
1505         /* Link status change */
1506         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1507                 adapter->hw.mac.get_link_status = 1;
1508                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1509         }
1510
1511         if (reg_icr & E1000_ICR_RXO)
1512                 adapter->rx_overruns++;
1513         return FILTER_HANDLED;
1514 }
1515
1516 /* Combined RX/TX handler, used by Legacy and MSI */
1517 static void
1518 em_handle_que(void *context, int pending)
1519 {
1520         struct adapter  *adapter = context;
1521         struct ifnet    *ifp = adapter->ifp;
1522         struct tx_ring  *txr = adapter->tx_rings;
1523         struct rx_ring  *rxr = adapter->rx_rings;
1524
1525
1526         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1527                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1528                 EM_TX_LOCK(txr);
1529                 em_txeof(txr);
1530 #ifdef EM_MULTIQUEUE
1531                 if (!drbr_empty(ifp, txr->br))
1532                         em_mq_start_locked(ifp, txr, NULL);
1533 #else
1534                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1535                         em_start_locked(ifp, txr);
1536 #endif
1537                 EM_TX_UNLOCK(txr);
1538                 if (more) {
1539                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1540                         return;
1541                 }
1542         }
1543
1544         em_enable_intr(adapter);
1545         return;
1546 }
1547
1548
1549 /*********************************************************************
1550  *
1551  *  MSIX Interrupt Service Routines
1552  *
1553  **********************************************************************/
1554 static void
1555 em_msix_tx(void *arg)
1556 {
1557         struct tx_ring *txr = arg;
1558         struct adapter *adapter = txr->adapter;
1559         struct ifnet    *ifp = adapter->ifp;
1560
1561         ++txr->tx_irq;
1562         EM_TX_LOCK(txr);
1563         em_txeof(txr);
1564 #ifdef EM_MULTIQUEUE
1565         if (!drbr_empty(ifp, txr->br))
1566                 em_mq_start_locked(ifp, txr, NULL);
1567 #else
1568         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1569                 em_start_locked(ifp, txr);
1570 #endif
1571         /* Reenable this interrupt */
1572         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1573         EM_TX_UNLOCK(txr);
1574         return;
1575 }
1576
1577 /*********************************************************************
1578  *
1579  *  MSIX RX Interrupt Service routine
1580  *
1581  **********************************************************************/
1582
1583 static void
1584 em_msix_rx(void *arg)
1585 {
1586         struct rx_ring  *rxr = arg;
1587         struct adapter  *adapter = rxr->adapter;
1588         bool            more;
1589
1590         ++rxr->rx_irq;
1591         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1592                 return;
1593         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1594         if (more)
1595                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1596         else
1597                 /* Reenable this interrupt */
1598                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1599         return;
1600 }
1601
1602 /*********************************************************************
1603  *
1604  *  MSIX Link Fast Interrupt Service routine
1605  *
1606  **********************************************************************/
1607 static void
1608 em_msix_link(void *arg)
1609 {
1610         struct adapter  *adapter = arg;
1611         u32             reg_icr;
1612
1613         ++adapter->link_irq;
1614         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1615
1616         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1617                 adapter->hw.mac.get_link_status = 1;
1618                 em_handle_link(adapter, 0);
1619         } else
1620                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1621                     EM_MSIX_LINK | E1000_IMS_LSC);
1622         return;
1623 }
1624
1625 static void
1626 em_handle_rx(void *context, int pending)
1627 {
1628         struct rx_ring  *rxr = context;
1629         struct adapter  *adapter = rxr->adapter;
1630         bool            more;
1631
1632         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1633         if (more)
1634                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1635         else
1636                 /* Reenable this interrupt */
1637                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1638 }
1639
1640 static void
1641 em_handle_tx(void *context, int pending)
1642 {
1643         struct tx_ring  *txr = context;
1644         struct adapter  *adapter = txr->adapter;
1645         struct ifnet    *ifp = adapter->ifp;
1646
1647         EM_TX_LOCK(txr);
1648         em_txeof(txr);
1649 #ifdef EM_MULTIQUEUE
1650         if (!drbr_empty(ifp, txr->br))
1651                 em_mq_start_locked(ifp, txr, NULL);
1652 #else
1653         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1654                 em_start_locked(ifp, txr);
1655 #endif
1656         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1657         EM_TX_UNLOCK(txr);
1658 }
1659
1660 static void
1661 em_handle_link(void *context, int pending)
1662 {
1663         struct adapter  *adapter = context;
1664         struct tx_ring  *txr = adapter->tx_rings;
1665         struct ifnet *ifp = adapter->ifp;
1666
1667         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1668                 return;
1669
1670         EM_CORE_LOCK(adapter);
1671         callout_stop(&adapter->timer);
1672         em_update_link_status(adapter);
1673         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1674         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1675             EM_MSIX_LINK | E1000_IMS_LSC);
1676         if (adapter->link_active) {
1677                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1678                         EM_TX_LOCK(txr);
1679 #ifdef EM_MULTIQUEUE
1680                         if (!drbr_empty(ifp, txr->br))
1681                                 em_mq_start_locked(ifp, txr, NULL);
1682 #else
1683                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1684                                 em_start_locked(ifp, txr);
1685 #endif
1686                         EM_TX_UNLOCK(txr);
1687                 }
1688         }
1689         EM_CORE_UNLOCK(adapter);
1690 }
1691
1692
1693 /*********************************************************************
1694  *
1695  *  Media Ioctl callback
1696  *
1697  *  This routine is called whenever the user queries the status of
1698  *  the interface using ifconfig.
1699  *
1700  **********************************************************************/
1701 static void
1702 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1703 {
1704         struct adapter *adapter = ifp->if_softc;
1705         u_char fiber_type = IFM_1000_SX;
1706
1707         INIT_DEBUGOUT("em_media_status: begin");
1708
1709         EM_CORE_LOCK(adapter);
1710         em_update_link_status(adapter);
1711
1712         ifmr->ifm_status = IFM_AVALID;
1713         ifmr->ifm_active = IFM_ETHER;
1714
1715         if (!adapter->link_active) {
1716                 EM_CORE_UNLOCK(adapter);
1717                 return;
1718         }
1719
1720         ifmr->ifm_status |= IFM_ACTIVE;
1721
1722         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1723             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1724                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1725         } else {
1726                 switch (adapter->link_speed) {
1727                 case 10:
1728                         ifmr->ifm_active |= IFM_10_T;
1729                         break;
1730                 case 100:
1731                         ifmr->ifm_active |= IFM_100_TX;
1732                         break;
1733                 case 1000:
1734                         ifmr->ifm_active |= IFM_1000_T;
1735                         break;
1736                 }
1737                 if (adapter->link_duplex == FULL_DUPLEX)
1738                         ifmr->ifm_active |= IFM_FDX;
1739                 else
1740                         ifmr->ifm_active |= IFM_HDX;
1741         }
1742         EM_CORE_UNLOCK(adapter);
1743 }
1744
1745 /*********************************************************************
1746  *
1747  *  Media Ioctl callback
1748  *
1749  *  This routine is called when the user changes speed/duplex using
1750  *  media/mediopt option with ifconfig.
1751  *
1752  **********************************************************************/
1753 static int
1754 em_media_change(struct ifnet *ifp)
1755 {
1756         struct adapter *adapter = ifp->if_softc;
1757         struct ifmedia  *ifm = &adapter->media;
1758
1759         INIT_DEBUGOUT("em_media_change: begin");
1760
1761         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1762                 return (EINVAL);
1763
1764         EM_CORE_LOCK(adapter);
1765         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1766         case IFM_AUTO:
1767                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1768                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1769                 break;
1770         case IFM_1000_LX:
1771         case IFM_1000_SX:
1772         case IFM_1000_T:
1773                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1774                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1775                 break;
1776         case IFM_100_TX:
1777                 adapter->hw.mac.autoneg = FALSE;
1778                 adapter->hw.phy.autoneg_advertised = 0;
1779                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1781                 else
1782                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1783                 break;
1784         case IFM_10_T:
1785                 adapter->hw.mac.autoneg = FALSE;
1786                 adapter->hw.phy.autoneg_advertised = 0;
1787                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1788                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1789                 else
1790                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1791                 break;
1792         default:
1793                 device_printf(adapter->dev, "Unsupported media type\n");
1794         }
1795
1796         em_init_locked(adapter);
1797         EM_CORE_UNLOCK(adapter);
1798
1799         return (0);
1800 }
1801
1802 /*********************************************************************
1803  *
1804  *  This routine maps the mbufs to tx descriptors.
1805  *
1806  *  return 0 on success, positive on failure
1807  **********************************************************************/
1808
1809 static int
1810 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1811 {
1812         struct adapter          *adapter = txr->adapter;
1813         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1814         bus_dmamap_t            map;
1815         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1816         struct e1000_tx_desc    *ctxd = NULL;
1817         struct mbuf             *m_head;
1818         struct ether_header     *eh;
1819         struct ip               *ip = NULL;
1820         struct tcphdr           *tp = NULL;
1821         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1822         int                     ip_off, poff;
1823         int                     nsegs, i, j, first, last = 0;
1824         int                     error, do_tso, tso_desc = 0, remap = 1;
1825
1826 retry:
1827         m_head = *m_headp;
1828         txd_upper = txd_lower = txd_used = txd_saved = 0;
1829         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1830         ip_off = poff = 0;
1831
1832         /*
1833          * Intel recommends entire IP/TCP header length reside in a single
1834          * buffer. If multiple descriptors are used to describe the IP and
1835          * TCP header, each descriptor should describe one or more
1836          * complete headers; descriptors referencing only parts of headers
1837          * are not supported. If all layer headers are not coalesced into
1838          * a single buffer, each buffer should not cross a 4KB boundary,
1839          * or be larger than the maximum read request size.
1840          * Controller also requires modifing IP/TCP header to make TSO work
1841          * so we firstly get a writable mbuf chain then coalesce ethernet/
1842          * IP/TCP header into a single buffer to meet the requirement of
1843          * controller. This also simplifies IP/TCP/UDP checksum offloading
1844          * which also has similiar restrictions.
1845          */
1846         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1847                 if (do_tso || (m_head->m_next != NULL && 
1848                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1849                         if (M_WRITABLE(*m_headp) == 0) {
1850                                 m_head = m_dup(*m_headp, M_NOWAIT);
1851                                 m_freem(*m_headp);
1852                                 if (m_head == NULL) {
1853                                         *m_headp = NULL;
1854                                         return (ENOBUFS);
1855                                 }
1856                                 *m_headp = m_head;
1857                         }
1858                 }
1859                 /*
1860                  * XXX
1861                  * Assume IPv4, we don't have TSO/checksum offload support
1862                  * for IPv6 yet.
1863                  */
1864                 ip_off = sizeof(struct ether_header);
1865                 m_head = m_pullup(m_head, ip_off);
1866                 if (m_head == NULL) {
1867                         *m_headp = NULL;
1868                         return (ENOBUFS);
1869                 }
1870                 eh = mtod(m_head, struct ether_header *);
1871                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1872                         ip_off = sizeof(struct ether_vlan_header);
1873                         m_head = m_pullup(m_head, ip_off);
1874                         if (m_head == NULL) {
1875                                 *m_headp = NULL;
1876                                 return (ENOBUFS);
1877                         }
1878                 }
1879                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1880                 if (m_head == NULL) {
1881                         *m_headp = NULL;
1882                         return (ENOBUFS);
1883                 }
1884                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1885                 poff = ip_off + (ip->ip_hl << 2);
1886                 if (do_tso) {
1887                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1888                         if (m_head == NULL) {
1889                                 *m_headp = NULL;
1890                                 return (ENOBUFS);
1891                         }
1892                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893                         /*
1894                          * TSO workaround:
1895                          *   pull 4 more bytes of data into it.
1896                          */
1897                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1898                         if (m_head == NULL) {
1899                                 *m_headp = NULL;
1900                                 return (ENOBUFS);
1901                         }
1902                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903                         ip->ip_len = 0;
1904                         ip->ip_sum = 0;
1905                         /*
1906                          * The pseudo TCP checksum does not include TCP payload
1907                          * length so driver should recompute the checksum here
1908                          * what hardware expect to see. This is adherence of
1909                          * Microsoft's Large Send specification.
1910                          */
1911                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1913                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1914                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1915                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1916                         if (m_head == NULL) {
1917                                 *m_headp = NULL;
1918                                 return (ENOBUFS);
1919                         }
1920                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1921                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1922                         if (m_head == NULL) {
1923                                 *m_headp = NULL;
1924                                 return (ENOBUFS);
1925                         }
1926                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1927                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1928                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1929                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1930                         if (m_head == NULL) {
1931                                 *m_headp = NULL;
1932                                 return (ENOBUFS);
1933                         }
1934                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1935                 }
1936                 *m_headp = m_head;
1937         }
1938
1939         /*
1940          * Map the packet for DMA
1941          *
1942          * Capture the first descriptor index,
1943          * this descriptor will have the index
1944          * of the EOP which is the only one that
1945          * now gets a DONE bit writeback.
1946          */
1947         first = txr->next_avail_desc;
1948         tx_buffer = &txr->tx_buffers[first];
1949         tx_buffer_mapped = tx_buffer;
1950         map = tx_buffer->map;
1951
1952         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1953             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1954
1955         /*
1956          * There are two types of errors we can (try) to handle:
1957          * - EFBIG means the mbuf chain was too long and bus_dma ran
1958          *   out of segments.  Defragment the mbuf chain and try again.
1959          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1960          *   at this point in time.  Defer sending and try again later.
1961          * All other errors, in particular EINVAL, are fatal and prevent the
1962          * mbuf chain from ever going through.  Drop it and report error.
1963          */
1964         if (error == EFBIG && remap) {
1965                 struct mbuf *m;
1966
1967                 m = m_defrag(*m_headp, M_NOWAIT);
1968                 if (m == NULL) {
1969                         adapter->mbuf_alloc_failed++;
1970                         m_freem(*m_headp);
1971                         *m_headp = NULL;
1972                         return (ENOBUFS);
1973                 }
1974                 *m_headp = m;
1975
1976                 /* Try it again, but only once */
1977                 remap = 0;
1978                 goto retry;
1979         } else if (error == ENOMEM) {
1980                 adapter->no_tx_dma_setup++;
1981                 return (error);
1982         } else if (error != 0) {
1983                 adapter->no_tx_dma_setup++;
1984                 m_freem(*m_headp);
1985                 *m_headp = NULL;
1986                 return (error);
1987         }
1988
1989         /*
1990          * TSO Hardware workaround, if this packet is not
1991          * TSO, and is only a single descriptor long, and
1992          * it follows a TSO burst, then we need to add a
1993          * sentinel descriptor to prevent premature writeback.
1994          */
1995         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1996                 if (nsegs == 1)
1997                         tso_desc = TRUE;
1998                 txr->tx_tso = FALSE;
1999         }
2000
2001         if (nsegs > (txr->tx_avail - 2)) {
2002                 txr->no_desc_avail++;
2003                 bus_dmamap_unload(txr->txtag, map);
2004                 return (ENOBUFS);
2005         }
2006         m_head = *m_headp;
2007
2008         /* Do hardware assists */
2009         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2010                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2011                     &txd_upper, &txd_lower);
2012                 /* we need to make a final sentinel transmit desc */
2013                 tso_desc = TRUE;
2014         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2015                 em_transmit_checksum_setup(txr, m_head,
2016                     ip_off, ip, &txd_upper, &txd_lower);
2017
2018         if (m_head->m_flags & M_VLANTAG) {
2019                 /* Set the vlan id. */
2020                 txd_upper |=
2021                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2022                 /* Tell hardware to add tag */
2023                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2024         }
2025
2026         i = txr->next_avail_desc;
2027
2028         /* Set up our transmit descriptors */
2029         for (j = 0; j < nsegs; j++) {
2030                 bus_size_t seg_len;
2031                 bus_addr_t seg_addr;
2032
2033                 tx_buffer = &txr->tx_buffers[i];
2034                 ctxd = &txr->tx_base[i];
2035                 seg_addr = segs[j].ds_addr;
2036                 seg_len  = segs[j].ds_len;
2037                 /*
2038                 ** TSO Workaround:
2039                 ** If this is the last descriptor, we want to
2040                 ** split it so we have a small final sentinel
2041                 */
2042                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2043                         seg_len -= 4;
2044                         ctxd->buffer_addr = htole64(seg_addr);
2045                         ctxd->lower.data = htole32(
2046                         adapter->txd_cmd | txd_lower | seg_len);
2047                         ctxd->upper.data =
2048                             htole32(txd_upper);
2049                         if (++i == adapter->num_tx_desc)
2050                                 i = 0;
2051                         /* Now make the sentinel */     
2052                         ++txd_used; /* using an extra txd */
2053                         ctxd = &txr->tx_base[i];
2054                         tx_buffer = &txr->tx_buffers[i];
2055                         ctxd->buffer_addr =
2056                             htole64(seg_addr + seg_len);
2057                         ctxd->lower.data = htole32(
2058                         adapter->txd_cmd | txd_lower | 4);
2059                         ctxd->upper.data =
2060                             htole32(txd_upper);
2061                         last = i;
2062                         if (++i == adapter->num_tx_desc)
2063                                 i = 0;
2064                 } else {
2065                         ctxd->buffer_addr = htole64(seg_addr);
2066                         ctxd->lower.data = htole32(
2067                         adapter->txd_cmd | txd_lower | seg_len);
2068                         ctxd->upper.data =
2069                             htole32(txd_upper);
2070                         last = i;
2071                         if (++i == adapter->num_tx_desc)
2072                                 i = 0;
2073                 }
2074                 tx_buffer->m_head = NULL;
2075                 tx_buffer->next_eop = -1;
2076         }
2077
2078         txr->next_avail_desc = i;
2079         txr->tx_avail -= nsegs;
2080         if (tso_desc) /* TSO used an extra for sentinel */
2081                 txr->tx_avail -= txd_used;
2082
2083         tx_buffer->m_head = m_head;
2084         /*
2085         ** Here we swap the map so the last descriptor,
2086         ** which gets the completion interrupt has the
2087         ** real map, and the first descriptor gets the
2088         ** unused map from this descriptor.
2089         */
2090         tx_buffer_mapped->map = tx_buffer->map;
2091         tx_buffer->map = map;
2092         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2093
2094         /*
2095          * Last Descriptor of Packet
2096          * needs End Of Packet (EOP)
2097          * and Report Status (RS)
2098          */
2099         ctxd->lower.data |=
2100             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2101         /*
2102          * Keep track in the first buffer which
2103          * descriptor will be written back
2104          */
2105         tx_buffer = &txr->tx_buffers[first];
2106         tx_buffer->next_eop = last;
2107         /* Update the watchdog time early and often */
2108         txr->watchdog_time = ticks;
2109
2110         /*
2111          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2112          * that this frame is available to transmit.
2113          */
2114         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2115             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2116         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2117
2118         return (0);
2119 }
2120
2121 static void
2122 em_set_promisc(struct adapter *adapter)
2123 {
2124         struct ifnet    *ifp = adapter->ifp;
2125         u32             reg_rctl;
2126
2127         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2128
2129         if (ifp->if_flags & IFF_PROMISC) {
2130                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2131                 /* Turn this on if you want to see bad packets */
2132                 if (em_debug_sbp)
2133                         reg_rctl |= E1000_RCTL_SBP;
2134                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2135         } else if (ifp->if_flags & IFF_ALLMULTI) {
2136                 reg_rctl |= E1000_RCTL_MPE;
2137                 reg_rctl &= ~E1000_RCTL_UPE;
2138                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2139         }
2140 }
2141
2142 static void
2143 em_disable_promisc(struct adapter *adapter)
2144 {
2145         struct ifnet    *ifp = adapter->ifp;
2146         u32             reg_rctl;
2147         int             mcnt = 0;
2148
2149         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2150         reg_rctl &=  (~E1000_RCTL_UPE);
2151         if (ifp->if_flags & IFF_ALLMULTI)
2152                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2153         else {
2154                 struct  ifmultiaddr *ifma;
2155 #if __FreeBSD_version < 800000
2156                 IF_ADDR_LOCK(ifp);
2157 #else   
2158                 if_maddr_rlock(ifp);
2159 #endif
2160                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2161                         if (ifma->ifma_addr->sa_family != AF_LINK)
2162                                 continue;
2163                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2164                                 break;
2165                         mcnt++;
2166                 }
2167 #if __FreeBSD_version < 800000
2168                 IF_ADDR_UNLOCK(ifp);
2169 #else
2170                 if_maddr_runlock(ifp);
2171 #endif
2172         }
2173         /* Don't disable if in MAX groups */
2174         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2175                 reg_rctl &=  (~E1000_RCTL_MPE);
2176         reg_rctl &=  (~E1000_RCTL_SBP);
2177         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2178 }
2179
2180
2181 /*********************************************************************
2182  *  Multicast Update
2183  *
2184  *  This routine is called whenever multicast address list is updated.
2185  *
2186  **********************************************************************/
2187
2188 static void
2189 em_set_multi(struct adapter *adapter)
2190 {
2191         struct ifnet    *ifp = adapter->ifp;
2192         struct ifmultiaddr *ifma;
2193         u32 reg_rctl = 0;
2194         u8  *mta; /* Multicast array memory */
2195         int mcnt = 0;
2196
2197         IOCTL_DEBUGOUT("em_set_multi: begin");
2198
2199         mta = adapter->mta;
2200         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2201
2202         if (adapter->hw.mac.type == e1000_82542 && 
2203             adapter->hw.revision_id == E1000_REVISION_2) {
2204                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2205                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2206                         e1000_pci_clear_mwi(&adapter->hw);
2207                 reg_rctl |= E1000_RCTL_RST;
2208                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2209                 msec_delay(5);
2210         }
2211
2212 #if __FreeBSD_version < 800000
2213         IF_ADDR_LOCK(ifp);
2214 #else
2215         if_maddr_rlock(ifp);
2216 #endif
2217         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2218                 if (ifma->ifma_addr->sa_family != AF_LINK)
2219                         continue;
2220
2221                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2222                         break;
2223
2224                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2225                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2226                 mcnt++;
2227         }
2228 #if __FreeBSD_version < 800000
2229         IF_ADDR_UNLOCK(ifp);
2230 #else
2231         if_maddr_runlock(ifp);
2232 #endif
2233         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2234                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2235                 reg_rctl |= E1000_RCTL_MPE;
2236                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2237         } else
2238                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2239
2240         if (adapter->hw.mac.type == e1000_82542 && 
2241             adapter->hw.revision_id == E1000_REVISION_2) {
2242                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243                 reg_rctl &= ~E1000_RCTL_RST;
2244                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2245                 msec_delay(5);
2246                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2247                         e1000_pci_set_mwi(&adapter->hw);
2248         }
2249 }
2250
2251
2252 /*********************************************************************
2253  *  Timer routine
2254  *
2255  *  This routine checks for link status and updates statistics.
2256  *
2257  **********************************************************************/
2258
2259 static void
2260 em_local_timer(void *arg)
2261 {
2262         struct adapter  *adapter = arg;
2263         struct ifnet    *ifp = adapter->ifp;
2264         struct tx_ring  *txr = adapter->tx_rings;
2265         struct rx_ring  *rxr = adapter->rx_rings;
2266         u32             trigger;
2267
2268         EM_CORE_LOCK_ASSERT(adapter);
2269
2270         em_update_link_status(adapter);
2271         em_update_stats_counters(adapter);
2272
2273         /* Reset LAA into RAR[0] on 82571 */
2274         if ((adapter->hw.mac.type == e1000_82571) &&
2275             e1000_get_laa_state_82571(&adapter->hw))
2276                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2277
2278         /* Mask to use in the irq trigger */
2279         if (adapter->msix_mem)
2280                 trigger = rxr->ims;
2281         else
2282                 trigger = E1000_ICS_RXDMT0;
2283
2284         /*
2285         ** Check on the state of the TX queue(s), this 
2286         ** can be done without the lock because its RO
2287         ** and the HUNG state will be static if set.
2288         */
2289         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2290                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2291                     (adapter->pause_frames == 0))
2292                         goto hung;
2293                 /* Schedule a TX tasklet if needed */
2294                 if (txr->tx_avail <= EM_MAX_SCATTER)
2295                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2296         }
2297         
2298         adapter->pause_frames = 0;
2299         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2300 #ifndef DEVICE_POLLING
2301         /* Trigger an RX interrupt to guarantee mbuf refresh */
2302         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2303 #endif
2304         return;
2305 hung:
2306         /* Looks like we're hung */
2307         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2308         device_printf(adapter->dev,
2309             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2310             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2311             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2312         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2313             "Next TX to Clean = %d\n",
2314             txr->me, txr->tx_avail, txr->next_to_clean);
2315         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2316         adapter->watchdog_events++;
2317         adapter->pause_frames = 0;
2318         em_init_locked(adapter);
2319 }
2320
2321
2322 static void
2323 em_update_link_status(struct adapter *adapter)
2324 {
2325         struct e1000_hw *hw = &adapter->hw;
2326         struct ifnet *ifp = adapter->ifp;
2327         device_t dev = adapter->dev;
2328         struct tx_ring *txr = adapter->tx_rings;
2329         u32 link_check = 0;
2330
2331         /* Get the cached link value or read phy for real */
2332         switch (hw->phy.media_type) {
2333         case e1000_media_type_copper:
2334                 if (hw->mac.get_link_status) {
2335                         /* Do the work to read phy */
2336                         e1000_check_for_link(hw);
2337                         link_check = !hw->mac.get_link_status;
2338                         if (link_check) /* ESB2 fix */
2339                                 e1000_cfg_on_link_up(hw);
2340                 } else
2341                         link_check = TRUE;
2342                 break;
2343         case e1000_media_type_fiber:
2344                 e1000_check_for_link(hw);
2345                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2346                                  E1000_STATUS_LU);
2347                 break;
2348         case e1000_media_type_internal_serdes:
2349                 e1000_check_for_link(hw);
2350                 link_check = adapter->hw.mac.serdes_has_link;
2351                 break;
2352         default:
2353         case e1000_media_type_unknown:
2354                 break;
2355         }
2356
2357         /* Now check for a transition */
2358         if (link_check && (adapter->link_active == 0)) {
2359                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2360                     &adapter->link_duplex);
2361                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2362                 if ((adapter->link_speed != SPEED_1000) &&
2363                     ((hw->mac.type == e1000_82571) ||
2364                     (hw->mac.type == e1000_82572))) {
2365                         int tarc0;
2366                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2367                         tarc0 &= ~SPEED_MODE_BIT;
2368                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2369                 }
2370                 if (bootverbose)
2371                         device_printf(dev, "Link is up %d Mbps %s\n",
2372                             adapter->link_speed,
2373                             ((adapter->link_duplex == FULL_DUPLEX) ?
2374                             "Full Duplex" : "Half Duplex"));
2375                 adapter->link_active = 1;
2376                 adapter->smartspeed = 0;
2377                 ifp->if_baudrate = adapter->link_speed * 1000000;
2378                 if_link_state_change(ifp, LINK_STATE_UP);
2379         } else if (!link_check && (adapter->link_active == 1)) {
2380                 ifp->if_baudrate = adapter->link_speed = 0;
2381                 adapter->link_duplex = 0;
2382                 if (bootverbose)
2383                         device_printf(dev, "Link is Down\n");
2384                 adapter->link_active = 0;
2385                 /* Link down, disable watchdog */
2386                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2387                         txr->queue_status = EM_QUEUE_IDLE;
2388                 if_link_state_change(ifp, LINK_STATE_DOWN);
2389         }
2390 }
2391
2392 /*********************************************************************
2393  *
2394  *  This routine disables all traffic on the adapter by issuing a
2395  *  global reset on the MAC and deallocates TX/RX buffers.
2396  *
2397  *  This routine should always be called with BOTH the CORE
2398  *  and TX locks.
2399  **********************************************************************/
2400
2401 static void
2402 em_stop(void *arg)
2403 {
2404         struct adapter  *adapter = arg;
2405         struct ifnet    *ifp = adapter->ifp;
2406         struct tx_ring  *txr = adapter->tx_rings;
2407
2408         EM_CORE_LOCK_ASSERT(adapter);
2409
2410         INIT_DEBUGOUT("em_stop: begin");
2411
2412         em_disable_intr(adapter);
2413         callout_stop(&adapter->timer);
2414
2415         /* Tell the stack that the interface is no longer active */
2416         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2417         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2418
2419         /* Unarm watchdog timer. */
2420         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2421                 EM_TX_LOCK(txr);
2422                 txr->queue_status = EM_QUEUE_IDLE;
2423                 EM_TX_UNLOCK(txr);
2424         }
2425
2426         e1000_reset_hw(&adapter->hw);
2427         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2428
2429         e1000_led_off(&adapter->hw);
2430         e1000_cleanup_led(&adapter->hw);
2431 }
2432
2433
2434 /*********************************************************************
2435  *
2436  *  Determine hardware revision.
2437  *
2438  **********************************************************************/
2439 static void
2440 em_identify_hardware(struct adapter *adapter)
2441 {
2442         device_t dev = adapter->dev;
2443
2444         /* Make sure our PCI config space has the necessary stuff set */
2445         pci_enable_busmaster(dev);
2446         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2447
2448         /* Save off the information about this board */
2449         adapter->hw.vendor_id = pci_get_vendor(dev);
2450         adapter->hw.device_id = pci_get_device(dev);
2451         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2452         adapter->hw.subsystem_vendor_id =
2453             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2454         adapter->hw.subsystem_device_id =
2455             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2456
2457         /* Do Shared Code Init and Setup */
2458         if (e1000_set_mac_type(&adapter->hw)) {
2459                 device_printf(dev, "Setup init failure\n");
2460                 return;
2461         }
2462 }
2463
2464 static int
2465 em_allocate_pci_resources(struct adapter *adapter)
2466 {
2467         device_t        dev = adapter->dev;
2468         int             rid;
2469
2470         rid = PCIR_BAR(0);
2471         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2472             &rid, RF_ACTIVE);
2473         if (adapter->memory == NULL) {
2474                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2475                 return (ENXIO);
2476         }
2477         adapter->osdep.mem_bus_space_tag =
2478             rman_get_bustag(adapter->memory);
2479         adapter->osdep.mem_bus_space_handle =
2480             rman_get_bushandle(adapter->memory);
2481         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2482
2483         /* Default to a single queue */
2484         adapter->num_queues = 1;
2485
2486         /*
2487          * Setup MSI/X or MSI if PCI Express
2488          */
2489         adapter->msix = em_setup_msix(adapter);
2490
2491         adapter->hw.back = &adapter->osdep;
2492
2493         return (0);
2494 }
2495
2496 /*********************************************************************
2497  *
2498  *  Setup the Legacy or MSI Interrupt handler
2499  *
2500  **********************************************************************/
2501 int
2502 em_allocate_legacy(struct adapter *adapter)
2503 {
2504         device_t dev = adapter->dev;
2505         struct tx_ring  *txr = adapter->tx_rings;
2506         int error, rid = 0;
2507
2508         /* Manually turn off all interrupts */
2509         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2510
2511         if (adapter->msix == 1) /* using MSI */
2512                 rid = 1;
2513         /* We allocate a single interrupt resource */
2514         adapter->res = bus_alloc_resource_any(dev,
2515             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2516         if (adapter->res == NULL) {
2517                 device_printf(dev, "Unable to allocate bus resource: "
2518                     "interrupt\n");
2519                 return (ENXIO);
2520         }
2521
2522         /*
2523          * Allocate a fast interrupt and the associated
2524          * deferred processing contexts.
2525          */
2526         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2527         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2528             taskqueue_thread_enqueue, &adapter->tq);
2529         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2530             device_get_nameunit(adapter->dev));
2531         /* Use a TX only tasklet for local timer */
2532         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2533         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2534             taskqueue_thread_enqueue, &txr->tq);
2535         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2536             device_get_nameunit(adapter->dev));
2537         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2538         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2539             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2540                 device_printf(dev, "Failed to register fast interrupt "
2541                             "handler: %d\n", error);
2542                 taskqueue_free(adapter->tq);
2543                 adapter->tq = NULL;
2544                 return (error);
2545         }
2546         
2547         return (0);
2548 }
2549
2550 /*********************************************************************
2551  *
2552  *  Setup the MSIX Interrupt handlers
2553  *   This is not really Multiqueue, rather
2554  *   its just seperate interrupt vectors
2555  *   for TX, RX, and Link.
2556  *
2557  **********************************************************************/
2558 int
2559 em_allocate_msix(struct adapter *adapter)
2560 {
2561         device_t        dev = adapter->dev;
2562         struct          tx_ring *txr = adapter->tx_rings;
2563         struct          rx_ring *rxr = adapter->rx_rings;
2564         int             error, rid, vector = 0;
2565
2566
2567         /* Make sure all interrupts are disabled */
2568         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2569
2570         /* First set up ring resources */
2571         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2572
2573                 /* RX ring */
2574                 rid = vector + 1;
2575
2576                 rxr->res = bus_alloc_resource_any(dev,
2577                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2578                 if (rxr->res == NULL) {
2579                         device_printf(dev,
2580                             "Unable to allocate bus resource: "
2581                             "RX MSIX Interrupt %d\n", i);
2582                         return (ENXIO);
2583                 }
2584                 if ((error = bus_setup_intr(dev, rxr->res,
2585                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2586                     rxr, &rxr->tag)) != 0) {
2587                         device_printf(dev, "Failed to register RX handler");
2588                         return (error);
2589                 }
2590 #if __FreeBSD_version >= 800504
2591                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2592 #endif
2593                 rxr->msix = vector++; /* NOTE increment vector for TX */
2594                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2595                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2596                     taskqueue_thread_enqueue, &rxr->tq);
2597                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2598                     device_get_nameunit(adapter->dev));
2599                 /*
2600                 ** Set the bit to enable interrupt
2601                 ** in E1000_IMS -- bits 20 and 21
2602                 ** are for RX0 and RX1, note this has
2603                 ** NOTHING to do with the MSIX vector
2604                 */
2605                 rxr->ims = 1 << (20 + i);
2606                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2607
2608                 /* TX ring */
2609                 rid = vector + 1;
2610                 txr->res = bus_alloc_resource_any(dev,
2611                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2612                 if (txr->res == NULL) {
2613                         device_printf(dev,
2614                             "Unable to allocate bus resource: "
2615                             "TX MSIX Interrupt %d\n", i);
2616                         return (ENXIO);
2617                 }
2618                 if ((error = bus_setup_intr(dev, txr->res,
2619                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2620                     txr, &txr->tag)) != 0) {
2621                         device_printf(dev, "Failed to register TX handler");
2622                         return (error);
2623                 }
2624 #if __FreeBSD_version >= 800504
2625                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2626 #endif
2627                 txr->msix = vector++; /* Increment vector for next pass */
2628                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2629                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2630                     taskqueue_thread_enqueue, &txr->tq);
2631                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2632                     device_get_nameunit(adapter->dev));
2633                 /*
2634                 ** Set the bit to enable interrupt
2635                 ** in E1000_IMS -- bits 22 and 23
2636                 ** are for TX0 and TX1, note this has
2637                 ** NOTHING to do with the MSIX vector
2638                 */
2639                 txr->ims = 1 << (22 + i);
2640                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2641         }
2642
2643         /* Link interrupt */
2644         ++rid;
2645         adapter->res = bus_alloc_resource_any(dev,
2646             SYS_RES_IRQ, &rid, RF_ACTIVE);
2647         if (!adapter->res) {
2648                 device_printf(dev,"Unable to allocate "
2649                     "bus resource: Link interrupt [%d]\n", rid);
2650                 return (ENXIO);
2651         }
2652         /* Set the link handler function */
2653         error = bus_setup_intr(dev, adapter->res,
2654             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2655             em_msix_link, adapter, &adapter->tag);
2656         if (error) {
2657                 adapter->res = NULL;
2658                 device_printf(dev, "Failed to register LINK handler");
2659                 return (error);
2660         }
2661 #if __FreeBSD_version >= 800504
2662                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2663 #endif
2664         adapter->linkvec = vector;
2665         adapter->ivars |=  (8 | vector) << 16;
2666         adapter->ivars |= 0x80000000;
2667
2668         return (0);
2669 }
2670
2671
2672 static void
2673 em_free_pci_resources(struct adapter *adapter)
2674 {
2675         device_t        dev = adapter->dev;
2676         struct tx_ring  *txr;
2677         struct rx_ring  *rxr;
2678         int             rid;
2679
2680
2681         /*
2682         ** Release all the queue interrupt resources:
2683         */
2684         for (int i = 0; i < adapter->num_queues; i++) {
2685                 txr = &adapter->tx_rings[i];
2686                 rxr = &adapter->rx_rings[i];
2687                 /* an early abort? */
2688                 if ((txr == NULL) || (rxr == NULL))
2689                         break;
2690                 rid = txr->msix +1;
2691                 if (txr->tag != NULL) {
2692                         bus_teardown_intr(dev, txr->res, txr->tag);
2693                         txr->tag = NULL;
2694                 }
2695                 if (txr->res != NULL)
2696                         bus_release_resource(dev, SYS_RES_IRQ,
2697                             rid, txr->res);
2698                 rid = rxr->msix +1;
2699                 if (rxr->tag != NULL) {
2700                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2701                         rxr->tag = NULL;
2702                 }
2703                 if (rxr->res != NULL)
2704                         bus_release_resource(dev, SYS_RES_IRQ,
2705                             rid, rxr->res);
2706         }
2707
2708         if (adapter->linkvec) /* we are doing MSIX */
2709                 rid = adapter->linkvec + 1;
2710         else
2711                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2712
2713         if (adapter->tag != NULL) {
2714                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2715                 adapter->tag = NULL;
2716         }
2717
2718         if (adapter->res != NULL)
2719                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2720
2721
2722         if (adapter->msix)
2723                 pci_release_msi(dev);
2724
2725         if (adapter->msix_mem != NULL)
2726                 bus_release_resource(dev, SYS_RES_MEMORY,
2727                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2728
2729         if (adapter->memory != NULL)
2730                 bus_release_resource(dev, SYS_RES_MEMORY,
2731                     PCIR_BAR(0), adapter->memory);
2732
2733         if (adapter->flash != NULL)
2734                 bus_release_resource(dev, SYS_RES_MEMORY,
2735                     EM_FLASH, adapter->flash);
2736 }
2737
2738 /*
2739  * Setup MSI or MSI/X
2740  */
2741 static int
2742 em_setup_msix(struct adapter *adapter)
2743 {
2744         device_t dev = adapter->dev;
2745         int val;
2746
2747         /*
2748         ** Setup MSI/X for Hartwell: tests have shown
2749         ** use of two queues to be unstable, and to
2750         ** provide no great gain anyway, so we simply
2751         ** seperate the interrupts and use a single queue.
2752         */
2753         if ((adapter->hw.mac.type == e1000_82574) &&
2754             (em_enable_msix == TRUE)) {
2755                 /* Map the MSIX BAR */
2756                 int rid = PCIR_BAR(EM_MSIX_BAR);
2757                 adapter->msix_mem = bus_alloc_resource_any(dev,
2758                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2759                 if (adapter->msix_mem == NULL) {
2760                         /* May not be enabled */
2761                         device_printf(adapter->dev,
2762                             "Unable to map MSIX table \n");
2763                         goto msi;
2764                 }
2765                 val = pci_msix_count(dev); 
2766                 /* We only need/want 3 vectors */
2767                 if (val >= 3)
2768                         val = 3;
2769                 else {
2770                         device_printf(adapter->dev,
2771                             "MSIX: insufficient vectors, using MSI\n");
2772                         goto msi;
2773                 }
2774
2775                 if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2776                         device_printf(adapter->dev,
2777                             "Using MSIX interrupts "
2778                             "with %d vectors\n", val);
2779                         return (val);
2780                 }
2781
2782                 /*
2783                 ** If MSIX alloc failed or provided us with
2784                 ** less than needed, free and fall through to MSI
2785                 */
2786                 pci_release_msi(dev);
2787         }
2788 msi:
2789         if (adapter->msix_mem != NULL) {
2790                 bus_release_resource(dev, SYS_RES_MEMORY,
2791                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2792                 adapter->msix_mem = NULL;
2793         }
2794         val = 1;
2795         if (pci_alloc_msi(dev, &val) == 0) {
2796                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2797                 return (val);
2798         } 
2799         /* Should only happen due to manual configuration */
2800         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2801         return (0);
2802 }
2803
2804
2805 /*********************************************************************
2806  *
2807  *  Initialize the hardware to a configuration
2808  *  as specified by the adapter structure.
2809  *
2810  **********************************************************************/
2811 static void
2812 em_reset(struct adapter *adapter)
2813 {
2814         device_t        dev = adapter->dev;
2815         struct ifnet    *ifp = adapter->ifp;
2816         struct e1000_hw *hw = &adapter->hw;
2817         u16             rx_buffer_size;
2818         u32             pba;
2819
2820         INIT_DEBUGOUT("em_reset: begin");
2821
2822         /* Set up smart power down as default off on newer adapters. */
2823         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2824             hw->mac.type == e1000_82572)) {
2825                 u16 phy_tmp = 0;
2826
2827                 /* Speed up time to link by disabling smart power down. */
2828                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2829                 phy_tmp &= ~IGP02E1000_PM_SPD;
2830                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2831         }
2832
2833         /*
2834          * Packet Buffer Allocation (PBA)
2835          * Writing PBA sets the receive portion of the buffer
2836          * the remainder is used for the transmit buffer.
2837          */
2838         switch (hw->mac.type) {
2839         /* Total Packet Buffer on these is 48K */
2840         case e1000_82571:
2841         case e1000_82572:
2842         case e1000_80003es2lan:
2843                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2844                 break;
2845         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2846                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2847                 break;
2848         case e1000_82574:
2849         case e1000_82583:
2850                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2851                 break;
2852         case e1000_ich8lan:
2853                 pba = E1000_PBA_8K;
2854                 break;
2855         case e1000_ich9lan:
2856         case e1000_ich10lan:
2857                 /* Boost Receive side for jumbo frames */
2858                 if (adapter->hw.mac.max_frame_size > 4096)
2859                         pba = E1000_PBA_14K;
2860                 else
2861                         pba = E1000_PBA_10K;
2862                 break;
2863         case e1000_pchlan:
2864         case e1000_pch2lan:
2865         case e1000_pch_lpt:
2866                 pba = E1000_PBA_26K;
2867                 break;
2868         default:
2869                 if (adapter->hw.mac.max_frame_size > 8192)
2870                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2871                 else
2872                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2873         }
2874         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2875
2876         /*
2877          * These parameters control the automatic generation (Tx) and
2878          * response (Rx) to Ethernet PAUSE frames.
2879          * - High water mark should allow for at least two frames to be
2880          *   received after sending an XOFF.
2881          * - Low water mark works best when it is very near the high water mark.
2882          *   This allows the receiver to restart by sending XON when it has
2883          *   drained a bit. Here we use an arbitary value of 1500 which will
2884          *   restart after one full frame is pulled from the buffer. There
2885          *   could be several smaller frames in the buffer and if so they will
2886          *   not trigger the XON until their total number reduces the buffer
2887          *   by 1500.
2888          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2889          */
2890         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2891         hw->fc.high_water = rx_buffer_size -
2892             roundup2(adapter->hw.mac.max_frame_size, 1024);
2893         hw->fc.low_water = hw->fc.high_water - 1500;
2894
2895         if (adapter->fc) /* locally set flow control value? */
2896                 hw->fc.requested_mode = adapter->fc;
2897         else
2898                 hw->fc.requested_mode = e1000_fc_full;
2899
2900         if (hw->mac.type == e1000_80003es2lan)
2901                 hw->fc.pause_time = 0xFFFF;
2902         else
2903                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2904
2905         hw->fc.send_xon = TRUE;
2906
2907         /* Device specific overrides/settings */
2908         switch (hw->mac.type) {
2909         case e1000_pchlan:
2910                 /* Workaround: no TX flow ctrl for PCH */
2911                 hw->fc.requested_mode = e1000_fc_rx_pause;
2912                 hw->fc.pause_time = 0xFFFF; /* override */
2913                 if (ifp->if_mtu > ETHERMTU) {
2914                         hw->fc.high_water = 0x3500;
2915                         hw->fc.low_water = 0x1500;
2916                 } else {
2917                         hw->fc.high_water = 0x5000;
2918                         hw->fc.low_water = 0x3000;
2919                 }
2920                 hw->fc.refresh_time = 0x1000;
2921                 break;
2922         case e1000_pch2lan:
2923         case e1000_pch_lpt:
2924                 hw->fc.high_water = 0x5C20;
2925                 hw->fc.low_water = 0x5048;
2926                 hw->fc.pause_time = 0x0650;
2927                 hw->fc.refresh_time = 0x0400;
2928                 /* Jumbos need adjusted PBA */
2929                 if (ifp->if_mtu > ETHERMTU)
2930                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2931                 else
2932                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2933                 break;
2934         case e1000_ich9lan:
2935         case e1000_ich10lan:
2936                 if (ifp->if_mtu > ETHERMTU) {
2937                         hw->fc.high_water = 0x2800;
2938                         hw->fc.low_water = hw->fc.high_water - 8;
2939                         break;
2940                 } 
2941                 /* else fall thru */
2942         default:
2943                 if (hw->mac.type == e1000_80003es2lan)
2944                         hw->fc.pause_time = 0xFFFF;
2945                 break;
2946         }
2947
2948         /* Issue a global reset */
2949         e1000_reset_hw(hw);
2950         E1000_WRITE_REG(hw, E1000_WUC, 0);
2951         em_disable_aspm(adapter);
2952         /* and a re-init */
2953         if (e1000_init_hw(hw) < 0) {
2954                 device_printf(dev, "Hardware Initialization Failed\n");
2955                 return;
2956         }
2957
2958         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2959         e1000_get_phy_info(hw);
2960         e1000_check_for_link(hw);
2961         return;
2962 }
2963
2964 /*********************************************************************
2965  *
2966  *  Setup networking device structure and register an interface.
2967  *
2968  **********************************************************************/
2969 static int
2970 em_setup_interface(device_t dev, struct adapter *adapter)
2971 {
2972         struct ifnet   *ifp;
2973
2974         INIT_DEBUGOUT("em_setup_interface: begin");
2975
2976         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2977         if (ifp == NULL) {
2978                 device_printf(dev, "can not allocate ifnet structure\n");
2979                 return (-1);
2980         }
2981         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2982         ifp->if_init =  em_init;
2983         ifp->if_softc = adapter;
2984         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2985         ifp->if_ioctl = em_ioctl;
2986 #ifdef EM_MULTIQUEUE
2987         /* Multiqueue stack interface */
2988         ifp->if_transmit = em_mq_start;
2989         ifp->if_qflush = em_qflush;
2990 #else
2991         ifp->if_start = em_start;
2992         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2993         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2994         IFQ_SET_READY(&ifp->if_snd);
2995 #endif  
2996
2997         ether_ifattach(ifp, adapter->hw.mac.addr);
2998
2999         ifp->if_capabilities = ifp->if_capenable = 0;
3000
3001
3002         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3003         ifp->if_capabilities |= IFCAP_TSO4;
3004         /*
3005          * Tell the upper layer(s) we
3006          * support full VLAN capability
3007          */
3008         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3009         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3010                              |  IFCAP_VLAN_HWTSO
3011                              |  IFCAP_VLAN_MTU;
3012         ifp->if_capenable = ifp->if_capabilities;
3013
3014         /*
3015         ** Don't turn this on by default, if vlans are
3016         ** created on another pseudo device (eg. lagg)
3017         ** then vlan events are not passed thru, breaking
3018         ** operation, but with HW FILTER off it works. If
3019         ** using vlans directly on the em driver you can
3020         ** enable this and get full hardware tag filtering.
3021         */
3022         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3023
3024 #ifdef DEVICE_POLLING
3025         ifp->if_capabilities |= IFCAP_POLLING;
3026 #endif
3027
3028         /* Enable only WOL MAGIC by default */
3029         if (adapter->wol) {
3030                 ifp->if_capabilities |= IFCAP_WOL;
3031                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3032         }
3033                 
3034         /*
3035          * Specify the media types supported by this adapter and register
3036          * callbacks to update media and link information
3037          */
3038         ifmedia_init(&adapter->media, IFM_IMASK,
3039             em_media_change, em_media_status);
3040         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3041             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3042                 u_char fiber_type = IFM_1000_SX;        /* default type */
3043
3044                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3045                             0, NULL);
3046                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3047         } else {
3048                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3049                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3050                             0, NULL);
3051                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3052                             0, NULL);
3053                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3054                             0, NULL);
3055                 if (adapter->hw.phy.type != e1000_phy_ife) {
3056                         ifmedia_add(&adapter->media,
3057                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3058                         ifmedia_add(&adapter->media,
3059                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3060                 }
3061         }
3062         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3063         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3064         return (0);
3065 }
3066
3067
3068 /*
3069  * Manage DMA'able memory.
3070  */
3071 static void
3072 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3073 {
3074         if (error)
3075                 return;
3076         *(bus_addr_t *) arg = segs[0].ds_addr;
3077 }
3078
3079 static int
3080 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3081         struct em_dma_alloc *dma, int mapflags)
3082 {
3083         int error;
3084
3085         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3086                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3087                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3088                                 BUS_SPACE_MAXADDR,      /* highaddr */
3089                                 NULL, NULL,             /* filter, filterarg */
3090                                 size,                   /* maxsize */
3091                                 1,                      /* nsegments */
3092                                 size,                   /* maxsegsize */
3093                                 0,                      /* flags */
3094                                 NULL,                   /* lockfunc */
3095                                 NULL,                   /* lockarg */
3096                                 &dma->dma_tag);
3097         if (error) {
3098                 device_printf(adapter->dev,
3099                     "%s: bus_dma_tag_create failed: %d\n",
3100                     __func__, error);
3101                 goto fail_0;
3102         }
3103
3104         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3105             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3106         if (error) {
3107                 device_printf(adapter->dev,
3108                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3109                     __func__, (uintmax_t)size, error);
3110                 goto fail_2;
3111         }
3112
3113         dma->dma_paddr = 0;
3114         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3115             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3116         if (error || dma->dma_paddr == 0) {
3117                 device_printf(adapter->dev,
3118                     "%s: bus_dmamap_load failed: %d\n",
3119                     __func__, error);
3120                 goto fail_3;
3121         }
3122
3123         return (0);
3124
3125 fail_3:
3126         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3127 fail_2:
3128         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3129         bus_dma_tag_destroy(dma->dma_tag);
3130 fail_0:
3131         dma->dma_map = NULL;
3132         dma->dma_tag = NULL;
3133
3134         return (error);
3135 }
3136
3137 static void
3138 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3139 {
3140         if (dma->dma_tag == NULL)
3141                 return;
3142         if (dma->dma_map != NULL) {
3143                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3144                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3145                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3146                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3147                 dma->dma_map = NULL;
3148         }
3149         bus_dma_tag_destroy(dma->dma_tag);
3150         dma->dma_tag = NULL;
3151 }
3152
3153
3154 /*********************************************************************
3155  *
3156  *  Allocate memory for the transmit and receive rings, and then
3157  *  the descriptors associated with each, called only once at attach.
3158  *
3159  **********************************************************************/
3160 static int
3161 em_allocate_queues(struct adapter *adapter)
3162 {
3163         device_t                dev = adapter->dev;
3164         struct tx_ring          *txr = NULL;
3165         struct rx_ring          *rxr = NULL;
3166         int rsize, tsize, error = E1000_SUCCESS;
3167         int txconf = 0, rxconf = 0;
3168
3169
3170         /* Allocate the TX ring struct memory */
3171         if (!(adapter->tx_rings =
3172             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3173             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3174                 device_printf(dev, "Unable to allocate TX ring memory\n");
3175                 error = ENOMEM;
3176                 goto fail;
3177         }
3178
3179         /* Now allocate the RX */
3180         if (!(adapter->rx_rings =
3181             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3182             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3183                 device_printf(dev, "Unable to allocate RX ring memory\n");
3184                 error = ENOMEM;
3185                 goto rx_fail;
3186         }
3187
3188         tsize = roundup2(adapter->num_tx_desc *
3189             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3190         /*
3191          * Now set up the TX queues, txconf is needed to handle the
3192          * possibility that things fail midcourse and we need to
3193          * undo memory gracefully
3194          */ 
3195         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3196                 /* Set up some basics */
3197                 txr = &adapter->tx_rings[i];
3198                 txr->adapter = adapter;
3199                 txr->me = i;
3200
3201                 /* Initialize the TX lock */
3202                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3203                     device_get_nameunit(dev), txr->me);
3204                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3205
3206                 if (em_dma_malloc(adapter, tsize,
3207                         &txr->txdma, BUS_DMA_NOWAIT)) {
3208                         device_printf(dev,
3209                             "Unable to allocate TX Descriptor memory\n");
3210                         error = ENOMEM;
3211                         goto err_tx_desc;
3212                 }
3213                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3214                 bzero((void *)txr->tx_base, tsize);
3215
3216                 if (em_allocate_transmit_buffers(txr)) {
3217                         device_printf(dev,
3218                             "Critical Failure setting up transmit buffers\n");
3219                         error = ENOMEM;
3220                         goto err_tx_desc;
3221                 }
3222 #if __FreeBSD_version >= 800000
3223                 /* Allocate a buf ring */
3224                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3225                     M_WAITOK, &txr->tx_mtx);
3226 #endif
3227         }
3228
3229         /*
3230          * Next the RX queues...
3231          */ 
3232         rsize = roundup2(adapter->num_rx_desc *
3233             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3234         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3235                 rxr = &adapter->rx_rings[i];
3236                 rxr->adapter = adapter;
3237                 rxr->me = i;
3238
3239                 /* Initialize the RX lock */
3240                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3241                     device_get_nameunit(dev), txr->me);
3242                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3243
3244                 if (em_dma_malloc(adapter, rsize,
3245                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3246                         device_printf(dev,
3247                             "Unable to allocate RxDescriptor memory\n");
3248                         error = ENOMEM;
3249                         goto err_rx_desc;
3250                 }
3251                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3252                 bzero((void *)rxr->rx_base, rsize);
3253
3254                 /* Allocate receive buffers for the ring*/
3255                 if (em_allocate_receive_buffers(rxr)) {
3256                         device_printf(dev,
3257                             "Critical Failure setting up receive buffers\n");
3258                         error = ENOMEM;
3259                         goto err_rx_desc;
3260                 }
3261         }
3262
3263         return (0);
3264
3265 err_rx_desc:
3266         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3267                 em_dma_free(adapter, &rxr->rxdma);
3268 err_tx_desc:
3269         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3270                 em_dma_free(adapter, &txr->txdma);
3271         free(adapter->rx_rings, M_DEVBUF);
3272 rx_fail:
3273 #if __FreeBSD_version >= 800000
3274         buf_ring_free(txr->br, M_DEVBUF);
3275 #endif
3276         free(adapter->tx_rings, M_DEVBUF);
3277 fail:
3278         return (error);
3279 }
3280
3281
3282 /*********************************************************************
3283  *
3284  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3285  *  the information needed to transmit a packet on the wire. This is
3286  *  called only once at attach, setup is done every reset.
3287  *
3288  **********************************************************************/
3289 static int
3290 em_allocate_transmit_buffers(struct tx_ring *txr)
3291 {
3292         struct adapter *adapter = txr->adapter;
3293         device_t dev = adapter->dev;
3294         struct em_buffer *txbuf;
3295         int error, i;
3296
3297         /*
3298          * Setup DMA descriptor areas.
3299          */
3300         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3301                                1, 0,                    /* alignment, bounds */
3302                                BUS_SPACE_MAXADDR,       /* lowaddr */
3303                                BUS_SPACE_MAXADDR,       /* highaddr */
3304                                NULL, NULL,              /* filter, filterarg */
3305                                EM_TSO_SIZE,             /* maxsize */
3306                                EM_MAX_SCATTER,          /* nsegments */
3307                                PAGE_SIZE,               /* maxsegsize */
3308                                0,                       /* flags */
3309                                NULL,                    /* lockfunc */
3310                                NULL,                    /* lockfuncarg */
3311                                &txr->txtag))) {
3312                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3313                 goto fail;
3314         }
3315
3316         if (!(txr->tx_buffers =
3317             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3318             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3319                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3320                 error = ENOMEM;
3321                 goto fail;
3322         }
3323
3324         /* Create the descriptor buffer dma maps */
3325         txbuf = txr->tx_buffers;
3326         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3327                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3328                 if (error != 0) {
3329                         device_printf(dev, "Unable to create TX DMA map\n");
3330                         goto fail;
3331                 }
3332         }
3333
3334         return 0;
3335 fail:
3336         /* We free all, it handles case where we are in the middle */
3337         em_free_transmit_structures(adapter);
3338         return (error);
3339 }
3340
3341 /*********************************************************************
3342  *
3343  *  Initialize a transmit ring.
3344  *
3345  **********************************************************************/
3346 static void
3347 em_setup_transmit_ring(struct tx_ring *txr)
3348 {
3349         struct adapter *adapter = txr->adapter;
3350         struct em_buffer *txbuf;
3351         int i;
3352 #ifdef DEV_NETMAP
3353         struct netmap_adapter *na = NA(adapter->ifp);
3354         struct netmap_slot *slot;
3355 #endif /* DEV_NETMAP */
3356
3357         /* Clear the old descriptor contents */
3358         EM_TX_LOCK(txr);
3359 #ifdef DEV_NETMAP
3360         slot = netmap_reset(na, NR_TX, txr->me, 0);
3361 #endif /* DEV_NETMAP */
3362
3363         bzero((void *)txr->tx_base,
3364               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3365         /* Reset indices */
3366         txr->next_avail_desc = 0;
3367         txr->next_to_clean = 0;
3368
3369         /* Free any existing tx buffers. */
3370         txbuf = txr->tx_buffers;
3371         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3372                 if (txbuf->m_head != NULL) {
3373                         bus_dmamap_sync(txr->txtag, txbuf->map,
3374                             BUS_DMASYNC_POSTWRITE);
3375                         bus_dmamap_unload(txr->txtag, txbuf->map);
3376                         m_freem(txbuf->m_head);
3377                         txbuf->m_head = NULL;
3378                 }
3379 #ifdef DEV_NETMAP
3380                 if (slot) {
3381                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3382                         uint64_t paddr;
3383                         void *addr;
3384
3385                         addr = PNMB(slot + si, &paddr);
3386                         txr->tx_base[i].buffer_addr = htole64(paddr);
3387                         /* reload the map for netmap mode */
3388                         netmap_load_map(txr->txtag, txbuf->map, addr);
3389                 }
3390 #endif /* DEV_NETMAP */
3391
3392                 /* clear the watch index */
3393                 txbuf->next_eop = -1;
3394         }
3395
3396         /* Set number of descriptors available */
3397         txr->tx_avail = adapter->num_tx_desc;
3398         txr->queue_status = EM_QUEUE_IDLE;
3399
3400         /* Clear checksum offload context. */
3401         txr->last_hw_offload = 0;
3402         txr->last_hw_ipcss = 0;
3403         txr->last_hw_ipcso = 0;
3404         txr->last_hw_tucss = 0;
3405         txr->last_hw_tucso = 0;
3406
3407         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3408             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3409         EM_TX_UNLOCK(txr);
3410 }
3411
3412 /*********************************************************************
3413  *
3414  *  Initialize all transmit rings.
3415  *
3416  **********************************************************************/
3417 static void
3418 em_setup_transmit_structures(struct adapter *adapter)
3419 {
3420         struct tx_ring *txr = adapter->tx_rings;
3421
3422         for (int i = 0; i < adapter->num_queues; i++, txr++)
3423                 em_setup_transmit_ring(txr);
3424
3425         return;
3426 }
3427
3428 /*********************************************************************
3429  *
3430  *  Enable transmit unit.
3431  *
3432  **********************************************************************/
3433 static void
3434 em_initialize_transmit_unit(struct adapter *adapter)
3435 {
3436         struct tx_ring  *txr = adapter->tx_rings;
3437         struct e1000_hw *hw = &adapter->hw;
3438         u32     tctl, tarc, tipg = 0;
3439
3440          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3441
3442         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3443                 u64 bus_addr = txr->txdma.dma_paddr;
3444                 /* Base and Len of TX Ring */
3445                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3446                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3447                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3448                     (u32)(bus_addr >> 32));
3449                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3450                     (u32)bus_addr);
3451                 /* Init the HEAD/TAIL indices */
3452                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3453                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3454
3455                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3456                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3457                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3458
3459                 txr->queue_status = EM_QUEUE_IDLE;
3460         }
3461
3462         /* Set the default values for the Tx Inter Packet Gap timer */
3463         switch (adapter->hw.mac.type) {
3464         case e1000_80003es2lan:
3465                 tipg = DEFAULT_82543_TIPG_IPGR1;
3466                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3467                     E1000_TIPG_IPGR2_SHIFT;
3468                 break;
3469         default:
3470                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3471                     (adapter->hw.phy.media_type ==
3472                     e1000_media_type_internal_serdes))
3473                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3474                 else
3475                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3476                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3477                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3478         }
3479
3480         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3481         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3482
3483         if(adapter->hw.mac.type >= e1000_82540)
3484                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3485                     adapter->tx_abs_int_delay.value);
3486
3487         if ((adapter->hw.mac.type == e1000_82571) ||
3488             (adapter->hw.mac.type == e1000_82572)) {
3489                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3490                 tarc |= SPEED_MODE_BIT;
3491                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3492         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3493                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3494                 tarc |= 1;
3495                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3496                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3497                 tarc |= 1;
3498                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3499         }
3500
3501         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3502         if (adapter->tx_int_delay.value > 0)
3503                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3504
3505         /* Program the Transmit Control Register */
3506         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3507         tctl &= ~E1000_TCTL_CT;
3508         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3509                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3510
3511         if (adapter->hw.mac.type >= e1000_82571)
3512                 tctl |= E1000_TCTL_MULR;
3513
3514         /* This write will effectively turn on the transmit unit. */
3515         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3516
3517 }
3518
3519
3520 /*********************************************************************
3521  *
3522  *  Free all transmit rings.
3523  *
3524  **********************************************************************/
3525 static void
3526 em_free_transmit_structures(struct adapter *adapter)
3527 {
3528         struct tx_ring *txr = adapter->tx_rings;
3529
3530         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3531                 EM_TX_LOCK(txr);
3532                 em_free_transmit_buffers(txr);
3533                 em_dma_free(adapter, &txr->txdma);
3534                 EM_TX_UNLOCK(txr);
3535                 EM_TX_LOCK_DESTROY(txr);
3536         }
3537
3538         free(adapter->tx_rings, M_DEVBUF);
3539 }
3540
3541 /*********************************************************************
3542  *
3543  *  Free transmit ring related data structures.
3544  *
3545  **********************************************************************/
3546 static void
3547 em_free_transmit_buffers(struct tx_ring *txr)
3548 {
3549         struct adapter          *adapter = txr->adapter;
3550         struct em_buffer        *txbuf;
3551
3552         INIT_DEBUGOUT("free_transmit_ring: begin");
3553
3554         if (txr->tx_buffers == NULL)
3555                 return;
3556
3557         for (int i = 0; i < adapter->num_tx_desc; i++) {
3558                 txbuf = &txr->tx_buffers[i];
3559                 if (txbuf->m_head != NULL) {
3560                         bus_dmamap_sync(txr->txtag, txbuf->map,
3561                             BUS_DMASYNC_POSTWRITE);
3562                         bus_dmamap_unload(txr->txtag,
3563                             txbuf->map);
3564                         m_freem(txbuf->m_head);
3565                         txbuf->m_head = NULL;
3566                         if (txbuf->map != NULL) {
3567                                 bus_dmamap_destroy(txr->txtag,
3568                                     txbuf->map);
3569                                 txbuf->map = NULL;
3570                         }
3571                 } else if (txbuf->map != NULL) {
3572                         bus_dmamap_unload(txr->txtag,
3573                             txbuf->map);
3574                         bus_dmamap_destroy(txr->txtag,
3575                             txbuf->map);
3576                         txbuf->map = NULL;
3577                 }
3578         }
3579 #if __FreeBSD_version >= 800000
3580         if (txr->br != NULL)
3581                 buf_ring_free(txr->br, M_DEVBUF);
3582 #endif
3583         if (txr->tx_buffers != NULL) {
3584                 free(txr->tx_buffers, M_DEVBUF);
3585                 txr->tx_buffers = NULL;
3586         }
3587         if (txr->txtag != NULL) {
3588                 bus_dma_tag_destroy(txr->txtag);
3589                 txr->txtag = NULL;
3590         }
3591         return;
3592 }
3593
3594
3595 /*********************************************************************
3596  *  The offload context is protocol specific (TCP/UDP) and thus
3597  *  only needs to be set when the protocol changes. The occasion
3598  *  of a context change can be a performance detriment, and
3599  *  might be better just disabled. The reason arises in the way
3600  *  in which the controller supports pipelined requests from the
3601  *  Tx data DMA. Up to four requests can be pipelined, and they may
3602  *  belong to the same packet or to multiple packets. However all
3603  *  requests for one packet are issued before a request is issued
3604  *  for a subsequent packet and if a request for the next packet
3605  *  requires a context change, that request will be stalled
3606  *  until the previous request completes. This means setting up
3607  *  a new context effectively disables pipelined Tx data DMA which
3608  *  in turn greatly slow down performance to send small sized
3609  *  frames. 
3610  **********************************************************************/
3611 static void
3612 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3613     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3614 {
3615         struct adapter                  *adapter = txr->adapter;
3616         struct e1000_context_desc       *TXD = NULL;
3617         struct em_buffer                *tx_buffer;
3618         int                             cur, hdr_len;
3619         u32                             cmd = 0;
3620         u16                             offload = 0;
3621         u8                              ipcso, ipcss, tucso, tucss;
3622
3623         ipcss = ipcso = tucss = tucso = 0;
3624         hdr_len = ip_off + (ip->ip_hl << 2);
3625         cur = txr->next_avail_desc;
3626
3627         /* Setup of IP header checksum. */
3628         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3629                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3630                 offload |= CSUM_IP;
3631                 ipcss = ip_off;
3632                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3633                 /*
3634                  * Start offset for header checksum calculation.
3635                  * End offset for header checksum calculation.
3636                  * Offset of place to put the checksum.
3637                  */
3638                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3639                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3640                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3641                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3642                 cmd |= E1000_TXD_CMD_IP;
3643         }
3644
3645         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3646                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3647                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3648                 offload |= CSUM_TCP;
3649                 tucss = hdr_len;
3650                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3651                 /*
3652                  * Setting up new checksum offload context for every frames
3653                  * takes a lot of processing time for hardware. This also
3654                  * reduces performance a lot for small sized frames so avoid
3655                  * it if driver can use previously configured checksum
3656                  * offload context.
3657                  */
3658                 if (txr->last_hw_offload == offload) {
3659                         if (offload & CSUM_IP) {
3660                                 if (txr->last_hw_ipcss == ipcss &&
3661                                     txr->last_hw_ipcso == ipcso &&
3662                                     txr->last_hw_tucss == tucss &&
3663                                     txr->last_hw_tucso == tucso)
3664                                         return;
3665                         } else {
3666                                 if (txr->last_hw_tucss == tucss &&
3667                                     txr->last_hw_tucso == tucso)
3668                                         return;
3669                         }
3670                 }
3671                 txr->last_hw_offload = offload;
3672                 txr->last_hw_tucss = tucss;
3673                 txr->last_hw_tucso = tucso;
3674                 /*
3675                  * Start offset for payload checksum calculation.
3676                  * End offset for payload checksum calculation.
3677                  * Offset of place to put the checksum.
3678                  */
3679                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3680                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3681                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3682                 TXD->upper_setup.tcp_fields.tucso = tucso;
3683                 cmd |= E1000_TXD_CMD_TCP;
3684         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3685                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3686                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3687                 tucss = hdr_len;
3688                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3689                 /*
3690                  * Setting up new checksum offload context for every frames
3691                  * takes a lot of processing time for hardware. This also
3692                  * reduces performance a lot for small sized frames so avoid
3693                  * it if driver can use previously configured checksum
3694                  * offload context.
3695                  */
3696                 if (txr->last_hw_offload == offload) {
3697                         if (offload & CSUM_IP) {
3698                                 if (txr->last_hw_ipcss == ipcss &&
3699                                     txr->last_hw_ipcso == ipcso &&
3700                                     txr->last_hw_tucss == tucss &&
3701                                     txr->last_hw_tucso == tucso)
3702                                         return;
3703                         } else {
3704                                 if (txr->last_hw_tucss == tucss &&
3705                                     txr->last_hw_tucso == tucso)
3706                                         return;
3707                         }
3708                 }
3709                 txr->last_hw_offload = offload;
3710                 txr->last_hw_tucss = tucss;
3711                 txr->last_hw_tucso = tucso;
3712                 /*
3713                  * Start offset for header checksum calculation.
3714                  * End offset for header checksum calculation.
3715                  * Offset of place to put the checksum.
3716                  */
3717                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3718                 TXD->upper_setup.tcp_fields.tucss = tucss;
3719                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3720                 TXD->upper_setup.tcp_fields.tucso = tucso;
3721         }
3722   
3723         if (offload & CSUM_IP) {
3724                 txr->last_hw_ipcss = ipcss;
3725                 txr->last_hw_ipcso = ipcso;
3726         }
3727
3728         TXD->tcp_seg_setup.data = htole32(0);
3729         TXD->cmd_and_length =
3730             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3731         tx_buffer = &txr->tx_buffers[cur];
3732         tx_buffer->m_head = NULL;
3733         tx_buffer->next_eop = -1;
3734
3735         if (++cur == adapter->num_tx_desc)
3736                 cur = 0;
3737
3738         txr->tx_avail--;
3739         txr->next_avail_desc = cur;
3740 }
3741
3742
3743 /**********************************************************************
3744  *
3745  *  Setup work for hardware segmentation offload (TSO)
3746  *
3747  **********************************************************************/
3748 static void
3749 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3750     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3751 {
3752         struct adapter                  *adapter = txr->adapter;
3753         struct e1000_context_desc       *TXD;
3754         struct em_buffer                *tx_buffer;
3755         int cur, hdr_len;
3756
3757         /*
3758          * In theory we can use the same TSO context if and only if
3759          * frame is the same type(IP/TCP) and the same MSS. However
3760          * checking whether a frame has the same IP/TCP structure is
3761          * hard thing so just ignore that and always restablish a
3762          * new TSO context.
3763          */
3764         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3765         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3766                       E1000_TXD_DTYP_D |        /* Data descr type */
3767                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3768
3769         /* IP and/or TCP header checksum calculation and insertion. */
3770         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3771
3772         cur = txr->next_avail_desc;
3773         tx_buffer = &txr->tx_buffers[cur];
3774         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3775
3776         /*
3777          * Start offset for header checksum calculation.
3778          * End offset for header checksum calculation.
3779          * Offset of place put the checksum.
3780          */
3781         TXD->lower_setup.ip_fields.ipcss = ip_off;
3782         TXD->lower_setup.ip_fields.ipcse =
3783             htole16(ip_off + (ip->ip_hl << 2) - 1);
3784         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3785         /*
3786          * Start offset for payload checksum calculation.
3787          * End offset for payload checksum calculation.
3788          * Offset of place to put the checksum.
3789          */
3790         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3791         TXD->upper_setup.tcp_fields.tucse = 0;
3792         TXD->upper_setup.tcp_fields.tucso =
3793             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3794         /*
3795          * Payload size per packet w/o any headers.
3796          * Length of all headers up to payload.
3797          */
3798         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3799         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3800
3801         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3802                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3803                                 E1000_TXD_CMD_TSE |     /* TSE context */
3804                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3805                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3806                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3807
3808         tx_buffer->m_head = NULL;
3809         tx_buffer->next_eop = -1;
3810
3811         if (++cur == adapter->num_tx_desc)
3812                 cur = 0;
3813
3814         txr->tx_avail--;
3815         txr->next_avail_desc = cur;
3816         txr->tx_tso = TRUE;
3817 }
3818
3819
3820 /**********************************************************************
3821  *
3822  *  Examine each tx_buffer in the used queue. If the hardware is done
3823  *  processing the packet then free associated resources. The
3824  *  tx_buffer is put back on the free queue.
3825  *
3826  **********************************************************************/
3827 static void
3828 em_txeof(struct tx_ring *txr)
3829 {
3830         struct adapter  *adapter = txr->adapter;
3831         int first, last, done, processed;
3832         struct em_buffer *tx_buffer;
3833         struct e1000_tx_desc   *tx_desc, *eop_desc;
3834         struct ifnet   *ifp = adapter->ifp;
3835
3836         EM_TX_LOCK_ASSERT(txr);
3837 #ifdef DEV_NETMAP
3838         if (netmap_tx_irq(ifp, txr->me |
3839             (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
3840                 return;
3841 #endif /* DEV_NETMAP */
3842
3843         /* No work, make sure watchdog is off */
3844         if (txr->tx_avail == adapter->num_tx_desc) {
3845                 txr->queue_status = EM_QUEUE_IDLE;
3846                 return;
3847         }
3848
3849         processed = 0;
3850         first = txr->next_to_clean;
3851         tx_desc = &txr->tx_base[first];
3852         tx_buffer = &txr->tx_buffers[first];
3853         last = tx_buffer->next_eop;
3854         eop_desc = &txr->tx_base[last];
3855
3856         /*
3857          * What this does is get the index of the
3858          * first descriptor AFTER the EOP of the 
3859          * first packet, that way we can do the
3860          * simple comparison on the inner while loop.
3861          */
3862         if (++last == adapter->num_tx_desc)
3863                 last = 0;
3864         done = last;
3865
3866         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3867             BUS_DMASYNC_POSTREAD);
3868
3869         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3870                 /* We clean the range of the packet */
3871                 while (first != done) {
3872                         tx_desc->upper.data = 0;
3873                         tx_desc->lower.data = 0;
3874                         tx_desc->buffer_addr = 0;
3875                         ++txr->tx_avail;
3876                         ++processed;
3877
3878                         if (tx_buffer->m_head) {
3879                                 bus_dmamap_sync(txr->txtag,
3880                                     tx_buffer->map,
3881                                     BUS_DMASYNC_POSTWRITE);
3882                                 bus_dmamap_unload(txr->txtag,
3883                                     tx_buffer->map);
3884                                 m_freem(tx_buffer->m_head);
3885                                 tx_buffer->m_head = NULL;
3886                         }
3887                         tx_buffer->next_eop = -1;
3888                         txr->watchdog_time = ticks;
3889
3890                         if (++first == adapter->num_tx_desc)
3891                                 first = 0;
3892
3893                         tx_buffer = &txr->tx_buffers[first];
3894                         tx_desc = &txr->tx_base[first];
3895                 }
3896                 ++ifp->if_opackets;
3897                 /* See if we can continue to the next packet */
3898                 last = tx_buffer->next_eop;
3899                 if (last != -1) {
3900                         eop_desc = &txr->tx_base[last];
3901                         /* Get new done point */
3902                         if (++last == adapter->num_tx_desc) last = 0;
3903                         done = last;
3904                 } else
3905                         break;
3906         }
3907         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3908             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3909
3910         txr->next_to_clean = first;
3911
3912         /*
3913         ** Watchdog calculation, we know there's
3914         ** work outstanding or the first return
3915         ** would have been taken, so none processed
3916         ** for too long indicates a hang. local timer
3917         ** will examine this and do a reset if needed.
3918         */
3919         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3920                 txr->queue_status = EM_QUEUE_HUNG;
3921
3922         /*
3923          * If we have a minimum free, clear IFF_DRV_OACTIVE
3924          * to tell the stack that it is OK to send packets.
3925          * Notice that all writes of OACTIVE happen under the
3926          * TX lock which, with a single queue, guarantees 
3927          * sanity.
3928          */
3929         if (txr->tx_avail >= EM_MAX_SCATTER)
3930                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3931
3932         /* Disable watchdog if all clean */
3933         if (txr->tx_avail == adapter->num_tx_desc) {
3934                 txr->queue_status = EM_QUEUE_IDLE;
3935         } 
3936 }
3937
3938
3939 /*********************************************************************
3940  *
3941  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3942  *
3943  **********************************************************************/
3944 static void
3945 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3946 {
3947         struct adapter          *adapter = rxr->adapter;
3948         struct mbuf             *m;
3949         bus_dma_segment_t       segs[1];
3950         struct em_buffer        *rxbuf;
3951         int                     i, j, error, nsegs;
3952         bool                    cleaned = FALSE;
3953
3954         i = j = rxr->next_to_refresh;
3955         /*
3956         ** Get one descriptor beyond
3957         ** our work mark to control
3958         ** the loop.
3959         */
3960         if (++j == adapter->num_rx_desc)
3961                 j = 0;
3962
3963         while (j != limit) {
3964                 rxbuf = &rxr->rx_buffers[i];
3965                 if (rxbuf->m_head == NULL) {
3966                         m = m_getjcl(M_NOWAIT, MT_DATA,
3967                             M_PKTHDR, adapter->rx_mbuf_sz);
3968                         /*
3969                         ** If we have a temporary resource shortage
3970                         ** that causes a failure, just abort refresh
3971                         ** for now, we will return to this point when
3972                         ** reinvoked from em_rxeof.
3973                         */
3974                         if (m == NULL)
3975                                 goto update;
3976                 } else
3977                         m = rxbuf->m_head;
3978
3979                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3980                 m->m_flags |= M_PKTHDR;
3981                 m->m_data = m->m_ext.ext_buf;
3982
3983                 /* Use bus_dma machinery to setup the memory mapping  */
3984                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3985                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3986                 if (error != 0) {
3987                         printf("Refresh mbufs: hdr dmamap load"
3988                             " failure - %d\n", error);
3989                         m_free(m);
3990                         rxbuf->m_head = NULL;
3991                         goto update;
3992                 }
3993                 rxbuf->m_head = m;
3994                 bus_dmamap_sync(rxr->rxtag,
3995                     rxbuf->map, BUS_DMASYNC_PREREAD);
3996                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3997                 cleaned = TRUE;
3998
3999                 i = j; /* Next is precalulated for us */
4000                 rxr->next_to_refresh = i;
4001                 /* Calculate next controlling index */
4002                 if (++j == adapter->num_rx_desc)
4003                         j = 0;
4004         }
4005 update:
4006         /*
4007         ** Update the tail pointer only if,
4008         ** and as far as we have refreshed.
4009         */
4010         if (cleaned)
4011                 E1000_WRITE_REG(&adapter->hw,
4012                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4013
4014         return;
4015 }
4016
4017
4018 /*********************************************************************
4019  *
4020  *  Allocate memory for rx_buffer structures. Since we use one
4021  *  rx_buffer per received packet, the maximum number of rx_buffer's
4022  *  that we'll need is equal to the number of receive descriptors
4023  *  that we've allocated.
4024  *
4025  **********************************************************************/
4026 static int
4027 em_allocate_receive_buffers(struct rx_ring *rxr)
4028 {
4029         struct adapter          *adapter = rxr->adapter;
4030         device_t                dev = adapter->dev;
4031         struct em_buffer        *rxbuf;
4032         int                     error;
4033
4034         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4035             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4036         if (rxr->rx_buffers == NULL) {
4037                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4038                 return (ENOMEM);
4039         }
4040
4041         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4042                                 1, 0,                   /* alignment, bounds */
4043                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4044                                 BUS_SPACE_MAXADDR,      /* highaddr */
4045                                 NULL, NULL,             /* filter, filterarg */
4046                                 MJUM9BYTES,             /* maxsize */
4047                                 1,                      /* nsegments */
4048                                 MJUM9BYTES,             /* maxsegsize */
4049                                 0,                      /* flags */
4050                                 NULL,                   /* lockfunc */
4051                                 NULL,                   /* lockarg */
4052                                 &rxr->rxtag);
4053         if (error) {
4054                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4055                     __func__, error);
4056                 goto fail;
4057         }
4058
4059         rxbuf = rxr->rx_buffers;
4060         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4061                 rxbuf = &rxr->rx_buffers[i];
4062                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4063                     &rxbuf->map);
4064                 if (error) {
4065                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4066                             __func__, error);
4067                         goto fail;
4068                 }
4069         }
4070
4071         return (0);
4072
4073 fail:
4074         em_free_receive_structures(adapter);
4075         return (error);
4076 }
4077
4078
4079 /*********************************************************************
4080  *
4081  *  Initialize a receive ring and its buffers.
4082  *
4083  **********************************************************************/
4084 static int
4085 em_setup_receive_ring(struct rx_ring *rxr)
4086 {
4087         struct  adapter         *adapter = rxr->adapter;
4088         struct em_buffer        *rxbuf;
4089         bus_dma_segment_t       seg[1];
4090         int                     rsize, nsegs, error = 0;
4091 #ifdef DEV_NETMAP
4092         struct netmap_adapter *na = NA(adapter->ifp);
4093         struct netmap_slot *slot;
4094 #endif
4095
4096
4097         /* Clear the ring contents */
4098         EM_RX_LOCK(rxr);
4099         rsize = roundup2(adapter->num_rx_desc *
4100             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4101         bzero((void *)rxr->rx_base, rsize);
4102 #ifdef DEV_NETMAP
4103         slot = netmap_reset(na, NR_RX, 0, 0);
4104 #endif
4105
4106         /*
4107         ** Free current RX buffer structs and their mbufs
4108         */
4109         for (int i = 0; i < adapter->num_rx_desc; i++) {
4110                 rxbuf = &rxr->rx_buffers[i];
4111                 if (rxbuf->m_head != NULL) {
4112                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4113                             BUS_DMASYNC_POSTREAD);
4114                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4115                         m_freem(rxbuf->m_head);
4116                         rxbuf->m_head = NULL; /* mark as freed */
4117                 }
4118         }
4119
4120         /* Now replenish the mbufs */
4121         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4122                 rxbuf = &rxr->rx_buffers[j];
4123 #ifdef DEV_NETMAP
4124                 if (slot) {
4125                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4126                         uint64_t paddr;
4127                         void *addr;
4128
4129                         addr = PNMB(slot + si, &paddr);
4130                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4131                         /* Update descriptor */
4132                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4133                         continue;
4134                 }
4135 #endif /* DEV_NETMAP */
4136                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4137                     M_PKTHDR, adapter->rx_mbuf_sz);
4138                 if (rxbuf->m_head == NULL) {
4139                         error = ENOBUFS;
4140                         goto fail;
4141                 }
4142                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4143                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4144                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4145
4146                 /* Get the memory mapping */
4147                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4148                     rxbuf->map, rxbuf->m_head, seg,
4149                     &nsegs, BUS_DMA_NOWAIT);
4150                 if (error != 0) {
4151                         m_freem(rxbuf->m_head);
4152                         rxbuf->m_head = NULL;
4153                         goto fail;
4154                 }
4155                 bus_dmamap_sync(rxr->rxtag,
4156                     rxbuf->map, BUS_DMASYNC_PREREAD);
4157
4158                 /* Update descriptor */
4159                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4160         }
4161         rxr->next_to_check = 0;
4162         rxr->next_to_refresh = 0;
4163         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4164             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4165
4166 fail:
4167         EM_RX_UNLOCK(rxr);
4168         return (error);
4169 }
4170
4171 /*********************************************************************
4172  *
4173  *  Initialize all receive rings.
4174  *
4175  **********************************************************************/
4176 static int
4177 em_setup_receive_structures(struct adapter *adapter)
4178 {
4179         struct rx_ring *rxr = adapter->rx_rings;
4180         int q;
4181
4182         for (q = 0; q < adapter->num_queues; q++, rxr++)
4183                 if (em_setup_receive_ring(rxr))
4184                         goto fail;
4185
4186         return (0);
4187 fail:
4188         /*
4189          * Free RX buffers allocated so far, we will only handle
4190          * the rings that completed, the failing case will have
4191          * cleaned up for itself. 'q' failed, so its the terminus.
4192          */
4193         for (int i = 0; i < q; ++i) {
4194                 rxr = &adapter->rx_rings[i];
4195                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4196                         struct em_buffer *rxbuf;
4197                         rxbuf = &rxr->rx_buffers[n];
4198                         if (rxbuf->m_head != NULL) {
4199                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4200                                   BUS_DMASYNC_POSTREAD);
4201                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4202                                 m_freem(rxbuf->m_head);
4203                                 rxbuf->m_head = NULL;
4204                         }
4205                 }
4206                 rxr->next_to_check = 0;
4207                 rxr->next_to_refresh = 0;
4208         }
4209
4210         return (ENOBUFS);
4211 }
4212
4213 /*********************************************************************
4214  *
4215  *  Free all receive rings.
4216  *
4217  **********************************************************************/
4218 static void
4219 em_free_receive_structures(struct adapter *adapter)
4220 {
4221         struct rx_ring *rxr = adapter->rx_rings;
4222
4223         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4224                 em_free_receive_buffers(rxr);
4225                 /* Free the ring memory as well */
4226                 em_dma_free(adapter, &rxr->rxdma);
4227                 EM_RX_LOCK_DESTROY(rxr);
4228         }
4229
4230         free(adapter->rx_rings, M_DEVBUF);
4231 }
4232
4233
4234 /*********************************************************************
4235  *
4236  *  Free receive ring data structures
4237  *
4238  **********************************************************************/
4239 static void
4240 em_free_receive_buffers(struct rx_ring *rxr)
4241 {
4242         struct adapter          *adapter = rxr->adapter;
4243         struct em_buffer        *rxbuf = NULL;
4244
4245         INIT_DEBUGOUT("free_receive_buffers: begin");
4246
4247         if (rxr->rx_buffers != NULL) {
4248                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4249                         rxbuf = &rxr->rx_buffers[i];
4250                         if (rxbuf->map != NULL) {
4251                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4252                                     BUS_DMASYNC_POSTREAD);
4253                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4254                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4255                         }
4256                         if (rxbuf->m_head != NULL) {
4257                                 m_freem(rxbuf->m_head);
4258                                 rxbuf->m_head = NULL;
4259                         }
4260                 }
4261                 free(rxr->rx_buffers, M_DEVBUF);
4262                 rxr->rx_buffers = NULL;
4263                 rxr->next_to_check = 0;
4264                 rxr->next_to_refresh = 0;
4265         }
4266
4267         if (rxr->rxtag != NULL) {
4268                 bus_dma_tag_destroy(rxr->rxtag);
4269                 rxr->rxtag = NULL;
4270         }
4271
4272         return;
4273 }
4274
4275
4276 /*********************************************************************
4277  *
4278  *  Enable receive unit.
4279  *
4280  **********************************************************************/
4281
4282 static void
4283 em_initialize_receive_unit(struct adapter *adapter)
4284 {
4285         struct rx_ring  *rxr = adapter->rx_rings;
4286         struct ifnet    *ifp = adapter->ifp;
4287         struct e1000_hw *hw = &adapter->hw;
4288         u64     bus_addr;
4289         u32     rctl, rxcsum;
4290
4291         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4292
4293         /*
4294          * Make sure receives are disabled while setting
4295          * up the descriptor ring
4296          */
4297         rctl = E1000_READ_REG(hw, E1000_RCTL);
4298         /* Do not disable if ever enabled on this hardware */
4299         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4300                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4301
4302         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4303             adapter->rx_abs_int_delay.value);
4304         /*
4305          * Set the interrupt throttling rate. Value is calculated
4306          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4307          */
4308         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4309
4310         /*
4311         ** When using MSIX interrupts we need to throttle
4312         ** using the EITR register (82574 only)
4313         */
4314         if (hw->mac.type == e1000_82574) {
4315                 for (int i = 0; i < 4; i++)
4316                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4317                             DEFAULT_ITR);
4318                 /* Disable accelerated acknowledge */
4319                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4320         }
4321
4322         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4323         if (ifp->if_capenable & IFCAP_RXCSUM)
4324                 rxcsum |= E1000_RXCSUM_TUOFL;
4325         else
4326                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4327         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4328
4329         /*
4330         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4331         ** long latencies are observed, like Lenovo X60. This
4332         ** change eliminates the problem, but since having positive
4333         ** values in RDTR is a known source of problems on other
4334         ** platforms another solution is being sought.
4335         */
4336         if (hw->mac.type == e1000_82573)
4337                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4338
4339         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4340                 /* Setup the Base and Length of the Rx Descriptor Ring */
4341                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4342
4343                 bus_addr = rxr->rxdma.dma_paddr;
4344                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4345                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4346                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4347                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4348                 /* Setup the Head and Tail Descriptor Pointers */
4349                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4350 #ifdef DEV_NETMAP
4351                 /*
4352                  * an init() while a netmap client is active must
4353                  * preserve the rx buffers passed to userspace.
4354                  */
4355                 if (ifp->if_capenable & IFCAP_NETMAP)
4356                         rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4357 #endif /* DEV_NETMAP */
4358                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4359         }
4360
4361         /* Set PTHRESH for improved jumbo performance */
4362         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4363             (adapter->hw.mac.type == e1000_pch2lan) ||
4364             (adapter->hw.mac.type == e1000_ich10lan)) &&
4365             (ifp->if_mtu > ETHERMTU)) {
4366                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4367                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4368         }
4369                 
4370         if (adapter->hw.mac.type >= e1000_pch2lan) {
4371                 if (ifp->if_mtu > ETHERMTU)
4372                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4373                 else
4374                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4375         }
4376
4377         /* Setup the Receive Control Register */
4378         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4379         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4380             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4381             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4382
4383         /* Strip the CRC */
4384         rctl |= E1000_RCTL_SECRC;
4385
4386         /* Make sure VLAN Filters are off */
4387         rctl &= ~E1000_RCTL_VFE;
4388         rctl &= ~E1000_RCTL_SBP;
4389
4390         if (adapter->rx_mbuf_sz == MCLBYTES)
4391                 rctl |= E1000_RCTL_SZ_2048;
4392         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4393                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4394         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4395                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4396
4397         if (ifp->if_mtu > ETHERMTU)
4398                 rctl |= E1000_RCTL_LPE;
4399         else
4400                 rctl &= ~E1000_RCTL_LPE;
4401
4402         /* Write out the settings */
4403         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4404
4405         return;
4406 }
4407
4408
4409 /*********************************************************************
4410  *
4411  *  This routine executes in interrupt context. It replenishes
4412  *  the mbufs in the descriptor and sends data which has been
4413  *  dma'ed into host memory to upper layer.
4414  *
4415  *  We loop at most count times if count is > 0, or until done if
4416  *  count < 0.
4417  *  
4418  *  For polling we also now return the number of cleaned packets
4419  *********************************************************************/
4420 static bool
4421 em_rxeof(struct rx_ring *rxr, int count, int *done)
4422 {
4423         struct adapter          *adapter = rxr->adapter;
4424         struct ifnet            *ifp = adapter->ifp;
4425         struct mbuf             *mp, *sendmp;
4426         u8                      status = 0;
4427         u16                     len;
4428         int                     i, processed, rxdone = 0;
4429         bool                    eop;
4430         struct e1000_rx_desc    *cur;
4431
4432         EM_RX_LOCK(rxr);
4433
4434 #ifdef DEV_NETMAP
4435         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4436                 return (FALSE);
4437 #endif /* DEV_NETMAP */
4438
4439         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4440
4441                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4442                         break;
4443
4444                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4445                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4446
4447                 cur = &rxr->rx_base[i];
4448                 status = cur->status;
4449                 mp = sendmp = NULL;
4450
4451                 if ((status & E1000_RXD_STAT_DD) == 0)
4452                         break;
4453
4454                 len = le16toh(cur->length);
4455                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4456
4457                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4458                     (rxr->discard == TRUE)) {
4459                         adapter->dropped_pkts++;
4460                         ++rxr->rx_discarded;
4461                         if (!eop) /* Catch subsequent segs */
4462                                 rxr->discard = TRUE;
4463                         else
4464                                 rxr->discard = FALSE;
4465                         em_rx_discard(rxr, i);
4466                         goto next_desc;
4467                 }
4468
4469                 /* Assign correct length to the current fragment */
4470                 mp = rxr->rx_buffers[i].m_head;
4471                 mp->m_len = len;
4472
4473                 /* Trigger for refresh */
4474                 rxr->rx_buffers[i].m_head = NULL;
4475
4476                 /* First segment? */
4477                 if (rxr->fmp == NULL) {
4478                         mp->m_pkthdr.len = len;
4479                         rxr->fmp = rxr->lmp = mp;
4480                 } else {
4481                         /* Chain mbuf's together */
4482                         mp->m_flags &= ~M_PKTHDR;
4483                         rxr->lmp->m_next = mp;
4484                         rxr->lmp = mp;
4485                         rxr->fmp->m_pkthdr.len += len;
4486                 }
4487
4488                 if (eop) {
4489                         --count;
4490                         sendmp = rxr->fmp;
4491                         sendmp->m_pkthdr.rcvif = ifp;
4492                         ifp->if_ipackets++;
4493                         em_receive_checksum(cur, sendmp);
4494 #ifndef __NO_STRICT_ALIGNMENT
4495                         if (adapter->hw.mac.max_frame_size >
4496                             (MCLBYTES - ETHER_ALIGN) &&
4497                             em_fixup_rx(rxr) != 0)
4498                                 goto skip;
4499 #endif
4500                         if (status & E1000_RXD_STAT_VP) {
4501                                 sendmp->m_pkthdr.ether_vtag =
4502                                     le16toh(cur->special);
4503                                 sendmp->m_flags |= M_VLANTAG;
4504                         }
4505 #ifndef __NO_STRICT_ALIGNMENT
4506 skip:
4507 #endif
4508                         rxr->fmp = rxr->lmp = NULL;
4509                 }
4510 next_desc:
4511                 /* Zero out the receive descriptors status. */
4512                 cur->status = 0;
4513                 ++rxdone;       /* cumulative for POLL */
4514                 ++processed;
4515
4516                 /* Advance our pointers to the next descriptor. */
4517                 if (++i == adapter->num_rx_desc)
4518                         i = 0;
4519
4520                 /* Send to the stack */
4521                 if (sendmp != NULL) {
4522                         rxr->next_to_check = i;
4523                         EM_RX_UNLOCK(rxr);
4524                         (*ifp->if_input)(ifp, sendmp);
4525                         EM_RX_LOCK(rxr);
4526                         i = rxr->next_to_check;
4527                 }
4528
4529                 /* Only refresh mbufs every 8 descriptors */
4530                 if (processed == 8) {
4531                         em_refresh_mbufs(rxr, i);
4532                         processed = 0;
4533                 }
4534         }
4535
4536         /* Catch any remaining refresh work */
4537         if (e1000_rx_unrefreshed(rxr))
4538                 em_refresh_mbufs(rxr, i);
4539
4540         rxr->next_to_check = i;
4541         if (done != NULL)
4542                 *done = rxdone;
4543         EM_RX_UNLOCK(rxr);
4544
4545         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4546 }
4547
4548 static __inline void
4549 em_rx_discard(struct rx_ring *rxr, int i)
4550 {
4551         struct em_buffer        *rbuf;
4552
4553         rbuf = &rxr->rx_buffers[i];
4554         /* Free any previous pieces */
4555         if (rxr->fmp != NULL) {
4556                 rxr->fmp->m_flags |= M_PKTHDR;
4557                 m_freem(rxr->fmp);
4558                 rxr->fmp = NULL;
4559                 rxr->lmp = NULL;
4560         }
4561         /*
4562         ** Free buffer and allow em_refresh_mbufs()
4563         ** to clean up and recharge buffer.
4564         */
4565         if (rbuf->m_head) {
4566                 m_free(rbuf->m_head);
4567                 rbuf->m_head = NULL;
4568         }
4569         return;
4570 }
4571
4572 #ifndef __NO_STRICT_ALIGNMENT
4573 /*
4574  * When jumbo frames are enabled we should realign entire payload on
4575  * architecures with strict alignment. This is serious design mistake of 8254x
4576  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4577  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4578  * payload. On architecures without strict alignment restrictions 8254x still
4579  * performs unaligned memory access which would reduce the performance too.
4580  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4581  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4582  * existing mbuf chain.
4583  *
4584  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4585  * not used at all on architectures with strict alignment.
4586  */
4587 static int
4588 em_fixup_rx(struct rx_ring *rxr)
4589 {
4590         struct adapter *adapter = rxr->adapter;
4591         struct mbuf *m, *n;
4592         int error;
4593
4594         error = 0;
4595         m = rxr->fmp;
4596         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4597                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4598                 m->m_data += ETHER_HDR_LEN;
4599         } else {
4600                 MGETHDR(n, M_NOWAIT, MT_DATA);
4601                 if (n != NULL) {
4602                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4603                         m->m_data += ETHER_HDR_LEN;
4604                         m->m_len -= ETHER_HDR_LEN;
4605                         n->m_len = ETHER_HDR_LEN;
4606                         M_MOVE_PKTHDR(n, m);
4607                         n->m_next = m;
4608                         rxr->fmp = n;
4609                 } else {
4610                         adapter->dropped_pkts++;
4611                         m_freem(rxr->fmp);
4612                         rxr->fmp = NULL;
4613                         error = ENOMEM;
4614                 }
4615         }
4616
4617         return (error);
4618 }
4619 #endif
4620
4621 /*********************************************************************
4622  *
4623  *  Verify that the hardware indicated that the checksum is valid.
4624  *  Inform the stack about the status of checksum so that stack
4625  *  doesn't spend time verifying the checksum.
4626  *
4627  *********************************************************************/
4628 static void
4629 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4630 {
4631         mp->m_pkthdr.csum_flags = 0;
4632
4633         /* Ignore Checksum bit is set */
4634         if (rx_desc->status & E1000_RXD_STAT_IXSM)
4635                 return;
4636
4637         if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4638                 return;
4639
4640         /* IP Checksum Good? */
4641         if (rx_desc->status & E1000_RXD_STAT_IPCS)
4642                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4643
4644         /* TCP or UDP checksum */
4645         if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4646                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4647                 mp->m_pkthdr.csum_data = htons(0xffff);
4648         }
4649 }
4650
4651 /*
4652  * This routine is run via an vlan
4653  * config EVENT
4654  */
4655 static void
4656 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4657 {
4658         struct adapter  *adapter = ifp->if_softc;
4659         u32             index, bit;
4660
4661         if (ifp->if_softc !=  arg)   /* Not our event */
4662                 return;
4663
4664         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4665                 return;
4666
4667         EM_CORE_LOCK(adapter);
4668         index = (vtag >> 5) & 0x7F;
4669         bit = vtag & 0x1F;
4670         adapter->shadow_vfta[index] |= (1 << bit);
4671         ++adapter->num_vlans;
4672         /* Re-init to load the changes */
4673         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4674                 em_init_locked(adapter);
4675         EM_CORE_UNLOCK(adapter);
4676 }
4677
4678 /*
4679  * This routine is run via an vlan
4680  * unconfig EVENT
4681  */
4682 static void
4683 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4684 {
4685         struct adapter  *adapter = ifp->if_softc;
4686         u32             index, bit;
4687
4688         if (ifp->if_softc !=  arg)
4689                 return;
4690
4691         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4692                 return;
4693
4694         EM_CORE_LOCK(adapter);
4695         index = (vtag >> 5) & 0x7F;
4696         bit = vtag & 0x1F;
4697         adapter->shadow_vfta[index] &= ~(1 << bit);
4698         --adapter->num_vlans;
4699         /* Re-init to load the changes */
4700         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4701                 em_init_locked(adapter);
4702         EM_CORE_UNLOCK(adapter);
4703 }
4704
4705 static void
4706 em_setup_vlan_hw_support(struct adapter *adapter)
4707 {
4708         struct e1000_hw *hw = &adapter->hw;
4709         u32             reg;
4710
4711         /*
4712         ** We get here thru init_locked, meaning
4713         ** a soft reset, this has already cleared
4714         ** the VFTA and other state, so if there
4715         ** have been no vlan's registered do nothing.
4716         */
4717         if (adapter->num_vlans == 0)
4718                 return;
4719
4720         /*
4721         ** A soft reset zero's out the VFTA, so
4722         ** we need to repopulate it now.
4723         */
4724         for (int i = 0; i < EM_VFTA_SIZE; i++)
4725                 if (adapter->shadow_vfta[i] != 0)
4726                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4727                             i, adapter->shadow_vfta[i]);
4728
4729         reg = E1000_READ_REG(hw, E1000_CTRL);
4730         reg |= E1000_CTRL_VME;
4731         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4732
4733         /* Enable the Filter Table */
4734         reg = E1000_READ_REG(hw, E1000_RCTL);
4735         reg &= ~E1000_RCTL_CFIEN;
4736         reg |= E1000_RCTL_VFE;
4737         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4738 }
4739
4740 static void
4741 em_enable_intr(struct adapter *adapter)
4742 {
4743         struct e1000_hw *hw = &adapter->hw;
4744         u32 ims_mask = IMS_ENABLE_MASK;
4745
4746         if (hw->mac.type == e1000_82574) {
4747                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4748                 ims_mask |= EM_MSIX_MASK;
4749         } 
4750         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4751 }
4752
4753 static void
4754 em_disable_intr(struct adapter *adapter)
4755 {
4756         struct e1000_hw *hw = &adapter->hw;
4757
4758         if (hw->mac.type == e1000_82574)
4759                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4760         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4761 }
4762
4763 /*
4764  * Bit of a misnomer, what this really means is
4765  * to enable OS management of the system... aka
4766  * to disable special hardware management features 
4767  */
4768 static void
4769 em_init_manageability(struct adapter *adapter)
4770 {
4771         /* A shared code workaround */
4772 #define E1000_82542_MANC2H E1000_MANC2H
4773         if (adapter->has_manage) {
4774                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4775                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4776
4777                 /* disable hardware interception of ARP */
4778                 manc &= ~(E1000_MANC_ARP_EN);
4779
4780                 /* enable receiving management packets to the host */
4781                 manc |= E1000_MANC_EN_MNG2HOST;
4782 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4783 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4784                 manc2h |= E1000_MNG2HOST_PORT_623;
4785                 manc2h |= E1000_MNG2HOST_PORT_664;
4786                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4787                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4788         }
4789 }
4790
4791 /*
4792  * Give control back to hardware management
4793  * controller if there is one.
4794  */
4795 static void
4796 em_release_manageability(struct adapter *adapter)
4797 {
4798         if (adapter->has_manage) {
4799                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4800
4801                 /* re-enable hardware interception of ARP */
4802                 manc |= E1000_MANC_ARP_EN;
4803                 manc &= ~E1000_MANC_EN_MNG2HOST;
4804
4805                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4806         }
4807 }
4808
4809 /*
4810  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4811  * For ASF and Pass Through versions of f/w this means
4812  * that the driver is loaded. For AMT version type f/w
4813  * this means that the network i/f is open.
4814  */
4815 static void
4816 em_get_hw_control(struct adapter *adapter)
4817 {
4818         u32 ctrl_ext, swsm;
4819
4820         if (adapter->hw.mac.type == e1000_82573) {
4821                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4822                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4823                     swsm | E1000_SWSM_DRV_LOAD);
4824                 return;
4825         }
4826         /* else */
4827         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4828         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4829             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4830         return;
4831 }
4832
4833 /*
4834  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4835  * For ASF and Pass Through versions of f/w this means that
4836  * the driver is no longer loaded. For AMT versions of the
4837  * f/w this means that the network i/f is closed.
4838  */
4839 static void
4840 em_release_hw_control(struct adapter *adapter)
4841 {
4842         u32 ctrl_ext, swsm;
4843
4844         if (!adapter->has_manage)
4845                 return;
4846
4847         if (adapter->hw.mac.type == e1000_82573) {
4848                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4849                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4850                     swsm & ~E1000_SWSM_DRV_LOAD);
4851                 return;
4852         }
4853         /* else */
4854         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4855         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4856             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4857         return;
4858 }
4859
4860 static int
4861 em_is_valid_ether_addr(u8 *addr)
4862 {
4863         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4864
4865         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4866                 return (FALSE);
4867         }
4868
4869         return (TRUE);
4870 }
4871
4872 /*
4873 ** Parse the interface capabilities with regard
4874 ** to both system management and wake-on-lan for
4875 ** later use.
4876 */
4877 static void
4878 em_get_wakeup(device_t dev)
4879 {
4880         struct adapter  *adapter = device_get_softc(dev);
4881         u16             eeprom_data = 0, device_id, apme_mask;
4882
4883         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4884         apme_mask = EM_EEPROM_APME;
4885
4886         switch (adapter->hw.mac.type) {
4887         case e1000_82573:
4888         case e1000_82583:
4889                 adapter->has_amt = TRUE;
4890                 /* Falls thru */
4891         case e1000_82571:
4892         case e1000_82572:
4893         case e1000_80003es2lan:
4894                 if (adapter->hw.bus.func == 1) {
4895                         e1000_read_nvm(&adapter->hw,
4896                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4897                         break;
4898                 } else
4899                         e1000_read_nvm(&adapter->hw,
4900                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4901                 break;
4902         case e1000_ich8lan:
4903         case e1000_ich9lan:
4904         case e1000_ich10lan:
4905         case e1000_pchlan:
4906         case e1000_pch2lan:
4907                 apme_mask = E1000_WUC_APME;
4908                 adapter->has_amt = TRUE;
4909                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4910                 break;
4911         default:
4912                 e1000_read_nvm(&adapter->hw,
4913                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4914                 break;
4915         }
4916         if (eeprom_data & apme_mask)
4917                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4918         /*
4919          * We have the eeprom settings, now apply the special cases
4920          * where the eeprom may be wrong or the board won't support
4921          * wake on lan on a particular port
4922          */
4923         device_id = pci_get_device(dev);
4924         switch (device_id) {
4925         case E1000_DEV_ID_82571EB_FIBER:
4926                 /* Wake events only supported on port A for dual fiber
4927                  * regardless of eeprom setting */
4928                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4929                     E1000_STATUS_FUNC_1)
4930                         adapter->wol = 0;
4931                 break;
4932         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4933         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4934         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4935                 /* if quad port adapter, disable WoL on all but port A */
4936                 if (global_quad_port_a != 0)
4937                         adapter->wol = 0;
4938                 /* Reset for multiple quad port adapters */
4939                 if (++global_quad_port_a == 4)
4940                         global_quad_port_a = 0;
4941                 break;
4942         }
4943         return;
4944 }
4945
4946
4947 /*
4948  * Enable PCI Wake On Lan capability
4949  */
4950 static void
4951 em_enable_wakeup(device_t dev)
4952 {
4953         struct adapter  *adapter = device_get_softc(dev);
4954         struct ifnet    *ifp = adapter->ifp;
4955         u32             pmc, ctrl, ctrl_ext, rctl;
4956         u16             status;
4957
4958         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4959                 return;
4960
4961         /* Advertise the wakeup capability */
4962         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4963         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4964         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4965         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4966
4967         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4968             (adapter->hw.mac.type == e1000_pchlan) ||
4969             (adapter->hw.mac.type == e1000_ich9lan) ||
4970             (adapter->hw.mac.type == e1000_ich10lan))
4971                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4972
4973         /* Keep the laser running on Fiber adapters */
4974         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4975             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4976                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4977                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4978                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4979         }
4980
4981         /*
4982         ** Determine type of Wakeup: note that wol
4983         ** is set with all bits on by default.
4984         */
4985         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4986                 adapter->wol &= ~E1000_WUFC_MAG;
4987
4988         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4989                 adapter->wol &= ~E1000_WUFC_MC;
4990         else {
4991                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4992                 rctl |= E1000_RCTL_MPE;
4993                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4994         }
4995
4996         if ((adapter->hw.mac.type == e1000_pchlan) ||
4997             (adapter->hw.mac.type == e1000_pch2lan)) {
4998                 if (em_enable_phy_wakeup(adapter))
4999                         return;
5000         } else {
5001                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5002                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5003         }
5004
5005         if (adapter->hw.phy.type == e1000_phy_igp_3)
5006                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5007
5008         /* Request PME */
5009         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5010         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5011         if (ifp->if_capenable & IFCAP_WOL)
5012                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5013         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5014
5015         return;
5016 }
5017
5018 /*
5019 ** WOL in the newer chipset interfaces (pchlan)
5020 ** require thing to be copied into the phy
5021 */
5022 static int
5023 em_enable_phy_wakeup(struct adapter *adapter)
5024 {
5025         struct e1000_hw *hw = &adapter->hw;
5026         u32 mreg, ret = 0;
5027         u16 preg;
5028
5029         /* copy MAC RARs to PHY RARs */
5030         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5031
5032         /* copy MAC MTA to PHY MTA */
5033         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5034                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5035                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5036                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5037                     (u16)((mreg >> 16) & 0xFFFF));
5038         }
5039
5040         /* configure PHY Rx Control register */
5041         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5042         mreg = E1000_READ_REG(hw, E1000_RCTL);
5043         if (mreg & E1000_RCTL_UPE)
5044                 preg |= BM_RCTL_UPE;
5045         if (mreg & E1000_RCTL_MPE)
5046                 preg |= BM_RCTL_MPE;
5047         preg &= ~(BM_RCTL_MO_MASK);
5048         if (mreg & E1000_RCTL_MO_3)
5049                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5050                                 << BM_RCTL_MO_SHIFT);
5051         if (mreg & E1000_RCTL_BAM)
5052                 preg |= BM_RCTL_BAM;
5053         if (mreg & E1000_RCTL_PMCF)
5054                 preg |= BM_RCTL_PMCF;
5055         mreg = E1000_READ_REG(hw, E1000_CTRL);
5056         if (mreg & E1000_CTRL_RFCE)
5057                 preg |= BM_RCTL_RFCE;
5058         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5059
5060         /* enable PHY wakeup in MAC register */
5061         E1000_WRITE_REG(hw, E1000_WUC,
5062             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5063         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5064
5065         /* configure and enable PHY wakeup in PHY registers */
5066         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5067         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5068
5069         /* activate PHY wakeup */
5070         ret = hw->phy.ops.acquire(hw);
5071         if (ret) {
5072                 printf("Could not acquire PHY\n");
5073                 return ret;
5074         }
5075         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5076                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5077         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5078         if (ret) {
5079                 printf("Could not read PHY page 769\n");
5080                 goto out;
5081         }
5082         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5083         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5084         if (ret)
5085                 printf("Could not set PHY Host Wakeup bit\n");
5086 out:
5087         hw->phy.ops.release(hw);
5088
5089         return ret;
5090 }
5091
5092 static void
5093 em_led_func(void *arg, int onoff)
5094 {
5095         struct adapter  *adapter = arg;
5096  
5097         EM_CORE_LOCK(adapter);
5098         if (onoff) {
5099                 e1000_setup_led(&adapter->hw);
5100                 e1000_led_on(&adapter->hw);
5101         } else {
5102                 e1000_led_off(&adapter->hw);
5103                 e1000_cleanup_led(&adapter->hw);
5104         }
5105         EM_CORE_UNLOCK(adapter);
5106 }
5107
5108 /*
5109 ** Disable the L0S and L1 LINK states
5110 */
5111 static void
5112 em_disable_aspm(struct adapter *adapter)
5113 {
5114         int             base, reg;
5115         u16             link_cap,link_ctrl;
5116         device_t        dev = adapter->dev;
5117
5118         switch (adapter->hw.mac.type) {
5119                 case e1000_82573:
5120                 case e1000_82574:
5121                 case e1000_82583:
5122                         break;
5123                 default:
5124                         return;
5125         }
5126         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5127                 return;
5128         reg = base + PCIER_LINK_CAP;
5129         link_cap = pci_read_config(dev, reg, 2);
5130         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5131                 return;
5132         reg = base + PCIER_LINK_CTL;
5133         link_ctrl = pci_read_config(dev, reg, 2);
5134         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5135         pci_write_config(dev, reg, link_ctrl, 2);
5136         return;
5137 }
5138
5139 /**********************************************************************
5140  *
5141  *  Update the board statistics counters.
5142  *
5143  **********************************************************************/
5144 static void
5145 em_update_stats_counters(struct adapter *adapter)
5146 {
5147         struct ifnet   *ifp;
5148
5149         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5150            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5151                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5152                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5153         }
5154         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5155         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5156         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5157         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5158
5159         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5160         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5161         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5162         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5163         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5164         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5165         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5166         /*
5167         ** For watchdog management we need to know if we have been
5168         ** paused during the last interval, so capture that here.
5169         */
5170         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5171         adapter->stats.xoffrxc += adapter->pause_frames;
5172         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5173         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5174         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5175         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5176         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5177         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5178         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5179         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5180         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5181         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5182         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5183         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5184
5185         /* For the 64-bit byte counters the low dword must be read first. */
5186         /* Both registers clear on the read of the high dword */
5187
5188         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5189             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5190         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5191             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5192
5193         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5194         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5195         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5196         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5197         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5198
5199         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5200         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5201
5202         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5203         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5204         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5205         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5206         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5207         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5208         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5209         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5210         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5211         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5212
5213         /* Interrupt Counts */
5214
5215         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5216         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5217         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5218         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5219         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5220         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5221         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5222         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5223         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5224
5225         if (adapter->hw.mac.type >= e1000_82543) {
5226                 adapter->stats.algnerrc += 
5227                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5228                 adapter->stats.rxerrc += 
5229                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5230                 adapter->stats.tncrs += 
5231                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5232                 adapter->stats.cexterr += 
5233                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5234                 adapter->stats.tsctc += 
5235                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5236                 adapter->stats.tsctfc += 
5237                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5238         }
5239         ifp = adapter->ifp;
5240
5241         ifp->if_collisions = adapter->stats.colc;
5242
5243         /* Rx Errors */
5244         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5245             adapter->stats.crcerrs + adapter->stats.algnerrc +
5246             adapter->stats.ruc + adapter->stats.roc +
5247             adapter->stats.mpc + adapter->stats.cexterr;
5248
5249         /* Tx Errors */
5250         ifp->if_oerrors = adapter->stats.ecol +
5251             adapter->stats.latecol + adapter->watchdog_events;
5252 }
5253
5254 /* Export a single 32-bit register via a read-only sysctl. */
5255 static int
5256 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5257 {
5258         struct adapter *adapter;
5259         u_int val;
5260
5261         adapter = oidp->oid_arg1;
5262         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5263         return (sysctl_handle_int(oidp, &val, 0, req));
5264 }
5265
5266 /*
5267  * Add sysctl variables, one per statistic, to the system.
5268  */
5269 static void
5270 em_add_hw_stats(struct adapter *adapter)
5271 {
5272         device_t dev = adapter->dev;
5273
5274         struct tx_ring *txr = adapter->tx_rings;
5275         struct rx_ring *rxr = adapter->rx_rings;
5276
5277         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5278         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5279         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5280         struct e1000_hw_stats *stats = &adapter->stats;
5281
5282         struct sysctl_oid *stat_node, *queue_node, *int_node;
5283         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5284
5285 #define QUEUE_NAME_LEN 32
5286         char namebuf[QUEUE_NAME_LEN];
5287         
5288         /* Driver Statistics */
5289         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5290                         CTLFLAG_RD, &adapter->link_irq,
5291                         "Link MSIX IRQ Handled");
5292         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5293                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5294                          "Std mbuf failed");
5295         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5296                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5297                          "Std mbuf cluster failed");
5298         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5299                         CTLFLAG_RD, &adapter->dropped_pkts,
5300                         "Driver dropped packets");
5301         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5302                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5303                         "Driver tx dma failure in xmit");
5304         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5305                         CTLFLAG_RD, &adapter->rx_overruns,
5306                         "RX overruns");
5307         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5308                         CTLFLAG_RD, &adapter->watchdog_events,
5309                         "Watchdog timeouts");
5310         
5311         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5312                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5313                         em_sysctl_reg_handler, "IU",
5314                         "Device Control Register");
5315         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5316                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5317                         em_sysctl_reg_handler, "IU",
5318                         "Receiver Control Register");
5319         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5320                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5321                         "Flow Control High Watermark");
5322         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5323                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5324                         "Flow Control Low Watermark");
5325
5326         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5327                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5328                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5329                                             CTLFLAG_RD, NULL, "Queue Name");
5330                 queue_list = SYSCTL_CHILDREN(queue_node);
5331
5332                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5333                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5334                                 E1000_TDH(txr->me),
5335                                 em_sysctl_reg_handler, "IU",
5336                                 "Transmit Descriptor Head");
5337                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5338                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5339                                 E1000_TDT(txr->me),
5340                                 em_sysctl_reg_handler, "IU",
5341                                 "Transmit Descriptor Tail");
5342                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5343                                 CTLFLAG_RD, &txr->tx_irq,
5344                                 "Queue MSI-X Transmit Interrupts");
5345                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5346                                 CTLFLAG_RD, &txr->no_desc_avail,
5347                                 "Queue No Descriptor Available");
5348                 
5349                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5350                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5351                                 E1000_RDH(rxr->me),
5352                                 em_sysctl_reg_handler, "IU",
5353                                 "Receive Descriptor Head");
5354                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5355                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5356                                 E1000_RDT(rxr->me),
5357                                 em_sysctl_reg_handler, "IU",
5358                                 "Receive Descriptor Tail");
5359                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5360                                 CTLFLAG_RD, &rxr->rx_irq,
5361                                 "Queue MSI-X Receive Interrupts");
5362         }
5363
5364         /* MAC stats get their own sub node */
5365
5366         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5367                                     CTLFLAG_RD, NULL, "Statistics");
5368         stat_list = SYSCTL_CHILDREN(stat_node);
5369
5370         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5371                         CTLFLAG_RD, &stats->ecol,
5372                         "Excessive collisions");
5373         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5374                         CTLFLAG_RD, &stats->scc,
5375                         "Single collisions");
5376         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5377                         CTLFLAG_RD, &stats->mcc,
5378                         "Multiple collisions");
5379         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5380                         CTLFLAG_RD, &stats->latecol,
5381                         "Late collisions");
5382         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5383                         CTLFLAG_RD, &stats->colc,
5384                         "Collision Count");
5385         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5386                         CTLFLAG_RD, &adapter->stats.symerrs,
5387                         "Symbol Errors");
5388         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5389                         CTLFLAG_RD, &adapter->stats.sec,
5390                         "Sequence Errors");
5391         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5392                         CTLFLAG_RD, &adapter->stats.dc,
5393                         "Defer Count");
5394         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5395                         CTLFLAG_RD, &adapter->stats.mpc,
5396                         "Missed Packets");
5397         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5398                         CTLFLAG_RD, &adapter->stats.rnbc,
5399                         "Receive No Buffers");
5400         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5401                         CTLFLAG_RD, &adapter->stats.ruc,
5402                         "Receive Undersize");
5403         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5404                         CTLFLAG_RD, &adapter->stats.rfc,
5405                         "Fragmented Packets Received ");
5406         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5407                         CTLFLAG_RD, &adapter->stats.roc,
5408                         "Oversized Packets Received");
5409         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5410                         CTLFLAG_RD, &adapter->stats.rjc,
5411                         "Recevied Jabber");
5412         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5413                         CTLFLAG_RD, &adapter->stats.rxerrc,
5414                         "Receive Errors");
5415         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5416                         CTLFLAG_RD, &adapter->stats.crcerrs,
5417                         "CRC errors");
5418         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5419                         CTLFLAG_RD, &adapter->stats.algnerrc,
5420                         "Alignment Errors");
5421         /* On 82575 these are collision counts */
5422         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5423                         CTLFLAG_RD, &adapter->stats.cexterr,
5424                         "Collision/Carrier extension errors");
5425         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5426                         CTLFLAG_RD, &adapter->stats.xonrxc,
5427                         "XON Received");
5428         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5429                         CTLFLAG_RD, &adapter->stats.xontxc,
5430                         "XON Transmitted");
5431         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5432                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5433                         "XOFF Received");
5434         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5435                         CTLFLAG_RD, &adapter->stats.xofftxc,
5436                         "XOFF Transmitted");
5437
5438         /* Packet Reception Stats */
5439         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5440                         CTLFLAG_RD, &adapter->stats.tpr,
5441                         "Total Packets Received ");
5442         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5443                         CTLFLAG_RD, &adapter->stats.gprc,
5444                         "Good Packets Received");
5445         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5446                         CTLFLAG_RD, &adapter->stats.bprc,
5447                         "Broadcast Packets Received");
5448         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5449                         CTLFLAG_RD, &adapter->stats.mprc,
5450                         "Multicast Packets Received");
5451         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5452                         CTLFLAG_RD, &adapter->stats.prc64,
5453                         "64 byte frames received ");
5454         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5455                         CTLFLAG_RD, &adapter->stats.prc127,
5456                         "65-127 byte frames received");
5457         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5458                         CTLFLAG_RD, &adapter->stats.prc255,
5459                         "128-255 byte frames received");
5460         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5461                         CTLFLAG_RD, &adapter->stats.prc511,
5462                         "256-511 byte frames received");
5463         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5464                         CTLFLAG_RD, &adapter->stats.prc1023,
5465                         "512-1023 byte frames received");
5466         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5467                         CTLFLAG_RD, &adapter->stats.prc1522,
5468                         "1023-1522 byte frames received");
5469         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5470                         CTLFLAG_RD, &adapter->stats.gorc, 
5471                         "Good Octets Received"); 
5472
5473         /* Packet Transmission Stats */
5474         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5475                         CTLFLAG_RD, &adapter->stats.gotc, 
5476                         "Good Octets Transmitted"); 
5477         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5478                         CTLFLAG_RD, &adapter->stats.tpt,
5479                         "Total Packets Transmitted");
5480         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5481                         CTLFLAG_RD, &adapter->stats.gptc,
5482                         "Good Packets Transmitted");
5483         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5484                         CTLFLAG_RD, &adapter->stats.bptc,
5485                         "Broadcast Packets Transmitted");
5486         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5487                         CTLFLAG_RD, &adapter->stats.mptc,
5488                         "Multicast Packets Transmitted");
5489         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5490                         CTLFLAG_RD, &adapter->stats.ptc64,
5491                         "64 byte frames transmitted ");
5492         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5493                         CTLFLAG_RD, &adapter->stats.ptc127,
5494                         "65-127 byte frames transmitted");
5495         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5496                         CTLFLAG_RD, &adapter->stats.ptc255,
5497                         "128-255 byte frames transmitted");
5498         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5499                         CTLFLAG_RD, &adapter->stats.ptc511,
5500                         "256-511 byte frames transmitted");
5501         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5502                         CTLFLAG_RD, &adapter->stats.ptc1023,
5503                         "512-1023 byte frames transmitted");
5504         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5505                         CTLFLAG_RD, &adapter->stats.ptc1522,
5506                         "1024-1522 byte frames transmitted");
5507         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5508                         CTLFLAG_RD, &adapter->stats.tsctc,
5509                         "TSO Contexts Transmitted");
5510         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5511                         CTLFLAG_RD, &adapter->stats.tsctfc,
5512                         "TSO Contexts Failed");
5513
5514
5515         /* Interrupt Stats */
5516
5517         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5518                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5519         int_list = SYSCTL_CHILDREN(int_node);
5520
5521         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5522                         CTLFLAG_RD, &adapter->stats.iac,
5523                         "Interrupt Assertion Count");
5524
5525         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5526                         CTLFLAG_RD, &adapter->stats.icrxptc,
5527                         "Interrupt Cause Rx Pkt Timer Expire Count");
5528
5529         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5530                         CTLFLAG_RD, &adapter->stats.icrxatc,
5531                         "Interrupt Cause Rx Abs Timer Expire Count");
5532
5533         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5534                         CTLFLAG_RD, &adapter->stats.ictxptc,
5535                         "Interrupt Cause Tx Pkt Timer Expire Count");
5536
5537         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5538                         CTLFLAG_RD, &adapter->stats.ictxatc,
5539                         "Interrupt Cause Tx Abs Timer Expire Count");
5540
5541         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5542                         CTLFLAG_RD, &adapter->stats.ictxqec,
5543                         "Interrupt Cause Tx Queue Empty Count");
5544
5545         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5546                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5547                         "Interrupt Cause Tx Queue Min Thresh Count");
5548
5549         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5550                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5551                         "Interrupt Cause Rx Desc Min Thresh Count");
5552
5553         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5554                         CTLFLAG_RD, &adapter->stats.icrxoc,
5555                         "Interrupt Cause Receiver Overrun Count");
5556 }
5557
5558 /**********************************************************************
5559  *
5560  *  This routine provides a way to dump out the adapter eeprom,
5561  *  often a useful debug/service tool. This only dumps the first
5562  *  32 words, stuff that matters is in that extent.
5563  *
5564  **********************************************************************/
5565 static int
5566 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5567 {
5568         struct adapter *adapter = (struct adapter *)arg1;
5569         int error;
5570         int result;
5571
5572         result = -1;
5573         error = sysctl_handle_int(oidp, &result, 0, req);
5574
5575         if (error || !req->newptr)
5576                 return (error);
5577
5578         /*
5579          * This value will cause a hex dump of the
5580          * first 32 16-bit words of the EEPROM to
5581          * the screen.
5582          */
5583         if (result == 1)
5584                 em_print_nvm_info(adapter);
5585
5586         return (error);
5587 }
5588
5589 static void
5590 em_print_nvm_info(struct adapter *adapter)
5591 {
5592         u16     eeprom_data;
5593         int     i, j, row = 0;
5594
5595         /* Its a bit crude, but it gets the job done */
5596         printf("\nInterface EEPROM Dump:\n");
5597         printf("Offset\n0x0000  ");
5598         for (i = 0, j = 0; i < 32; i++, j++) {
5599                 if (j == 8) { /* Make the offset block */
5600                         j = 0; ++row;
5601                         printf("\n0x00%x0  ",row);
5602                 }
5603                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5604                 printf("%04x ", eeprom_data);
5605         }
5606         printf("\n");
5607 }
5608
5609 static int
5610 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5611 {
5612         struct em_int_delay_info *info;
5613         struct adapter *adapter;
5614         u32 regval;
5615         int error, usecs, ticks;
5616
5617         info = (struct em_int_delay_info *)arg1;
5618         usecs = info->value;
5619         error = sysctl_handle_int(oidp, &usecs, 0, req);
5620         if (error != 0 || req->newptr == NULL)
5621                 return (error);
5622         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5623                 return (EINVAL);
5624         info->value = usecs;
5625         ticks = EM_USECS_TO_TICKS(usecs);
5626         if (info->offset == E1000_ITR)  /* units are 256ns here */
5627                 ticks *= 4;
5628
5629         adapter = info->adapter;
5630         
5631         EM_CORE_LOCK(adapter);
5632         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5633         regval = (regval & ~0xffff) | (ticks & 0xffff);
5634         /* Handle a few special cases. */
5635         switch (info->offset) {
5636         case E1000_RDTR:
5637                 break;
5638         case E1000_TIDV:
5639                 if (ticks == 0) {
5640                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5641                         /* Don't write 0 into the TIDV register. */
5642                         regval++;
5643                 } else
5644                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5645                 break;
5646         }
5647         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5648         EM_CORE_UNLOCK(adapter);
5649         return (0);
5650 }
5651
5652 static void
5653 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5654         const char *description, struct em_int_delay_info *info,
5655         int offset, int value)
5656 {
5657         info->adapter = adapter;
5658         info->offset = offset;
5659         info->value = value;
5660         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5661             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5662             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5663             info, 0, em_sysctl_int_delay, "I", description);
5664 }
5665
5666 static void
5667 em_set_sysctl_value(struct adapter *adapter, const char *name,
5668         const char *description, int *limit, int value)
5669 {
5670         *limit = value;
5671         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5672             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5673             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5674 }
5675
5676
5677 /*
5678 ** Set flow control using sysctl:
5679 ** Flow control values:
5680 **      0 - off
5681 **      1 - rx pause
5682 **      2 - tx pause
5683 **      3 - full
5684 */
5685 static int
5686 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5687 {       
5688         int             error;
5689         static int      input = 3; /* default is full */
5690         struct adapter  *adapter = (struct adapter *) arg1;
5691                     
5692         error = sysctl_handle_int(oidp, &input, 0, req);
5693     
5694         if ((error) || (req->newptr == NULL))
5695                 return (error);
5696                 
5697         if (input == adapter->fc) /* no change? */
5698                 return (error);
5699
5700         switch (input) {
5701                 case e1000_fc_rx_pause:
5702                 case e1000_fc_tx_pause:
5703                 case e1000_fc_full:
5704                 case e1000_fc_none:
5705                         adapter->hw.fc.requested_mode = input;
5706                         adapter->fc = input;
5707                         break;
5708                 default:
5709                         /* Do nothing */
5710                         return (error);
5711         }
5712
5713         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5714         e1000_force_mac_fc(&adapter->hw);
5715         return (error);
5716 }
5717
5718 /*
5719 ** Manage Energy Efficient Ethernet:
5720 ** Control values:
5721 **     0/1 - enabled/disabled
5722 */
5723 static int
5724 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5725 {
5726        struct adapter *adapter = (struct adapter *) arg1;
5727        int             error, value;
5728
5729        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5730        error = sysctl_handle_int(oidp, &value, 0, req);
5731        if (error || req->newptr == NULL)
5732                return (error);
5733        EM_CORE_LOCK(adapter);
5734        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5735        em_init_locked(adapter);
5736        EM_CORE_UNLOCK(adapter);
5737        return (0);
5738 }
5739
5740 static int
5741 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5742 {
5743         struct adapter *adapter;
5744         int error;
5745         int result;
5746
5747         result = -1;
5748         error = sysctl_handle_int(oidp, &result, 0, req);
5749
5750         if (error || !req->newptr)
5751                 return (error);
5752
5753         if (result == 1) {
5754                 adapter = (struct adapter *)arg1;
5755                 em_print_debug_info(adapter);
5756         }
5757
5758         return (error);
5759 }
5760
5761 /*
5762 ** This routine is meant to be fluid, add whatever is
5763 ** needed for debugging a problem.  -jfv
5764 */
5765 static void
5766 em_print_debug_info(struct adapter *adapter)
5767 {
5768         device_t dev = adapter->dev;
5769         struct tx_ring *txr = adapter->tx_rings;
5770         struct rx_ring *rxr = adapter->rx_rings;
5771
5772         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5773                 printf("Interface is RUNNING ");
5774         else
5775                 printf("Interface is NOT RUNNING\n");
5776
5777         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5778                 printf("and INACTIVE\n");
5779         else
5780                 printf("and ACTIVE\n");
5781
5782         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5783             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5784             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5785         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5786             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5787             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5788         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5789         device_printf(dev, "TX descriptors avail = %d\n",
5790             txr->tx_avail);
5791         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5792             txr->no_desc_avail);
5793         device_printf(dev, "RX discarded packets = %ld\n",
5794             rxr->rx_discarded);
5795         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5796         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5797 }