]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/dev/e1000/if_em.c
MFC r309400:
[FreeBSD/stable/8.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int     em_display_debug_stats = 0;
93
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.7";
98
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111         /* Intel(R) PRO/1000 Network Connection */
112         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
178                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
180                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
181         /* required last entry */
182         { 0, 0, 0, 0, 0}
183 };
184
185 /*********************************************************************
186  *  Table of branding strings for all supported NICs.
187  *********************************************************************/
188
189 static char *em_strings[] = {
190         "Intel(R) PRO/1000 Network Connection"
191 };
192
193 /*********************************************************************
194  *  Function prototypes
195  *********************************************************************/
196 static int      em_probe(device_t);
197 static int      em_attach(device_t);
198 static int      em_detach(device_t);
199 static int      em_shutdown(device_t);
200 static int      em_suspend(device_t);
201 static int      em_resume(device_t);
202 #ifdef EM_MULTIQUEUE
203 static int      em_mq_start(struct ifnet *, struct mbuf *);
204 static int      em_mq_start_locked(struct ifnet *,
205                     struct tx_ring *, struct mbuf *);
206 static void     em_qflush(struct ifnet *);
207 #else
208 static void     em_start(struct ifnet *);
209 static void     em_start_locked(struct ifnet *, struct tx_ring *);
210 #endif
211 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
212 static void     em_init(void *);
213 static void     em_init_locked(struct adapter *);
214 static void     em_stop(void *);
215 static void     em_media_status(struct ifnet *, struct ifmediareq *);
216 static int      em_media_change(struct ifnet *);
217 static void     em_identify_hardware(struct adapter *);
218 static int      em_allocate_pci_resources(struct adapter *);
219 static int      em_allocate_legacy(struct adapter *);
220 static int      em_allocate_msix(struct adapter *);
221 static int      em_allocate_queues(struct adapter *);
222 static int      em_setup_msix(struct adapter *);
223 static void     em_free_pci_resources(struct adapter *);
224 static void     em_local_timer(void *);
225 static void     em_reset(struct adapter *);
226 static int      em_setup_interface(device_t, struct adapter *);
227
228 static void     em_setup_transmit_structures(struct adapter *);
229 static void     em_initialize_transmit_unit(struct adapter *);
230 static int      em_allocate_transmit_buffers(struct tx_ring *);
231 static void     em_free_transmit_structures(struct adapter *);
232 static void     em_free_transmit_buffers(struct tx_ring *);
233
234 static int      em_setup_receive_structures(struct adapter *);
235 static int      em_allocate_receive_buffers(struct rx_ring *);
236 static void     em_initialize_receive_unit(struct adapter *);
237 static void     em_free_receive_structures(struct adapter *);
238 static void     em_free_receive_buffers(struct rx_ring *);
239
240 static void     em_enable_intr(struct adapter *);
241 static void     em_disable_intr(struct adapter *);
242 static void     em_update_stats_counters(struct adapter *);
243 static void     em_add_hw_stats(struct adapter *adapter);
244 static void     em_txeof(struct tx_ring *);
245 static bool     em_rxeof(struct rx_ring *, int, int *);
246 #ifndef __NO_STRICT_ALIGNMENT
247 static int      em_fixup_rx(struct rx_ring *);
248 #endif
249 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
250 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
251                     struct ip *, u32 *, u32 *);
252 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
253                     struct tcphdr *, u32 *, u32 *);
254 static void     em_set_promisc(struct adapter *);
255 static void     em_disable_promisc(struct adapter *);
256 static void     em_set_multi(struct adapter *);
257 static void     em_update_link_status(struct adapter *);
258 static void     em_refresh_mbufs(struct rx_ring *, int);
259 static void     em_register_vlan(void *, struct ifnet *, u16);
260 static void     em_unregister_vlan(void *, struct ifnet *, u16);
261 static void     em_setup_vlan_hw_support(struct adapter *);
262 static int      em_xmit(struct tx_ring *, struct mbuf **);
263 static int      em_dma_malloc(struct adapter *, bus_size_t,
264                     struct em_dma_alloc *, int);
265 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
266 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
267 static void     em_print_nvm_info(struct adapter *);
268 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
269 static void     em_print_debug_info(struct adapter *);
270 static int      em_is_valid_ether_addr(u8 *);
271 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
272 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
273                     const char *, struct em_int_delay_info *, int, int);
274 /* Management and WOL Support */
275 static void     em_init_manageability(struct adapter *);
276 static void     em_release_manageability(struct adapter *);
277 static void     em_get_hw_control(struct adapter *);
278 static void     em_release_hw_control(struct adapter *);
279 static void     em_get_wakeup(device_t);
280 static void     em_enable_wakeup(device_t);
281 static int      em_enable_phy_wakeup(struct adapter *);
282 static void     em_led_func(void *, int);
283 static void     em_disable_aspm(struct adapter *);
284
285 static int      em_irq_fast(void *);
286
287 /* MSIX handlers */
288 static void     em_msix_tx(void *);
289 static void     em_msix_rx(void *);
290 static void     em_msix_link(void *);
291 static void     em_handle_tx(void *context, int pending);
292 static void     em_handle_rx(void *context, int pending);
293 static void     em_handle_link(void *context, int pending);
294
295 static void     em_set_sysctl_value(struct adapter *, const char *,
296                     const char *, int *, int);
297 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
298 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
299
300 static __inline void em_rx_discard(struct rx_ring *, int);
301
302 #ifdef DEVICE_POLLING
303 static poll_handler_t em_poll;
304 #endif /* POLLING */
305
306 /*********************************************************************
307  *  FreeBSD Device Interface Entry Points
308  *********************************************************************/
309
310 static device_method_t em_methods[] = {
311         /* Device interface */
312         DEVMETHOD(device_probe, em_probe),
313         DEVMETHOD(device_attach, em_attach),
314         DEVMETHOD(device_detach, em_detach),
315         DEVMETHOD(device_shutdown, em_shutdown),
316         DEVMETHOD(device_suspend, em_suspend),
317         DEVMETHOD(device_resume, em_resume),
318         DEVMETHOD_END
319 };
320
321 static driver_t em_driver = {
322         "em", em_methods, sizeof(struct adapter),
323 };
324
325 devclass_t em_devclass;
326 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
327 MODULE_DEPEND(em, pci, 1, 1, 1);
328 MODULE_DEPEND(em, ether, 1, 1, 1);
329
330 /*********************************************************************
331  *  Tunable default values.
332  *********************************************************************/
333
334 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
335 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
336 #define M_TSO_LEN                       66
337
338 /* Allow common code without TSO */
339 #ifndef CSUM_TSO
340 #define CSUM_TSO        0
341 #endif
342
343 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
344
345 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
346 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
347 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
348 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
349 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
350     0, "Default transmit interrupt delay in usecs");
351 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
352     0, "Default receive interrupt delay in usecs");
353
354 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
355 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
356 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
357 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
358 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
359     &em_tx_abs_int_delay_dflt, 0,
360     "Default transmit interrupt delay limit in usecs");
361 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
362     &em_rx_abs_int_delay_dflt, 0,
363     "Default receive interrupt delay limit in usecs");
364
365 static int em_rxd = EM_DEFAULT_RXD;
366 static int em_txd = EM_DEFAULT_TXD;
367 TUNABLE_INT("hw.em.rxd", &em_rxd);
368 TUNABLE_INT("hw.em.txd", &em_txd);
369 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
370     "Number of receive descriptors per queue");
371 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
372     "Number of transmit descriptors per queue");
373
374 static int em_smart_pwr_down = FALSE;
375 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
376 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
377     0, "Set to true to leave smart power down enabled on newer adapters");
378
379 /* Controls whether promiscuous also shows bad packets */
380 static int em_debug_sbp = FALSE;
381 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
382 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
383     "Show bad packets in promiscuous mode");
384
385 static int em_enable_msix = TRUE;
386 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
387 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
388     "Enable MSI-X interrupts");
389
390 /* How many packets rxeof tries to clean at a time */
391 static int em_rx_process_limit = 100;
392 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
393 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
394     &em_rx_process_limit, 0,
395     "Maximum number of received packets to process "
396     "at a time, -1 means unlimited");
397
398 /* Energy efficient ethernet - default to OFF */
399 static int eee_setting = 1;
400 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
401 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
402     "Enable Energy Efficient Ethernet");
403
404 /* Global used in WOL setup with multiport cards */
405 static int global_quad_port_a = 0;
406
407 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
408 #include <dev/netmap/if_em_netmap.h>
409 #endif /* DEV_NETMAP */
410
411 /*********************************************************************
412  *  Device identification routine
413  *
414  *  em_probe determines if the driver should be loaded on
415  *  adapter based on PCI vendor/device id of the adapter.
416  *
417  *  return BUS_PROBE_DEFAULT on success, positive on failure
418  *********************************************************************/
419
420 static int
421 em_probe(device_t dev)
422 {
423         char            adapter_name[60];
424         u16             pci_vendor_id = 0;
425         u16             pci_device_id = 0;
426         u16             pci_subvendor_id = 0;
427         u16             pci_subdevice_id = 0;
428         em_vendor_info_t *ent;
429
430         INIT_DEBUGOUT("em_probe: begin");
431
432         pci_vendor_id = pci_get_vendor(dev);
433         if (pci_vendor_id != EM_VENDOR_ID)
434                 return (ENXIO);
435
436         pci_device_id = pci_get_device(dev);
437         pci_subvendor_id = pci_get_subvendor(dev);
438         pci_subdevice_id = pci_get_subdevice(dev);
439
440         ent = em_vendor_info_array;
441         while (ent->vendor_id != 0) {
442                 if ((pci_vendor_id == ent->vendor_id) &&
443                     (pci_device_id == ent->device_id) &&
444
445                     ((pci_subvendor_id == ent->subvendor_id) ||
446                     (ent->subvendor_id == PCI_ANY_ID)) &&
447
448                     ((pci_subdevice_id == ent->subdevice_id) ||
449                     (ent->subdevice_id == PCI_ANY_ID))) {
450                         sprintf(adapter_name, "%s %s",
451                                 em_strings[ent->index],
452                                 em_driver_version);
453                         device_set_desc_copy(dev, adapter_name);
454                         return (BUS_PROBE_DEFAULT);
455                 }
456                 ent++;
457         }
458
459         return (ENXIO);
460 }
461
462 /*********************************************************************
463  *  Device initialization routine
464  *
465  *  The attach entry point is called when the driver is being loaded.
466  *  This routine identifies the type of hardware, allocates all resources
467  *  and initializes the hardware.
468  *
469  *  return 0 on success, positive on failure
470  *********************************************************************/
471
472 static int
473 em_attach(device_t dev)
474 {
475         struct adapter  *adapter;
476         struct e1000_hw *hw;
477         int             error = 0;
478
479         INIT_DEBUGOUT("em_attach: begin");
480
481         if (resource_disabled("em", device_get_unit(dev))) {
482                 device_printf(dev, "Disabled by device hint\n");
483                 return (ENXIO);
484         }
485
486         adapter = device_get_softc(dev);
487         adapter->dev = adapter->osdep.dev = dev;
488         hw = &adapter->hw;
489         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
490
491         /* SYSCTL stuff */
492         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
493             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
494             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
495             em_sysctl_nvm_info, "I", "NVM Information");
496
497         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
498             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
499             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
500             em_sysctl_debug_info, "I", "Debug Information");
501
502         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
503             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
504             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
505             em_set_flowcntl, "I", "Flow Control");
506
507         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
508
509         /* Determine hardware and mac info */
510         em_identify_hardware(adapter);
511
512         /* Setup PCI resources */
513         if (em_allocate_pci_resources(adapter)) {
514                 device_printf(dev, "Allocation of PCI resources failed\n");
515                 error = ENXIO;
516                 goto err_pci;
517         }
518
519         /*
520         ** For ICH8 and family we need to
521         ** map the flash memory, and this
522         ** must happen after the MAC is 
523         ** identified
524         */
525         if ((hw->mac.type == e1000_ich8lan) ||
526             (hw->mac.type == e1000_ich9lan) ||
527             (hw->mac.type == e1000_ich10lan) ||
528             (hw->mac.type == e1000_pchlan) ||
529             (hw->mac.type == e1000_pch2lan) ||
530             (hw->mac.type == e1000_pch_lpt)) {
531                 int rid = EM_BAR_TYPE_FLASH;
532                 adapter->flash = bus_alloc_resource_any(dev,
533                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
534                 if (adapter->flash == NULL) {
535                         device_printf(dev, "Mapping of Flash failed\n");
536                         error = ENXIO;
537                         goto err_pci;
538                 }
539                 /* This is used in the shared code */
540                 hw->flash_address = (u8 *)adapter->flash;
541                 adapter->osdep.flash_bus_space_tag =
542                     rman_get_bustag(adapter->flash);
543                 adapter->osdep.flash_bus_space_handle =
544                     rman_get_bushandle(adapter->flash);
545         }
546
547         /* Do Shared Code initialization */
548         if (e1000_setup_init_funcs(hw, TRUE)) {
549                 device_printf(dev, "Setup of Shared code failed\n");
550                 error = ENXIO;
551                 goto err_pci;
552         }
553
554         e1000_get_bus_info(hw);
555
556         /* Set up some sysctls for the tunable interrupt delays */
557         em_add_int_delay_sysctl(adapter, "rx_int_delay",
558             "receive interrupt delay in usecs", &adapter->rx_int_delay,
559             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
560         em_add_int_delay_sysctl(adapter, "tx_int_delay",
561             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
562             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
563         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
564             "receive interrupt delay limit in usecs",
565             &adapter->rx_abs_int_delay,
566             E1000_REGISTER(hw, E1000_RADV),
567             em_rx_abs_int_delay_dflt);
568         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
569             "transmit interrupt delay limit in usecs",
570             &adapter->tx_abs_int_delay,
571             E1000_REGISTER(hw, E1000_TADV),
572             em_tx_abs_int_delay_dflt);
573
574         /* Sysctl for limiting the amount of work done in the taskqueue */
575         em_set_sysctl_value(adapter, "rx_processing_limit",
576             "max number of rx packets to process", &adapter->rx_process_limit,
577             em_rx_process_limit);
578
579         /*
580          * Validate number of transmit and receive descriptors. It
581          * must not exceed hardware maximum, and must be multiple
582          * of E1000_DBA_ALIGN.
583          */
584         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
585             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
586                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
587                     EM_DEFAULT_TXD, em_txd);
588                 adapter->num_tx_desc = EM_DEFAULT_TXD;
589         } else
590                 adapter->num_tx_desc = em_txd;
591
592         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
593             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
594                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
595                     EM_DEFAULT_RXD, em_rxd);
596                 adapter->num_rx_desc = EM_DEFAULT_RXD;
597         } else
598                 adapter->num_rx_desc = em_rxd;
599
600         hw->mac.autoneg = DO_AUTO_NEG;
601         hw->phy.autoneg_wait_to_complete = FALSE;
602         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
603
604         /* Copper options */
605         if (hw->phy.media_type == e1000_media_type_copper) {
606                 hw->phy.mdix = AUTO_ALL_MODES;
607                 hw->phy.disable_polarity_correction = FALSE;
608                 hw->phy.ms_type = EM_MASTER_SLAVE;
609         }
610
611         /*
612          * Set the frame limits assuming
613          * standard ethernet sized frames.
614          */
615         adapter->hw.mac.max_frame_size =
616             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
617
618         /*
619          * This controls when hardware reports transmit completion
620          * status.
621          */
622         hw->mac.report_tx_early = 1;
623
624         /* 
625         ** Get queue/ring memory
626         */
627         if (em_allocate_queues(adapter)) {
628                 error = ENOMEM;
629                 goto err_pci;
630         }
631
632         /* Allocate multicast array memory. */
633         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
634             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
635         if (adapter->mta == NULL) {
636                 device_printf(dev, "Can not allocate multicast setup array\n");
637                 error = ENOMEM;
638                 goto err_late;
639         }
640
641         /* Check SOL/IDER usage */
642         if (e1000_check_reset_block(hw))
643                 device_printf(dev, "PHY reset is blocked"
644                     " due to SOL/IDER session.\n");
645
646         /* Sysctl for setting Energy Efficient Ethernet */
647         hw->dev_spec.ich8lan.eee_disable = eee_setting;
648         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
649             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
650             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
651             adapter, 0, em_sysctl_eee, "I",
652             "Disable Energy Efficient Ethernet");
653
654         /*
655         ** Start from a known state, this is
656         ** important in reading the nvm and
657         ** mac from that.
658         */
659         e1000_reset_hw(hw);
660
661
662         /* Make sure we have a good EEPROM before we read from it */
663         if (e1000_validate_nvm_checksum(hw) < 0) {
664                 /*
665                 ** Some PCI-E parts fail the first check due to
666                 ** the link being in sleep state, call it again,
667                 ** if it fails a second time its a real issue.
668                 */
669                 if (e1000_validate_nvm_checksum(hw) < 0) {
670                         device_printf(dev,
671                             "The EEPROM Checksum Is Not Valid\n");
672                         error = EIO;
673                         goto err_late;
674                 }
675         }
676
677         /* Copy the permanent MAC address out of the EEPROM */
678         if (e1000_read_mac_addr(hw) < 0) {
679                 device_printf(dev, "EEPROM read error while reading MAC"
680                     " address\n");
681                 error = EIO;
682                 goto err_late;
683         }
684
685         if (!em_is_valid_ether_addr(hw->mac.addr)) {
686                 device_printf(dev, "Invalid MAC address\n");
687                 error = EIO;
688                 goto err_late;
689         }
690
691         /*
692         **  Do interrupt configuration
693         */
694         if (adapter->msix > 1) /* Do MSIX */
695                 error = em_allocate_msix(adapter);
696         else  /* MSI or Legacy */
697                 error = em_allocate_legacy(adapter);
698         if (error)
699                 goto err_late;
700
701         /*
702          * Get Wake-on-Lan and Management info for later use
703          */
704         em_get_wakeup(dev);
705
706         /* Setup OS specific network interface */
707         if (em_setup_interface(dev, adapter) != 0)
708                 goto err_late;
709
710         em_reset(adapter);
711
712         /* Initialize statistics */
713         em_update_stats_counters(adapter);
714
715         hw->mac.get_link_status = 1;
716         em_update_link_status(adapter);
717
718         /* Register for VLAN events */
719         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
720             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
721         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
722             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
723
724         em_add_hw_stats(adapter);
725
726         /* Non-AMT based hardware can now take control from firmware */
727         if (adapter->has_manage && !adapter->has_amt)
728                 em_get_hw_control(adapter);
729
730         /* Tell the stack that the interface is not active */
731         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
732         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
733
734         adapter->led_dev = led_create(em_led_func, adapter,
735             device_get_nameunit(dev));
736 #ifdef DEV_NETMAP
737         em_netmap_attach(adapter);
738 #endif /* DEV_NETMAP */
739
740         INIT_DEBUGOUT("em_attach: end");
741
742         return (0);
743
744 err_late:
745         em_free_transmit_structures(adapter);
746         em_free_receive_structures(adapter);
747         em_release_hw_control(adapter);
748         if (adapter->ifp != NULL)
749                 if_free(adapter->ifp);
750 err_pci:
751         em_free_pci_resources(adapter);
752         free(adapter->mta, M_DEVBUF);
753         EM_CORE_LOCK_DESTROY(adapter);
754
755         return (error);
756 }
757
758 /*********************************************************************
759  *  Device removal routine
760  *
761  *  The detach entry point is called when the driver is being removed.
762  *  This routine stops the adapter and deallocates all the resources
763  *  that were allocated for driver operation.
764  *
765  *  return 0 on success, positive on failure
766  *********************************************************************/
767
768 static int
769 em_detach(device_t dev)
770 {
771         struct adapter  *adapter = device_get_softc(dev);
772         struct ifnet    *ifp = adapter->ifp;
773
774         INIT_DEBUGOUT("em_detach: begin");
775
776         /* Make sure VLANS are not using driver */
777         if (adapter->ifp->if_vlantrunk != NULL) {
778                 device_printf(dev,"Vlan in use, detach first\n");
779                 return (EBUSY);
780         }
781
782 #ifdef DEVICE_POLLING
783         if (ifp->if_capenable & IFCAP_POLLING)
784                 ether_poll_deregister(ifp);
785 #endif
786
787         if (adapter->led_dev != NULL)
788                 led_destroy(adapter->led_dev);
789
790         EM_CORE_LOCK(adapter);
791         adapter->in_detach = 1;
792         em_stop(adapter);
793         EM_CORE_UNLOCK(adapter);
794         EM_CORE_LOCK_DESTROY(adapter);
795
796         e1000_phy_hw_reset(&adapter->hw);
797
798         em_release_manageability(adapter);
799         em_release_hw_control(adapter);
800
801         /* Unregister VLAN events */
802         if (adapter->vlan_attach != NULL)
803                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
804         if (adapter->vlan_detach != NULL)
805                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
806
807         ether_ifdetach(adapter->ifp);
808         callout_drain(&adapter->timer);
809
810 #ifdef DEV_NETMAP
811         netmap_detach(ifp);
812 #endif /* DEV_NETMAP */
813
814         em_free_pci_resources(adapter);
815         bus_generic_detach(dev);
816         if_free(ifp);
817
818         em_free_transmit_structures(adapter);
819         em_free_receive_structures(adapter);
820
821         em_release_hw_control(adapter);
822         free(adapter->mta, M_DEVBUF);
823
824         return (0);
825 }
826
827 /*********************************************************************
828  *
829  *  Shutdown entry point
830  *
831  **********************************************************************/
832
833 static int
834 em_shutdown(device_t dev)
835 {
836         return em_suspend(dev);
837 }
838
839 /*
840  * Suspend/resume device methods.
841  */
842 static int
843 em_suspend(device_t dev)
844 {
845         struct adapter *adapter = device_get_softc(dev);
846
847         EM_CORE_LOCK(adapter);
848
849         em_release_manageability(adapter);
850         em_release_hw_control(adapter);
851         em_enable_wakeup(dev);
852
853         EM_CORE_UNLOCK(adapter);
854
855         return bus_generic_suspend(dev);
856 }
857
858 static int
859 em_resume(device_t dev)
860 {
861         struct adapter *adapter = device_get_softc(dev);
862         struct tx_ring  *txr = adapter->tx_rings;
863         struct ifnet *ifp = adapter->ifp;
864
865         EM_CORE_LOCK(adapter);
866         if (adapter->hw.mac.type == e1000_pch2lan)
867                 e1000_resume_workarounds_pchlan(&adapter->hw);
868         em_init_locked(adapter);
869         em_init_manageability(adapter);
870
871         if ((ifp->if_flags & IFF_UP) &&
872             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
873                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
874                         EM_TX_LOCK(txr);
875 #ifdef EM_MULTIQUEUE
876                         if (!drbr_empty(ifp, txr->br))
877                                 em_mq_start_locked(ifp, txr, NULL);
878 #else
879                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
880                                 em_start_locked(ifp, txr);
881 #endif
882                         EM_TX_UNLOCK(txr);
883                 }
884         }
885         EM_CORE_UNLOCK(adapter);
886
887         return bus_generic_resume(dev);
888 }
889
890
891 #ifdef EM_MULTIQUEUE
892 /*********************************************************************
893  *  Multiqueue Transmit routines 
894  *
895  *  em_mq_start is called by the stack to initiate a transmit.
896  *  however, if busy the driver can queue the request rather
897  *  than do an immediate send. It is this that is an advantage
898  *  in this driver, rather than also having multiple tx queues.
899  **********************************************************************/
900 static int
901 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
902 {
903         struct adapter  *adapter = txr->adapter;
904         struct mbuf     *next;
905         int             err = 0, enq = 0;
906
907         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
908             IFF_DRV_RUNNING || adapter->link_active == 0) {
909                 if (m != NULL)
910                         err = drbr_enqueue(ifp, txr->br, m);
911                 return (err);
912         }
913
914         enq = 0;
915         if (m != NULL) {
916                 err = drbr_enqueue(ifp, txr->br, m);
917                 if (err)
918                         return (err);
919         } 
920
921         /* Process the queue */
922         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
923                 if ((err = em_xmit(txr, &next)) != 0) {
924                         if (next == NULL)
925                                 drbr_advance(ifp, txr->br);
926                         else 
927                                 drbr_putback(ifp, txr->br, next);
928                         break;
929                 }
930                 drbr_advance(ifp, txr->br);
931                 enq++;
932                 ifp->if_obytes += next->m_pkthdr.len;
933                 if (next->m_flags & M_MCAST)
934                         ifp->if_omcasts++;
935                 ETHER_BPF_MTAP(ifp, next);
936                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
937                         break;
938         }
939
940         if (enq > 0) {
941                 /* Set the watchdog */
942                 txr->queue_status = EM_QUEUE_WORKING;
943                 txr->watchdog_time = ticks;
944         }
945
946         if (txr->tx_avail < EM_MAX_SCATTER)
947                 em_txeof(txr);
948         if (txr->tx_avail < EM_MAX_SCATTER)
949                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
950         return (err);
951 }
952
953 /*
954 ** Multiqueue capable stack interface
955 */
956 static int
957 em_mq_start(struct ifnet *ifp, struct mbuf *m)
958 {
959         struct adapter  *adapter = ifp->if_softc;
960         struct tx_ring  *txr = adapter->tx_rings;
961         int             error;
962
963         if (EM_TX_TRYLOCK(txr)) {
964                 error = em_mq_start_locked(ifp, txr, m);
965                 EM_TX_UNLOCK(txr);
966         } else 
967                 error = drbr_enqueue(ifp, txr->br, m);
968
969         return (error);
970 }
971
972 /*
973 ** Flush all ring buffers
974 */
975 static void
976 em_qflush(struct ifnet *ifp)
977 {
978         struct adapter  *adapter = ifp->if_softc;
979         struct tx_ring  *txr = adapter->tx_rings;
980         struct mbuf     *m;
981
982         for (int i = 0; i < adapter->num_queues; i++, txr++) {
983                 EM_TX_LOCK(txr);
984                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
985                         m_freem(m);
986                 EM_TX_UNLOCK(txr);
987         }
988         if_qflush(ifp);
989 }
990 #else  /* !EM_MULTIQUEUE */
991
992 static void
993 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
994 {
995         struct adapter  *adapter = ifp->if_softc;
996         struct mbuf     *m_head;
997
998         EM_TX_LOCK_ASSERT(txr);
999
1000         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1001             IFF_DRV_RUNNING)
1002                 return;
1003
1004         if (!adapter->link_active)
1005                 return;
1006
1007         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1008                 /* Call cleanup if number of TX descriptors low */
1009                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1010                         em_txeof(txr);
1011                 if (txr->tx_avail < EM_MAX_SCATTER) {
1012                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1013                         break;
1014                 }
1015                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1016                 if (m_head == NULL)
1017                         break;
1018                 /*
1019                  *  Encapsulation can modify our pointer, and or make it
1020                  *  NULL on failure.  In that event, we can't requeue.
1021                  */
1022                 if (em_xmit(txr, &m_head)) {
1023                         if (m_head == NULL)
1024                                 break;
1025                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1026                         break;
1027                 }
1028
1029                 /* Send a copy of the frame to the BPF listener */
1030                 ETHER_BPF_MTAP(ifp, m_head);
1031
1032                 /* Set timeout in case hardware has problems transmitting. */
1033                 txr->watchdog_time = ticks;
1034                 txr->queue_status = EM_QUEUE_WORKING;
1035         }
1036
1037         return;
1038 }
1039
1040 static void
1041 em_start(struct ifnet *ifp)
1042 {
1043         struct adapter  *adapter = ifp->if_softc;
1044         struct tx_ring  *txr = adapter->tx_rings;
1045
1046         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1047                 EM_TX_LOCK(txr);
1048                 em_start_locked(ifp, txr);
1049                 EM_TX_UNLOCK(txr);
1050         }
1051         return;
1052 }
1053 #endif /* EM_MULTIQUEUE */
1054
1055 /*********************************************************************
1056  *  Ioctl entry point
1057  *
1058  *  em_ioctl is called when the user wants to configure the
1059  *  interface.
1060  *
1061  *  return 0 on success, positive on failure
1062  **********************************************************************/
1063
1064 static int
1065 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1066 {
1067         struct adapter  *adapter = ifp->if_softc;
1068         struct ifreq    *ifr = (struct ifreq *)data;
1069 #if defined(INET) || defined(INET6)
1070         struct ifaddr   *ifa = (struct ifaddr *)data;
1071 #endif
1072         bool            avoid_reset = FALSE;
1073         int             error = 0;
1074
1075         if (adapter->in_detach)
1076                 return (error);
1077
1078         switch (command) {
1079         case SIOCSIFADDR:
1080 #ifdef INET
1081                 if (ifa->ifa_addr->sa_family == AF_INET)
1082                         avoid_reset = TRUE;
1083 #endif
1084 #ifdef INET6
1085                 if (ifa->ifa_addr->sa_family == AF_INET6)
1086                         avoid_reset = TRUE;
1087 #endif
1088                 /*
1089                 ** Calling init results in link renegotiation,
1090                 ** so we avoid doing it when possible.
1091                 */
1092                 if (avoid_reset) {
1093                         ifp->if_flags |= IFF_UP;
1094                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1095                                 em_init(adapter);
1096 #ifdef INET
1097                         if (!(ifp->if_flags & IFF_NOARP))
1098                                 arp_ifinit(ifp, ifa);
1099 #endif
1100                 } else
1101                         error = ether_ioctl(ifp, command, data);
1102                 break;
1103         case SIOCSIFMTU:
1104             {
1105                 int max_frame_size;
1106
1107                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1108
1109                 EM_CORE_LOCK(adapter);
1110                 switch (adapter->hw.mac.type) {
1111                 case e1000_82571:
1112                 case e1000_82572:
1113                 case e1000_ich9lan:
1114                 case e1000_ich10lan:
1115                 case e1000_pch2lan:
1116                 case e1000_pch_lpt:
1117                 case e1000_82574:
1118                 case e1000_82583:
1119                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1120                         max_frame_size = 9234;
1121                         break;
1122                 case e1000_pchlan:
1123                         max_frame_size = 4096;
1124                         break;
1125                         /* Adapters that do not support jumbo frames */
1126                 case e1000_ich8lan:
1127                         max_frame_size = ETHER_MAX_LEN;
1128                         break;
1129                 default:
1130                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1131                 }
1132                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1133                     ETHER_CRC_LEN) {
1134                         EM_CORE_UNLOCK(adapter);
1135                         error = EINVAL;
1136                         break;
1137                 }
1138
1139                 ifp->if_mtu = ifr->ifr_mtu;
1140                 adapter->hw.mac.max_frame_size =
1141                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1142                 em_init_locked(adapter);
1143                 EM_CORE_UNLOCK(adapter);
1144                 break;
1145             }
1146         case SIOCSIFFLAGS:
1147                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1148                     SIOCSIFFLAGS (Set Interface Flags)");
1149                 EM_CORE_LOCK(adapter);
1150                 if (ifp->if_flags & IFF_UP) {
1151                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1152                                 if ((ifp->if_flags ^ adapter->if_flags) &
1153                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1154                                         em_disable_promisc(adapter);
1155                                         em_set_promisc(adapter);
1156                                 }
1157                         } else
1158                                 em_init_locked(adapter);
1159                 } else
1160                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1161                                 em_stop(adapter);
1162                 adapter->if_flags = ifp->if_flags;
1163                 EM_CORE_UNLOCK(adapter);
1164                 break;
1165         case SIOCADDMULTI:
1166         case SIOCDELMULTI:
1167                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1168                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1169                         EM_CORE_LOCK(adapter);
1170                         em_disable_intr(adapter);
1171                         em_set_multi(adapter);
1172 #ifdef DEVICE_POLLING
1173                         if (!(ifp->if_capenable & IFCAP_POLLING))
1174 #endif
1175                                 em_enable_intr(adapter);
1176                         EM_CORE_UNLOCK(adapter);
1177                 }
1178                 break;
1179         case SIOCSIFMEDIA:
1180                 /* Check SOL/IDER usage */
1181                 EM_CORE_LOCK(adapter);
1182                 if (e1000_check_reset_block(&adapter->hw)) {
1183                         EM_CORE_UNLOCK(adapter);
1184                         device_printf(adapter->dev, "Media change is"
1185                             " blocked due to SOL/IDER session.\n");
1186                         break;
1187                 }
1188                 EM_CORE_UNLOCK(adapter);
1189                 /* falls thru */
1190         case SIOCGIFMEDIA:
1191                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1192                     SIOCxIFMEDIA (Get/Set Interface Media)");
1193                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1194                 break;
1195         case SIOCSIFCAP:
1196             {
1197                 int mask, reinit;
1198
1199                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1200                 reinit = 0;
1201                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1202 #ifdef DEVICE_POLLING
1203                 if (mask & IFCAP_POLLING) {
1204                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1205                                 error = ether_poll_register(em_poll, ifp);
1206                                 if (error)
1207                                         return (error);
1208                                 EM_CORE_LOCK(adapter);
1209                                 em_disable_intr(adapter);
1210                                 ifp->if_capenable |= IFCAP_POLLING;
1211                                 EM_CORE_UNLOCK(adapter);
1212                         } else {
1213                                 error = ether_poll_deregister(ifp);
1214                                 /* Enable interrupt even in error case */
1215                                 EM_CORE_LOCK(adapter);
1216                                 em_enable_intr(adapter);
1217                                 ifp->if_capenable &= ~IFCAP_POLLING;
1218                                 EM_CORE_UNLOCK(adapter);
1219                         }
1220                 }
1221 #endif
1222                 if (mask & IFCAP_HWCSUM) {
1223                         ifp->if_capenable ^= IFCAP_HWCSUM;
1224                         reinit = 1;
1225                 }
1226                 if (mask & IFCAP_TSO4) {
1227                         ifp->if_capenable ^= IFCAP_TSO4;
1228                         reinit = 1;
1229                 }
1230                 if (mask & IFCAP_VLAN_HWTAGGING) {
1231                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1232                         reinit = 1;
1233                 }
1234                 if (mask & IFCAP_VLAN_HWFILTER) {
1235                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1236                         reinit = 1;
1237                 }
1238                 if (mask & IFCAP_VLAN_HWTSO) {
1239                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1240                         reinit = 1;
1241                 }
1242                 if ((mask & IFCAP_WOL) &&
1243                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1244                         if (mask & IFCAP_WOL_MCAST)
1245                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1246                         if (mask & IFCAP_WOL_MAGIC)
1247                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1248                 }
1249                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1250                         em_init(adapter);
1251                 VLAN_CAPABILITIES(ifp);
1252                 break;
1253             }
1254
1255         default:
1256                 error = ether_ioctl(ifp, command, data);
1257                 break;
1258         }
1259
1260         return (error);
1261 }
1262
1263
1264 /*********************************************************************
1265  *  Init entry point
1266  *
1267  *  This routine is used in two ways. It is used by the stack as
1268  *  init entry point in network interface structure. It is also used
1269  *  by the driver as a hw/sw initialization routine to get to a
1270  *  consistent state.
1271  *
1272  *  return 0 on success, positive on failure
1273  **********************************************************************/
1274
1275 static void
1276 em_init_locked(struct adapter *adapter)
1277 {
1278         struct ifnet    *ifp = adapter->ifp;
1279         device_t        dev = adapter->dev;
1280
1281         INIT_DEBUGOUT("em_init: begin");
1282
1283         EM_CORE_LOCK_ASSERT(adapter);
1284
1285         em_disable_intr(adapter);
1286         callout_stop(&adapter->timer);
1287
1288         /* Get the latest mac address, User can use a LAA */
1289         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1290               ETHER_ADDR_LEN);
1291
1292         /* Put the address into the Receive Address Array */
1293         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1294
1295         /*
1296          * With the 82571 adapter, RAR[0] may be overwritten
1297          * when the other port is reset, we make a duplicate
1298          * in RAR[14] for that eventuality, this assures
1299          * the interface continues to function.
1300          */
1301         if (adapter->hw.mac.type == e1000_82571) {
1302                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1303                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1304                     E1000_RAR_ENTRIES - 1);
1305         }
1306
1307         /* Initialize the hardware */
1308         em_reset(adapter);
1309         em_update_link_status(adapter);
1310
1311         /* Setup VLAN support, basic and offload if available */
1312         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1313
1314         /* Set hardware offload abilities */
1315         ifp->if_hwassist = 0;
1316         if (ifp->if_capenable & IFCAP_TXCSUM)
1317                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1318         if (ifp->if_capenable & IFCAP_TSO4)
1319                 ifp->if_hwassist |= CSUM_TSO;
1320
1321         /* Configure for OS presence */
1322         em_init_manageability(adapter);
1323
1324         /* Prepare transmit descriptors and buffers */
1325         em_setup_transmit_structures(adapter);
1326         em_initialize_transmit_unit(adapter);
1327
1328         /* Setup Multicast table */
1329         em_set_multi(adapter);
1330
1331         /*
1332         ** Figure out the desired mbuf
1333         ** pool for doing jumbos
1334         */
1335         if (adapter->hw.mac.max_frame_size <= 2048)
1336                 adapter->rx_mbuf_sz = MCLBYTES;
1337         else if (adapter->hw.mac.max_frame_size <= 4096)
1338                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1339         else
1340                 adapter->rx_mbuf_sz = MJUM9BYTES;
1341
1342         /* Prepare receive descriptors and buffers */
1343         if (em_setup_receive_structures(adapter)) {
1344                 device_printf(dev, "Could not setup receive structures\n");
1345                 em_stop(adapter);
1346                 return;
1347         }
1348         em_initialize_receive_unit(adapter);
1349
1350         /* Use real VLAN Filter support? */
1351         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1352                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1353                         /* Use real VLAN Filter support */
1354                         em_setup_vlan_hw_support(adapter);
1355                 else {
1356                         u32 ctrl;
1357                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1358                         ctrl |= E1000_CTRL_VME;
1359                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1360                 }
1361         }
1362
1363         /* Don't lose promiscuous settings */
1364         em_set_promisc(adapter);
1365
1366         /* Set the interface as ACTIVE */
1367         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1368         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1369
1370         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1371         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1372
1373         /* MSI/X configuration for 82574 */
1374         if (adapter->hw.mac.type == e1000_82574) {
1375                 int tmp;
1376                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1377                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1378                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1379                 /* Set the IVAR - interrupt vector routing. */
1380                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1381         }
1382
1383 #ifdef DEVICE_POLLING
1384         /*
1385          * Only enable interrupts if we are not polling, make sure
1386          * they are off otherwise.
1387          */
1388         if (ifp->if_capenable & IFCAP_POLLING)
1389                 em_disable_intr(adapter);
1390         else
1391 #endif /* DEVICE_POLLING */
1392                 em_enable_intr(adapter);
1393
1394         /* AMT based hardware can now take control from firmware */
1395         if (adapter->has_manage && adapter->has_amt)
1396                 em_get_hw_control(adapter);
1397 }
1398
1399 static void
1400 em_init(void *arg)
1401 {
1402         struct adapter *adapter = arg;
1403
1404         EM_CORE_LOCK(adapter);
1405         em_init_locked(adapter);
1406         EM_CORE_UNLOCK(adapter);
1407 }
1408
1409
1410 #ifdef DEVICE_POLLING
1411 /*********************************************************************
1412  *
1413  *  Legacy polling routine: note this only works with single queue
1414  *
1415  *********************************************************************/
1416 static int
1417 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1418 {
1419         struct adapter *adapter = ifp->if_softc;
1420         struct tx_ring  *txr = adapter->tx_rings;
1421         struct rx_ring  *rxr = adapter->rx_rings;
1422         u32             reg_icr;
1423         int             rx_done;
1424
1425         EM_CORE_LOCK(adapter);
1426         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1427                 EM_CORE_UNLOCK(adapter);
1428                 return (0);
1429         }
1430
1431         if (cmd == POLL_AND_CHECK_STATUS) {
1432                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1433                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1434                         callout_stop(&adapter->timer);
1435                         adapter->hw.mac.get_link_status = 1;
1436                         em_update_link_status(adapter);
1437                         callout_reset(&adapter->timer, hz,
1438                             em_local_timer, adapter);
1439                 }
1440         }
1441         EM_CORE_UNLOCK(adapter);
1442
1443         em_rxeof(rxr, count, &rx_done);
1444
1445         EM_TX_LOCK(txr);
1446         em_txeof(txr);
1447 #ifdef EM_MULTIQUEUE
1448         if (!drbr_empty(ifp, txr->br))
1449                 em_mq_start_locked(ifp, txr, NULL);
1450 #else
1451         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1452                 em_start_locked(ifp, txr);
1453 #endif
1454         EM_TX_UNLOCK(txr);
1455
1456         return (rx_done);
1457 }
1458 #endif /* DEVICE_POLLING */
1459
1460
1461 /*********************************************************************
1462  *
1463  *  Fast Legacy/MSI Combined Interrupt Service routine  
1464  *
1465  *********************************************************************/
1466 static int
1467 em_irq_fast(void *arg)
1468 {
1469         struct adapter  *adapter = arg;
1470         struct ifnet    *ifp;
1471         u32             reg_icr;
1472
1473         ifp = adapter->ifp;
1474
1475         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1476
1477         /* Hot eject?  */
1478         if (reg_icr == 0xffffffff)
1479                 return FILTER_STRAY;
1480
1481         /* Definitely not our interrupt.  */
1482         if (reg_icr == 0x0)
1483                 return FILTER_STRAY;
1484
1485         /*
1486          * Starting with the 82571 chip, bit 31 should be used to
1487          * determine whether the interrupt belongs to us.
1488          */
1489         if (adapter->hw.mac.type >= e1000_82571 &&
1490             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1491                 return FILTER_STRAY;
1492
1493         em_disable_intr(adapter);
1494         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1495
1496         /* Link status change */
1497         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1498                 adapter->hw.mac.get_link_status = 1;
1499                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1500         }
1501
1502         if (reg_icr & E1000_ICR_RXO)
1503                 adapter->rx_overruns++;
1504         return FILTER_HANDLED;
1505 }
1506
1507 /* Combined RX/TX handler, used by Legacy and MSI */
1508 static void
1509 em_handle_que(void *context, int pending)
1510 {
1511         struct adapter  *adapter = context;
1512         struct ifnet    *ifp = adapter->ifp;
1513         struct tx_ring  *txr = adapter->tx_rings;
1514         struct rx_ring  *rxr = adapter->rx_rings;
1515
1516
1517         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1518                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1519                 EM_TX_LOCK(txr);
1520                 em_txeof(txr);
1521 #ifdef EM_MULTIQUEUE
1522                 if (!drbr_empty(ifp, txr->br))
1523                         em_mq_start_locked(ifp, txr, NULL);
1524 #else
1525                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1526                         em_start_locked(ifp, txr);
1527 #endif
1528                 EM_TX_UNLOCK(txr);
1529                 if (more) {
1530                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1531                         return;
1532                 }
1533         }
1534
1535         em_enable_intr(adapter);
1536         return;
1537 }
1538
1539
1540 /*********************************************************************
1541  *
1542  *  MSIX Interrupt Service Routines
1543  *
1544  **********************************************************************/
1545 static void
1546 em_msix_tx(void *arg)
1547 {
1548         struct tx_ring *txr = arg;
1549         struct adapter *adapter = txr->adapter;
1550         struct ifnet    *ifp = adapter->ifp;
1551
1552         ++txr->tx_irq;
1553         EM_TX_LOCK(txr);
1554         em_txeof(txr);
1555 #ifdef EM_MULTIQUEUE
1556         if (!drbr_empty(ifp, txr->br))
1557                 em_mq_start_locked(ifp, txr, NULL);
1558 #else
1559         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1560                 em_start_locked(ifp, txr);
1561 #endif
1562         /* Reenable this interrupt */
1563         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1564         EM_TX_UNLOCK(txr);
1565         return;
1566 }
1567
1568 /*********************************************************************
1569  *
1570  *  MSIX RX Interrupt Service routine
1571  *
1572  **********************************************************************/
1573
1574 static void
1575 em_msix_rx(void *arg)
1576 {
1577         struct rx_ring  *rxr = arg;
1578         struct adapter  *adapter = rxr->adapter;
1579         bool            more;
1580
1581         ++rxr->rx_irq;
1582         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1583                 return;
1584         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1585         if (more)
1586                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1587         else
1588                 /* Reenable this interrupt */
1589                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1590         return;
1591 }
1592
1593 /*********************************************************************
1594  *
1595  *  MSIX Link Fast Interrupt Service routine
1596  *
1597  **********************************************************************/
1598 static void
1599 em_msix_link(void *arg)
1600 {
1601         struct adapter  *adapter = arg;
1602         u32             reg_icr;
1603
1604         ++adapter->link_irq;
1605         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1606
1607         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1608                 adapter->hw.mac.get_link_status = 1;
1609                 em_handle_link(adapter, 0);
1610         } else
1611                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1612                     EM_MSIX_LINK | E1000_IMS_LSC);
1613         return;
1614 }
1615
1616 static void
1617 em_handle_rx(void *context, int pending)
1618 {
1619         struct rx_ring  *rxr = context;
1620         struct adapter  *adapter = rxr->adapter;
1621         bool            more;
1622
1623         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1624         if (more)
1625                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1626         else
1627                 /* Reenable this interrupt */
1628                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1629 }
1630
1631 static void
1632 em_handle_tx(void *context, int pending)
1633 {
1634         struct tx_ring  *txr = context;
1635         struct adapter  *adapter = txr->adapter;
1636         struct ifnet    *ifp = adapter->ifp;
1637
1638         EM_TX_LOCK(txr);
1639         em_txeof(txr);
1640 #ifdef EM_MULTIQUEUE
1641         if (!drbr_empty(ifp, txr->br))
1642                 em_mq_start_locked(ifp, txr, NULL);
1643 #else
1644         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1645                 em_start_locked(ifp, txr);
1646 #endif
1647         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1648         EM_TX_UNLOCK(txr);
1649 }
1650
1651 static void
1652 em_handle_link(void *context, int pending)
1653 {
1654         struct adapter  *adapter = context;
1655         struct tx_ring  *txr = adapter->tx_rings;
1656         struct ifnet *ifp = adapter->ifp;
1657
1658         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1659                 return;
1660
1661         EM_CORE_LOCK(adapter);
1662         callout_stop(&adapter->timer);
1663         em_update_link_status(adapter);
1664         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1665         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1666             EM_MSIX_LINK | E1000_IMS_LSC);
1667         if (adapter->link_active) {
1668                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1669                         EM_TX_LOCK(txr);
1670 #ifdef EM_MULTIQUEUE
1671                         if (!drbr_empty(ifp, txr->br))
1672                                 em_mq_start_locked(ifp, txr, NULL);
1673 #else
1674                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1675                                 em_start_locked(ifp, txr);
1676 #endif
1677                         EM_TX_UNLOCK(txr);
1678                 }
1679         }
1680         EM_CORE_UNLOCK(adapter);
1681 }
1682
1683
1684 /*********************************************************************
1685  *
1686  *  Media Ioctl callback
1687  *
1688  *  This routine is called whenever the user queries the status of
1689  *  the interface using ifconfig.
1690  *
1691  **********************************************************************/
1692 static void
1693 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1694 {
1695         struct adapter *adapter = ifp->if_softc;
1696         u_char fiber_type = IFM_1000_SX;
1697
1698         INIT_DEBUGOUT("em_media_status: begin");
1699
1700         EM_CORE_LOCK(adapter);
1701         em_update_link_status(adapter);
1702
1703         ifmr->ifm_status = IFM_AVALID;
1704         ifmr->ifm_active = IFM_ETHER;
1705
1706         if (!adapter->link_active) {
1707                 EM_CORE_UNLOCK(adapter);
1708                 return;
1709         }
1710
1711         ifmr->ifm_status |= IFM_ACTIVE;
1712
1713         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1714             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1715                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1716         } else {
1717                 switch (adapter->link_speed) {
1718                 case 10:
1719                         ifmr->ifm_active |= IFM_10_T;
1720                         break;
1721                 case 100:
1722                         ifmr->ifm_active |= IFM_100_TX;
1723                         break;
1724                 case 1000:
1725                         ifmr->ifm_active |= IFM_1000_T;
1726                         break;
1727                 }
1728                 if (adapter->link_duplex == FULL_DUPLEX)
1729                         ifmr->ifm_active |= IFM_FDX;
1730                 else
1731                         ifmr->ifm_active |= IFM_HDX;
1732         }
1733         EM_CORE_UNLOCK(adapter);
1734 }
1735
1736 /*********************************************************************
1737  *
1738  *  Media Ioctl callback
1739  *
1740  *  This routine is called when the user changes speed/duplex using
1741  *  media/mediopt option with ifconfig.
1742  *
1743  **********************************************************************/
1744 static int
1745 em_media_change(struct ifnet *ifp)
1746 {
1747         struct adapter *adapter = ifp->if_softc;
1748         struct ifmedia  *ifm = &adapter->media;
1749
1750         INIT_DEBUGOUT("em_media_change: begin");
1751
1752         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1753                 return (EINVAL);
1754
1755         EM_CORE_LOCK(adapter);
1756         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1757         case IFM_AUTO:
1758                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1759                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1760                 break;
1761         case IFM_1000_LX:
1762         case IFM_1000_SX:
1763         case IFM_1000_T:
1764                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1765                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1766                 break;
1767         case IFM_100_TX:
1768                 adapter->hw.mac.autoneg = FALSE;
1769                 adapter->hw.phy.autoneg_advertised = 0;
1770                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1771                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1772                 else
1773                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1774                 break;
1775         case IFM_10_T:
1776                 adapter->hw.mac.autoneg = FALSE;
1777                 adapter->hw.phy.autoneg_advertised = 0;
1778                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1779                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1780                 else
1781                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1782                 break;
1783         default:
1784                 device_printf(adapter->dev, "Unsupported media type\n");
1785         }
1786
1787         em_init_locked(adapter);
1788         EM_CORE_UNLOCK(adapter);
1789
1790         return (0);
1791 }
1792
1793 /*********************************************************************
1794  *
1795  *  This routine maps the mbufs to tx descriptors.
1796  *
1797  *  return 0 on success, positive on failure
1798  **********************************************************************/
1799
1800 static int
1801 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1802 {
1803         struct adapter          *adapter = txr->adapter;
1804         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1805         bus_dmamap_t            map;
1806         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1807         struct e1000_tx_desc    *ctxd = NULL;
1808         struct mbuf             *m_head;
1809         struct ether_header     *eh;
1810         struct ip               *ip = NULL;
1811         struct tcphdr           *tp = NULL;
1812         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1813         int                     ip_off, poff;
1814         int                     nsegs, i, j, first, last = 0;
1815         int                     error, do_tso, tso_desc = 0, remap = 1;
1816
1817 retry:
1818         m_head = *m_headp;
1819         txd_upper = txd_lower = txd_used = txd_saved = 0;
1820         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1821         ip_off = poff = 0;
1822
1823         /*
1824          * Intel recommends entire IP/TCP header length reside in a single
1825          * buffer. If multiple descriptors are used to describe the IP and
1826          * TCP header, each descriptor should describe one or more
1827          * complete headers; descriptors referencing only parts of headers
1828          * are not supported. If all layer headers are not coalesced into
1829          * a single buffer, each buffer should not cross a 4KB boundary,
1830          * or be larger than the maximum read request size.
1831          * Controller also requires modifing IP/TCP header to make TSO work
1832          * so we firstly get a writable mbuf chain then coalesce ethernet/
1833          * IP/TCP header into a single buffer to meet the requirement of
1834          * controller. This also simplifies IP/TCP/UDP checksum offloading
1835          * which also has similiar restrictions.
1836          */
1837         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1838                 if (do_tso || (m_head->m_next != NULL && 
1839                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1840                         if (M_WRITABLE(*m_headp) == 0) {
1841                                 m_head = m_dup(*m_headp, M_NOWAIT);
1842                                 m_freem(*m_headp);
1843                                 if (m_head == NULL) {
1844                                         *m_headp = NULL;
1845                                         return (ENOBUFS);
1846                                 }
1847                                 *m_headp = m_head;
1848                         }
1849                 }
1850                 /*
1851                  * XXX
1852                  * Assume IPv4, we don't have TSO/checksum offload support
1853                  * for IPv6 yet.
1854                  */
1855                 ip_off = sizeof(struct ether_header);
1856                 m_head = m_pullup(m_head, ip_off);
1857                 if (m_head == NULL) {
1858                         *m_headp = NULL;
1859                         return (ENOBUFS);
1860                 }
1861                 eh = mtod(m_head, struct ether_header *);
1862                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1863                         ip_off = sizeof(struct ether_vlan_header);
1864                         m_head = m_pullup(m_head, ip_off);
1865                         if (m_head == NULL) {
1866                                 *m_headp = NULL;
1867                                 return (ENOBUFS);
1868                         }
1869                 }
1870                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1871                 if (m_head == NULL) {
1872                         *m_headp = NULL;
1873                         return (ENOBUFS);
1874                 }
1875                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1876                 poff = ip_off + (ip->ip_hl << 2);
1877                 if (do_tso) {
1878                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1879                         if (m_head == NULL) {
1880                                 *m_headp = NULL;
1881                                 return (ENOBUFS);
1882                         }
1883                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1884                         /*
1885                          * TSO workaround:
1886                          *   pull 4 more bytes of data into it.
1887                          */
1888                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1889                         if (m_head == NULL) {
1890                                 *m_headp = NULL;
1891                                 return (ENOBUFS);
1892                         }
1893                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1894                         ip->ip_len = 0;
1895                         ip->ip_sum = 0;
1896                         /*
1897                          * The pseudo TCP checksum does not include TCP payload
1898                          * length so driver should recompute the checksum here
1899                          * what hardware expect to see. This is adherence of
1900                          * Microsoft's Large Send specification.
1901                          */
1902                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1903                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1904                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1905                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1906                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1907                         if (m_head == NULL) {
1908                                 *m_headp = NULL;
1909                                 return (ENOBUFS);
1910                         }
1911                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1913                         if (m_head == NULL) {
1914                                 *m_headp = NULL;
1915                                 return (ENOBUFS);
1916                         }
1917                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1918                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1919                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1920                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1921                         if (m_head == NULL) {
1922                                 *m_headp = NULL;
1923                                 return (ENOBUFS);
1924                         }
1925                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1926                 }
1927                 *m_headp = m_head;
1928         }
1929
1930         /*
1931          * Map the packet for DMA
1932          *
1933          * Capture the first descriptor index,
1934          * this descriptor will have the index
1935          * of the EOP which is the only one that
1936          * now gets a DONE bit writeback.
1937          */
1938         first = txr->next_avail_desc;
1939         tx_buffer = &txr->tx_buffers[first];
1940         tx_buffer_mapped = tx_buffer;
1941         map = tx_buffer->map;
1942
1943         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1944             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1945
1946         /*
1947          * There are two types of errors we can (try) to handle:
1948          * - EFBIG means the mbuf chain was too long and bus_dma ran
1949          *   out of segments.  Defragment the mbuf chain and try again.
1950          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1951          *   at this point in time.  Defer sending and try again later.
1952          * All other errors, in particular EINVAL, are fatal and prevent the
1953          * mbuf chain from ever going through.  Drop it and report error.
1954          */
1955         if (error == EFBIG && remap) {
1956                 struct mbuf *m;
1957
1958                 m = m_defrag(*m_headp, M_NOWAIT);
1959                 if (m == NULL) {
1960                         adapter->mbuf_alloc_failed++;
1961                         m_freem(*m_headp);
1962                         *m_headp = NULL;
1963                         return (ENOBUFS);
1964                 }
1965                 *m_headp = m;
1966
1967                 /* Try it again, but only once */
1968                 remap = 0;
1969                 goto retry;
1970         } else if (error == ENOMEM) {
1971                 adapter->no_tx_dma_setup++;
1972                 return (error);
1973         } else if (error != 0) {
1974                 adapter->no_tx_dma_setup++;
1975                 m_freem(*m_headp);
1976                 *m_headp = NULL;
1977                 return (error);
1978         }
1979
1980         /*
1981          * TSO Hardware workaround, if this packet is not
1982          * TSO, and is only a single descriptor long, and
1983          * it follows a TSO burst, then we need to add a
1984          * sentinel descriptor to prevent premature writeback.
1985          */
1986         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1987                 if (nsegs == 1)
1988                         tso_desc = TRUE;
1989                 txr->tx_tso = FALSE;
1990         }
1991
1992         if (nsegs > (txr->tx_avail - 2)) {
1993                 txr->no_desc_avail++;
1994                 bus_dmamap_unload(txr->txtag, map);
1995                 return (ENOBUFS);
1996         }
1997         m_head = *m_headp;
1998
1999         /* Do hardware assists */
2000         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2001                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2002                     &txd_upper, &txd_lower);
2003                 /* we need to make a final sentinel transmit desc */
2004                 tso_desc = TRUE;
2005         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2006                 em_transmit_checksum_setup(txr, m_head,
2007                     ip_off, ip, &txd_upper, &txd_lower);
2008
2009         if (m_head->m_flags & M_VLANTAG) {
2010                 /* Set the vlan id. */
2011                 txd_upper |=
2012                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2013                 /* Tell hardware to add tag */
2014                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2015         }
2016
2017         i = txr->next_avail_desc;
2018
2019         /* Set up our transmit descriptors */
2020         for (j = 0; j < nsegs; j++) {
2021                 bus_size_t seg_len;
2022                 bus_addr_t seg_addr;
2023
2024                 tx_buffer = &txr->tx_buffers[i];
2025                 ctxd = &txr->tx_base[i];
2026                 seg_addr = segs[j].ds_addr;
2027                 seg_len  = segs[j].ds_len;
2028                 /*
2029                 ** TSO Workaround:
2030                 ** If this is the last descriptor, we want to
2031                 ** split it so we have a small final sentinel
2032                 */
2033                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2034                         seg_len -= 4;
2035                         ctxd->buffer_addr = htole64(seg_addr);
2036                         ctxd->lower.data = htole32(
2037                         adapter->txd_cmd | txd_lower | seg_len);
2038                         ctxd->upper.data =
2039                             htole32(txd_upper);
2040                         if (++i == adapter->num_tx_desc)
2041                                 i = 0;
2042                         /* Now make the sentinel */     
2043                         ++txd_used; /* using an extra txd */
2044                         ctxd = &txr->tx_base[i];
2045                         tx_buffer = &txr->tx_buffers[i];
2046                         ctxd->buffer_addr =
2047                             htole64(seg_addr + seg_len);
2048                         ctxd->lower.data = htole32(
2049                         adapter->txd_cmd | txd_lower | 4);
2050                         ctxd->upper.data =
2051                             htole32(txd_upper);
2052                         last = i;
2053                         if (++i == adapter->num_tx_desc)
2054                                 i = 0;
2055                 } else {
2056                         ctxd->buffer_addr = htole64(seg_addr);
2057                         ctxd->lower.data = htole32(
2058                         adapter->txd_cmd | txd_lower | seg_len);
2059                         ctxd->upper.data =
2060                             htole32(txd_upper);
2061                         last = i;
2062                         if (++i == adapter->num_tx_desc)
2063                                 i = 0;
2064                 }
2065                 tx_buffer->m_head = NULL;
2066                 tx_buffer->next_eop = -1;
2067         }
2068
2069         txr->next_avail_desc = i;
2070         txr->tx_avail -= nsegs;
2071         if (tso_desc) /* TSO used an extra for sentinel */
2072                 txr->tx_avail -= txd_used;
2073
2074         tx_buffer->m_head = m_head;
2075         /*
2076         ** Here we swap the map so the last descriptor,
2077         ** which gets the completion interrupt has the
2078         ** real map, and the first descriptor gets the
2079         ** unused map from this descriptor.
2080         */
2081         tx_buffer_mapped->map = tx_buffer->map;
2082         tx_buffer->map = map;
2083         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2084
2085         /*
2086          * Last Descriptor of Packet
2087          * needs End Of Packet (EOP)
2088          * and Report Status (RS)
2089          */
2090         ctxd->lower.data |=
2091             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2092         /*
2093          * Keep track in the first buffer which
2094          * descriptor will be written back
2095          */
2096         tx_buffer = &txr->tx_buffers[first];
2097         tx_buffer->next_eop = last;
2098         /* Update the watchdog time early and often */
2099         txr->watchdog_time = ticks;
2100
2101         /*
2102          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2103          * that this frame is available to transmit.
2104          */
2105         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2106             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2107         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2108
2109         return (0);
2110 }
2111
2112 static void
2113 em_set_promisc(struct adapter *adapter)
2114 {
2115         struct ifnet    *ifp = adapter->ifp;
2116         u32             reg_rctl;
2117
2118         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2119
2120         if (ifp->if_flags & IFF_PROMISC) {
2121                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2122                 /* Turn this on if you want to see bad packets */
2123                 if (em_debug_sbp)
2124                         reg_rctl |= E1000_RCTL_SBP;
2125                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2126         } else if (ifp->if_flags & IFF_ALLMULTI) {
2127                 reg_rctl |= E1000_RCTL_MPE;
2128                 reg_rctl &= ~E1000_RCTL_UPE;
2129                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2130         }
2131 }
2132
2133 static void
2134 em_disable_promisc(struct adapter *adapter)
2135 {
2136         u32     reg_rctl;
2137
2138         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2139
2140         reg_rctl &=  (~E1000_RCTL_UPE);
2141         reg_rctl &=  (~E1000_RCTL_MPE);
2142         reg_rctl &=  (~E1000_RCTL_SBP);
2143         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2144 }
2145
2146
2147 /*********************************************************************
2148  *  Multicast Update
2149  *
2150  *  This routine is called whenever multicast address list is updated.
2151  *
2152  **********************************************************************/
2153
2154 static void
2155 em_set_multi(struct adapter *adapter)
2156 {
2157         struct ifnet    *ifp = adapter->ifp;
2158         struct ifmultiaddr *ifma;
2159         u32 reg_rctl = 0;
2160         u8  *mta; /* Multicast array memory */
2161         int mcnt = 0;
2162
2163         IOCTL_DEBUGOUT("em_set_multi: begin");
2164
2165         mta = adapter->mta;
2166         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2167
2168         if (adapter->hw.mac.type == e1000_82542 && 
2169             adapter->hw.revision_id == E1000_REVISION_2) {
2170                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2171                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2172                         e1000_pci_clear_mwi(&adapter->hw);
2173                 reg_rctl |= E1000_RCTL_RST;
2174                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2175                 msec_delay(5);
2176         }
2177
2178 #if __FreeBSD_version < 800000
2179         IF_ADDR_LOCK(ifp);
2180 #else
2181         if_maddr_rlock(ifp);
2182 #endif
2183         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2184                 if (ifma->ifma_addr->sa_family != AF_LINK)
2185                         continue;
2186
2187                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2188                         break;
2189
2190                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2191                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2192                 mcnt++;
2193         }
2194 #if __FreeBSD_version < 800000
2195         IF_ADDR_UNLOCK(ifp);
2196 #else
2197         if_maddr_runlock(ifp);
2198 #endif
2199         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2200                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2201                 reg_rctl |= E1000_RCTL_MPE;
2202                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2203         } else
2204                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2205
2206         if (adapter->hw.mac.type == e1000_82542 && 
2207             adapter->hw.revision_id == E1000_REVISION_2) {
2208                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2209                 reg_rctl &= ~E1000_RCTL_RST;
2210                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2211                 msec_delay(5);
2212                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2213                         e1000_pci_set_mwi(&adapter->hw);
2214         }
2215 }
2216
2217
2218 /*********************************************************************
2219  *  Timer routine
2220  *
2221  *  This routine checks for link status and updates statistics.
2222  *
2223  **********************************************************************/
2224
2225 static void
2226 em_local_timer(void *arg)
2227 {
2228         struct adapter  *adapter = arg;
2229         struct ifnet    *ifp = adapter->ifp;
2230         struct tx_ring  *txr = adapter->tx_rings;
2231         struct rx_ring  *rxr = adapter->rx_rings;
2232         u32             trigger;
2233
2234         EM_CORE_LOCK_ASSERT(adapter);
2235
2236         em_update_link_status(adapter);
2237         em_update_stats_counters(adapter);
2238
2239         /* Reset LAA into RAR[0] on 82571 */
2240         if ((adapter->hw.mac.type == e1000_82571) &&
2241             e1000_get_laa_state_82571(&adapter->hw))
2242                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2243
2244         /* Mask to use in the irq trigger */
2245         if (adapter->msix_mem)
2246                 trigger = rxr->ims; /* RX for 82574 */
2247         else
2248                 trigger = E1000_ICS_RXDMT0;
2249
2250         /*
2251         ** Check on the state of the TX queue(s), this 
2252         ** can be done without the lock because its RO
2253         ** and the HUNG state will be static if set.
2254         */
2255         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2256                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2257                     (adapter->pause_frames == 0))
2258                         goto hung;
2259                 /* Schedule a TX tasklet if needed */
2260                 if (txr->tx_avail <= EM_MAX_SCATTER)
2261                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2262         }
2263         
2264         adapter->pause_frames = 0;
2265         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2266 #ifndef DEVICE_POLLING
2267         /* Trigger an RX interrupt to guarantee mbuf refresh */
2268         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2269 #endif
2270         return;
2271 hung:
2272         /* Looks like we're hung */
2273         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2274         device_printf(adapter->dev,
2275             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2276             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2277             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2278         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2279             "Next TX to Clean = %d\n",
2280             txr->me, txr->tx_avail, txr->next_to_clean);
2281         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2282         adapter->watchdog_events++;
2283         adapter->pause_frames = 0;
2284         em_init_locked(adapter);
2285 }
2286
2287
2288 static void
2289 em_update_link_status(struct adapter *adapter)
2290 {
2291         struct e1000_hw *hw = &adapter->hw;
2292         struct ifnet *ifp = adapter->ifp;
2293         device_t dev = adapter->dev;
2294         struct tx_ring *txr = adapter->tx_rings;
2295         u32 link_check = 0;
2296
2297         /* Get the cached link value or read phy for real */
2298         switch (hw->phy.media_type) {
2299         case e1000_media_type_copper:
2300                 if (hw->mac.get_link_status) {
2301                         /* Do the work to read phy */
2302                         e1000_check_for_link(hw);
2303                         link_check = !hw->mac.get_link_status;
2304                         if (link_check) /* ESB2 fix */
2305                                 e1000_cfg_on_link_up(hw);
2306                 } else
2307                         link_check = TRUE;
2308                 break;
2309         case e1000_media_type_fiber:
2310                 e1000_check_for_link(hw);
2311                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2312                                  E1000_STATUS_LU);
2313                 break;
2314         case e1000_media_type_internal_serdes:
2315                 e1000_check_for_link(hw);
2316                 link_check = adapter->hw.mac.serdes_has_link;
2317                 break;
2318         default:
2319         case e1000_media_type_unknown:
2320                 break;
2321         }
2322
2323         /* Now check for a transition */
2324         if (link_check && (adapter->link_active == 0)) {
2325                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2326                     &adapter->link_duplex);
2327                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2328                 if ((adapter->link_speed != SPEED_1000) &&
2329                     ((hw->mac.type == e1000_82571) ||
2330                     (hw->mac.type == e1000_82572))) {
2331                         int tarc0;
2332                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2333                         tarc0 &= ~SPEED_MODE_BIT;
2334                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2335                 }
2336                 if (bootverbose)
2337                         device_printf(dev, "Link is up %d Mbps %s\n",
2338                             adapter->link_speed,
2339                             ((adapter->link_duplex == FULL_DUPLEX) ?
2340                             "Full Duplex" : "Half Duplex"));
2341                 adapter->link_active = 1;
2342                 adapter->smartspeed = 0;
2343                 ifp->if_baudrate = adapter->link_speed * 1000000;
2344                 if_link_state_change(ifp, LINK_STATE_UP);
2345         } else if (!link_check && (adapter->link_active == 1)) {
2346                 ifp->if_baudrate = adapter->link_speed = 0;
2347                 adapter->link_duplex = 0;
2348                 if (bootverbose)
2349                         device_printf(dev, "Link is Down\n");
2350                 adapter->link_active = 0;
2351                 /* Link down, disable watchdog */
2352                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2353                         txr->queue_status = EM_QUEUE_IDLE;
2354                 if_link_state_change(ifp, LINK_STATE_DOWN);
2355         }
2356 }
2357
2358 /*********************************************************************
2359  *
2360  *  This routine disables all traffic on the adapter by issuing a
2361  *  global reset on the MAC and deallocates TX/RX buffers.
2362  *
2363  *  This routine should always be called with BOTH the CORE
2364  *  and TX locks.
2365  **********************************************************************/
2366
2367 static void
2368 em_stop(void *arg)
2369 {
2370         struct adapter  *adapter = arg;
2371         struct ifnet    *ifp = adapter->ifp;
2372         struct tx_ring  *txr = adapter->tx_rings;
2373
2374         EM_CORE_LOCK_ASSERT(adapter);
2375
2376         INIT_DEBUGOUT("em_stop: begin");
2377
2378         em_disable_intr(adapter);
2379         callout_stop(&adapter->timer);
2380
2381         /* Tell the stack that the interface is no longer active */
2382         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2383         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2384
2385         /* Unarm watchdog timer. */
2386         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2387                 EM_TX_LOCK(txr);
2388                 txr->queue_status = EM_QUEUE_IDLE;
2389                 EM_TX_UNLOCK(txr);
2390         }
2391
2392         e1000_reset_hw(&adapter->hw);
2393         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2394
2395         e1000_led_off(&adapter->hw);
2396         e1000_cleanup_led(&adapter->hw);
2397 }
2398
2399
2400 /*********************************************************************
2401  *
2402  *  Determine hardware revision.
2403  *
2404  **********************************************************************/
2405 static void
2406 em_identify_hardware(struct adapter *adapter)
2407 {
2408         device_t dev = adapter->dev;
2409
2410         /* Make sure our PCI config space has the necessary stuff set */
2411         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2412         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2413             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2414                 device_printf(dev, "Memory Access and/or Bus Master bits "
2415                     "were not set!\n");
2416                 adapter->hw.bus.pci_cmd_word |=
2417                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2418                 pci_write_config(dev, PCIR_COMMAND,
2419                     adapter->hw.bus.pci_cmd_word, 2);
2420         }
2421
2422         /* Save off the information about this board */
2423         adapter->hw.vendor_id = pci_get_vendor(dev);
2424         adapter->hw.device_id = pci_get_device(dev);
2425         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2426         adapter->hw.subsystem_vendor_id =
2427             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2428         adapter->hw.subsystem_device_id =
2429             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2430
2431         /* Do Shared Code Init and Setup */
2432         if (e1000_set_mac_type(&adapter->hw)) {
2433                 device_printf(dev, "Setup init failure\n");
2434                 return;
2435         }
2436 }
2437
2438 static int
2439 em_allocate_pci_resources(struct adapter *adapter)
2440 {
2441         device_t        dev = adapter->dev;
2442         int             rid;
2443
2444         rid = PCIR_BAR(0);
2445         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2446             &rid, RF_ACTIVE);
2447         if (adapter->memory == NULL) {
2448                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2449                 return (ENXIO);
2450         }
2451         adapter->osdep.mem_bus_space_tag =
2452             rman_get_bustag(adapter->memory);
2453         adapter->osdep.mem_bus_space_handle =
2454             rman_get_bushandle(adapter->memory);
2455         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2456
2457         /* Default to a single queue */
2458         adapter->num_queues = 1;
2459
2460         /*
2461          * Setup MSI/X or MSI if PCI Express
2462          */
2463         adapter->msix = em_setup_msix(adapter);
2464
2465         adapter->hw.back = &adapter->osdep;
2466
2467         return (0);
2468 }
2469
2470 /*********************************************************************
2471  *
2472  *  Setup the Legacy or MSI Interrupt handler
2473  *
2474  **********************************************************************/
2475 int
2476 em_allocate_legacy(struct adapter *adapter)
2477 {
2478         device_t dev = adapter->dev;
2479         struct tx_ring  *txr = adapter->tx_rings;
2480         int error, rid = 0;
2481
2482         /* Manually turn off all interrupts */
2483         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2484
2485         if (adapter->msix == 1) /* using MSI */
2486                 rid = 1;
2487         /* We allocate a single interrupt resource */
2488         adapter->res = bus_alloc_resource_any(dev,
2489             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2490         if (adapter->res == NULL) {
2491                 device_printf(dev, "Unable to allocate bus resource: "
2492                     "interrupt\n");
2493                 return (ENXIO);
2494         }
2495
2496         /*
2497          * Allocate a fast interrupt and the associated
2498          * deferred processing contexts.
2499          */
2500         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2501         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2502             taskqueue_thread_enqueue, &adapter->tq);
2503         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2504             device_get_nameunit(adapter->dev));
2505         /* Use a TX only tasklet for local timer */
2506         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2507         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2508             taskqueue_thread_enqueue, &txr->tq);
2509         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2510             device_get_nameunit(adapter->dev));
2511         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2512         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2513             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2514                 device_printf(dev, "Failed to register fast interrupt "
2515                             "handler: %d\n", error);
2516                 taskqueue_free(adapter->tq);
2517                 adapter->tq = NULL;
2518                 return (error);
2519         }
2520         
2521         return (0);
2522 }
2523
2524 /*********************************************************************
2525  *
2526  *  Setup the MSIX Interrupt handlers
2527  *   This is not really Multiqueue, rather
2528  *   its just seperate interrupt vectors
2529  *   for TX, RX, and Link.
2530  *
2531  **********************************************************************/
2532 int
2533 em_allocate_msix(struct adapter *adapter)
2534 {
2535         device_t        dev = adapter->dev;
2536         struct          tx_ring *txr = adapter->tx_rings;
2537         struct          rx_ring *rxr = adapter->rx_rings;
2538         int             error, rid, vector = 0;
2539
2540
2541         /* Make sure all interrupts are disabled */
2542         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2543
2544         /* First set up ring resources */
2545         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2546
2547                 /* RX ring */
2548                 rid = vector + 1;
2549
2550                 rxr->res = bus_alloc_resource_any(dev,
2551                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2552                 if (rxr->res == NULL) {
2553                         device_printf(dev,
2554                             "Unable to allocate bus resource: "
2555                             "RX MSIX Interrupt %d\n", i);
2556                         return (ENXIO);
2557                 }
2558                 if ((error = bus_setup_intr(dev, rxr->res,
2559                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2560                     rxr, &rxr->tag)) != 0) {
2561                         device_printf(dev, "Failed to register RX handler");
2562                         return (error);
2563                 }
2564 #if __FreeBSD_version >= 800504
2565                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2566 #endif
2567                 rxr->msix = vector++; /* NOTE increment vector for TX */
2568                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2569                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2570                     taskqueue_thread_enqueue, &rxr->tq);
2571                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2572                     device_get_nameunit(adapter->dev));
2573                 /*
2574                 ** Set the bit to enable interrupt
2575                 ** in E1000_IMS -- bits 20 and 21
2576                 ** are for RX0 and RX1, note this has
2577                 ** NOTHING to do with the MSIX vector
2578                 */
2579                 rxr->ims = 1 << (20 + i);
2580                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2581
2582                 /* TX ring */
2583                 rid = vector + 1;
2584                 txr->res = bus_alloc_resource_any(dev,
2585                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2586                 if (txr->res == NULL) {
2587                         device_printf(dev,
2588                             "Unable to allocate bus resource: "
2589                             "TX MSIX Interrupt %d\n", i);
2590                         return (ENXIO);
2591                 }
2592                 if ((error = bus_setup_intr(dev, txr->res,
2593                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2594                     txr, &txr->tag)) != 0) {
2595                         device_printf(dev, "Failed to register TX handler");
2596                         return (error);
2597                 }
2598 #if __FreeBSD_version >= 800504
2599                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2600 #endif
2601                 txr->msix = vector++; /* Increment vector for next pass */
2602                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2603                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2604                     taskqueue_thread_enqueue, &txr->tq);
2605                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2606                     device_get_nameunit(adapter->dev));
2607                 /*
2608                 ** Set the bit to enable interrupt
2609                 ** in E1000_IMS -- bits 22 and 23
2610                 ** are for TX0 and TX1, note this has
2611                 ** NOTHING to do with the MSIX vector
2612                 */
2613                 txr->ims = 1 << (22 + i);
2614                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2615         }
2616
2617         /* Link interrupt */
2618         ++rid;
2619         adapter->res = bus_alloc_resource_any(dev,
2620             SYS_RES_IRQ, &rid, RF_ACTIVE);
2621         if (!adapter->res) {
2622                 device_printf(dev,"Unable to allocate "
2623                     "bus resource: Link interrupt [%d]\n", rid);
2624                 return (ENXIO);
2625         }
2626         /* Set the link handler function */
2627         error = bus_setup_intr(dev, adapter->res,
2628             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2629             em_msix_link, adapter, &adapter->tag);
2630         if (error) {
2631                 adapter->res = NULL;
2632                 device_printf(dev, "Failed to register LINK handler");
2633                 return (error);
2634         }
2635 #if __FreeBSD_version >= 800504
2636                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2637 #endif
2638         adapter->linkvec = vector;
2639         adapter->ivars |=  (8 | vector) << 16;
2640         adapter->ivars |= 0x80000000;
2641
2642         return (0);
2643 }
2644
2645
2646 static void
2647 em_free_pci_resources(struct adapter *adapter)
2648 {
2649         device_t        dev = adapter->dev;
2650         struct tx_ring  *txr;
2651         struct rx_ring  *rxr;
2652         int             rid;
2653
2654
2655         /*
2656         ** Release all the queue interrupt resources:
2657         */
2658         for (int i = 0; i < adapter->num_queues; i++) {
2659                 txr = &adapter->tx_rings[i];
2660                 rxr = &adapter->rx_rings[i];
2661                 /* an early abort? */
2662                 if ((txr == NULL) || (rxr == NULL))
2663                         break;
2664                 rid = txr->msix +1;
2665                 if (txr->tag != NULL) {
2666                         bus_teardown_intr(dev, txr->res, txr->tag);
2667                         txr->tag = NULL;
2668                 }
2669                 if (txr->res != NULL)
2670                         bus_release_resource(dev, SYS_RES_IRQ,
2671                             rid, txr->res);
2672                 rid = rxr->msix +1;
2673                 if (rxr->tag != NULL) {
2674                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2675                         rxr->tag = NULL;
2676                 }
2677                 if (rxr->res != NULL)
2678                         bus_release_resource(dev, SYS_RES_IRQ,
2679                             rid, rxr->res);
2680         }
2681
2682         if (adapter->linkvec) /* we are doing MSIX */
2683                 rid = adapter->linkvec + 1;
2684         else
2685                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2686
2687         if (adapter->tag != NULL) {
2688                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2689                 adapter->tag = NULL;
2690         }
2691
2692         if (adapter->res != NULL)
2693                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2694
2695
2696         if (adapter->msix)
2697                 pci_release_msi(dev);
2698
2699         if (adapter->msix_mem != NULL)
2700                 bus_release_resource(dev, SYS_RES_MEMORY,
2701                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2702
2703         if (adapter->memory != NULL)
2704                 bus_release_resource(dev, SYS_RES_MEMORY,
2705                     PCIR_BAR(0), adapter->memory);
2706
2707         if (adapter->flash != NULL)
2708                 bus_release_resource(dev, SYS_RES_MEMORY,
2709                     EM_FLASH, adapter->flash);
2710 }
2711
2712 /*
2713  * Setup MSI or MSI/X
2714  */
2715 static int
2716 em_setup_msix(struct adapter *adapter)
2717 {
2718         device_t dev = adapter->dev;
2719         int val = 0;
2720
2721         /*
2722         ** Setup MSI/X for Hartwell: tests have shown
2723         ** use of two queues to be unstable, and to
2724         ** provide no great gain anyway, so we simply
2725         ** seperate the interrupts and use a single queue.
2726         */
2727         if ((adapter->hw.mac.type == e1000_82574) &&
2728             (em_enable_msix == TRUE)) {
2729                 /* Map the MSIX BAR */
2730                 int rid = PCIR_BAR(EM_MSIX_BAR);
2731                 adapter->msix_mem = bus_alloc_resource_any(dev,
2732                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2733                 if (!adapter->msix_mem) {
2734                         /* May not be enabled */
2735                         device_printf(adapter->dev,
2736                             "Unable to map MSIX table \n");
2737                         goto msi;
2738                 }
2739                 val = pci_msix_count(dev); 
2740                 /* We only need 3 vectors */
2741                 if (val > 3)
2742                         val = 3;
2743                 if ((val != 3) && (val != 5)) {
2744                         bus_release_resource(dev, SYS_RES_MEMORY,
2745                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2746                         adapter->msix_mem = NULL;
2747                         device_printf(adapter->dev,
2748                             "MSIX: incorrect vectors, using MSI\n");
2749                         goto msi;
2750                 }
2751
2752                 if (pci_alloc_msix(dev, &val) == 0) {
2753                         device_printf(adapter->dev,
2754                             "Using MSIX interrupts "
2755                             "with %d vectors\n", val);
2756                 }
2757
2758                 return (val);
2759         }
2760 msi:
2761         val = pci_msi_count(dev);
2762         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2763                 adapter->msix = 1;
2764                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2765                 return (val);
2766         } 
2767         /* Should only happen due to manual configuration */
2768         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2769         return (0);
2770 }
2771
2772
2773 /*********************************************************************
2774  *
2775  *  Initialize the hardware to a configuration
2776  *  as specified by the adapter structure.
2777  *
2778  **********************************************************************/
2779 static void
2780 em_reset(struct adapter *adapter)
2781 {
2782         device_t        dev = adapter->dev;
2783         struct ifnet    *ifp = adapter->ifp;
2784         struct e1000_hw *hw = &adapter->hw;
2785         u16             rx_buffer_size;
2786         u32             pba;
2787
2788         INIT_DEBUGOUT("em_reset: begin");
2789
2790         /* Set up smart power down as default off on newer adapters. */
2791         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2792             hw->mac.type == e1000_82572)) {
2793                 u16 phy_tmp = 0;
2794
2795                 /* Speed up time to link by disabling smart power down. */
2796                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2797                 phy_tmp &= ~IGP02E1000_PM_SPD;
2798                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2799         }
2800
2801         /*
2802          * Packet Buffer Allocation (PBA)
2803          * Writing PBA sets the receive portion of the buffer
2804          * the remainder is used for the transmit buffer.
2805          */
2806         switch (hw->mac.type) {
2807         /* Total Packet Buffer on these is 48K */
2808         case e1000_82571:
2809         case e1000_82572:
2810         case e1000_80003es2lan:
2811                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2812                 break;
2813         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2814                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2815                 break;
2816         case e1000_82574:
2817         case e1000_82583:
2818                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2819                 break;
2820         case e1000_ich8lan:
2821                 pba = E1000_PBA_8K;
2822                 break;
2823         case e1000_ich9lan:
2824         case e1000_ich10lan:
2825                 /* Boost Receive side for jumbo frames */
2826                 if (adapter->hw.mac.max_frame_size > 4096)
2827                         pba = E1000_PBA_14K;
2828                 else
2829                         pba = E1000_PBA_10K;
2830                 break;
2831         case e1000_pchlan:
2832         case e1000_pch2lan:
2833         case e1000_pch_lpt:
2834                 pba = E1000_PBA_26K;
2835                 break;
2836         default:
2837                 if (adapter->hw.mac.max_frame_size > 8192)
2838                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2839                 else
2840                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2841         }
2842         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2843
2844         /*
2845          * These parameters control the automatic generation (Tx) and
2846          * response (Rx) to Ethernet PAUSE frames.
2847          * - High water mark should allow for at least two frames to be
2848          *   received after sending an XOFF.
2849          * - Low water mark works best when it is very near the high water mark.
2850          *   This allows the receiver to restart by sending XON when it has
2851          *   drained a bit. Here we use an arbitary value of 1500 which will
2852          *   restart after one full frame is pulled from the buffer. There
2853          *   could be several smaller frames in the buffer and if so they will
2854          *   not trigger the XON until their total number reduces the buffer
2855          *   by 1500.
2856          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2857          */
2858         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2859         hw->fc.high_water = rx_buffer_size -
2860             roundup2(adapter->hw.mac.max_frame_size, 1024);
2861         hw->fc.low_water = hw->fc.high_water - 1500;
2862
2863         if (adapter->fc) /* locally set flow control value? */
2864                 hw->fc.requested_mode = adapter->fc;
2865         else
2866                 hw->fc.requested_mode = e1000_fc_full;
2867
2868         if (hw->mac.type == e1000_80003es2lan)
2869                 hw->fc.pause_time = 0xFFFF;
2870         else
2871                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2872
2873         hw->fc.send_xon = TRUE;
2874
2875         /* Device specific overrides/settings */
2876         switch (hw->mac.type) {
2877         case e1000_pchlan:
2878                 /* Workaround: no TX flow ctrl for PCH */
2879                 hw->fc.requested_mode = e1000_fc_rx_pause;
2880                 hw->fc.pause_time = 0xFFFF; /* override */
2881                 if (ifp->if_mtu > ETHERMTU) {
2882                         hw->fc.high_water = 0x3500;
2883                         hw->fc.low_water = 0x1500;
2884                 } else {
2885                         hw->fc.high_water = 0x5000;
2886                         hw->fc.low_water = 0x3000;
2887                 }
2888                 hw->fc.refresh_time = 0x1000;
2889                 break;
2890         case e1000_pch2lan:
2891         case e1000_pch_lpt:
2892                 hw->fc.high_water = 0x5C20;
2893                 hw->fc.low_water = 0x5048;
2894                 hw->fc.pause_time = 0x0650;
2895                 hw->fc.refresh_time = 0x0400;
2896                 /* Jumbos need adjusted PBA */
2897                 if (ifp->if_mtu > ETHERMTU)
2898                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2899                 else
2900                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2901                 break;
2902         case e1000_ich9lan:
2903         case e1000_ich10lan:
2904                 if (ifp->if_mtu > ETHERMTU) {
2905                         hw->fc.high_water = 0x2800;
2906                         hw->fc.low_water = hw->fc.high_water - 8;
2907                         break;
2908                 } 
2909                 /* else fall thru */
2910         default:
2911                 if (hw->mac.type == e1000_80003es2lan)
2912                         hw->fc.pause_time = 0xFFFF;
2913                 break;
2914         }
2915
2916         /* Issue a global reset */
2917         e1000_reset_hw(hw);
2918         E1000_WRITE_REG(hw, E1000_WUC, 0);
2919         em_disable_aspm(adapter);
2920         /* and a re-init */
2921         if (e1000_init_hw(hw) < 0) {
2922                 device_printf(dev, "Hardware Initialization Failed\n");
2923                 return;
2924         }
2925
2926         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2927         e1000_get_phy_info(hw);
2928         e1000_check_for_link(hw);
2929         return;
2930 }
2931
2932 /*********************************************************************
2933  *
2934  *  Setup networking device structure and register an interface.
2935  *
2936  **********************************************************************/
2937 static int
2938 em_setup_interface(device_t dev, struct adapter *adapter)
2939 {
2940         struct ifnet   *ifp;
2941
2942         INIT_DEBUGOUT("em_setup_interface: begin");
2943
2944         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2945         if (ifp == NULL) {
2946                 device_printf(dev, "can not allocate ifnet structure\n");
2947                 return (-1);
2948         }
2949         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2950         ifp->if_init =  em_init;
2951         ifp->if_softc = adapter;
2952         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2953         ifp->if_ioctl = em_ioctl;
2954 #ifdef EM_MULTIQUEUE
2955         /* Multiqueue stack interface */
2956         ifp->if_transmit = em_mq_start;
2957         ifp->if_qflush = em_qflush;
2958 #else
2959         ifp->if_start = em_start;
2960         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2961         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2962         IFQ_SET_READY(&ifp->if_snd);
2963 #endif  
2964
2965         ether_ifattach(ifp, adapter->hw.mac.addr);
2966
2967         ifp->if_capabilities = ifp->if_capenable = 0;
2968
2969
2970         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2971         ifp->if_capabilities |= IFCAP_TSO4;
2972         /*
2973          * Tell the upper layer(s) we
2974          * support full VLAN capability
2975          */
2976         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2977         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2978                              |  IFCAP_VLAN_HWTSO
2979                              |  IFCAP_VLAN_MTU;
2980         ifp->if_capenable = ifp->if_capabilities;
2981
2982         /*
2983         ** Don't turn this on by default, if vlans are
2984         ** created on another pseudo device (eg. lagg)
2985         ** then vlan events are not passed thru, breaking
2986         ** operation, but with HW FILTER off it works. If
2987         ** using vlans directly on the em driver you can
2988         ** enable this and get full hardware tag filtering.
2989         */
2990         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2991
2992 #ifdef DEVICE_POLLING
2993         ifp->if_capabilities |= IFCAP_POLLING;
2994 #endif
2995
2996         /* Enable only WOL MAGIC by default */
2997         if (adapter->wol) {
2998                 ifp->if_capabilities |= IFCAP_WOL;
2999                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3000         }
3001                 
3002         /*
3003          * Specify the media types supported by this adapter and register
3004          * callbacks to update media and link information
3005          */
3006         ifmedia_init(&adapter->media, IFM_IMASK,
3007             em_media_change, em_media_status);
3008         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3009             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3010                 u_char fiber_type = IFM_1000_SX;        /* default type */
3011
3012                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3013                             0, NULL);
3014                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3015         } else {
3016                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3017                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3018                             0, NULL);
3019                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3020                             0, NULL);
3021                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3022                             0, NULL);
3023                 if (adapter->hw.phy.type != e1000_phy_ife) {
3024                         ifmedia_add(&adapter->media,
3025                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3026                         ifmedia_add(&adapter->media,
3027                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3028                 }
3029         }
3030         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3031         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3032         return (0);
3033 }
3034
3035
3036 /*
3037  * Manage DMA'able memory.
3038  */
3039 static void
3040 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3041 {
3042         if (error)
3043                 return;
3044         *(bus_addr_t *) arg = segs[0].ds_addr;
3045 }
3046
3047 static int
3048 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3049         struct em_dma_alloc *dma, int mapflags)
3050 {
3051         int error;
3052
3053         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3054                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3055                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3056                                 BUS_SPACE_MAXADDR,      /* highaddr */
3057                                 NULL, NULL,             /* filter, filterarg */
3058                                 size,                   /* maxsize */
3059                                 1,                      /* nsegments */
3060                                 size,                   /* maxsegsize */
3061                                 0,                      /* flags */
3062                                 NULL,                   /* lockfunc */
3063                                 NULL,                   /* lockarg */
3064                                 &dma->dma_tag);
3065         if (error) {
3066                 device_printf(adapter->dev,
3067                     "%s: bus_dma_tag_create failed: %d\n",
3068                     __func__, error);
3069                 goto fail_0;
3070         }
3071
3072         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3073             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3074         if (error) {
3075                 device_printf(adapter->dev,
3076                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3077                     __func__, (uintmax_t)size, error);
3078                 goto fail_2;
3079         }
3080
3081         dma->dma_paddr = 0;
3082         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3083             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3084         if (error || dma->dma_paddr == 0) {
3085                 device_printf(adapter->dev,
3086                     "%s: bus_dmamap_load failed: %d\n",
3087                     __func__, error);
3088                 goto fail_3;
3089         }
3090
3091         return (0);
3092
3093 fail_3:
3094         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3095 fail_2:
3096         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3097         bus_dma_tag_destroy(dma->dma_tag);
3098 fail_0:
3099         dma->dma_map = NULL;
3100         dma->dma_tag = NULL;
3101
3102         return (error);
3103 }
3104
3105 static void
3106 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3107 {
3108         if (dma->dma_tag == NULL)
3109                 return;
3110         if (dma->dma_map != NULL) {
3111                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3112                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3113                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3114                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3115                 dma->dma_map = NULL;
3116         }
3117         bus_dma_tag_destroy(dma->dma_tag);
3118         dma->dma_tag = NULL;
3119 }
3120
3121
3122 /*********************************************************************
3123  *
3124  *  Allocate memory for the transmit and receive rings, and then
3125  *  the descriptors associated with each, called only once at attach.
3126  *
3127  **********************************************************************/
3128 static int
3129 em_allocate_queues(struct adapter *adapter)
3130 {
3131         device_t                dev = adapter->dev;
3132         struct tx_ring          *txr = NULL;
3133         struct rx_ring          *rxr = NULL;
3134         int rsize, tsize, error = E1000_SUCCESS;
3135         int txconf = 0, rxconf = 0;
3136
3137
3138         /* Allocate the TX ring struct memory */
3139         if (!(adapter->tx_rings =
3140             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3141             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3142                 device_printf(dev, "Unable to allocate TX ring memory\n");
3143                 error = ENOMEM;
3144                 goto fail;
3145         }
3146
3147         /* Now allocate the RX */
3148         if (!(adapter->rx_rings =
3149             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3150             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3151                 device_printf(dev, "Unable to allocate RX ring memory\n");
3152                 error = ENOMEM;
3153                 goto rx_fail;
3154         }
3155
3156         tsize = roundup2(adapter->num_tx_desc *
3157             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3158         /*
3159          * Now set up the TX queues, txconf is needed to handle the
3160          * possibility that things fail midcourse and we need to
3161          * undo memory gracefully
3162          */ 
3163         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3164                 /* Set up some basics */
3165                 txr = &adapter->tx_rings[i];
3166                 txr->adapter = adapter;
3167                 txr->me = i;
3168
3169                 /* Initialize the TX lock */
3170                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3171                     device_get_nameunit(dev), txr->me);
3172                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3173
3174                 if (em_dma_malloc(adapter, tsize,
3175                         &txr->txdma, BUS_DMA_NOWAIT)) {
3176                         device_printf(dev,
3177                             "Unable to allocate TX Descriptor memory\n");
3178                         error = ENOMEM;
3179                         goto err_tx_desc;
3180                 }
3181                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3182                 bzero((void *)txr->tx_base, tsize);
3183
3184                 if (em_allocate_transmit_buffers(txr)) {
3185                         device_printf(dev,
3186                             "Critical Failure setting up transmit buffers\n");
3187                         error = ENOMEM;
3188                         goto err_tx_desc;
3189                 }
3190 #if __FreeBSD_version >= 800000
3191                 /* Allocate a buf ring */
3192                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3193                     M_WAITOK, &txr->tx_mtx);
3194 #endif
3195         }
3196
3197         /*
3198          * Next the RX queues...
3199          */ 
3200         rsize = roundup2(adapter->num_rx_desc *
3201             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3202         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3203                 rxr = &adapter->rx_rings[i];
3204                 rxr->adapter = adapter;
3205                 rxr->me = i;
3206
3207                 /* Initialize the RX lock */
3208                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3209                     device_get_nameunit(dev), txr->me);
3210                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3211
3212                 if (em_dma_malloc(adapter, rsize,
3213                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3214                         device_printf(dev,
3215                             "Unable to allocate RxDescriptor memory\n");
3216                         error = ENOMEM;
3217                         goto err_rx_desc;
3218                 }
3219                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3220                 bzero((void *)rxr->rx_base, rsize);
3221
3222                 /* Allocate receive buffers for the ring*/
3223                 if (em_allocate_receive_buffers(rxr)) {
3224                         device_printf(dev,
3225                             "Critical Failure setting up receive buffers\n");
3226                         error = ENOMEM;
3227                         goto err_rx_desc;
3228                 }
3229         }
3230
3231         return (0);
3232
3233 err_rx_desc:
3234         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3235                 em_dma_free(adapter, &rxr->rxdma);
3236 err_tx_desc:
3237         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3238                 em_dma_free(adapter, &txr->txdma);
3239         free(adapter->rx_rings, M_DEVBUF);
3240 rx_fail:
3241 #if __FreeBSD_version >= 800000
3242         buf_ring_free(txr->br, M_DEVBUF);
3243 #endif
3244         free(adapter->tx_rings, M_DEVBUF);
3245 fail:
3246         return (error);
3247 }
3248
3249
3250 /*********************************************************************
3251  *
3252  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3253  *  the information needed to transmit a packet on the wire. This is
3254  *  called only once at attach, setup is done every reset.
3255  *
3256  **********************************************************************/
3257 static int
3258 em_allocate_transmit_buffers(struct tx_ring *txr)
3259 {
3260         struct adapter *adapter = txr->adapter;
3261         device_t dev = adapter->dev;
3262         struct em_buffer *txbuf;
3263         int error, i;
3264
3265         /*
3266          * Setup DMA descriptor areas.
3267          */
3268         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3269                                1, 0,                    /* alignment, bounds */
3270                                BUS_SPACE_MAXADDR,       /* lowaddr */
3271                                BUS_SPACE_MAXADDR,       /* highaddr */
3272                                NULL, NULL,              /* filter, filterarg */
3273                                EM_TSO_SIZE,             /* maxsize */
3274                                EM_MAX_SCATTER,          /* nsegments */
3275                                PAGE_SIZE,               /* maxsegsize */
3276                                0,                       /* flags */
3277                                NULL,                    /* lockfunc */
3278                                NULL,                    /* lockfuncarg */
3279                                &txr->txtag))) {
3280                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3281                 goto fail;
3282         }
3283
3284         if (!(txr->tx_buffers =
3285             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3286             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3287                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3288                 error = ENOMEM;
3289                 goto fail;
3290         }
3291
3292         /* Create the descriptor buffer dma maps */
3293         txbuf = txr->tx_buffers;
3294         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3295                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3296                 if (error != 0) {
3297                         device_printf(dev, "Unable to create TX DMA map\n");
3298                         goto fail;
3299                 }
3300         }
3301
3302         return 0;
3303 fail:
3304         /* We free all, it handles case where we are in the middle */
3305         em_free_transmit_structures(adapter);
3306         return (error);
3307 }
3308
3309 /*********************************************************************
3310  *
3311  *  Initialize a transmit ring.
3312  *
3313  **********************************************************************/
3314 static void
3315 em_setup_transmit_ring(struct tx_ring *txr)
3316 {
3317         struct adapter *adapter = txr->adapter;
3318         struct em_buffer *txbuf;
3319         int i;
3320 #ifdef DEV_NETMAP
3321         struct netmap_adapter *na = NA(adapter->ifp);
3322         struct netmap_slot *slot;
3323 #endif /* DEV_NETMAP */
3324
3325         /* Clear the old descriptor contents */
3326         EM_TX_LOCK(txr);
3327 #ifdef DEV_NETMAP
3328         slot = netmap_reset(na, NR_TX, txr->me, 0);
3329 #endif /* DEV_NETMAP */
3330
3331         bzero((void *)txr->tx_base,
3332               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3333         /* Reset indices */
3334         txr->next_avail_desc = 0;
3335         txr->next_to_clean = 0;
3336
3337         /* Free any existing tx buffers. */
3338         txbuf = txr->tx_buffers;
3339         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3340                 if (txbuf->m_head != NULL) {
3341                         bus_dmamap_sync(txr->txtag, txbuf->map,
3342                             BUS_DMASYNC_POSTWRITE);
3343                         bus_dmamap_unload(txr->txtag, txbuf->map);
3344                         m_freem(txbuf->m_head);
3345                         txbuf->m_head = NULL;
3346                 }
3347 #ifdef DEV_NETMAP
3348                 if (slot) {
3349                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3350                         uint64_t paddr;
3351                         void *addr;
3352
3353                         addr = PNMB(slot + si, &paddr);
3354                         txr->tx_base[i].buffer_addr = htole64(paddr);
3355                         /* reload the map for netmap mode */
3356                         netmap_load_map(txr->txtag, txbuf->map, addr);
3357                 }
3358 #endif /* DEV_NETMAP */
3359
3360                 /* clear the watch index */
3361                 txbuf->next_eop = -1;
3362         }
3363
3364         /* Set number of descriptors available */
3365         txr->tx_avail = adapter->num_tx_desc;
3366         txr->queue_status = EM_QUEUE_IDLE;
3367
3368         /* Clear checksum offload context. */
3369         txr->last_hw_offload = 0;
3370         txr->last_hw_ipcss = 0;
3371         txr->last_hw_ipcso = 0;
3372         txr->last_hw_tucss = 0;
3373         txr->last_hw_tucso = 0;
3374
3375         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3376             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3377         EM_TX_UNLOCK(txr);
3378 }
3379
3380 /*********************************************************************
3381  *
3382  *  Initialize all transmit rings.
3383  *
3384  **********************************************************************/
3385 static void
3386 em_setup_transmit_structures(struct adapter *adapter)
3387 {
3388         struct tx_ring *txr = adapter->tx_rings;
3389
3390         for (int i = 0; i < adapter->num_queues; i++, txr++)
3391                 em_setup_transmit_ring(txr);
3392
3393         return;
3394 }
3395
3396 /*********************************************************************
3397  *
3398  *  Enable transmit unit.
3399  *
3400  **********************************************************************/
3401 static void
3402 em_initialize_transmit_unit(struct adapter *adapter)
3403 {
3404         struct tx_ring  *txr = adapter->tx_rings;
3405         struct e1000_hw *hw = &adapter->hw;
3406         u32     tctl, tarc, tipg = 0;
3407
3408          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3409
3410         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3411                 u64 bus_addr = txr->txdma.dma_paddr;
3412                 /* Base and Len of TX Ring */
3413                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3414                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3415                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3416                     (u32)(bus_addr >> 32));
3417                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3418                     (u32)bus_addr);
3419                 /* Init the HEAD/TAIL indices */
3420                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3421                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3422
3423                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3424                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3425                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3426
3427                 txr->queue_status = EM_QUEUE_IDLE;
3428         }
3429
3430         /* Set the default values for the Tx Inter Packet Gap timer */
3431         switch (adapter->hw.mac.type) {
3432         case e1000_80003es2lan:
3433                 tipg = DEFAULT_82543_TIPG_IPGR1;
3434                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3435                     E1000_TIPG_IPGR2_SHIFT;
3436                 break;
3437         default:
3438                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3439                     (adapter->hw.phy.media_type ==
3440                     e1000_media_type_internal_serdes))
3441                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3442                 else
3443                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3444                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3445                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3446         }
3447
3448         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3449         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3450
3451         if(adapter->hw.mac.type >= e1000_82540)
3452                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3453                     adapter->tx_abs_int_delay.value);
3454
3455         if ((adapter->hw.mac.type == e1000_82571) ||
3456             (adapter->hw.mac.type == e1000_82572)) {
3457                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3458                 tarc |= SPEED_MODE_BIT;
3459                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3460         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3461                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3462                 tarc |= 1;
3463                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3464                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3465                 tarc |= 1;
3466                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3467         }
3468
3469         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3470         if (adapter->tx_int_delay.value > 0)
3471                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3472
3473         /* Program the Transmit Control Register */
3474         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3475         tctl &= ~E1000_TCTL_CT;
3476         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3477                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3478
3479         if (adapter->hw.mac.type >= e1000_82571)
3480                 tctl |= E1000_TCTL_MULR;
3481
3482         /* This write will effectively turn on the transmit unit. */
3483         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3484
3485 }
3486
3487
3488 /*********************************************************************
3489  *
3490  *  Free all transmit rings.
3491  *
3492  **********************************************************************/
3493 static void
3494 em_free_transmit_structures(struct adapter *adapter)
3495 {
3496         struct tx_ring *txr = adapter->tx_rings;
3497
3498         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3499                 EM_TX_LOCK(txr);
3500                 em_free_transmit_buffers(txr);
3501                 em_dma_free(adapter, &txr->txdma);
3502                 EM_TX_UNLOCK(txr);
3503                 EM_TX_LOCK_DESTROY(txr);
3504         }
3505
3506         free(adapter->tx_rings, M_DEVBUF);
3507 }
3508
3509 /*********************************************************************
3510  *
3511  *  Free transmit ring related data structures.
3512  *
3513  **********************************************************************/
3514 static void
3515 em_free_transmit_buffers(struct tx_ring *txr)
3516 {
3517         struct adapter          *adapter = txr->adapter;
3518         struct em_buffer        *txbuf;
3519
3520         INIT_DEBUGOUT("free_transmit_ring: begin");
3521
3522         if (txr->tx_buffers == NULL)
3523                 return;
3524
3525         for (int i = 0; i < adapter->num_tx_desc; i++) {
3526                 txbuf = &txr->tx_buffers[i];
3527                 if (txbuf->m_head != NULL) {
3528                         bus_dmamap_sync(txr->txtag, txbuf->map,
3529                             BUS_DMASYNC_POSTWRITE);
3530                         bus_dmamap_unload(txr->txtag,
3531                             txbuf->map);
3532                         m_freem(txbuf->m_head);
3533                         txbuf->m_head = NULL;
3534                         if (txbuf->map != NULL) {
3535                                 bus_dmamap_destroy(txr->txtag,
3536                                     txbuf->map);
3537                                 txbuf->map = NULL;
3538                         }
3539                 } else if (txbuf->map != NULL) {
3540                         bus_dmamap_unload(txr->txtag,
3541                             txbuf->map);
3542                         bus_dmamap_destroy(txr->txtag,
3543                             txbuf->map);
3544                         txbuf->map = NULL;
3545                 }
3546         }
3547 #if __FreeBSD_version >= 800000
3548         if (txr->br != NULL)
3549                 buf_ring_free(txr->br, M_DEVBUF);
3550 #endif
3551         if (txr->tx_buffers != NULL) {
3552                 free(txr->tx_buffers, M_DEVBUF);
3553                 txr->tx_buffers = NULL;
3554         }
3555         if (txr->txtag != NULL) {
3556                 bus_dma_tag_destroy(txr->txtag);
3557                 txr->txtag = NULL;
3558         }
3559         return;
3560 }
3561
3562
3563 /*********************************************************************
3564  *  The offload context is protocol specific (TCP/UDP) and thus
3565  *  only needs to be set when the protocol changes. The occasion
3566  *  of a context change can be a performance detriment, and
3567  *  might be better just disabled. The reason arises in the way
3568  *  in which the controller supports pipelined requests from the
3569  *  Tx data DMA. Up to four requests can be pipelined, and they may
3570  *  belong to the same packet or to multiple packets. However all
3571  *  requests for one packet are issued before a request is issued
3572  *  for a subsequent packet and if a request for the next packet
3573  *  requires a context change, that request will be stalled
3574  *  until the previous request completes. This means setting up
3575  *  a new context effectively disables pipelined Tx data DMA which
3576  *  in turn greatly slow down performance to send small sized
3577  *  frames. 
3578  **********************************************************************/
3579 static void
3580 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3581     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3582 {
3583         struct adapter                  *adapter = txr->adapter;
3584         struct e1000_context_desc       *TXD = NULL;
3585         struct em_buffer                *tx_buffer;
3586         int                             cur, hdr_len;
3587         u32                             cmd = 0;
3588         u16                             offload = 0;
3589         u8                              ipcso, ipcss, tucso, tucss;
3590
3591         ipcss = ipcso = tucss = tucso = 0;
3592         hdr_len = ip_off + (ip->ip_hl << 2);
3593         cur = txr->next_avail_desc;
3594
3595         /* Setup of IP header checksum. */
3596         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3597                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3598                 offload |= CSUM_IP;
3599                 ipcss = ip_off;
3600                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3601                 /*
3602                  * Start offset for header checksum calculation.
3603                  * End offset for header checksum calculation.
3604                  * Offset of place to put the checksum.
3605                  */
3606                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3607                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3608                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3609                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3610                 cmd |= E1000_TXD_CMD_IP;
3611         }
3612
3613         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3614                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3615                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3616                 offload |= CSUM_TCP;
3617                 tucss = hdr_len;
3618                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3619                 /*
3620                  * Setting up new checksum offload context for every frames
3621                  * takes a lot of processing time for hardware. This also
3622                  * reduces performance a lot for small sized frames so avoid
3623                  * it if driver can use previously configured checksum
3624                  * offload context.
3625                  */
3626                 if (txr->last_hw_offload == offload) {
3627                         if (offload & CSUM_IP) {
3628                                 if (txr->last_hw_ipcss == ipcss &&
3629                                     txr->last_hw_ipcso == ipcso &&
3630                                     txr->last_hw_tucss == tucss &&
3631                                     txr->last_hw_tucso == tucso)
3632                                         return;
3633                         } else {
3634                                 if (txr->last_hw_tucss == tucss &&
3635                                     txr->last_hw_tucso == tucso)
3636                                         return;
3637                         }
3638                 }
3639                 txr->last_hw_offload = offload;
3640                 txr->last_hw_tucss = tucss;
3641                 txr->last_hw_tucso = tucso;
3642                 /*
3643                  * Start offset for payload checksum calculation.
3644                  * End offset for payload checksum calculation.
3645                  * Offset of place to put the checksum.
3646                  */
3647                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3648                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3649                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3650                 TXD->upper_setup.tcp_fields.tucso = tucso;
3651                 cmd |= E1000_TXD_CMD_TCP;
3652         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3653                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3654                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3655                 tucss = hdr_len;
3656                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3657                 /*
3658                  * Setting up new checksum offload context for every frames
3659                  * takes a lot of processing time for hardware. This also
3660                  * reduces performance a lot for small sized frames so avoid
3661                  * it if driver can use previously configured checksum
3662                  * offload context.
3663                  */
3664                 if (txr->last_hw_offload == offload) {
3665                         if (offload & CSUM_IP) {
3666                                 if (txr->last_hw_ipcss == ipcss &&
3667                                     txr->last_hw_ipcso == ipcso &&
3668                                     txr->last_hw_tucss == tucss &&
3669                                     txr->last_hw_tucso == tucso)
3670                                         return;
3671                         } else {
3672                                 if (txr->last_hw_tucss == tucss &&
3673                                     txr->last_hw_tucso == tucso)
3674                                         return;
3675                         }
3676                 }
3677                 txr->last_hw_offload = offload;
3678                 txr->last_hw_tucss = tucss;
3679                 txr->last_hw_tucso = tucso;
3680                 /*
3681                  * Start offset for header checksum calculation.
3682                  * End offset for header checksum calculation.
3683                  * Offset of place to put the checksum.
3684                  */
3685                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3686                 TXD->upper_setup.tcp_fields.tucss = tucss;
3687                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3688                 TXD->upper_setup.tcp_fields.tucso = tucso;
3689         }
3690   
3691         if (offload & CSUM_IP) {
3692                 txr->last_hw_ipcss = ipcss;
3693                 txr->last_hw_ipcso = ipcso;
3694         }
3695
3696         TXD->tcp_seg_setup.data = htole32(0);
3697         TXD->cmd_and_length =
3698             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3699         tx_buffer = &txr->tx_buffers[cur];
3700         tx_buffer->m_head = NULL;
3701         tx_buffer->next_eop = -1;
3702
3703         if (++cur == adapter->num_tx_desc)
3704                 cur = 0;
3705
3706         txr->tx_avail--;
3707         txr->next_avail_desc = cur;
3708 }
3709
3710
3711 /**********************************************************************
3712  *
3713  *  Setup work for hardware segmentation offload (TSO)
3714  *
3715  **********************************************************************/
3716 static void
3717 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3718     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3719 {
3720         struct adapter                  *adapter = txr->adapter;
3721         struct e1000_context_desc       *TXD;
3722         struct em_buffer                *tx_buffer;
3723         int cur, hdr_len;
3724
3725         /*
3726          * In theory we can use the same TSO context if and only if
3727          * frame is the same type(IP/TCP) and the same MSS. However
3728          * checking whether a frame has the same IP/TCP structure is
3729          * hard thing so just ignore that and always restablish a
3730          * new TSO context.
3731          */
3732         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3733         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3734                       E1000_TXD_DTYP_D |        /* Data descr type */
3735                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3736
3737         /* IP and/or TCP header checksum calculation and insertion. */
3738         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3739
3740         cur = txr->next_avail_desc;
3741         tx_buffer = &txr->tx_buffers[cur];
3742         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3743
3744         /*
3745          * Start offset for header checksum calculation.
3746          * End offset for header checksum calculation.
3747          * Offset of place put the checksum.
3748          */
3749         TXD->lower_setup.ip_fields.ipcss = ip_off;
3750         TXD->lower_setup.ip_fields.ipcse =
3751             htole16(ip_off + (ip->ip_hl << 2) - 1);
3752         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3753         /*
3754          * Start offset for payload checksum calculation.
3755          * End offset for payload checksum calculation.
3756          * Offset of place to put the checksum.
3757          */
3758         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3759         TXD->upper_setup.tcp_fields.tucse = 0;
3760         TXD->upper_setup.tcp_fields.tucso =
3761             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3762         /*
3763          * Payload size per packet w/o any headers.
3764          * Length of all headers up to payload.
3765          */
3766         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3767         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3768
3769         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3770                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3771                                 E1000_TXD_CMD_TSE |     /* TSE context */
3772                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3773                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3774                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3775
3776         tx_buffer->m_head = NULL;
3777         tx_buffer->next_eop = -1;
3778
3779         if (++cur == adapter->num_tx_desc)
3780                 cur = 0;
3781
3782         txr->tx_avail--;
3783         txr->next_avail_desc = cur;
3784         txr->tx_tso = TRUE;
3785 }
3786
3787
3788 /**********************************************************************
3789  *
3790  *  Examine each tx_buffer in the used queue. If the hardware is done
3791  *  processing the packet then free associated resources. The
3792  *  tx_buffer is put back on the free queue.
3793  *
3794  **********************************************************************/
3795 static void
3796 em_txeof(struct tx_ring *txr)
3797 {
3798         struct adapter  *adapter = txr->adapter;
3799         int first, last, done, processed;
3800         struct em_buffer *tx_buffer;
3801         struct e1000_tx_desc   *tx_desc, *eop_desc;
3802         struct ifnet   *ifp = adapter->ifp;
3803
3804         EM_TX_LOCK_ASSERT(txr);
3805 #ifdef DEV_NETMAP
3806         if (ifp->if_capenable & IFCAP_NETMAP) {
3807                 struct netmap_adapter *na = NA(ifp);
3808
3809                 selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3810                 EM_TX_UNLOCK(txr);
3811                 EM_CORE_LOCK(adapter);
3812                 selwakeuppri(&na->tx_si, PI_NET);
3813                 EM_CORE_UNLOCK(adapter);
3814                 EM_TX_LOCK(txr);
3815                 return;
3816         }
3817 #endif /* DEV_NETMAP */
3818
3819         /* No work, make sure watchdog is off */
3820         if (txr->tx_avail == adapter->num_tx_desc) {
3821                 txr->queue_status = EM_QUEUE_IDLE;
3822                 return;
3823         }
3824
3825         processed = 0;
3826         first = txr->next_to_clean;
3827         tx_desc = &txr->tx_base[first];
3828         tx_buffer = &txr->tx_buffers[first];
3829         last = tx_buffer->next_eop;
3830         eop_desc = &txr->tx_base[last];
3831
3832         /*
3833          * What this does is get the index of the
3834          * first descriptor AFTER the EOP of the 
3835          * first packet, that way we can do the
3836          * simple comparison on the inner while loop.
3837          */
3838         if (++last == adapter->num_tx_desc)
3839                 last = 0;
3840         done = last;
3841
3842         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3843             BUS_DMASYNC_POSTREAD);
3844
3845         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3846                 /* We clean the range of the packet */
3847                 while (first != done) {
3848                         tx_desc->upper.data = 0;
3849                         tx_desc->lower.data = 0;
3850                         tx_desc->buffer_addr = 0;
3851                         ++txr->tx_avail;
3852                         ++processed;
3853
3854                         if (tx_buffer->m_head) {
3855                                 bus_dmamap_sync(txr->txtag,
3856                                     tx_buffer->map,
3857                                     BUS_DMASYNC_POSTWRITE);
3858                                 bus_dmamap_unload(txr->txtag,
3859                                     tx_buffer->map);
3860                                 m_freem(tx_buffer->m_head);
3861                                 tx_buffer->m_head = NULL;
3862                         }
3863                         tx_buffer->next_eop = -1;
3864                         txr->watchdog_time = ticks;
3865
3866                         if (++first == adapter->num_tx_desc)
3867                                 first = 0;
3868
3869                         tx_buffer = &txr->tx_buffers[first];
3870                         tx_desc = &txr->tx_base[first];
3871                 }
3872                 ++ifp->if_opackets;
3873                 /* See if we can continue to the next packet */
3874                 last = tx_buffer->next_eop;
3875                 if (last != -1) {
3876                         eop_desc = &txr->tx_base[last];
3877                         /* Get new done point */
3878                         if (++last == adapter->num_tx_desc) last = 0;
3879                         done = last;
3880                 } else
3881                         break;
3882         }
3883         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3884             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3885
3886         txr->next_to_clean = first;
3887
3888         /*
3889         ** Watchdog calculation, we know there's
3890         ** work outstanding or the first return
3891         ** would have been taken, so none processed
3892         ** for too long indicates a hang. local timer
3893         ** will examine this and do a reset if needed.
3894         */
3895         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3896                 txr->queue_status = EM_QUEUE_HUNG;
3897
3898         /*
3899          * If we have a minimum free, clear IFF_DRV_OACTIVE
3900          * to tell the stack that it is OK to send packets.
3901          * Notice that all writes of OACTIVE happen under the
3902          * TX lock which, with a single queue, guarantees 
3903          * sanity.
3904          */
3905         if (txr->tx_avail >= EM_MAX_SCATTER)
3906                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3907
3908         /* Disable watchdog if all clean */
3909         if (txr->tx_avail == adapter->num_tx_desc) {
3910                 txr->queue_status = EM_QUEUE_IDLE;
3911         } 
3912 }
3913
3914
3915 /*********************************************************************
3916  *
3917  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3918  *
3919  **********************************************************************/
3920 static void
3921 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3922 {
3923         struct adapter          *adapter = rxr->adapter;
3924         struct mbuf             *m;
3925         bus_dma_segment_t       segs[1];
3926         struct em_buffer        *rxbuf;
3927         int                     i, j, error, nsegs;
3928         bool                    cleaned = FALSE;
3929
3930         i = j = rxr->next_to_refresh;
3931         /*
3932         ** Get one descriptor beyond
3933         ** our work mark to control
3934         ** the loop.
3935         */
3936         if (++j == adapter->num_rx_desc)
3937                 j = 0;
3938
3939         while (j != limit) {
3940                 rxbuf = &rxr->rx_buffers[i];
3941                 if (rxbuf->m_head == NULL) {
3942                         m = m_getjcl(M_NOWAIT, MT_DATA,
3943                             M_PKTHDR, adapter->rx_mbuf_sz);
3944                         /*
3945                         ** If we have a temporary resource shortage
3946                         ** that causes a failure, just abort refresh
3947                         ** for now, we will return to this point when
3948                         ** reinvoked from em_rxeof.
3949                         */
3950                         if (m == NULL)
3951                                 goto update;
3952                 } else
3953                         m = rxbuf->m_head;
3954
3955                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3956                 m->m_flags |= M_PKTHDR;
3957                 m->m_data = m->m_ext.ext_buf;
3958
3959                 /* Use bus_dma machinery to setup the memory mapping  */
3960                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3961                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3962                 if (error != 0) {
3963                         printf("Refresh mbufs: hdr dmamap load"
3964                             " failure - %d\n", error);
3965                         m_free(m);
3966                         rxbuf->m_head = NULL;
3967                         goto update;
3968                 }
3969                 rxbuf->m_head = m;
3970                 bus_dmamap_sync(rxr->rxtag,
3971                     rxbuf->map, BUS_DMASYNC_PREREAD);
3972                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3973                 cleaned = TRUE;
3974
3975                 i = j; /* Next is precalulated for us */
3976                 rxr->next_to_refresh = i;
3977                 /* Calculate next controlling index */
3978                 if (++j == adapter->num_rx_desc)
3979                         j = 0;
3980         }
3981 update:
3982         /*
3983         ** Update the tail pointer only if,
3984         ** and as far as we have refreshed.
3985         */
3986         if (cleaned)
3987                 E1000_WRITE_REG(&adapter->hw,
3988                     E1000_RDT(rxr->me), rxr->next_to_refresh);
3989
3990         return;
3991 }
3992
3993
3994 /*********************************************************************
3995  *
3996  *  Allocate memory for rx_buffer structures. Since we use one
3997  *  rx_buffer per received packet, the maximum number of rx_buffer's
3998  *  that we'll need is equal to the number of receive descriptors
3999  *  that we've allocated.
4000  *
4001  **********************************************************************/
4002 static int
4003 em_allocate_receive_buffers(struct rx_ring *rxr)
4004 {
4005         struct adapter          *adapter = rxr->adapter;
4006         device_t                dev = adapter->dev;
4007         struct em_buffer        *rxbuf;
4008         int                     error;
4009
4010         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4011             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4012         if (rxr->rx_buffers == NULL) {
4013                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4014                 return (ENOMEM);
4015         }
4016
4017         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4018                                 1, 0,                   /* alignment, bounds */
4019                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4020                                 BUS_SPACE_MAXADDR,      /* highaddr */
4021                                 NULL, NULL,             /* filter, filterarg */
4022                                 MJUM9BYTES,             /* maxsize */
4023                                 1,                      /* nsegments */
4024                                 MJUM9BYTES,             /* maxsegsize */
4025                                 0,                      /* flags */
4026                                 NULL,                   /* lockfunc */
4027                                 NULL,                   /* lockarg */
4028                                 &rxr->rxtag);
4029         if (error) {
4030                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4031                     __func__, error);
4032                 goto fail;
4033         }
4034
4035         rxbuf = rxr->rx_buffers;
4036         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4037                 rxbuf = &rxr->rx_buffers[i];
4038                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4039                     &rxbuf->map);
4040                 if (error) {
4041                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4042                             __func__, error);
4043                         goto fail;
4044                 }
4045         }
4046
4047         return (0);
4048
4049 fail:
4050         em_free_receive_structures(adapter);
4051         return (error);
4052 }
4053
4054
4055 /*********************************************************************
4056  *
4057  *  Initialize a receive ring and its buffers.
4058  *
4059  **********************************************************************/
4060 static int
4061 em_setup_receive_ring(struct rx_ring *rxr)
4062 {
4063         struct  adapter         *adapter = rxr->adapter;
4064         struct em_buffer        *rxbuf;
4065         bus_dma_segment_t       seg[1];
4066         int                     rsize, nsegs, error = 0;
4067 #ifdef DEV_NETMAP
4068         struct netmap_adapter *na = NA(adapter->ifp);
4069         struct netmap_slot *slot;
4070 #endif
4071
4072
4073         /* Clear the ring contents */
4074         EM_RX_LOCK(rxr);
4075         rsize = roundup2(adapter->num_rx_desc *
4076             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4077         bzero((void *)rxr->rx_base, rsize);
4078 #ifdef DEV_NETMAP
4079         slot = netmap_reset(na, NR_RX, 0, 0);
4080 #endif
4081
4082         /*
4083         ** Free current RX buffer structs and their mbufs
4084         */
4085         for (int i = 0; i < adapter->num_rx_desc; i++) {
4086                 rxbuf = &rxr->rx_buffers[i];
4087                 if (rxbuf->m_head != NULL) {
4088                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4089                             BUS_DMASYNC_POSTREAD);
4090                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4091                         m_freem(rxbuf->m_head);
4092                         rxbuf->m_head = NULL; /* mark as freed */
4093                 }
4094         }
4095
4096         /* Now replenish the mbufs */
4097         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4098                 rxbuf = &rxr->rx_buffers[j];
4099 #ifdef DEV_NETMAP
4100                 if (slot) {
4101                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4102                         uint64_t paddr;
4103                         void *addr;
4104
4105                         addr = PNMB(slot + si, &paddr);
4106                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4107                         /* Update descriptor */
4108                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4109                         continue;
4110                 }
4111 #endif /* DEV_NETMAP */
4112                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4113                     M_PKTHDR, adapter->rx_mbuf_sz);
4114                 if (rxbuf->m_head == NULL) {
4115                         error = ENOBUFS;
4116                         goto fail;
4117                 }
4118                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4119                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4120                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4121
4122                 /* Get the memory mapping */
4123                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4124                     rxbuf->map, rxbuf->m_head, seg,
4125                     &nsegs, BUS_DMA_NOWAIT);
4126                 if (error != 0) {
4127                         m_freem(rxbuf->m_head);
4128                         rxbuf->m_head = NULL;
4129                         goto fail;
4130                 }
4131                 bus_dmamap_sync(rxr->rxtag,
4132                     rxbuf->map, BUS_DMASYNC_PREREAD);
4133
4134                 /* Update descriptor */
4135                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4136         }
4137         rxr->next_to_check = 0;
4138         rxr->next_to_refresh = 0;
4139         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4140             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4141
4142 fail:
4143         EM_RX_UNLOCK(rxr);
4144         return (error);
4145 }
4146
4147 /*********************************************************************
4148  *
4149  *  Initialize all receive rings.
4150  *
4151  **********************************************************************/
4152 static int
4153 em_setup_receive_structures(struct adapter *adapter)
4154 {
4155         struct rx_ring *rxr = adapter->rx_rings;
4156         int q;
4157
4158         for (q = 0; q < adapter->num_queues; q++, rxr++)
4159                 if (em_setup_receive_ring(rxr))
4160                         goto fail;
4161
4162         return (0);
4163 fail:
4164         /*
4165          * Free RX buffers allocated so far, we will only handle
4166          * the rings that completed, the failing case will have
4167          * cleaned up for itself. 'q' failed, so its the terminus.
4168          */
4169         for (int i = 0; i < q; ++i) {
4170                 rxr = &adapter->rx_rings[i];
4171                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4172                         struct em_buffer *rxbuf;
4173                         rxbuf = &rxr->rx_buffers[n];
4174                         if (rxbuf->m_head != NULL) {
4175                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4176                                   BUS_DMASYNC_POSTREAD);
4177                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4178                                 m_freem(rxbuf->m_head);
4179                                 rxbuf->m_head = NULL;
4180                         }
4181                 }
4182                 rxr->next_to_check = 0;
4183                 rxr->next_to_refresh = 0;
4184         }
4185
4186         return (ENOBUFS);
4187 }
4188
4189 /*********************************************************************
4190  *
4191  *  Free all receive rings.
4192  *
4193  **********************************************************************/
4194 static void
4195 em_free_receive_structures(struct adapter *adapter)
4196 {
4197         struct rx_ring *rxr = adapter->rx_rings;
4198
4199         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4200                 em_free_receive_buffers(rxr);
4201                 /* Free the ring memory as well */
4202                 em_dma_free(adapter, &rxr->rxdma);
4203                 EM_RX_LOCK_DESTROY(rxr);
4204         }
4205
4206         free(adapter->rx_rings, M_DEVBUF);
4207 }
4208
4209
4210 /*********************************************************************
4211  *
4212  *  Free receive ring data structures
4213  *
4214  **********************************************************************/
4215 static void
4216 em_free_receive_buffers(struct rx_ring *rxr)
4217 {
4218         struct adapter          *adapter = rxr->adapter;
4219         struct em_buffer        *rxbuf = NULL;
4220
4221         INIT_DEBUGOUT("free_receive_buffers: begin");
4222
4223         if (rxr->rx_buffers != NULL) {
4224                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4225                         rxbuf = &rxr->rx_buffers[i];
4226                         if (rxbuf->map != NULL) {
4227                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4228                                     BUS_DMASYNC_POSTREAD);
4229                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4230                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4231                         }
4232                         if (rxbuf->m_head != NULL) {
4233                                 m_freem(rxbuf->m_head);
4234                                 rxbuf->m_head = NULL;
4235                         }
4236                 }
4237                 free(rxr->rx_buffers, M_DEVBUF);
4238                 rxr->rx_buffers = NULL;
4239                 rxr->next_to_check = 0;
4240                 rxr->next_to_refresh = 0;
4241         }
4242
4243         if (rxr->rxtag != NULL) {
4244                 bus_dma_tag_destroy(rxr->rxtag);
4245                 rxr->rxtag = NULL;
4246         }
4247
4248         return;
4249 }
4250
4251
4252 /*********************************************************************
4253  *
4254  *  Enable receive unit.
4255  *
4256  **********************************************************************/
4257 #define MAX_INTS_PER_SEC        8000
4258 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
4259
4260 static void
4261 em_initialize_receive_unit(struct adapter *adapter)
4262 {
4263         struct rx_ring  *rxr = adapter->rx_rings;
4264         struct ifnet    *ifp = adapter->ifp;
4265         struct e1000_hw *hw = &adapter->hw;
4266         u64     bus_addr;
4267         u32     rctl, rxcsum;
4268
4269         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4270
4271         /*
4272          * Make sure receives are disabled while setting
4273          * up the descriptor ring
4274          */
4275         rctl = E1000_READ_REG(hw, E1000_RCTL);
4276         /* Do not disable if ever enabled on this hardware */
4277         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4278                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4279
4280         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4281             adapter->rx_abs_int_delay.value);
4282         /*
4283          * Set the interrupt throttling rate. Value is calculated
4284          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4285          */
4286         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4287
4288         /*
4289         ** When using MSIX interrupts we need to throttle
4290         ** using the EITR register (82574 only)
4291         */
4292         if (hw->mac.type == e1000_82574) {
4293                 for (int i = 0; i < 4; i++)
4294                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4295                             DEFAULT_ITR);
4296                 /* Disable accelerated acknowledge */
4297                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4298         }
4299
4300         if (ifp->if_capenable & IFCAP_RXCSUM) {
4301                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4302                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4303                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4304         }
4305
4306         /*
4307         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4308         ** long latencies are observed, like Lenovo X60. This
4309         ** change eliminates the problem, but since having positive
4310         ** values in RDTR is a known source of problems on other
4311         ** platforms another solution is being sought.
4312         */
4313         if (hw->mac.type == e1000_82573)
4314                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4315
4316         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4317                 /* Setup the Base and Length of the Rx Descriptor Ring */
4318                 bus_addr = rxr->rxdma.dma_paddr;
4319                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4320                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4321                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4322                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4323                 /* Setup the Head and Tail Descriptor Pointers */
4324                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4325 #ifdef DEV_NETMAP
4326                 /*
4327                  * an init() while a netmap client is active must
4328                  * preserve the rx buffers passed to userspace.
4329                  * In this driver it means we adjust RDT to
4330                  * something different from na->num_rx_desc - 1.
4331                  */
4332                 if (ifp->if_capenable & IFCAP_NETMAP) {
4333                         struct netmap_adapter *na = NA(adapter->ifp);
4334                         struct netmap_kring *kring = &na->rx_rings[i];
4335                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4336
4337                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4338                 } else
4339 #endif /* DEV_NETMAP */
4340                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4341         }
4342
4343         /* Set PTHRESH for improved jumbo performance */
4344         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4345             (adapter->hw.mac.type == e1000_pch2lan) ||
4346             (adapter->hw.mac.type == e1000_ich10lan)) &&
4347             (ifp->if_mtu > ETHERMTU)) {
4348                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4349                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4350         }
4351                 
4352         if (adapter->hw.mac.type >= e1000_pch2lan) {
4353                 if (ifp->if_mtu > ETHERMTU)
4354                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4355                 else
4356                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4357         }
4358
4359         /* Setup the Receive Control Register */
4360         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4361         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4362             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4363             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4364
4365         /* Strip the CRC */
4366         rctl |= E1000_RCTL_SECRC;
4367
4368         /* Make sure VLAN Filters are off */
4369         rctl &= ~E1000_RCTL_VFE;
4370         rctl &= ~E1000_RCTL_SBP;
4371
4372         if (adapter->rx_mbuf_sz == MCLBYTES)
4373                 rctl |= E1000_RCTL_SZ_2048;
4374         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4375                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4376         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4377                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4378
4379         if (ifp->if_mtu > ETHERMTU)
4380                 rctl |= E1000_RCTL_LPE;
4381         else
4382                 rctl &= ~E1000_RCTL_LPE;
4383
4384         /* Write out the settings */
4385         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4386
4387         return;
4388 }
4389
4390
4391 /*********************************************************************
4392  *
4393  *  This routine executes in interrupt context. It replenishes
4394  *  the mbufs in the descriptor and sends data which has been
4395  *  dma'ed into host memory to upper layer.
4396  *
4397  *  We loop at most count times if count is > 0, or until done if
4398  *  count < 0.
4399  *  
4400  *  For polling we also now return the number of cleaned packets
4401  *********************************************************************/
4402 static bool
4403 em_rxeof(struct rx_ring *rxr, int count, int *done)
4404 {
4405         struct adapter          *adapter = rxr->adapter;
4406         struct ifnet            *ifp = adapter->ifp;
4407         struct mbuf             *mp, *sendmp;
4408         u8                      status = 0;
4409         u16                     len;
4410         int                     i, processed, rxdone = 0;
4411         bool                    eop;
4412         struct e1000_rx_desc    *cur;
4413
4414         EM_RX_LOCK(rxr);
4415
4416 #ifdef DEV_NETMAP
4417         if (ifp->if_capenable & IFCAP_NETMAP) {
4418                 struct netmap_adapter *na = NA(ifp);
4419
4420                 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4421                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4422                 EM_RX_UNLOCK(rxr);
4423                 EM_CORE_LOCK(adapter);
4424                 selwakeuppri(&na->rx_si, PI_NET);
4425                 EM_CORE_UNLOCK(adapter);
4426                 return (0);
4427         }
4428 #endif /* DEV_NETMAP */
4429
4430         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4431
4432                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4433                         break;
4434
4435                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4436                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4437
4438                 cur = &rxr->rx_base[i];
4439                 status = cur->status;
4440                 mp = sendmp = NULL;
4441
4442                 if ((status & E1000_RXD_STAT_DD) == 0)
4443                         break;
4444
4445                 len = le16toh(cur->length);
4446                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4447
4448                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4449                     (rxr->discard == TRUE)) {
4450                         adapter->dropped_pkts++;
4451                         ++rxr->rx_discarded;
4452                         if (!eop) /* Catch subsequent segs */
4453                                 rxr->discard = TRUE;
4454                         else
4455                                 rxr->discard = FALSE;
4456                         em_rx_discard(rxr, i);
4457                         goto next_desc;
4458                 }
4459
4460                 /* Assign correct length to the current fragment */
4461                 mp = rxr->rx_buffers[i].m_head;
4462                 mp->m_len = len;
4463
4464                 /* Trigger for refresh */
4465                 rxr->rx_buffers[i].m_head = NULL;
4466
4467                 /* First segment? */
4468                 if (rxr->fmp == NULL) {
4469                         mp->m_pkthdr.len = len;
4470                         rxr->fmp = rxr->lmp = mp;
4471                 } else {
4472                         /* Chain mbuf's together */
4473                         mp->m_flags &= ~M_PKTHDR;
4474                         rxr->lmp->m_next = mp;
4475                         rxr->lmp = mp;
4476                         rxr->fmp->m_pkthdr.len += len;
4477                 }
4478
4479                 if (eop) {
4480                         --count;
4481                         sendmp = rxr->fmp;
4482                         sendmp->m_pkthdr.rcvif = ifp;
4483                         ifp->if_ipackets++;
4484                         em_receive_checksum(cur, sendmp);
4485 #ifndef __NO_STRICT_ALIGNMENT
4486                         if (adapter->hw.mac.max_frame_size >
4487                             (MCLBYTES - ETHER_ALIGN) &&
4488                             em_fixup_rx(rxr) != 0)
4489                                 goto skip;
4490 #endif
4491                         if (status & E1000_RXD_STAT_VP) {
4492                                 sendmp->m_pkthdr.ether_vtag =
4493                                     le16toh(cur->special);
4494                                 sendmp->m_flags |= M_VLANTAG;
4495                         }
4496 #ifndef __NO_STRICT_ALIGNMENT
4497 skip:
4498 #endif
4499                         rxr->fmp = rxr->lmp = NULL;
4500                 }
4501 next_desc:
4502                 /* Zero out the receive descriptors status. */
4503                 cur->status = 0;
4504                 ++rxdone;       /* cumulative for POLL */
4505                 ++processed;
4506
4507                 /* Advance our pointers to the next descriptor. */
4508                 if (++i == adapter->num_rx_desc)
4509                         i = 0;
4510
4511                 /* Send to the stack */
4512                 if (sendmp != NULL) {
4513                         rxr->next_to_check = i;
4514                         EM_RX_UNLOCK(rxr);
4515                         (*ifp->if_input)(ifp, sendmp);
4516                         EM_RX_LOCK(rxr);
4517                         i = rxr->next_to_check;
4518                 }
4519
4520                 /* Only refresh mbufs every 8 descriptors */
4521                 if (processed == 8) {
4522                         em_refresh_mbufs(rxr, i);
4523                         processed = 0;
4524                 }
4525         }
4526
4527         /* Catch any remaining refresh work */
4528         if (e1000_rx_unrefreshed(rxr))
4529                 em_refresh_mbufs(rxr, i);
4530
4531         rxr->next_to_check = i;
4532         if (done != NULL)
4533                 *done = rxdone;
4534         EM_RX_UNLOCK(rxr);
4535
4536         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4537 }
4538
4539 static __inline void
4540 em_rx_discard(struct rx_ring *rxr, int i)
4541 {
4542         struct em_buffer        *rbuf;
4543
4544         rbuf = &rxr->rx_buffers[i];
4545         /* Free any previous pieces */
4546         if (rxr->fmp != NULL) {
4547                 rxr->fmp->m_flags |= M_PKTHDR;
4548                 m_freem(rxr->fmp);
4549                 rxr->fmp = NULL;
4550                 rxr->lmp = NULL;
4551         }
4552         /*
4553         ** Free buffer and allow em_refresh_mbufs()
4554         ** to clean up and recharge buffer.
4555         */
4556         if (rbuf->m_head) {
4557                 m_free(rbuf->m_head);
4558                 rbuf->m_head = NULL;
4559         }
4560         return;
4561 }
4562
4563 #ifndef __NO_STRICT_ALIGNMENT
4564 /*
4565  * When jumbo frames are enabled we should realign entire payload on
4566  * architecures with strict alignment. This is serious design mistake of 8254x
4567  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4568  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4569  * payload. On architecures without strict alignment restrictions 8254x still
4570  * performs unaligned memory access which would reduce the performance too.
4571  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4572  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4573  * existing mbuf chain.
4574  *
4575  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4576  * not used at all on architectures with strict alignment.
4577  */
4578 static int
4579 em_fixup_rx(struct rx_ring *rxr)
4580 {
4581         struct adapter *adapter = rxr->adapter;
4582         struct mbuf *m, *n;
4583         int error;
4584
4585         error = 0;
4586         m = rxr->fmp;
4587         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4588                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4589                 m->m_data += ETHER_HDR_LEN;
4590         } else {
4591                 MGETHDR(n, M_NOWAIT, MT_DATA);
4592                 if (n != NULL) {
4593                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4594                         m->m_data += ETHER_HDR_LEN;
4595                         m->m_len -= ETHER_HDR_LEN;
4596                         n->m_len = ETHER_HDR_LEN;
4597                         M_MOVE_PKTHDR(n, m);
4598                         n->m_next = m;
4599                         rxr->fmp = n;
4600                 } else {
4601                         adapter->dropped_pkts++;
4602                         m_freem(rxr->fmp);
4603                         rxr->fmp = NULL;
4604                         error = ENOMEM;
4605                 }
4606         }
4607
4608         return (error);
4609 }
4610 #endif
4611
4612 /*********************************************************************
4613  *
4614  *  Verify that the hardware indicated that the checksum is valid.
4615  *  Inform the stack about the status of checksum so that stack
4616  *  doesn't spend time verifying the checksum.
4617  *
4618  *********************************************************************/
4619 static void
4620 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4621 {
4622         /* Ignore Checksum bit is set */
4623         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4624                 mp->m_pkthdr.csum_flags = 0;
4625                 return;
4626         }
4627
4628         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4629                 /* Did it pass? */
4630                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4631                         /* IP Checksum Good */
4632                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4633                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4634
4635                 } else {
4636                         mp->m_pkthdr.csum_flags = 0;
4637                 }
4638         }
4639
4640         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4641                 /* Did it pass? */
4642                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4643                         mp->m_pkthdr.csum_flags |=
4644                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4645                         mp->m_pkthdr.csum_data = htons(0xffff);
4646                 }
4647         }
4648 }
4649
4650 /*
4651  * This routine is run via an vlan
4652  * config EVENT
4653  */
4654 static void
4655 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4656 {
4657         struct adapter  *adapter = ifp->if_softc;
4658         u32             index, bit;
4659
4660         if (ifp->if_softc !=  arg)   /* Not our event */
4661                 return;
4662
4663         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4664                 return;
4665
4666         EM_CORE_LOCK(adapter);
4667         index = (vtag >> 5) & 0x7F;
4668         bit = vtag & 0x1F;
4669         adapter->shadow_vfta[index] |= (1 << bit);
4670         ++adapter->num_vlans;
4671         /* Re-init to load the changes */
4672         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4673                 em_init_locked(adapter);
4674         EM_CORE_UNLOCK(adapter);
4675 }
4676
4677 /*
4678  * This routine is run via an vlan
4679  * unconfig EVENT
4680  */
4681 static void
4682 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4683 {
4684         struct adapter  *adapter = ifp->if_softc;
4685         u32             index, bit;
4686
4687         if (ifp->if_softc !=  arg)
4688                 return;
4689
4690         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4691                 return;
4692
4693         EM_CORE_LOCK(adapter);
4694         index = (vtag >> 5) & 0x7F;
4695         bit = vtag & 0x1F;
4696         adapter->shadow_vfta[index] &= ~(1 << bit);
4697         --adapter->num_vlans;
4698         /* Re-init to load the changes */
4699         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4700                 em_init_locked(adapter);
4701         EM_CORE_UNLOCK(adapter);
4702 }
4703
4704 static void
4705 em_setup_vlan_hw_support(struct adapter *adapter)
4706 {
4707         struct e1000_hw *hw = &adapter->hw;
4708         u32             reg;
4709
4710         /*
4711         ** We get here thru init_locked, meaning
4712         ** a soft reset, this has already cleared
4713         ** the VFTA and other state, so if there
4714         ** have been no vlan's registered do nothing.
4715         */
4716         if (adapter->num_vlans == 0)
4717                 return;
4718
4719         /*
4720         ** A soft reset zero's out the VFTA, so
4721         ** we need to repopulate it now.
4722         */
4723         for (int i = 0; i < EM_VFTA_SIZE; i++)
4724                 if (adapter->shadow_vfta[i] != 0)
4725                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4726                             i, adapter->shadow_vfta[i]);
4727
4728         reg = E1000_READ_REG(hw, E1000_CTRL);
4729         reg |= E1000_CTRL_VME;
4730         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4731
4732         /* Enable the Filter Table */
4733         reg = E1000_READ_REG(hw, E1000_RCTL);
4734         reg &= ~E1000_RCTL_CFIEN;
4735         reg |= E1000_RCTL_VFE;
4736         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4737 }
4738
4739 static void
4740 em_enable_intr(struct adapter *adapter)
4741 {
4742         struct e1000_hw *hw = &adapter->hw;
4743         u32 ims_mask = IMS_ENABLE_MASK;
4744
4745         if (hw->mac.type == e1000_82574) {
4746                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4747                 ims_mask |= EM_MSIX_MASK;
4748         } 
4749         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4750 }
4751
4752 static void
4753 em_disable_intr(struct adapter *adapter)
4754 {
4755         struct e1000_hw *hw = &adapter->hw;
4756
4757         if (hw->mac.type == e1000_82574)
4758                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4759         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4760 }
4761
4762 /*
4763  * Bit of a misnomer, what this really means is
4764  * to enable OS management of the system... aka
4765  * to disable special hardware management features 
4766  */
4767 static void
4768 em_init_manageability(struct adapter *adapter)
4769 {
4770         /* A shared code workaround */
4771 #define E1000_82542_MANC2H E1000_MANC2H
4772         if (adapter->has_manage) {
4773                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4774                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4775
4776                 /* disable hardware interception of ARP */
4777                 manc &= ~(E1000_MANC_ARP_EN);
4778
4779                 /* enable receiving management packets to the host */
4780                 manc |= E1000_MANC_EN_MNG2HOST;
4781 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4782 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4783                 manc2h |= E1000_MNG2HOST_PORT_623;
4784                 manc2h |= E1000_MNG2HOST_PORT_664;
4785                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4786                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4787         }
4788 }
4789
4790 /*
4791  * Give control back to hardware management
4792  * controller if there is one.
4793  */
4794 static void
4795 em_release_manageability(struct adapter *adapter)
4796 {
4797         if (adapter->has_manage) {
4798                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4799
4800                 /* re-enable hardware interception of ARP */
4801                 manc |= E1000_MANC_ARP_EN;
4802                 manc &= ~E1000_MANC_EN_MNG2HOST;
4803
4804                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4805         }
4806 }
4807
4808 /*
4809  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4810  * For ASF and Pass Through versions of f/w this means
4811  * that the driver is loaded. For AMT version type f/w
4812  * this means that the network i/f is open.
4813  */
4814 static void
4815 em_get_hw_control(struct adapter *adapter)
4816 {
4817         u32 ctrl_ext, swsm;
4818
4819         if (adapter->hw.mac.type == e1000_82573) {
4820                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4821                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4822                     swsm | E1000_SWSM_DRV_LOAD);
4823                 return;
4824         }
4825         /* else */
4826         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4827         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4828             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4829         return;
4830 }
4831
4832 /*
4833  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4834  * For ASF and Pass Through versions of f/w this means that
4835  * the driver is no longer loaded. For AMT versions of the
4836  * f/w this means that the network i/f is closed.
4837  */
4838 static void
4839 em_release_hw_control(struct adapter *adapter)
4840 {
4841         u32 ctrl_ext, swsm;
4842
4843         if (!adapter->has_manage)
4844                 return;
4845
4846         if (adapter->hw.mac.type == e1000_82573) {
4847                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4848                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4849                     swsm & ~E1000_SWSM_DRV_LOAD);
4850                 return;
4851         }
4852         /* else */
4853         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4854         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4855             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4856         return;
4857 }
4858
4859 static int
4860 em_is_valid_ether_addr(u8 *addr)
4861 {
4862         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4863
4864         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4865                 return (FALSE);
4866         }
4867
4868         return (TRUE);
4869 }
4870
4871 /*
4872 ** Parse the interface capabilities with regard
4873 ** to both system management and wake-on-lan for
4874 ** later use.
4875 */
4876 static void
4877 em_get_wakeup(device_t dev)
4878 {
4879         struct adapter  *adapter = device_get_softc(dev);
4880         u16             eeprom_data = 0, device_id, apme_mask;
4881
4882         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4883         apme_mask = EM_EEPROM_APME;
4884
4885         switch (adapter->hw.mac.type) {
4886         case e1000_82573:
4887         case e1000_82583:
4888                 adapter->has_amt = TRUE;
4889                 /* Falls thru */
4890         case e1000_82571:
4891         case e1000_82572:
4892         case e1000_80003es2lan:
4893                 if (adapter->hw.bus.func == 1) {
4894                         e1000_read_nvm(&adapter->hw,
4895                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4896                         break;
4897                 } else
4898                         e1000_read_nvm(&adapter->hw,
4899                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4900                 break;
4901         case e1000_ich8lan:
4902         case e1000_ich9lan:
4903         case e1000_ich10lan:
4904         case e1000_pchlan:
4905         case e1000_pch2lan:
4906                 apme_mask = E1000_WUC_APME;
4907                 adapter->has_amt = TRUE;
4908                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4909                 break;
4910         default:
4911                 e1000_read_nvm(&adapter->hw,
4912                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4913                 break;
4914         }
4915         if (eeprom_data & apme_mask)
4916                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4917         /*
4918          * We have the eeprom settings, now apply the special cases
4919          * where the eeprom may be wrong or the board won't support
4920          * wake on lan on a particular port
4921          */
4922         device_id = pci_get_device(dev);
4923         switch (device_id) {
4924         case E1000_DEV_ID_82571EB_FIBER:
4925                 /* Wake events only supported on port A for dual fiber
4926                  * regardless of eeprom setting */
4927                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4928                     E1000_STATUS_FUNC_1)
4929                         adapter->wol = 0;
4930                 break;
4931         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4932         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4933         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4934                 /* if quad port adapter, disable WoL on all but port A */
4935                 if (global_quad_port_a != 0)
4936                         adapter->wol = 0;
4937                 /* Reset for multiple quad port adapters */
4938                 if (++global_quad_port_a == 4)
4939                         global_quad_port_a = 0;
4940                 break;
4941         }
4942         return;
4943 }
4944
4945
4946 /*
4947  * Enable PCI Wake On Lan capability
4948  */
4949 static void
4950 em_enable_wakeup(device_t dev)
4951 {
4952         struct adapter  *adapter = device_get_softc(dev);
4953         struct ifnet    *ifp = adapter->ifp;
4954         u32             pmc, ctrl, ctrl_ext, rctl;
4955         u16             status;
4956
4957         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4958                 return;
4959
4960         /* Advertise the wakeup capability */
4961         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4962         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4963         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4964         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4965
4966         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4967             (adapter->hw.mac.type == e1000_pchlan) ||
4968             (adapter->hw.mac.type == e1000_ich9lan) ||
4969             (adapter->hw.mac.type == e1000_ich10lan))
4970                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4971
4972         /* Keep the laser running on Fiber adapters */
4973         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4974             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4975                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4976                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4977                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4978         }
4979
4980         /*
4981         ** Determine type of Wakeup: note that wol
4982         ** is set with all bits on by default.
4983         */
4984         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4985                 adapter->wol &= ~E1000_WUFC_MAG;
4986
4987         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4988                 adapter->wol &= ~E1000_WUFC_MC;
4989         else {
4990                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4991                 rctl |= E1000_RCTL_MPE;
4992                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4993         }
4994
4995         if ((adapter->hw.mac.type == e1000_pchlan) ||
4996             (adapter->hw.mac.type == e1000_pch2lan)) {
4997                 if (em_enable_phy_wakeup(adapter))
4998                         return;
4999         } else {
5000                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5001                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5002         }
5003
5004         if (adapter->hw.phy.type == e1000_phy_igp_3)
5005                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5006
5007         /* Request PME */
5008         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5009         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5010         if (ifp->if_capenable & IFCAP_WOL)
5011                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5012         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5013
5014         return;
5015 }
5016
5017 /*
5018 ** WOL in the newer chipset interfaces (pchlan)
5019 ** require thing to be copied into the phy
5020 */
5021 static int
5022 em_enable_phy_wakeup(struct adapter *adapter)
5023 {
5024         struct e1000_hw *hw = &adapter->hw;
5025         u32 mreg, ret = 0;
5026         u16 preg;
5027
5028         /* copy MAC RARs to PHY RARs */
5029         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5030
5031         /* copy MAC MTA to PHY MTA */
5032         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5033                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5034                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5035                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5036                     (u16)((mreg >> 16) & 0xFFFF));
5037         }
5038
5039         /* configure PHY Rx Control register */
5040         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5041         mreg = E1000_READ_REG(hw, E1000_RCTL);
5042         if (mreg & E1000_RCTL_UPE)
5043                 preg |= BM_RCTL_UPE;
5044         if (mreg & E1000_RCTL_MPE)
5045                 preg |= BM_RCTL_MPE;
5046         preg &= ~(BM_RCTL_MO_MASK);
5047         if (mreg & E1000_RCTL_MO_3)
5048                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5049                                 << BM_RCTL_MO_SHIFT);
5050         if (mreg & E1000_RCTL_BAM)
5051                 preg |= BM_RCTL_BAM;
5052         if (mreg & E1000_RCTL_PMCF)
5053                 preg |= BM_RCTL_PMCF;
5054         mreg = E1000_READ_REG(hw, E1000_CTRL);
5055         if (mreg & E1000_CTRL_RFCE)
5056                 preg |= BM_RCTL_RFCE;
5057         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5058
5059         /* enable PHY wakeup in MAC register */
5060         E1000_WRITE_REG(hw, E1000_WUC,
5061             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5062         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5063
5064         /* configure and enable PHY wakeup in PHY registers */
5065         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5066         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5067
5068         /* activate PHY wakeup */
5069         ret = hw->phy.ops.acquire(hw);
5070         if (ret) {
5071                 printf("Could not acquire PHY\n");
5072                 return ret;
5073         }
5074         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5075                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5076         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5077         if (ret) {
5078                 printf("Could not read PHY page 769\n");
5079                 goto out;
5080         }
5081         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5082         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5083         if (ret)
5084                 printf("Could not set PHY Host Wakeup bit\n");
5085 out:
5086         hw->phy.ops.release(hw);
5087
5088         return ret;
5089 }
5090
5091 static void
5092 em_led_func(void *arg, int onoff)
5093 {
5094         struct adapter  *adapter = arg;
5095  
5096         EM_CORE_LOCK(adapter);
5097         if (onoff) {
5098                 e1000_setup_led(&adapter->hw);
5099                 e1000_led_on(&adapter->hw);
5100         } else {
5101                 e1000_led_off(&adapter->hw);
5102                 e1000_cleanup_led(&adapter->hw);
5103         }
5104         EM_CORE_UNLOCK(adapter);
5105 }
5106
5107 /*
5108 ** Disable the L0S and L1 LINK states
5109 */
5110 static void
5111 em_disable_aspm(struct adapter *adapter)
5112 {
5113         int             base, reg;
5114         u16             link_cap,link_ctrl;
5115         device_t        dev = adapter->dev;
5116
5117         switch (adapter->hw.mac.type) {
5118                 case e1000_82573:
5119                 case e1000_82574:
5120                 case e1000_82583:
5121                         break;
5122                 default:
5123                         return;
5124         }
5125         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5126                 return;
5127         reg = base + PCIER_LINK_CAP;
5128         link_cap = pci_read_config(dev, reg, 2);
5129         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5130                 return;
5131         reg = base + PCIER_LINK_CTL;
5132         link_ctrl = pci_read_config(dev, reg, 2);
5133         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5134         pci_write_config(dev, reg, link_ctrl, 2);
5135         return;
5136 }
5137
5138 /**********************************************************************
5139  *
5140  *  Update the board statistics counters.
5141  *
5142  **********************************************************************/
5143 static void
5144 em_update_stats_counters(struct adapter *adapter)
5145 {
5146         struct ifnet   *ifp;
5147
5148         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5149            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5150                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5151                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5152         }
5153         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5154         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5155         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5156         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5157
5158         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5159         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5160         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5161         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5162         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5163         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5164         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5165         /*
5166         ** For watchdog management we need to know if we have been
5167         ** paused during the last interval, so capture that here.
5168         */
5169         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5170         adapter->stats.xoffrxc += adapter->pause_frames;
5171         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5172         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5173         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5174         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5175         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5176         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5177         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5178         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5179         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5180         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5181         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5182         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5183
5184         /* For the 64-bit byte counters the low dword must be read first. */
5185         /* Both registers clear on the read of the high dword */
5186
5187         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5188             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5189         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5190             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5191
5192         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5193         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5194         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5195         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5196         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5197
5198         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5199         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5200
5201         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5202         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5203         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5204         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5205         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5206         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5207         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5208         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5209         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5210         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5211
5212         /* Interrupt Counts */
5213
5214         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5215         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5216         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5217         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5218         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5219         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5220         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5221         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5222         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5223
5224         if (adapter->hw.mac.type >= e1000_82543) {
5225                 adapter->stats.algnerrc += 
5226                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5227                 adapter->stats.rxerrc += 
5228                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5229                 adapter->stats.tncrs += 
5230                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5231                 adapter->stats.cexterr += 
5232                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5233                 adapter->stats.tsctc += 
5234                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5235                 adapter->stats.tsctfc += 
5236                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5237         }
5238         ifp = adapter->ifp;
5239
5240         ifp->if_collisions = adapter->stats.colc;
5241
5242         /* Rx Errors */
5243         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5244             adapter->stats.crcerrs + adapter->stats.algnerrc +
5245             adapter->stats.ruc + adapter->stats.roc +
5246             adapter->stats.mpc + adapter->stats.cexterr;
5247
5248         /* Tx Errors */
5249         ifp->if_oerrors = adapter->stats.ecol +
5250             adapter->stats.latecol + adapter->watchdog_events;
5251 }
5252
5253 /* Export a single 32-bit register via a read-only sysctl. */
5254 static int
5255 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5256 {
5257         struct adapter *adapter;
5258         u_int val;
5259
5260         adapter = oidp->oid_arg1;
5261         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5262         return (sysctl_handle_int(oidp, &val, 0, req));
5263 }
5264
5265 /*
5266  * Add sysctl variables, one per statistic, to the system.
5267  */
5268 static void
5269 em_add_hw_stats(struct adapter *adapter)
5270 {
5271         device_t dev = adapter->dev;
5272
5273         struct tx_ring *txr = adapter->tx_rings;
5274         struct rx_ring *rxr = adapter->rx_rings;
5275
5276         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5277         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5278         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5279         struct e1000_hw_stats *stats = &adapter->stats;
5280
5281         struct sysctl_oid *stat_node, *queue_node, *int_node;
5282         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5283
5284 #define QUEUE_NAME_LEN 32
5285         char namebuf[QUEUE_NAME_LEN];
5286         
5287         /* Driver Statistics */
5288         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5289                         CTLFLAG_RD, &adapter->link_irq,
5290                         "Link MSIX IRQ Handled");
5291         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5292                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5293                          "Std mbuf failed");
5294         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5295                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5296                          "Std mbuf cluster failed");
5297         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5298                         CTLFLAG_RD, &adapter->dropped_pkts,
5299                         "Driver dropped packets");
5300         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5301                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5302                         "Driver tx dma failure in xmit");
5303         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5304                         CTLFLAG_RD, &adapter->rx_overruns,
5305                         "RX overruns");
5306         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5307                         CTLFLAG_RD, &adapter->watchdog_events,
5308                         "Watchdog timeouts");
5309         
5310         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5311                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5312                         em_sysctl_reg_handler, "IU",
5313                         "Device Control Register");
5314         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5315                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5316                         em_sysctl_reg_handler, "IU",
5317                         "Receiver Control Register");
5318         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5319                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5320                         "Flow Control High Watermark");
5321         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5322                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5323                         "Flow Control Low Watermark");
5324
5325         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5326                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5327                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5328                                             CTLFLAG_RD, NULL, "Queue Name");
5329                 queue_list = SYSCTL_CHILDREN(queue_node);
5330
5331                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5332                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5333                                 E1000_TDH(txr->me),
5334                                 em_sysctl_reg_handler, "IU",
5335                                 "Transmit Descriptor Head");
5336                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5337                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5338                                 E1000_TDT(txr->me),
5339                                 em_sysctl_reg_handler, "IU",
5340                                 "Transmit Descriptor Tail");
5341                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5342                                 CTLFLAG_RD, &txr->tx_irq,
5343                                 "Queue MSI-X Transmit Interrupts");
5344                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5345                                 CTLFLAG_RD, &txr->no_desc_avail,
5346                                 "Queue No Descriptor Available");
5347                 
5348                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5349                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5350                                 E1000_RDH(rxr->me),
5351                                 em_sysctl_reg_handler, "IU",
5352                                 "Receive Descriptor Head");
5353                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5354                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5355                                 E1000_RDT(rxr->me),
5356                                 em_sysctl_reg_handler, "IU",
5357                                 "Receive Descriptor Tail");
5358                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5359                                 CTLFLAG_RD, &rxr->rx_irq,
5360                                 "Queue MSI-X Receive Interrupts");
5361         }
5362
5363         /* MAC stats get their own sub node */
5364
5365         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5366                                     CTLFLAG_RD, NULL, "Statistics");
5367         stat_list = SYSCTL_CHILDREN(stat_node);
5368
5369         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5370                         CTLFLAG_RD, &stats->ecol,
5371                         "Excessive collisions");
5372         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5373                         CTLFLAG_RD, &stats->scc,
5374                         "Single collisions");
5375         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5376                         CTLFLAG_RD, &stats->mcc,
5377                         "Multiple collisions");
5378         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5379                         CTLFLAG_RD, &stats->latecol,
5380                         "Late collisions");
5381         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5382                         CTLFLAG_RD, &stats->colc,
5383                         "Collision Count");
5384         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5385                         CTLFLAG_RD, &adapter->stats.symerrs,
5386                         "Symbol Errors");
5387         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5388                         CTLFLAG_RD, &adapter->stats.sec,
5389                         "Sequence Errors");
5390         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5391                         CTLFLAG_RD, &adapter->stats.dc,
5392                         "Defer Count");
5393         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5394                         CTLFLAG_RD, &adapter->stats.mpc,
5395                         "Missed Packets");
5396         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5397                         CTLFLAG_RD, &adapter->stats.rnbc,
5398                         "Receive No Buffers");
5399         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5400                         CTLFLAG_RD, &adapter->stats.ruc,
5401                         "Receive Undersize");
5402         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5403                         CTLFLAG_RD, &adapter->stats.rfc,
5404                         "Fragmented Packets Received ");
5405         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5406                         CTLFLAG_RD, &adapter->stats.roc,
5407                         "Oversized Packets Received");
5408         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5409                         CTLFLAG_RD, &adapter->stats.rjc,
5410                         "Recevied Jabber");
5411         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5412                         CTLFLAG_RD, &adapter->stats.rxerrc,
5413                         "Receive Errors");
5414         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5415                         CTLFLAG_RD, &adapter->stats.crcerrs,
5416                         "CRC errors");
5417         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5418                         CTLFLAG_RD, &adapter->stats.algnerrc,
5419                         "Alignment Errors");
5420         /* On 82575 these are collision counts */
5421         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5422                         CTLFLAG_RD, &adapter->stats.cexterr,
5423                         "Collision/Carrier extension errors");
5424         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5425                         CTLFLAG_RD, &adapter->stats.xonrxc,
5426                         "XON Received");
5427         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5428                         CTLFLAG_RD, &adapter->stats.xontxc,
5429                         "XON Transmitted");
5430         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5431                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5432                         "XOFF Received");
5433         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5434                         CTLFLAG_RD, &adapter->stats.xofftxc,
5435                         "XOFF Transmitted");
5436
5437         /* Packet Reception Stats */
5438         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5439                         CTLFLAG_RD, &adapter->stats.tpr,
5440                         "Total Packets Received ");
5441         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5442                         CTLFLAG_RD, &adapter->stats.gprc,
5443                         "Good Packets Received");
5444         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5445                         CTLFLAG_RD, &adapter->stats.bprc,
5446                         "Broadcast Packets Received");
5447         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5448                         CTLFLAG_RD, &adapter->stats.mprc,
5449                         "Multicast Packets Received");
5450         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5451                         CTLFLAG_RD, &adapter->stats.prc64,
5452                         "64 byte frames received ");
5453         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5454                         CTLFLAG_RD, &adapter->stats.prc127,
5455                         "65-127 byte frames received");
5456         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5457                         CTLFLAG_RD, &adapter->stats.prc255,
5458                         "128-255 byte frames received");
5459         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5460                         CTLFLAG_RD, &adapter->stats.prc511,
5461                         "256-511 byte frames received");
5462         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5463                         CTLFLAG_RD, &adapter->stats.prc1023,
5464                         "512-1023 byte frames received");
5465         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5466                         CTLFLAG_RD, &adapter->stats.prc1522,
5467                         "1023-1522 byte frames received");
5468         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5469                         CTLFLAG_RD, &adapter->stats.gorc, 
5470                         "Good Octets Received"); 
5471
5472         /* Packet Transmission Stats */
5473         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5474                         CTLFLAG_RD, &adapter->stats.gotc, 
5475                         "Good Octets Transmitted"); 
5476         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5477                         CTLFLAG_RD, &adapter->stats.tpt,
5478                         "Total Packets Transmitted");
5479         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5480                         CTLFLAG_RD, &adapter->stats.gptc,
5481                         "Good Packets Transmitted");
5482         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5483                         CTLFLAG_RD, &adapter->stats.bptc,
5484                         "Broadcast Packets Transmitted");
5485         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5486                         CTLFLAG_RD, &adapter->stats.mptc,
5487                         "Multicast Packets Transmitted");
5488         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5489                         CTLFLAG_RD, &adapter->stats.ptc64,
5490                         "64 byte frames transmitted ");
5491         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5492                         CTLFLAG_RD, &adapter->stats.ptc127,
5493                         "65-127 byte frames transmitted");
5494         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5495                         CTLFLAG_RD, &adapter->stats.ptc255,
5496                         "128-255 byte frames transmitted");
5497         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5498                         CTLFLAG_RD, &adapter->stats.ptc511,
5499                         "256-511 byte frames transmitted");
5500         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5501                         CTLFLAG_RD, &adapter->stats.ptc1023,
5502                         "512-1023 byte frames transmitted");
5503         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5504                         CTLFLAG_RD, &adapter->stats.ptc1522,
5505                         "1024-1522 byte frames transmitted");
5506         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5507                         CTLFLAG_RD, &adapter->stats.tsctc,
5508                         "TSO Contexts Transmitted");
5509         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5510                         CTLFLAG_RD, &adapter->stats.tsctfc,
5511                         "TSO Contexts Failed");
5512
5513
5514         /* Interrupt Stats */
5515
5516         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5517                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5518         int_list = SYSCTL_CHILDREN(int_node);
5519
5520         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5521                         CTLFLAG_RD, &adapter->stats.iac,
5522                         "Interrupt Assertion Count");
5523
5524         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5525                         CTLFLAG_RD, &adapter->stats.icrxptc,
5526                         "Interrupt Cause Rx Pkt Timer Expire Count");
5527
5528         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5529                         CTLFLAG_RD, &adapter->stats.icrxatc,
5530                         "Interrupt Cause Rx Abs Timer Expire Count");
5531
5532         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5533                         CTLFLAG_RD, &adapter->stats.ictxptc,
5534                         "Interrupt Cause Tx Pkt Timer Expire Count");
5535
5536         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5537                         CTLFLAG_RD, &adapter->stats.ictxatc,
5538                         "Interrupt Cause Tx Abs Timer Expire Count");
5539
5540         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5541                         CTLFLAG_RD, &adapter->stats.ictxqec,
5542                         "Interrupt Cause Tx Queue Empty Count");
5543
5544         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5545                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5546                         "Interrupt Cause Tx Queue Min Thresh Count");
5547
5548         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5549                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5550                         "Interrupt Cause Rx Desc Min Thresh Count");
5551
5552         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5553                         CTLFLAG_RD, &adapter->stats.icrxoc,
5554                         "Interrupt Cause Receiver Overrun Count");
5555 }
5556
5557 /**********************************************************************
5558  *
5559  *  This routine provides a way to dump out the adapter eeprom,
5560  *  often a useful debug/service tool. This only dumps the first
5561  *  32 words, stuff that matters is in that extent.
5562  *
5563  **********************************************************************/
5564 static int
5565 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5566 {
5567         struct adapter *adapter = (struct adapter *)arg1;
5568         int error;
5569         int result;
5570
5571         result = -1;
5572         error = sysctl_handle_int(oidp, &result, 0, req);
5573
5574         if (error || !req->newptr)
5575                 return (error);
5576
5577         /*
5578          * This value will cause a hex dump of the
5579          * first 32 16-bit words of the EEPROM to
5580          * the screen.
5581          */
5582         if (result == 1)
5583                 em_print_nvm_info(adapter);
5584
5585         return (error);
5586 }
5587
5588 static void
5589 em_print_nvm_info(struct adapter *adapter)
5590 {
5591         u16     eeprom_data;
5592         int     i, j, row = 0;
5593
5594         /* Its a bit crude, but it gets the job done */
5595         printf("\nInterface EEPROM Dump:\n");
5596         printf("Offset\n0x0000  ");
5597         for (i = 0, j = 0; i < 32; i++, j++) {
5598                 if (j == 8) { /* Make the offset block */
5599                         j = 0; ++row;
5600                         printf("\n0x00%x0  ",row);
5601                 }
5602                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5603                 printf("%04x ", eeprom_data);
5604         }
5605         printf("\n");
5606 }
5607
5608 static int
5609 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5610 {
5611         struct em_int_delay_info *info;
5612         struct adapter *adapter;
5613         u32 regval;
5614         int error, usecs, ticks;
5615
5616         info = (struct em_int_delay_info *)arg1;
5617         usecs = info->value;
5618         error = sysctl_handle_int(oidp, &usecs, 0, req);
5619         if (error != 0 || req->newptr == NULL)
5620                 return (error);
5621         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5622                 return (EINVAL);
5623         info->value = usecs;
5624         ticks = EM_USECS_TO_TICKS(usecs);
5625
5626         adapter = info->adapter;
5627         
5628         EM_CORE_LOCK(adapter);
5629         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5630         regval = (regval & ~0xffff) | (ticks & 0xffff);
5631         /* Handle a few special cases. */
5632         switch (info->offset) {
5633         case E1000_RDTR:
5634                 break;
5635         case E1000_TIDV:
5636                 if (ticks == 0) {
5637                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5638                         /* Don't write 0 into the TIDV register. */
5639                         regval++;
5640                 } else
5641                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5642                 break;
5643         }
5644         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5645         EM_CORE_UNLOCK(adapter);
5646         return (0);
5647 }
5648
5649 static void
5650 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5651         const char *description, struct em_int_delay_info *info,
5652         int offset, int value)
5653 {
5654         info->adapter = adapter;
5655         info->offset = offset;
5656         info->value = value;
5657         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5658             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5659             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5660             info, 0, em_sysctl_int_delay, "I", description);
5661 }
5662
5663 static void
5664 em_set_sysctl_value(struct adapter *adapter, const char *name,
5665         const char *description, int *limit, int value)
5666 {
5667         *limit = value;
5668         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5669             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5670             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5671 }
5672
5673
5674 /*
5675 ** Set flow control using sysctl:
5676 ** Flow control values:
5677 **      0 - off
5678 **      1 - rx pause
5679 **      2 - tx pause
5680 **      3 - full
5681 */
5682 static int
5683 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5684 {       
5685         int             error;
5686         static int      input = 3; /* default is full */
5687         struct adapter  *adapter = (struct adapter *) arg1;
5688                     
5689         error = sysctl_handle_int(oidp, &input, 0, req);
5690     
5691         if ((error) || (req->newptr == NULL))
5692                 return (error);
5693                 
5694         if (input == adapter->fc) /* no change? */
5695                 return (error);
5696
5697         switch (input) {
5698                 case e1000_fc_rx_pause:
5699                 case e1000_fc_tx_pause:
5700                 case e1000_fc_full:
5701                 case e1000_fc_none:
5702                         adapter->hw.fc.requested_mode = input;
5703                         adapter->fc = input;
5704                         break;
5705                 default:
5706                         /* Do nothing */
5707                         return (error);
5708         }
5709
5710         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5711         e1000_force_mac_fc(&adapter->hw);
5712         return (error);
5713 }
5714
5715 /*
5716 ** Manage Energy Efficient Ethernet:
5717 ** Control values:
5718 **     0/1 - enabled/disabled
5719 */
5720 static int
5721 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5722 {
5723        struct adapter *adapter = (struct adapter *) arg1;
5724        int             error, value;
5725
5726        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5727        error = sysctl_handle_int(oidp, &value, 0, req);
5728        if (error || req->newptr == NULL)
5729                return (error);
5730        EM_CORE_LOCK(adapter);
5731        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5732        em_init_locked(adapter);
5733        EM_CORE_UNLOCK(adapter);
5734        return (0);
5735 }
5736
5737 static int
5738 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5739 {
5740         struct adapter *adapter;
5741         int error;
5742         int result;
5743
5744         result = -1;
5745         error = sysctl_handle_int(oidp, &result, 0, req);
5746
5747         if (error || !req->newptr)
5748                 return (error);
5749
5750         if (result == 1) {
5751                 adapter = (struct adapter *)arg1;
5752                 em_print_debug_info(adapter);
5753         }
5754
5755         return (error);
5756 }
5757
5758 /*
5759 ** This routine is meant to be fluid, add whatever is
5760 ** needed for debugging a problem.  -jfv
5761 */
5762 static void
5763 em_print_debug_info(struct adapter *adapter)
5764 {
5765         device_t dev = adapter->dev;
5766         struct tx_ring *txr = adapter->tx_rings;
5767         struct rx_ring *rxr = adapter->rx_rings;
5768
5769         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5770                 printf("Interface is RUNNING ");
5771         else
5772                 printf("Interface is NOT RUNNING\n");
5773
5774         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5775                 printf("and INACTIVE\n");
5776         else
5777                 printf("and ACTIVE\n");
5778
5779         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5780             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5781             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5782         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5783             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5784             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5785         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5786         device_printf(dev, "TX descriptors avail = %d\n",
5787             txr->tx_avail);
5788         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5789             txr->no_desc_avail);
5790         device_printf(dev, "RX discarded packets = %ld\n",
5791             rxr->rx_discarded);
5792         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5793         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5794 }