]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/dev/e1000/if_em.c
MFC of the e1000 drivers: revisions include
[FreeBSD/stable/9.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2011, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int     em_display_debug_stats = 0;
93
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.2";
98
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111         /* Intel(R) PRO/1000 Network Connection */
112         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
175         /* required last entry */
176         { 0, 0, 0, 0, 0}
177 };
178
179 /*********************************************************************
180  *  Table of branding strings for all supported NICs.
181  *********************************************************************/
182
183 static char *em_strings[] = {
184         "Intel(R) PRO/1000 Network Connection"
185 };
186
187 /*********************************************************************
188  *  Function prototypes
189  *********************************************************************/
190 static int      em_probe(device_t);
191 static int      em_attach(device_t);
192 static int      em_detach(device_t);
193 static int      em_shutdown(device_t);
194 static int      em_suspend(device_t);
195 static int      em_resume(device_t);
196 #ifdef EM_MULTIQUEUE
197 static int      em_mq_start(struct ifnet *, struct mbuf *);
198 static int      em_mq_start_locked(struct ifnet *,
199                     struct tx_ring *, struct mbuf *);
200 static void     em_qflush(struct ifnet *);
201 #else
202 static void     em_start(struct ifnet *);
203 static void     em_start_locked(struct ifnet *, struct tx_ring *);
204 #endif
205 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
206 static void     em_init(void *);
207 static void     em_init_locked(struct adapter *);
208 static void     em_stop(void *);
209 static void     em_media_status(struct ifnet *, struct ifmediareq *);
210 static int      em_media_change(struct ifnet *);
211 static void     em_identify_hardware(struct adapter *);
212 static int      em_allocate_pci_resources(struct adapter *);
213 static int      em_allocate_legacy(struct adapter *);
214 static int      em_allocate_msix(struct adapter *);
215 static int      em_allocate_queues(struct adapter *);
216 static int      em_setup_msix(struct adapter *);
217 static void     em_free_pci_resources(struct adapter *);
218 static void     em_local_timer(void *);
219 static void     em_reset(struct adapter *);
220 static int      em_setup_interface(device_t, struct adapter *);
221
222 static void     em_setup_transmit_structures(struct adapter *);
223 static void     em_initialize_transmit_unit(struct adapter *);
224 static int      em_allocate_transmit_buffers(struct tx_ring *);
225 static void     em_free_transmit_structures(struct adapter *);
226 static void     em_free_transmit_buffers(struct tx_ring *);
227
228 static int      em_setup_receive_structures(struct adapter *);
229 static int      em_allocate_receive_buffers(struct rx_ring *);
230 static void     em_initialize_receive_unit(struct adapter *);
231 static void     em_free_receive_structures(struct adapter *);
232 static void     em_free_receive_buffers(struct rx_ring *);
233
234 static void     em_enable_intr(struct adapter *);
235 static void     em_disable_intr(struct adapter *);
236 static void     em_update_stats_counters(struct adapter *);
237 static void     em_add_hw_stats(struct adapter *adapter);
238 static void     em_txeof(struct tx_ring *);
239 static bool     em_rxeof(struct rx_ring *, int, int *);
240 #ifndef __NO_STRICT_ALIGNMENT
241 static int      em_fixup_rx(struct rx_ring *);
242 #endif
243 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245                     struct ip *, u32 *, u32 *);
246 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247                     struct tcphdr *, u32 *, u32 *);
248 static void     em_set_promisc(struct adapter *);
249 static void     em_disable_promisc(struct adapter *);
250 static void     em_set_multi(struct adapter *);
251 static void     em_update_link_status(struct adapter *);
252 static void     em_refresh_mbufs(struct rx_ring *, int);
253 static void     em_register_vlan(void *, struct ifnet *, u16);
254 static void     em_unregister_vlan(void *, struct ifnet *, u16);
255 static void     em_setup_vlan_hw_support(struct adapter *);
256 static int      em_xmit(struct tx_ring *, struct mbuf **);
257 static int      em_dma_malloc(struct adapter *, bus_size_t,
258                     struct em_dma_alloc *, int);
259 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
260 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261 static void     em_print_nvm_info(struct adapter *);
262 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263 static void     em_print_debug_info(struct adapter *);
264 static int      em_is_valid_ether_addr(u8 *);
265 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
267                     const char *, struct em_int_delay_info *, int, int);
268 /* Management and WOL Support */
269 static void     em_init_manageability(struct adapter *);
270 static void     em_release_manageability(struct adapter *);
271 static void     em_get_hw_control(struct adapter *);
272 static void     em_release_hw_control(struct adapter *);
273 static void     em_get_wakeup(device_t);
274 static void     em_enable_wakeup(device_t);
275 static int      em_enable_phy_wakeup(struct adapter *);
276 static void     em_led_func(void *, int);
277 static void     em_disable_aspm(struct adapter *);
278
279 static int      em_irq_fast(void *);
280
281 /* MSIX handlers */
282 static void     em_msix_tx(void *);
283 static void     em_msix_rx(void *);
284 static void     em_msix_link(void *);
285 static void     em_handle_tx(void *context, int pending);
286 static void     em_handle_rx(void *context, int pending);
287 static void     em_handle_link(void *context, int pending);
288
289 static void     em_set_sysctl_value(struct adapter *, const char *,
290                     const char *, int *, int);
291 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292
293 static __inline void em_rx_discard(struct rx_ring *, int);
294
295 #ifdef DEVICE_POLLING
296 static poll_handler_t em_poll;
297 #endif /* POLLING */
298
299 /*********************************************************************
300  *  FreeBSD Device Interface Entry Points
301  *********************************************************************/
302
303 static device_method_t em_methods[] = {
304         /* Device interface */
305         DEVMETHOD(device_probe, em_probe),
306         DEVMETHOD(device_attach, em_attach),
307         DEVMETHOD(device_detach, em_detach),
308         DEVMETHOD(device_shutdown, em_shutdown),
309         DEVMETHOD(device_suspend, em_suspend),
310         DEVMETHOD(device_resume, em_resume),
311         {0, 0}
312 };
313
314 static driver_t em_driver = {
315         "em", em_methods, sizeof(struct adapter),
316 };
317
318 devclass_t em_devclass;
319 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
320 MODULE_DEPEND(em, pci, 1, 1, 1);
321 MODULE_DEPEND(em, ether, 1, 1, 1);
322
323 /*********************************************************************
324  *  Tunable default values.
325  *********************************************************************/
326
327 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
328 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
329 #define M_TSO_LEN                       66
330
331 /* Allow common code without TSO */
332 #ifndef CSUM_TSO
333 #define CSUM_TSO        0
334 #endif
335
336 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
337
338 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
339 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
340 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
341 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
342 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
343     0, "Default transmit interrupt delay in usecs");
344 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
345     0, "Default receive interrupt delay in usecs");
346
347 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
348 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
349 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
350 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
351 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
352     &em_tx_abs_int_delay_dflt, 0,
353     "Default transmit interrupt delay limit in usecs");
354 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
355     &em_rx_abs_int_delay_dflt, 0,
356     "Default receive interrupt delay limit in usecs");
357
358 static int em_rxd = EM_DEFAULT_RXD;
359 static int em_txd = EM_DEFAULT_TXD;
360 TUNABLE_INT("hw.em.rxd", &em_rxd);
361 TUNABLE_INT("hw.em.txd", &em_txd);
362 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
363     "Number of receive descriptors per queue");
364 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
365     "Number of transmit descriptors per queue");
366
367 static int em_smart_pwr_down = FALSE;
368 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
369 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
370     0, "Set to true to leave smart power down enabled on newer adapters");
371
372 /* Controls whether promiscuous also shows bad packets */
373 static int em_debug_sbp = FALSE;
374 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
375 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
376     "Show bad packets in promiscuous mode");
377
378 static int em_enable_msix = TRUE;
379 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
380 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
381     "Enable MSI-X interrupts");
382
383 /* How many packets rxeof tries to clean at a time */
384 static int em_rx_process_limit = 100;
385 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
386 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
387     &em_rx_process_limit, 0,
388     "Maximum number of received packets to process "
389     "at a time, -1 means unlimited");
390
391 /* Energy efficient ethernet - default to OFF */
392 static int eee_setting = 0;
393 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
394 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
395     "Enable Energy Efficient Ethernet");
396
397 /* Global used in WOL setup with multiport cards */
398 static int global_quad_port_a = 0;
399
400 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
401 #include <dev/netmap/if_em_netmap.h>
402 #endif /* DEV_NETMAP */
403
404 /*********************************************************************
405  *  Device identification routine
406  *
407  *  em_probe determines if the driver should be loaded on
408  *  adapter based on PCI vendor/device id of the adapter.
409  *
410  *  return BUS_PROBE_DEFAULT on success, positive on failure
411  *********************************************************************/
412
413 static int
414 em_probe(device_t dev)
415 {
416         char            adapter_name[60];
417         u16             pci_vendor_id = 0;
418         u16             pci_device_id = 0;
419         u16             pci_subvendor_id = 0;
420         u16             pci_subdevice_id = 0;
421         em_vendor_info_t *ent;
422
423         INIT_DEBUGOUT("em_probe: begin");
424
425         pci_vendor_id = pci_get_vendor(dev);
426         if (pci_vendor_id != EM_VENDOR_ID)
427                 return (ENXIO);
428
429         pci_device_id = pci_get_device(dev);
430         pci_subvendor_id = pci_get_subvendor(dev);
431         pci_subdevice_id = pci_get_subdevice(dev);
432
433         ent = em_vendor_info_array;
434         while (ent->vendor_id != 0) {
435                 if ((pci_vendor_id == ent->vendor_id) &&
436                     (pci_device_id == ent->device_id) &&
437
438                     ((pci_subvendor_id == ent->subvendor_id) ||
439                     (ent->subvendor_id == PCI_ANY_ID)) &&
440
441                     ((pci_subdevice_id == ent->subdevice_id) ||
442                     (ent->subdevice_id == PCI_ANY_ID))) {
443                         sprintf(adapter_name, "%s %s",
444                                 em_strings[ent->index],
445                                 em_driver_version);
446                         device_set_desc_copy(dev, adapter_name);
447                         return (BUS_PROBE_DEFAULT);
448                 }
449                 ent++;
450         }
451
452         return (ENXIO);
453 }
454
455 /*********************************************************************
456  *  Device initialization routine
457  *
458  *  The attach entry point is called when the driver is being loaded.
459  *  This routine identifies the type of hardware, allocates all resources
460  *  and initializes the hardware.
461  *
462  *  return 0 on success, positive on failure
463  *********************************************************************/
464
465 static int
466 em_attach(device_t dev)
467 {
468         struct adapter  *adapter;
469         struct e1000_hw *hw;
470         int             error = 0;
471
472         INIT_DEBUGOUT("em_attach: begin");
473
474         if (resource_disabled("em", device_get_unit(dev))) {
475                 device_printf(dev, "Disabled by device hint\n");
476                 return (ENXIO);
477         }
478
479         adapter = device_get_softc(dev);
480         adapter->dev = adapter->osdep.dev = dev;
481         hw = &adapter->hw;
482         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
483
484         /* SYSCTL stuff */
485         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
486             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
487             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
488             em_sysctl_nvm_info, "I", "NVM Information");
489
490         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
491             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
492             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
493             em_sysctl_debug_info, "I", "Debug Information");
494
495         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
496             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
497             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
498             em_set_flowcntl, "I", "Flow Control");
499
500         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
501
502         /* Determine hardware and mac info */
503         em_identify_hardware(adapter);
504
505         /* Setup PCI resources */
506         if (em_allocate_pci_resources(adapter)) {
507                 device_printf(dev, "Allocation of PCI resources failed\n");
508                 error = ENXIO;
509                 goto err_pci;
510         }
511
512         /*
513         ** For ICH8 and family we need to
514         ** map the flash memory, and this
515         ** must happen after the MAC is 
516         ** identified
517         */
518         if ((hw->mac.type == e1000_ich8lan) ||
519             (hw->mac.type == e1000_ich9lan) ||
520             (hw->mac.type == e1000_ich10lan) ||
521             (hw->mac.type == e1000_pchlan) ||
522             (hw->mac.type == e1000_pch2lan)) {
523                 int rid = EM_BAR_TYPE_FLASH;
524                 adapter->flash = bus_alloc_resource_any(dev,
525                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
526                 if (adapter->flash == NULL) {
527                         device_printf(dev, "Mapping of Flash failed\n");
528                         error = ENXIO;
529                         goto err_pci;
530                 }
531                 /* This is used in the shared code */
532                 hw->flash_address = (u8 *)adapter->flash;
533                 adapter->osdep.flash_bus_space_tag =
534                     rman_get_bustag(adapter->flash);
535                 adapter->osdep.flash_bus_space_handle =
536                     rman_get_bushandle(adapter->flash);
537         }
538
539         /* Do Shared Code initialization */
540         if (e1000_setup_init_funcs(hw, TRUE)) {
541                 device_printf(dev, "Setup of Shared code failed\n");
542                 error = ENXIO;
543                 goto err_pci;
544         }
545
546         e1000_get_bus_info(hw);
547
548         /* Set up some sysctls for the tunable interrupt delays */
549         em_add_int_delay_sysctl(adapter, "rx_int_delay",
550             "receive interrupt delay in usecs", &adapter->rx_int_delay,
551             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
552         em_add_int_delay_sysctl(adapter, "tx_int_delay",
553             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
554             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
555         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
556             "receive interrupt delay limit in usecs",
557             &adapter->rx_abs_int_delay,
558             E1000_REGISTER(hw, E1000_RADV),
559             em_rx_abs_int_delay_dflt);
560         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
561             "transmit interrupt delay limit in usecs",
562             &adapter->tx_abs_int_delay,
563             E1000_REGISTER(hw, E1000_TADV),
564             em_tx_abs_int_delay_dflt);
565
566         /* Sysctl for limiting the amount of work done in the taskqueue */
567         em_set_sysctl_value(adapter, "rx_processing_limit",
568             "max number of rx packets to process", &adapter->rx_process_limit,
569             em_rx_process_limit);
570
571         /*
572          * Validate number of transmit and receive descriptors. It
573          * must not exceed hardware maximum, and must be multiple
574          * of E1000_DBA_ALIGN.
575          */
576         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
577             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
578                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
579                     EM_DEFAULT_TXD, em_txd);
580                 adapter->num_tx_desc = EM_DEFAULT_TXD;
581         } else
582                 adapter->num_tx_desc = em_txd;
583
584         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
585             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
586                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
587                     EM_DEFAULT_RXD, em_rxd);
588                 adapter->num_rx_desc = EM_DEFAULT_RXD;
589         } else
590                 adapter->num_rx_desc = em_rxd;
591
592         hw->mac.autoneg = DO_AUTO_NEG;
593         hw->phy.autoneg_wait_to_complete = FALSE;
594         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
595
596         /* Copper options */
597         if (hw->phy.media_type == e1000_media_type_copper) {
598                 hw->phy.mdix = AUTO_ALL_MODES;
599                 hw->phy.disable_polarity_correction = FALSE;
600                 hw->phy.ms_type = EM_MASTER_SLAVE;
601         }
602
603         /*
604          * Set the frame limits assuming
605          * standard ethernet sized frames.
606          */
607         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
608         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
609
610         /*
611          * This controls when hardware reports transmit completion
612          * status.
613          */
614         hw->mac.report_tx_early = 1;
615
616         /* 
617         ** Get queue/ring memory
618         */
619         if (em_allocate_queues(adapter)) {
620                 error = ENOMEM;
621                 goto err_pci;
622         }
623
624         /* Allocate multicast array memory. */
625         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
626             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
627         if (adapter->mta == NULL) {
628                 device_printf(dev, "Can not allocate multicast setup array\n");
629                 error = ENOMEM;
630                 goto err_late;
631         }
632
633         /* Check SOL/IDER usage */
634         if (e1000_check_reset_block(hw))
635                 device_printf(dev, "PHY reset is blocked"
636                     " due to SOL/IDER session.\n");
637
638         /* Sysctl for setting Energy Efficient Ethernet */
639         em_set_sysctl_value(adapter, "eee_control",
640             "enable Energy Efficient Ethernet",
641             &hw->dev_spec.ich8lan.eee_disable, eee_setting);
642
643         /*
644         ** Start from a known state, this is
645         ** important in reading the nvm and
646         ** mac from that.
647         */
648         e1000_reset_hw(hw);
649
650
651         /* Make sure we have a good EEPROM before we read from it */
652         if (e1000_validate_nvm_checksum(hw) < 0) {
653                 /*
654                 ** Some PCI-E parts fail the first check due to
655                 ** the link being in sleep state, call it again,
656                 ** if it fails a second time its a real issue.
657                 */
658                 if (e1000_validate_nvm_checksum(hw) < 0) {
659                         device_printf(dev,
660                             "The EEPROM Checksum Is Not Valid\n");
661                         error = EIO;
662                         goto err_late;
663                 }
664         }
665
666         /* Copy the permanent MAC address out of the EEPROM */
667         if (e1000_read_mac_addr(hw) < 0) {
668                 device_printf(dev, "EEPROM read error while reading MAC"
669                     " address\n");
670                 error = EIO;
671                 goto err_late;
672         }
673
674         if (!em_is_valid_ether_addr(hw->mac.addr)) {
675                 device_printf(dev, "Invalid MAC address\n");
676                 error = EIO;
677                 goto err_late;
678         }
679
680         /*
681         **  Do interrupt configuration
682         */
683         if (adapter->msix > 1) /* Do MSIX */
684                 error = em_allocate_msix(adapter);
685         else  /* MSI or Legacy */
686                 error = em_allocate_legacy(adapter);
687         if (error)
688                 goto err_late;
689
690         /*
691          * Get Wake-on-Lan and Management info for later use
692          */
693         em_get_wakeup(dev);
694
695         /* Setup OS specific network interface */
696         if (em_setup_interface(dev, adapter) != 0)
697                 goto err_late;
698
699         em_reset(adapter);
700
701         /* Initialize statistics */
702         em_update_stats_counters(adapter);
703
704         hw->mac.get_link_status = 1;
705         em_update_link_status(adapter);
706
707         /* Register for VLAN events */
708         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
709             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
710         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
711             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
712
713         em_add_hw_stats(adapter);
714
715         /* Non-AMT based hardware can now take control from firmware */
716         if (adapter->has_manage && !adapter->has_amt)
717                 em_get_hw_control(adapter);
718
719         /* Tell the stack that the interface is not active */
720         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
721         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
722
723         adapter->led_dev = led_create(em_led_func, adapter,
724             device_get_nameunit(dev));
725 #ifdef DEV_NETMAP
726         em_netmap_attach(adapter);
727 #endif /* DEV_NETMAP */
728
729         INIT_DEBUGOUT("em_attach: end");
730
731         return (0);
732
733 err_late:
734         em_free_transmit_structures(adapter);
735         em_free_receive_structures(adapter);
736         em_release_hw_control(adapter);
737         if (adapter->ifp != NULL)
738                 if_free(adapter->ifp);
739 err_pci:
740         em_free_pci_resources(adapter);
741         free(adapter->mta, M_DEVBUF);
742         EM_CORE_LOCK_DESTROY(adapter);
743
744         return (error);
745 }
746
747 /*********************************************************************
748  *  Device removal routine
749  *
750  *  The detach entry point is called when the driver is being removed.
751  *  This routine stops the adapter and deallocates all the resources
752  *  that were allocated for driver operation.
753  *
754  *  return 0 on success, positive on failure
755  *********************************************************************/
756
757 static int
758 em_detach(device_t dev)
759 {
760         struct adapter  *adapter = device_get_softc(dev);
761         struct ifnet    *ifp = adapter->ifp;
762
763         INIT_DEBUGOUT("em_detach: begin");
764
765         /* Make sure VLANS are not using driver */
766         if (adapter->ifp->if_vlantrunk != NULL) {
767                 device_printf(dev,"Vlan in use, detach first\n");
768                 return (EBUSY);
769         }
770
771 #ifdef DEVICE_POLLING
772         if (ifp->if_capenable & IFCAP_POLLING)
773                 ether_poll_deregister(ifp);
774 #endif
775
776         if (adapter->led_dev != NULL)
777                 led_destroy(adapter->led_dev);
778
779         EM_CORE_LOCK(adapter);
780         adapter->in_detach = 1;
781         em_stop(adapter);
782         EM_CORE_UNLOCK(adapter);
783         EM_CORE_LOCK_DESTROY(adapter);
784
785         e1000_phy_hw_reset(&adapter->hw);
786
787         em_release_manageability(adapter);
788         em_release_hw_control(adapter);
789
790         /* Unregister VLAN events */
791         if (adapter->vlan_attach != NULL)
792                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
793         if (adapter->vlan_detach != NULL)
794                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
795
796         ether_ifdetach(adapter->ifp);
797         callout_drain(&adapter->timer);
798
799 #ifdef DEV_NETMAP
800         netmap_detach(ifp);
801 #endif /* DEV_NETMAP */
802
803         em_free_pci_resources(adapter);
804         bus_generic_detach(dev);
805         if_free(ifp);
806
807         em_free_transmit_structures(adapter);
808         em_free_receive_structures(adapter);
809
810         em_release_hw_control(adapter);
811         free(adapter->mta, M_DEVBUF);
812
813         return (0);
814 }
815
816 /*********************************************************************
817  *
818  *  Shutdown entry point
819  *
820  **********************************************************************/
821
822 static int
823 em_shutdown(device_t dev)
824 {
825         return em_suspend(dev);
826 }
827
828 /*
829  * Suspend/resume device methods.
830  */
831 static int
832 em_suspend(device_t dev)
833 {
834         struct adapter *adapter = device_get_softc(dev);
835
836         EM_CORE_LOCK(adapter);
837
838         em_release_manageability(adapter);
839         em_release_hw_control(adapter);
840         em_enable_wakeup(dev);
841
842         EM_CORE_UNLOCK(adapter);
843
844         return bus_generic_suspend(dev);
845 }
846
847 static int
848 em_resume(device_t dev)
849 {
850         struct adapter *adapter = device_get_softc(dev);
851         struct tx_ring  *txr = adapter->tx_rings;
852         struct ifnet *ifp = adapter->ifp;
853
854         EM_CORE_LOCK(adapter);
855         if (adapter->hw.mac.type == e1000_pch2lan)
856                 e1000_resume_workarounds_pchlan(&adapter->hw);
857         em_init_locked(adapter);
858         em_init_manageability(adapter);
859
860         if ((ifp->if_flags & IFF_UP) &&
861             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
862                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
863                         EM_TX_LOCK(txr);
864 #ifdef EM_MULTIQUEUE
865                         if (!drbr_empty(ifp, txr->br))
866                                 em_mq_start_locked(ifp, txr, NULL);
867 #else
868                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
869                                 em_start_locked(ifp, txr);
870 #endif
871                         EM_TX_UNLOCK(txr);
872                 }
873         }
874         EM_CORE_UNLOCK(adapter);
875
876         return bus_generic_resume(dev);
877 }
878
879
880 #ifdef EM_MULTIQUEUE
881 /*********************************************************************
882  *  Multiqueue Transmit routines 
883  *
884  *  em_mq_start is called by the stack to initiate a transmit.
885  *  however, if busy the driver can queue the request rather
886  *  than do an immediate send. It is this that is an advantage
887  *  in this driver, rather than also having multiple tx queues.
888  **********************************************************************/
889 static int
890 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
891 {
892         struct adapter  *adapter = txr->adapter;
893         struct mbuf     *next;
894         int             err = 0, enq = 0;
895
896         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
897             IFF_DRV_RUNNING || adapter->link_active == 0) {
898                 if (m != NULL)
899                         err = drbr_enqueue(ifp, txr->br, m);
900                 return (err);
901         }
902
903         enq = 0;
904         if (m == NULL) {
905                 next = drbr_dequeue(ifp, txr->br);
906         } else if (drbr_needs_enqueue(ifp, txr->br)) {
907                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
908                         return (err);
909                 next = drbr_dequeue(ifp, txr->br);
910         } else
911                 next = m;
912
913         /* Process the queue */
914         while (next != NULL) {
915                 if ((err = em_xmit(txr, &next)) != 0) {
916                         if (next != NULL)
917                                 err = drbr_enqueue(ifp, txr->br, next);
918                         break;
919                 }
920                 enq++;
921                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
922                 ETHER_BPF_MTAP(ifp, next);
923                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
924                         break;
925                 next = drbr_dequeue(ifp, txr->br);
926         }
927
928         if (enq > 0) {
929                 /* Set the watchdog */
930                 txr->queue_status = EM_QUEUE_WORKING;
931                 txr->watchdog_time = ticks;
932         }
933
934         if (txr->tx_avail < EM_MAX_SCATTER)
935                 em_txeof(txr);
936         if (txr->tx_avail < EM_MAX_SCATTER)
937                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
938         return (err);
939 }
940
941 /*
942 ** Multiqueue capable stack interface
943 */
944 static int
945 em_mq_start(struct ifnet *ifp, struct mbuf *m)
946 {
947         struct adapter  *adapter = ifp->if_softc;
948         struct tx_ring  *txr = adapter->tx_rings;
949         int             error;
950
951         if (EM_TX_TRYLOCK(txr)) {
952                 error = em_mq_start_locked(ifp, txr, m);
953                 EM_TX_UNLOCK(txr);
954         } else 
955                 error = drbr_enqueue(ifp, txr->br, m);
956
957         return (error);
958 }
959
960 /*
961 ** Flush all ring buffers
962 */
963 static void
964 em_qflush(struct ifnet *ifp)
965 {
966         struct adapter  *adapter = ifp->if_softc;
967         struct tx_ring  *txr = adapter->tx_rings;
968         struct mbuf     *m;
969
970         for (int i = 0; i < adapter->num_queues; i++, txr++) {
971                 EM_TX_LOCK(txr);
972                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
973                         m_freem(m);
974                 EM_TX_UNLOCK(txr);
975         }
976         if_qflush(ifp);
977 }
978 #else  /* !EM_MULTIQUEUE */
979
980 static void
981 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
982 {
983         struct adapter  *adapter = ifp->if_softc;
984         struct mbuf     *m_head;
985
986         EM_TX_LOCK_ASSERT(txr);
987
988         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
989             IFF_DRV_RUNNING)
990                 return;
991
992         if (!adapter->link_active)
993                 return;
994
995         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
996                 /* Call cleanup if number of TX descriptors low */
997                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
998                         em_txeof(txr);
999                 if (txr->tx_avail < EM_MAX_SCATTER) {
1000                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1001                         break;
1002                 }
1003                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1004                 if (m_head == NULL)
1005                         break;
1006                 /*
1007                  *  Encapsulation can modify our pointer, and or make it
1008                  *  NULL on failure.  In that event, we can't requeue.
1009                  */
1010                 if (em_xmit(txr, &m_head)) {
1011                         if (m_head == NULL)
1012                                 break;
1013                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1014                         break;
1015                 }
1016
1017                 /* Send a copy of the frame to the BPF listener */
1018                 ETHER_BPF_MTAP(ifp, m_head);
1019
1020                 /* Set timeout in case hardware has problems transmitting. */
1021                 txr->watchdog_time = ticks;
1022                 txr->queue_status = EM_QUEUE_WORKING;
1023         }
1024
1025         return;
1026 }
1027
1028 static void
1029 em_start(struct ifnet *ifp)
1030 {
1031         struct adapter  *adapter = ifp->if_softc;
1032         struct tx_ring  *txr = adapter->tx_rings;
1033
1034         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1035                 EM_TX_LOCK(txr);
1036                 em_start_locked(ifp, txr);
1037                 EM_TX_UNLOCK(txr);
1038         }
1039         return;
1040 }
1041 #endif /* EM_MULTIQUEUE */
1042
1043 /*********************************************************************
1044  *  Ioctl entry point
1045  *
1046  *  em_ioctl is called when the user wants to configure the
1047  *  interface.
1048  *
1049  *  return 0 on success, positive on failure
1050  **********************************************************************/
1051
1052 static int
1053 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1054 {
1055         struct adapter  *adapter = ifp->if_softc;
1056         struct ifreq    *ifr = (struct ifreq *)data;
1057 #if defined(INET) || defined(INET6)
1058         struct ifaddr   *ifa = (struct ifaddr *)data;
1059 #endif
1060         bool            avoid_reset = FALSE;
1061         int             error = 0;
1062
1063         if (adapter->in_detach)
1064                 return (error);
1065
1066         switch (command) {
1067         case SIOCSIFADDR:
1068 #ifdef INET
1069                 if (ifa->ifa_addr->sa_family == AF_INET)
1070                         avoid_reset = TRUE;
1071 #endif
1072 #ifdef INET6
1073                 if (ifa->ifa_addr->sa_family == AF_INET6)
1074                         avoid_reset = TRUE;
1075 #endif
1076                 /*
1077                 ** Calling init results in link renegotiation,
1078                 ** so we avoid doing it when possible.
1079                 */
1080                 if (avoid_reset) {
1081                         ifp->if_flags |= IFF_UP;
1082                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1083                                 em_init(adapter);
1084 #ifdef INET
1085                         if (!(ifp->if_flags & IFF_NOARP))
1086                                 arp_ifinit(ifp, ifa);
1087 #endif
1088                 } else
1089                         error = ether_ioctl(ifp, command, data);
1090                 break;
1091         case SIOCSIFMTU:
1092             {
1093                 int max_frame_size;
1094
1095                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1096
1097                 EM_CORE_LOCK(adapter);
1098                 switch (adapter->hw.mac.type) {
1099                 case e1000_82571:
1100                 case e1000_82572:
1101                 case e1000_ich9lan:
1102                 case e1000_ich10lan:
1103                 case e1000_pch2lan:
1104                 case e1000_82574:
1105                 case e1000_82583:
1106                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1107                         max_frame_size = 9234;
1108                         break;
1109                 case e1000_pchlan:
1110                         max_frame_size = 4096;
1111                         break;
1112                         /* Adapters that do not support jumbo frames */
1113                 case e1000_ich8lan:
1114                         max_frame_size = ETHER_MAX_LEN;
1115                         break;
1116                 default:
1117                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1118                 }
1119                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1120                     ETHER_CRC_LEN) {
1121                         EM_CORE_UNLOCK(adapter);
1122                         error = EINVAL;
1123                         break;
1124                 }
1125
1126                 ifp->if_mtu = ifr->ifr_mtu;
1127                 adapter->max_frame_size =
1128                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1129                 em_init_locked(adapter);
1130                 EM_CORE_UNLOCK(adapter);
1131                 break;
1132             }
1133         case SIOCSIFFLAGS:
1134                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1135                     SIOCSIFFLAGS (Set Interface Flags)");
1136                 EM_CORE_LOCK(adapter);
1137                 if (ifp->if_flags & IFF_UP) {
1138                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1139                                 if ((ifp->if_flags ^ adapter->if_flags) &
1140                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1141                                         em_disable_promisc(adapter);
1142                                         em_set_promisc(adapter);
1143                                 }
1144                         } else
1145                                 em_init_locked(adapter);
1146                 } else
1147                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1148                                 em_stop(adapter);
1149                 adapter->if_flags = ifp->if_flags;
1150                 EM_CORE_UNLOCK(adapter);
1151                 break;
1152         case SIOCADDMULTI:
1153         case SIOCDELMULTI:
1154                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1155                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1156                         EM_CORE_LOCK(adapter);
1157                         em_disable_intr(adapter);
1158                         em_set_multi(adapter);
1159 #ifdef DEVICE_POLLING
1160                         if (!(ifp->if_capenable & IFCAP_POLLING))
1161 #endif
1162                                 em_enable_intr(adapter);
1163                         EM_CORE_UNLOCK(adapter);
1164                 }
1165                 break;
1166         case SIOCSIFMEDIA:
1167                 /* Check SOL/IDER usage */
1168                 EM_CORE_LOCK(adapter);
1169                 if (e1000_check_reset_block(&adapter->hw)) {
1170                         EM_CORE_UNLOCK(adapter);
1171                         device_printf(adapter->dev, "Media change is"
1172                             " blocked due to SOL/IDER session.\n");
1173                         break;
1174                 }
1175                 EM_CORE_UNLOCK(adapter);
1176                 /* falls thru */
1177         case SIOCGIFMEDIA:
1178                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1179                     SIOCxIFMEDIA (Get/Set Interface Media)");
1180                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1181                 break;
1182         case SIOCSIFCAP:
1183             {
1184                 int mask, reinit;
1185
1186                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1187                 reinit = 0;
1188                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1189 #ifdef DEVICE_POLLING
1190                 if (mask & IFCAP_POLLING) {
1191                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1192                                 error = ether_poll_register(em_poll, ifp);
1193                                 if (error)
1194                                         return (error);
1195                                 EM_CORE_LOCK(adapter);
1196                                 em_disable_intr(adapter);
1197                                 ifp->if_capenable |= IFCAP_POLLING;
1198                                 EM_CORE_UNLOCK(adapter);
1199                         } else {
1200                                 error = ether_poll_deregister(ifp);
1201                                 /* Enable interrupt even in error case */
1202                                 EM_CORE_LOCK(adapter);
1203                                 em_enable_intr(adapter);
1204                                 ifp->if_capenable &= ~IFCAP_POLLING;
1205                                 EM_CORE_UNLOCK(adapter);
1206                         }
1207                 }
1208 #endif
1209                 if (mask & IFCAP_HWCSUM) {
1210                         ifp->if_capenable ^= IFCAP_HWCSUM;
1211                         reinit = 1;
1212                 }
1213                 if (mask & IFCAP_TSO4) {
1214                         ifp->if_capenable ^= IFCAP_TSO4;
1215                         reinit = 1;
1216                 }
1217                 if (mask & IFCAP_VLAN_HWTAGGING) {
1218                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1219                         reinit = 1;
1220                 }
1221                 if (mask & IFCAP_VLAN_HWFILTER) {
1222                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1223                         reinit = 1;
1224                 }
1225                 if (mask & IFCAP_VLAN_HWTSO) {
1226                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1227                         reinit = 1;
1228                 }
1229                 if ((mask & IFCAP_WOL) &&
1230                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1231                         if (mask & IFCAP_WOL_MCAST)
1232                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1233                         if (mask & IFCAP_WOL_MAGIC)
1234                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1235                 }
1236                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1237                         em_init(adapter);
1238                 VLAN_CAPABILITIES(ifp);
1239                 break;
1240             }
1241
1242         default:
1243                 error = ether_ioctl(ifp, command, data);
1244                 break;
1245         }
1246
1247         return (error);
1248 }
1249
1250
1251 /*********************************************************************
1252  *  Init entry point
1253  *
1254  *  This routine is used in two ways. It is used by the stack as
1255  *  init entry point in network interface structure. It is also used
1256  *  by the driver as a hw/sw initialization routine to get to a
1257  *  consistent state.
1258  *
1259  *  return 0 on success, positive on failure
1260  **********************************************************************/
1261
1262 static void
1263 em_init_locked(struct adapter *adapter)
1264 {
1265         struct ifnet    *ifp = adapter->ifp;
1266         device_t        dev = adapter->dev;
1267
1268         INIT_DEBUGOUT("em_init: begin");
1269
1270         EM_CORE_LOCK_ASSERT(adapter);
1271
1272         em_disable_intr(adapter);
1273         callout_stop(&adapter->timer);
1274
1275         /* Get the latest mac address, User can use a LAA */
1276         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1277               ETHER_ADDR_LEN);
1278
1279         /* Put the address into the Receive Address Array */
1280         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1281
1282         /*
1283          * With the 82571 adapter, RAR[0] may be overwritten
1284          * when the other port is reset, we make a duplicate
1285          * in RAR[14] for that eventuality, this assures
1286          * the interface continues to function.
1287          */
1288         if (adapter->hw.mac.type == e1000_82571) {
1289                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1290                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1291                     E1000_RAR_ENTRIES - 1);
1292         }
1293
1294         /* Initialize the hardware */
1295         em_reset(adapter);
1296         em_update_link_status(adapter);
1297
1298         /* Setup VLAN support, basic and offload if available */
1299         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1300
1301         /* Set hardware offload abilities */
1302         ifp->if_hwassist = 0;
1303         if (ifp->if_capenable & IFCAP_TXCSUM)
1304                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1305         if (ifp->if_capenable & IFCAP_TSO4)
1306                 ifp->if_hwassist |= CSUM_TSO;
1307
1308         /* Configure for OS presence */
1309         em_init_manageability(adapter);
1310
1311         /* Prepare transmit descriptors and buffers */
1312         em_setup_transmit_structures(adapter);
1313         em_initialize_transmit_unit(adapter);
1314
1315         /* Setup Multicast table */
1316         em_set_multi(adapter);
1317
1318         /*
1319         ** Figure out the desired mbuf
1320         ** pool for doing jumbos
1321         */
1322         if (adapter->max_frame_size <= 2048)
1323                 adapter->rx_mbuf_sz = MCLBYTES;
1324         else if (adapter->max_frame_size <= 4096)
1325                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1326         else
1327                 adapter->rx_mbuf_sz = MJUM9BYTES;
1328
1329         /* Prepare receive descriptors and buffers */
1330         if (em_setup_receive_structures(adapter)) {
1331                 device_printf(dev, "Could not setup receive structures\n");
1332                 em_stop(adapter);
1333                 return;
1334         }
1335         em_initialize_receive_unit(adapter);
1336
1337         /* Use real VLAN Filter support? */
1338         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1339                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1340                         /* Use real VLAN Filter support */
1341                         em_setup_vlan_hw_support(adapter);
1342                 else {
1343                         u32 ctrl;
1344                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1345                         ctrl |= E1000_CTRL_VME;
1346                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1347                 }
1348         }
1349
1350         /* Don't lose promiscuous settings */
1351         em_set_promisc(adapter);
1352
1353         /* Set the interface as ACTIVE */
1354         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1355         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1356
1357         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1358         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1359
1360         /* MSI/X configuration for 82574 */
1361         if (adapter->hw.mac.type == e1000_82574) {
1362                 int tmp;
1363                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1364                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1365                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1366                 /* Set the IVAR - interrupt vector routing. */
1367                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1368         }
1369
1370 #ifdef DEVICE_POLLING
1371         /*
1372          * Only enable interrupts if we are not polling, make sure
1373          * they are off otherwise.
1374          */
1375         if (ifp->if_capenable & IFCAP_POLLING)
1376                 em_disable_intr(adapter);
1377         else
1378 #endif /* DEVICE_POLLING */
1379                 em_enable_intr(adapter);
1380
1381         /* AMT based hardware can now take control from firmware */
1382         if (adapter->has_manage && adapter->has_amt)
1383                 em_get_hw_control(adapter);
1384 }
1385
1386 static void
1387 em_init(void *arg)
1388 {
1389         struct adapter *adapter = arg;
1390
1391         EM_CORE_LOCK(adapter);
1392         em_init_locked(adapter);
1393         EM_CORE_UNLOCK(adapter);
1394 }
1395
1396
1397 #ifdef DEVICE_POLLING
1398 /*********************************************************************
1399  *
1400  *  Legacy polling routine: note this only works with single queue
1401  *
1402  *********************************************************************/
1403 static int
1404 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1405 {
1406         struct adapter *adapter = ifp->if_softc;
1407         struct tx_ring  *txr = adapter->tx_rings;
1408         struct rx_ring  *rxr = adapter->rx_rings;
1409         u32             reg_icr;
1410         int             rx_done;
1411
1412         EM_CORE_LOCK(adapter);
1413         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1414                 EM_CORE_UNLOCK(adapter);
1415                 return (0);
1416         }
1417
1418         if (cmd == POLL_AND_CHECK_STATUS) {
1419                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1420                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1421                         callout_stop(&adapter->timer);
1422                         adapter->hw.mac.get_link_status = 1;
1423                         em_update_link_status(adapter);
1424                         callout_reset(&adapter->timer, hz,
1425                             em_local_timer, adapter);
1426                 }
1427         }
1428         EM_CORE_UNLOCK(adapter);
1429
1430         em_rxeof(rxr, count, &rx_done);
1431
1432         EM_TX_LOCK(txr);
1433         em_txeof(txr);
1434 #ifdef EM_MULTIQUEUE
1435         if (!drbr_empty(ifp, txr->br))
1436                 em_mq_start_locked(ifp, txr, NULL);
1437 #else
1438         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1439                 em_start_locked(ifp, txr);
1440 #endif
1441         EM_TX_UNLOCK(txr);
1442
1443         return (rx_done);
1444 }
1445 #endif /* DEVICE_POLLING */
1446
1447
1448 /*********************************************************************
1449  *
1450  *  Fast Legacy/MSI Combined Interrupt Service routine  
1451  *
1452  *********************************************************************/
1453 static int
1454 em_irq_fast(void *arg)
1455 {
1456         struct adapter  *adapter = arg;
1457         struct ifnet    *ifp;
1458         u32             reg_icr;
1459
1460         ifp = adapter->ifp;
1461
1462         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1463
1464         /* Hot eject?  */
1465         if (reg_icr == 0xffffffff)
1466                 return FILTER_STRAY;
1467
1468         /* Definitely not our interrupt.  */
1469         if (reg_icr == 0x0)
1470                 return FILTER_STRAY;
1471
1472         /*
1473          * Starting with the 82571 chip, bit 31 should be used to
1474          * determine whether the interrupt belongs to us.
1475          */
1476         if (adapter->hw.mac.type >= e1000_82571 &&
1477             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1478                 return FILTER_STRAY;
1479
1480         em_disable_intr(adapter);
1481         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1482
1483         /* Link status change */
1484         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1485                 adapter->hw.mac.get_link_status = 1;
1486                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1487         }
1488
1489         if (reg_icr & E1000_ICR_RXO)
1490                 adapter->rx_overruns++;
1491         return FILTER_HANDLED;
1492 }
1493
1494 /* Combined RX/TX handler, used by Legacy and MSI */
1495 static void
1496 em_handle_que(void *context, int pending)
1497 {
1498         struct adapter  *adapter = context;
1499         struct ifnet    *ifp = adapter->ifp;
1500         struct tx_ring  *txr = adapter->tx_rings;
1501         struct rx_ring  *rxr = adapter->rx_rings;
1502
1503
1504         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1505                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1506                 EM_TX_LOCK(txr);
1507                 em_txeof(txr);
1508 #ifdef EM_MULTIQUEUE
1509                 if (!drbr_empty(ifp, txr->br))
1510                         em_mq_start_locked(ifp, txr, NULL);
1511 #else
1512                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1513                         em_start_locked(ifp, txr);
1514 #endif
1515                 EM_TX_UNLOCK(txr);
1516                 if (more) {
1517                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1518                         return;
1519                 }
1520         }
1521
1522         em_enable_intr(adapter);
1523         return;
1524 }
1525
1526
1527 /*********************************************************************
1528  *
1529  *  MSIX Interrupt Service Routines
1530  *
1531  **********************************************************************/
1532 static void
1533 em_msix_tx(void *arg)
1534 {
1535         struct tx_ring *txr = arg;
1536         struct adapter *adapter = txr->adapter;
1537         struct ifnet    *ifp = adapter->ifp;
1538
1539         ++txr->tx_irq;
1540         EM_TX_LOCK(txr);
1541         em_txeof(txr);
1542 #ifdef EM_MULTIQUEUE
1543         if (!drbr_empty(ifp, txr->br))
1544                 em_mq_start_locked(ifp, txr, NULL);
1545 #else
1546         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1547                 em_start_locked(ifp, txr);
1548 #endif
1549         /* Reenable this interrupt */
1550         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1551         EM_TX_UNLOCK(txr);
1552         return;
1553 }
1554
1555 /*********************************************************************
1556  *
1557  *  MSIX RX Interrupt Service routine
1558  *
1559  **********************************************************************/
1560
1561 static void
1562 em_msix_rx(void *arg)
1563 {
1564         struct rx_ring  *rxr = arg;
1565         struct adapter  *adapter = rxr->adapter;
1566         bool            more;
1567
1568         ++rxr->rx_irq;
1569         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1570         if (more)
1571                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1572         else
1573                 /* Reenable this interrupt */
1574                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1575         return;
1576 }
1577
1578 /*********************************************************************
1579  *
1580  *  MSIX Link Fast Interrupt Service routine
1581  *
1582  **********************************************************************/
1583 static void
1584 em_msix_link(void *arg)
1585 {
1586         struct adapter  *adapter = arg;
1587         u32             reg_icr;
1588
1589         ++adapter->link_irq;
1590         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1591
1592         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1593                 adapter->hw.mac.get_link_status = 1;
1594                 em_handle_link(adapter, 0);
1595         } else
1596                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1597                     EM_MSIX_LINK | E1000_IMS_LSC);
1598         return;
1599 }
1600
1601 static void
1602 em_handle_rx(void *context, int pending)
1603 {
1604         struct rx_ring  *rxr = context;
1605         struct adapter  *adapter = rxr->adapter;
1606         bool            more;
1607
1608         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1609         if (more)
1610                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1611         else
1612                 /* Reenable this interrupt */
1613                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1614 }
1615
1616 static void
1617 em_handle_tx(void *context, int pending)
1618 {
1619         struct tx_ring  *txr = context;
1620         struct adapter  *adapter = txr->adapter;
1621         struct ifnet    *ifp = adapter->ifp;
1622
1623         EM_TX_LOCK(txr);
1624         em_txeof(txr);
1625 #ifdef EM_MULTIQUEUE
1626         if (!drbr_empty(ifp, txr->br))
1627                 em_mq_start_locked(ifp, txr, NULL);
1628 #else
1629         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1630                 em_start_locked(ifp, txr);
1631 #endif
1632         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1633         EM_TX_UNLOCK(txr);
1634 }
1635
1636 static void
1637 em_handle_link(void *context, int pending)
1638 {
1639         struct adapter  *adapter = context;
1640         struct tx_ring  *txr = adapter->tx_rings;
1641         struct ifnet *ifp = adapter->ifp;
1642
1643         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1644                 return;
1645
1646         EM_CORE_LOCK(adapter);
1647         callout_stop(&adapter->timer);
1648         em_update_link_status(adapter);
1649         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1650         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1651             EM_MSIX_LINK | E1000_IMS_LSC);
1652         if (adapter->link_active) {
1653                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1654                         EM_TX_LOCK(txr);
1655 #ifdef EM_MULTIQUEUE
1656                         if (!drbr_empty(ifp, txr->br))
1657                                 em_mq_start_locked(ifp, txr, NULL);
1658 #else
1659                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1660                                 em_start_locked(ifp, txr);
1661 #endif
1662                         EM_TX_UNLOCK(txr);
1663                 }
1664         }
1665         EM_CORE_UNLOCK(adapter);
1666 }
1667
1668
1669 /*********************************************************************
1670  *
1671  *  Media Ioctl callback
1672  *
1673  *  This routine is called whenever the user queries the status of
1674  *  the interface using ifconfig.
1675  *
1676  **********************************************************************/
1677 static void
1678 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1679 {
1680         struct adapter *adapter = ifp->if_softc;
1681         u_char fiber_type = IFM_1000_SX;
1682
1683         INIT_DEBUGOUT("em_media_status: begin");
1684
1685         EM_CORE_LOCK(adapter);
1686         em_update_link_status(adapter);
1687
1688         ifmr->ifm_status = IFM_AVALID;
1689         ifmr->ifm_active = IFM_ETHER;
1690
1691         if (!adapter->link_active) {
1692                 EM_CORE_UNLOCK(adapter);
1693                 return;
1694         }
1695
1696         ifmr->ifm_status |= IFM_ACTIVE;
1697
1698         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1699             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1700                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1701         } else {
1702                 switch (adapter->link_speed) {
1703                 case 10:
1704                         ifmr->ifm_active |= IFM_10_T;
1705                         break;
1706                 case 100:
1707                         ifmr->ifm_active |= IFM_100_TX;
1708                         break;
1709                 case 1000:
1710                         ifmr->ifm_active |= IFM_1000_T;
1711                         break;
1712                 }
1713                 if (adapter->link_duplex == FULL_DUPLEX)
1714                         ifmr->ifm_active |= IFM_FDX;
1715                 else
1716                         ifmr->ifm_active |= IFM_HDX;
1717         }
1718         EM_CORE_UNLOCK(adapter);
1719 }
1720
1721 /*********************************************************************
1722  *
1723  *  Media Ioctl callback
1724  *
1725  *  This routine is called when the user changes speed/duplex using
1726  *  media/mediopt option with ifconfig.
1727  *
1728  **********************************************************************/
1729 static int
1730 em_media_change(struct ifnet *ifp)
1731 {
1732         struct adapter *adapter = ifp->if_softc;
1733         struct ifmedia  *ifm = &adapter->media;
1734
1735         INIT_DEBUGOUT("em_media_change: begin");
1736
1737         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1738                 return (EINVAL);
1739
1740         EM_CORE_LOCK(adapter);
1741         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1742         case IFM_AUTO:
1743                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1744                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1745                 break;
1746         case IFM_1000_LX:
1747         case IFM_1000_SX:
1748         case IFM_1000_T:
1749                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1750                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1751                 break;
1752         case IFM_100_TX:
1753                 adapter->hw.mac.autoneg = FALSE;
1754                 adapter->hw.phy.autoneg_advertised = 0;
1755                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1756                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1757                 else
1758                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1759                 break;
1760         case IFM_10_T:
1761                 adapter->hw.mac.autoneg = FALSE;
1762                 adapter->hw.phy.autoneg_advertised = 0;
1763                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1764                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1765                 else
1766                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1767                 break;
1768         default:
1769                 device_printf(adapter->dev, "Unsupported media type\n");
1770         }
1771
1772         em_init_locked(adapter);
1773         EM_CORE_UNLOCK(adapter);
1774
1775         return (0);
1776 }
1777
1778 /*********************************************************************
1779  *
1780  *  This routine maps the mbufs to tx descriptors.
1781  *
1782  *  return 0 on success, positive on failure
1783  **********************************************************************/
1784
1785 static int
1786 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1787 {
1788         struct adapter          *adapter = txr->adapter;
1789         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1790         bus_dmamap_t            map;
1791         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1792         struct e1000_tx_desc    *ctxd = NULL;
1793         struct mbuf             *m_head;
1794         struct ether_header     *eh;
1795         struct ip               *ip = NULL;
1796         struct tcphdr           *tp = NULL;
1797         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1798         int                     ip_off, poff;
1799         int                     nsegs, i, j, first, last = 0;
1800         int                     error, do_tso, tso_desc = 0, remap = 1;
1801
1802 retry:
1803         m_head = *m_headp;
1804         txd_upper = txd_lower = txd_used = txd_saved = 0;
1805         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1806         ip_off = poff = 0;
1807
1808         /*
1809          * Intel recommends entire IP/TCP header length reside in a single
1810          * buffer. If multiple descriptors are used to describe the IP and
1811          * TCP header, each descriptor should describe one or more
1812          * complete headers; descriptors referencing only parts of headers
1813          * are not supported. If all layer headers are not coalesced into
1814          * a single buffer, each buffer should not cross a 4KB boundary,
1815          * or be larger than the maximum read request size.
1816          * Controller also requires modifing IP/TCP header to make TSO work
1817          * so we firstly get a writable mbuf chain then coalesce ethernet/
1818          * IP/TCP header into a single buffer to meet the requirement of
1819          * controller. This also simplifies IP/TCP/UDP checksum offloading
1820          * which also has similiar restrictions.
1821          */
1822         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1823                 if (do_tso || (m_head->m_next != NULL && 
1824                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1825                         if (M_WRITABLE(*m_headp) == 0) {
1826                                 m_head = m_dup(*m_headp, M_DONTWAIT);
1827                                 m_freem(*m_headp);
1828                                 if (m_head == NULL) {
1829                                         *m_headp = NULL;
1830                                         return (ENOBUFS);
1831                                 }
1832                                 *m_headp = m_head;
1833                         }
1834                 }
1835                 /*
1836                  * XXX
1837                  * Assume IPv4, we don't have TSO/checksum offload support
1838                  * for IPv6 yet.
1839                  */
1840                 ip_off = sizeof(struct ether_header);
1841                 m_head = m_pullup(m_head, ip_off);
1842                 if (m_head == NULL) {
1843                         *m_headp = NULL;
1844                         return (ENOBUFS);
1845                 }
1846                 eh = mtod(m_head, struct ether_header *);
1847                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1848                         ip_off = sizeof(struct ether_vlan_header);
1849                         m_head = m_pullup(m_head, ip_off);
1850                         if (m_head == NULL) {
1851                                 *m_headp = NULL;
1852                                 return (ENOBUFS);
1853                         }
1854                 }
1855                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1856                 if (m_head == NULL) {
1857                         *m_headp = NULL;
1858                         return (ENOBUFS);
1859                 }
1860                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1861                 poff = ip_off + (ip->ip_hl << 2);
1862                 if (do_tso) {
1863                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1864                         if (m_head == NULL) {
1865                                 *m_headp = NULL;
1866                                 return (ENOBUFS);
1867                         }
1868                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1869                         /*
1870                          * TSO workaround:
1871                          *   pull 4 more bytes of data into it.
1872                          */
1873                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1874                         if (m_head == NULL) {
1875                                 *m_headp = NULL;
1876                                 return (ENOBUFS);
1877                         }
1878                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1879                         ip->ip_len = 0;
1880                         ip->ip_sum = 0;
1881                         /*
1882                          * The pseudo TCP checksum does not include TCP payload
1883                          * length so driver should recompute the checksum here
1884                          * what hardware expect to see. This is adherence of
1885                          * Microsoft's Large Send specification.
1886                          */
1887                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1888                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1889                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1890                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1891                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1892                         if (m_head == NULL) {
1893                                 *m_headp = NULL;
1894                                 return (ENOBUFS);
1895                         }
1896                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1897                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1898                         if (m_head == NULL) {
1899                                 *m_headp = NULL;
1900                                 return (ENOBUFS);
1901                         }
1902                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1904                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1905                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1906                         if (m_head == NULL) {
1907                                 *m_headp = NULL;
1908                                 return (ENOBUFS);
1909                         }
1910                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1911                 }
1912                 *m_headp = m_head;
1913         }
1914
1915         /*
1916          * Map the packet for DMA
1917          *
1918          * Capture the first descriptor index,
1919          * this descriptor will have the index
1920          * of the EOP which is the only one that
1921          * now gets a DONE bit writeback.
1922          */
1923         first = txr->next_avail_desc;
1924         tx_buffer = &txr->tx_buffers[first];
1925         tx_buffer_mapped = tx_buffer;
1926         map = tx_buffer->map;
1927
1928         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1929             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1930
1931         /*
1932          * There are two types of errors we can (try) to handle:
1933          * - EFBIG means the mbuf chain was too long and bus_dma ran
1934          *   out of segments.  Defragment the mbuf chain and try again.
1935          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1936          *   at this point in time.  Defer sending and try again later.
1937          * All other errors, in particular EINVAL, are fatal and prevent the
1938          * mbuf chain from ever going through.  Drop it and report error.
1939          */
1940         if (error == EFBIG && remap) {
1941                 struct mbuf *m;
1942
1943                 m = m_defrag(*m_headp, M_DONTWAIT);
1944                 if (m == NULL) {
1945                         adapter->mbuf_alloc_failed++;
1946                         m_freem(*m_headp);
1947                         *m_headp = NULL;
1948                         return (ENOBUFS);
1949                 }
1950                 *m_headp = m;
1951
1952                 /* Try it again, but only once */
1953                 remap = 0;
1954                 goto retry;
1955         } else if (error == ENOMEM) {
1956                 adapter->no_tx_dma_setup++;
1957                 return (error);
1958         } else if (error != 0) {
1959                 adapter->no_tx_dma_setup++;
1960                 m_freem(*m_headp);
1961                 *m_headp = NULL;
1962                 return (error);
1963         }
1964
1965         /*
1966          * TSO Hardware workaround, if this packet is not
1967          * TSO, and is only a single descriptor long, and
1968          * it follows a TSO burst, then we need to add a
1969          * sentinel descriptor to prevent premature writeback.
1970          */
1971         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1972                 if (nsegs == 1)
1973                         tso_desc = TRUE;
1974                 txr->tx_tso = FALSE;
1975         }
1976
1977         if (nsegs > (txr->tx_avail - 2)) {
1978                 txr->no_desc_avail++;
1979                 bus_dmamap_unload(txr->txtag, map);
1980                 return (ENOBUFS);
1981         }
1982         m_head = *m_headp;
1983
1984         /* Do hardware assists */
1985         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1986                 em_tso_setup(txr, m_head, ip_off, ip, tp,
1987                     &txd_upper, &txd_lower);
1988                 /* we need to make a final sentinel transmit desc */
1989                 tso_desc = TRUE;
1990         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1991                 em_transmit_checksum_setup(txr, m_head,
1992                     ip_off, ip, &txd_upper, &txd_lower);
1993
1994         if (m_head->m_flags & M_VLANTAG) {
1995                 /* Set the vlan id. */
1996                 txd_upper |=
1997                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
1998                 /* Tell hardware to add tag */
1999                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2000         }
2001
2002         i = txr->next_avail_desc;
2003
2004         /* Set up our transmit descriptors */
2005         for (j = 0; j < nsegs; j++) {
2006                 bus_size_t seg_len;
2007                 bus_addr_t seg_addr;
2008
2009                 tx_buffer = &txr->tx_buffers[i];
2010                 ctxd = &txr->tx_base[i];
2011                 seg_addr = segs[j].ds_addr;
2012                 seg_len  = segs[j].ds_len;
2013                 /*
2014                 ** TSO Workaround:
2015                 ** If this is the last descriptor, we want to
2016                 ** split it so we have a small final sentinel
2017                 */
2018                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2019                         seg_len -= 4;
2020                         ctxd->buffer_addr = htole64(seg_addr);
2021                         ctxd->lower.data = htole32(
2022                         adapter->txd_cmd | txd_lower | seg_len);
2023                         ctxd->upper.data =
2024                             htole32(txd_upper);
2025                         if (++i == adapter->num_tx_desc)
2026                                 i = 0;
2027                         /* Now make the sentinel */     
2028                         ++txd_used; /* using an extra txd */
2029                         ctxd = &txr->tx_base[i];
2030                         tx_buffer = &txr->tx_buffers[i];
2031                         ctxd->buffer_addr =
2032                             htole64(seg_addr + seg_len);
2033                         ctxd->lower.data = htole32(
2034                         adapter->txd_cmd | txd_lower | 4);
2035                         ctxd->upper.data =
2036                             htole32(txd_upper);
2037                         last = i;
2038                         if (++i == adapter->num_tx_desc)
2039                                 i = 0;
2040                 } else {
2041                         ctxd->buffer_addr = htole64(seg_addr);
2042                         ctxd->lower.data = htole32(
2043                         adapter->txd_cmd | txd_lower | seg_len);
2044                         ctxd->upper.data =
2045                             htole32(txd_upper);
2046                         last = i;
2047                         if (++i == adapter->num_tx_desc)
2048                                 i = 0;
2049                 }
2050                 tx_buffer->m_head = NULL;
2051                 tx_buffer->next_eop = -1;
2052         }
2053
2054         txr->next_avail_desc = i;
2055         txr->tx_avail -= nsegs;
2056         if (tso_desc) /* TSO used an extra for sentinel */
2057                 txr->tx_avail -= txd_used;
2058
2059         tx_buffer->m_head = m_head;
2060         /*
2061         ** Here we swap the map so the last descriptor,
2062         ** which gets the completion interrupt has the
2063         ** real map, and the first descriptor gets the
2064         ** unused map from this descriptor.
2065         */
2066         tx_buffer_mapped->map = tx_buffer->map;
2067         tx_buffer->map = map;
2068         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2069
2070         /*
2071          * Last Descriptor of Packet
2072          * needs End Of Packet (EOP)
2073          * and Report Status (RS)
2074          */
2075         ctxd->lower.data |=
2076             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2077         /*
2078          * Keep track in the first buffer which
2079          * descriptor will be written back
2080          */
2081         tx_buffer = &txr->tx_buffers[first];
2082         tx_buffer->next_eop = last;
2083         /* Update the watchdog time early and often */
2084         txr->watchdog_time = ticks;
2085
2086         /*
2087          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2088          * that this frame is available to transmit.
2089          */
2090         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2091             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2092         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2093
2094         return (0);
2095 }
2096
2097 static void
2098 em_set_promisc(struct adapter *adapter)
2099 {
2100         struct ifnet    *ifp = adapter->ifp;
2101         u32             reg_rctl;
2102
2103         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2104
2105         if (ifp->if_flags & IFF_PROMISC) {
2106                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2107                 /* Turn this on if you want to see bad packets */
2108                 if (em_debug_sbp)
2109                         reg_rctl |= E1000_RCTL_SBP;
2110                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2111         } else if (ifp->if_flags & IFF_ALLMULTI) {
2112                 reg_rctl |= E1000_RCTL_MPE;
2113                 reg_rctl &= ~E1000_RCTL_UPE;
2114                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2115         }
2116 }
2117
2118 static void
2119 em_disable_promisc(struct adapter *adapter)
2120 {
2121         u32     reg_rctl;
2122
2123         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2124
2125         reg_rctl &=  (~E1000_RCTL_UPE);
2126         reg_rctl &=  (~E1000_RCTL_MPE);
2127         reg_rctl &=  (~E1000_RCTL_SBP);
2128         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2129 }
2130
2131
2132 /*********************************************************************
2133  *  Multicast Update
2134  *
2135  *  This routine is called whenever multicast address list is updated.
2136  *
2137  **********************************************************************/
2138
2139 static void
2140 em_set_multi(struct adapter *adapter)
2141 {
2142         struct ifnet    *ifp = adapter->ifp;
2143         struct ifmultiaddr *ifma;
2144         u32 reg_rctl = 0;
2145         u8  *mta; /* Multicast array memory */
2146         int mcnt = 0;
2147
2148         IOCTL_DEBUGOUT("em_set_multi: begin");
2149
2150         mta = adapter->mta;
2151         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2152
2153         if (adapter->hw.mac.type == e1000_82542 && 
2154             adapter->hw.revision_id == E1000_REVISION_2) {
2155                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2156                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2157                         e1000_pci_clear_mwi(&adapter->hw);
2158                 reg_rctl |= E1000_RCTL_RST;
2159                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2160                 msec_delay(5);
2161         }
2162
2163 #if __FreeBSD_version < 800000
2164         IF_ADDR_LOCK(ifp);
2165 #else
2166         if_maddr_rlock(ifp);
2167 #endif
2168         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2169                 if (ifma->ifma_addr->sa_family != AF_LINK)
2170                         continue;
2171
2172                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2173                         break;
2174
2175                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2176                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2177                 mcnt++;
2178         }
2179 #if __FreeBSD_version < 800000
2180         IF_ADDR_UNLOCK(ifp);
2181 #else
2182         if_maddr_runlock(ifp);
2183 #endif
2184         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2185                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2186                 reg_rctl |= E1000_RCTL_MPE;
2187                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2188         } else
2189                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2190
2191         if (adapter->hw.mac.type == e1000_82542 && 
2192             adapter->hw.revision_id == E1000_REVISION_2) {
2193                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2194                 reg_rctl &= ~E1000_RCTL_RST;
2195                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2196                 msec_delay(5);
2197                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2198                         e1000_pci_set_mwi(&adapter->hw);
2199         }
2200 }
2201
2202
2203 /*********************************************************************
2204  *  Timer routine
2205  *
2206  *  This routine checks for link status and updates statistics.
2207  *
2208  **********************************************************************/
2209
2210 static void
2211 em_local_timer(void *arg)
2212 {
2213         struct adapter  *adapter = arg;
2214         struct ifnet    *ifp = adapter->ifp;
2215         struct tx_ring  *txr = adapter->tx_rings;
2216         struct rx_ring  *rxr = adapter->rx_rings;
2217         u32             trigger;
2218
2219         EM_CORE_LOCK_ASSERT(adapter);
2220
2221         em_update_link_status(adapter);
2222         em_update_stats_counters(adapter);
2223
2224         /* Reset LAA into RAR[0] on 82571 */
2225         if ((adapter->hw.mac.type == e1000_82571) &&
2226             e1000_get_laa_state_82571(&adapter->hw))
2227                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2228
2229         /* Mask to use in the irq trigger */
2230         if (adapter->msix_mem)
2231                 trigger = rxr->ims; /* RX for 82574 */
2232         else
2233                 trigger = E1000_ICS_RXDMT0;
2234
2235         /*
2236         ** Check on the state of the TX queue(s), this 
2237         ** can be done without the lock because its RO
2238         ** and the HUNG state will be static if set.
2239         */
2240         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2241                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2242                     (adapter->pause_frames == 0))
2243                         goto hung;
2244                 /* Schedule a TX tasklet if needed */
2245                 if (txr->tx_avail <= EM_MAX_SCATTER)
2246                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2247         }
2248         
2249         adapter->pause_frames = 0;
2250         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2251 #ifndef DEVICE_POLLING
2252         /* Trigger an RX interrupt to guarantee mbuf refresh */
2253         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2254 #endif
2255         return;
2256 hung:
2257         /* Looks like we're hung */
2258         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2259         device_printf(adapter->dev,
2260             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2261             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2262             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2263         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2264             "Next TX to Clean = %d\n",
2265             txr->me, txr->tx_avail, txr->next_to_clean);
2266         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2267         adapter->watchdog_events++;
2268         adapter->pause_frames = 0;
2269         em_init_locked(adapter);
2270 }
2271
2272
2273 static void
2274 em_update_link_status(struct adapter *adapter)
2275 {
2276         struct e1000_hw *hw = &adapter->hw;
2277         struct ifnet *ifp = adapter->ifp;
2278         device_t dev = adapter->dev;
2279         struct tx_ring *txr = adapter->tx_rings;
2280         u32 link_check = 0;
2281
2282         /* Get the cached link value or read phy for real */
2283         switch (hw->phy.media_type) {
2284         case e1000_media_type_copper:
2285                 if (hw->mac.get_link_status) {
2286                         /* Do the work to read phy */
2287                         e1000_check_for_link(hw);
2288                         link_check = !hw->mac.get_link_status;
2289                         if (link_check) /* ESB2 fix */
2290                                 e1000_cfg_on_link_up(hw);
2291                 } else
2292                         link_check = TRUE;
2293                 break;
2294         case e1000_media_type_fiber:
2295                 e1000_check_for_link(hw);
2296                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2297                                  E1000_STATUS_LU);
2298                 break;
2299         case e1000_media_type_internal_serdes:
2300                 e1000_check_for_link(hw);
2301                 link_check = adapter->hw.mac.serdes_has_link;
2302                 break;
2303         default:
2304         case e1000_media_type_unknown:
2305                 break;
2306         }
2307
2308         /* Now check for a transition */
2309         if (link_check && (adapter->link_active == 0)) {
2310                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2311                     &adapter->link_duplex);
2312                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2313                 if ((adapter->link_speed != SPEED_1000) &&
2314                     ((hw->mac.type == e1000_82571) ||
2315                     (hw->mac.type == e1000_82572))) {
2316                         int tarc0;
2317                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2318                         tarc0 &= ~SPEED_MODE_BIT;
2319                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2320                 }
2321                 if (bootverbose)
2322                         device_printf(dev, "Link is up %d Mbps %s\n",
2323                             adapter->link_speed,
2324                             ((adapter->link_duplex == FULL_DUPLEX) ?
2325                             "Full Duplex" : "Half Duplex"));
2326                 adapter->link_active = 1;
2327                 adapter->smartspeed = 0;
2328                 ifp->if_baudrate = adapter->link_speed * 1000000;
2329                 if_link_state_change(ifp, LINK_STATE_UP);
2330         } else if (!link_check && (adapter->link_active == 1)) {
2331                 ifp->if_baudrate = adapter->link_speed = 0;
2332                 adapter->link_duplex = 0;
2333                 if (bootverbose)
2334                         device_printf(dev, "Link is Down\n");
2335                 adapter->link_active = 0;
2336                 /* Link down, disable watchdog */
2337                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2338                         txr->queue_status = EM_QUEUE_IDLE;
2339                 if_link_state_change(ifp, LINK_STATE_DOWN);
2340         }
2341 }
2342
2343 /*********************************************************************
2344  *
2345  *  This routine disables all traffic on the adapter by issuing a
2346  *  global reset on the MAC and deallocates TX/RX buffers.
2347  *
2348  *  This routine should always be called with BOTH the CORE
2349  *  and TX locks.
2350  **********************************************************************/
2351
2352 static void
2353 em_stop(void *arg)
2354 {
2355         struct adapter  *adapter = arg;
2356         struct ifnet    *ifp = adapter->ifp;
2357         struct tx_ring  *txr = adapter->tx_rings;
2358
2359         EM_CORE_LOCK_ASSERT(adapter);
2360
2361         INIT_DEBUGOUT("em_stop: begin");
2362
2363         em_disable_intr(adapter);
2364         callout_stop(&adapter->timer);
2365
2366         /* Tell the stack that the interface is no longer active */
2367         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2368         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2369
2370         /* Unarm watchdog timer. */
2371         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2372                 EM_TX_LOCK(txr);
2373                 txr->queue_status = EM_QUEUE_IDLE;
2374                 EM_TX_UNLOCK(txr);
2375         }
2376
2377         e1000_reset_hw(&adapter->hw);
2378         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2379
2380         e1000_led_off(&adapter->hw);
2381         e1000_cleanup_led(&adapter->hw);
2382 }
2383
2384
2385 /*********************************************************************
2386  *
2387  *  Determine hardware revision.
2388  *
2389  **********************************************************************/
2390 static void
2391 em_identify_hardware(struct adapter *adapter)
2392 {
2393         device_t dev = adapter->dev;
2394
2395         /* Make sure our PCI config space has the necessary stuff set */
2396         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2397         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2398             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2399                 device_printf(dev, "Memory Access and/or Bus Master bits "
2400                     "were not set!\n");
2401                 adapter->hw.bus.pci_cmd_word |=
2402                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2403                 pci_write_config(dev, PCIR_COMMAND,
2404                     adapter->hw.bus.pci_cmd_word, 2);
2405         }
2406
2407         /* Save off the information about this board */
2408         adapter->hw.vendor_id = pci_get_vendor(dev);
2409         adapter->hw.device_id = pci_get_device(dev);
2410         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2411         adapter->hw.subsystem_vendor_id =
2412             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2413         adapter->hw.subsystem_device_id =
2414             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2415
2416         /* Do Shared Code Init and Setup */
2417         if (e1000_set_mac_type(&adapter->hw)) {
2418                 device_printf(dev, "Setup init failure\n");
2419                 return;
2420         }
2421 }
2422
2423 static int
2424 em_allocate_pci_resources(struct adapter *adapter)
2425 {
2426         device_t        dev = adapter->dev;
2427         int             rid;
2428
2429         rid = PCIR_BAR(0);
2430         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2431             &rid, RF_ACTIVE);
2432         if (adapter->memory == NULL) {
2433                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2434                 return (ENXIO);
2435         }
2436         adapter->osdep.mem_bus_space_tag =
2437             rman_get_bustag(adapter->memory);
2438         adapter->osdep.mem_bus_space_handle =
2439             rman_get_bushandle(adapter->memory);
2440         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2441
2442         /* Default to a single queue */
2443         adapter->num_queues = 1;
2444
2445         /*
2446          * Setup MSI/X or MSI if PCI Express
2447          */
2448         adapter->msix = em_setup_msix(adapter);
2449
2450         adapter->hw.back = &adapter->osdep;
2451
2452         return (0);
2453 }
2454
2455 /*********************************************************************
2456  *
2457  *  Setup the Legacy or MSI Interrupt handler
2458  *
2459  **********************************************************************/
2460 int
2461 em_allocate_legacy(struct adapter *adapter)
2462 {
2463         device_t dev = adapter->dev;
2464         struct tx_ring  *txr = adapter->tx_rings;
2465         int error, rid = 0;
2466
2467         /* Manually turn off all interrupts */
2468         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2469
2470         if (adapter->msix == 1) /* using MSI */
2471                 rid = 1;
2472         /* We allocate a single interrupt resource */
2473         adapter->res = bus_alloc_resource_any(dev,
2474             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2475         if (adapter->res == NULL) {
2476                 device_printf(dev, "Unable to allocate bus resource: "
2477                     "interrupt\n");
2478                 return (ENXIO);
2479         }
2480
2481         /*
2482          * Allocate a fast interrupt and the associated
2483          * deferred processing contexts.
2484          */
2485         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2486         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2487             taskqueue_thread_enqueue, &adapter->tq);
2488         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2489             device_get_nameunit(adapter->dev));
2490         /* Use a TX only tasklet for local timer */
2491         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2492         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2493             taskqueue_thread_enqueue, &txr->tq);
2494         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2495             device_get_nameunit(adapter->dev));
2496         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2497         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2498             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2499                 device_printf(dev, "Failed to register fast interrupt "
2500                             "handler: %d\n", error);
2501                 taskqueue_free(adapter->tq);
2502                 adapter->tq = NULL;
2503                 return (error);
2504         }
2505         
2506         return (0);
2507 }
2508
2509 /*********************************************************************
2510  *
2511  *  Setup the MSIX Interrupt handlers
2512  *   This is not really Multiqueue, rather
2513  *   its just seperate interrupt vectors
2514  *   for TX, RX, and Link.
2515  *
2516  **********************************************************************/
2517 int
2518 em_allocate_msix(struct adapter *adapter)
2519 {
2520         device_t        dev = adapter->dev;
2521         struct          tx_ring *txr = adapter->tx_rings;
2522         struct          rx_ring *rxr = adapter->rx_rings;
2523         int             error, rid, vector = 0;
2524
2525
2526         /* Make sure all interrupts are disabled */
2527         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2528
2529         /* First set up ring resources */
2530         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2531
2532                 /* RX ring */
2533                 rid = vector + 1;
2534
2535                 rxr->res = bus_alloc_resource_any(dev,
2536                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2537                 if (rxr->res == NULL) {
2538                         device_printf(dev,
2539                             "Unable to allocate bus resource: "
2540                             "RX MSIX Interrupt %d\n", i);
2541                         return (ENXIO);
2542                 }
2543                 if ((error = bus_setup_intr(dev, rxr->res,
2544                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2545                     rxr, &rxr->tag)) != 0) {
2546                         device_printf(dev, "Failed to register RX handler");
2547                         return (error);
2548                 }
2549 #if __FreeBSD_version >= 800504
2550                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2551 #endif
2552                 rxr->msix = vector++; /* NOTE increment vector for TX */
2553                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2554                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2555                     taskqueue_thread_enqueue, &rxr->tq);
2556                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2557                     device_get_nameunit(adapter->dev));
2558                 /*
2559                 ** Set the bit to enable interrupt
2560                 ** in E1000_IMS -- bits 20 and 21
2561                 ** are for RX0 and RX1, note this has
2562                 ** NOTHING to do with the MSIX vector
2563                 */
2564                 rxr->ims = 1 << (20 + i);
2565                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2566
2567                 /* TX ring */
2568                 rid = vector + 1;
2569                 txr->res = bus_alloc_resource_any(dev,
2570                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2571                 if (txr->res == NULL) {
2572                         device_printf(dev,
2573                             "Unable to allocate bus resource: "
2574                             "TX MSIX Interrupt %d\n", i);
2575                         return (ENXIO);
2576                 }
2577                 if ((error = bus_setup_intr(dev, txr->res,
2578                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2579                     txr, &txr->tag)) != 0) {
2580                         device_printf(dev, "Failed to register TX handler");
2581                         return (error);
2582                 }
2583 #if __FreeBSD_version >= 800504
2584                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2585 #endif
2586                 txr->msix = vector++; /* Increment vector for next pass */
2587                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2588                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2589                     taskqueue_thread_enqueue, &txr->tq);
2590                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2591                     device_get_nameunit(adapter->dev));
2592                 /*
2593                 ** Set the bit to enable interrupt
2594                 ** in E1000_IMS -- bits 22 and 23
2595                 ** are for TX0 and TX1, note this has
2596                 ** NOTHING to do with the MSIX vector
2597                 */
2598                 txr->ims = 1 << (22 + i);
2599                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2600         }
2601
2602         /* Link interrupt */
2603         ++rid;
2604         adapter->res = bus_alloc_resource_any(dev,
2605             SYS_RES_IRQ, &rid, RF_ACTIVE);
2606         if (!adapter->res) {
2607                 device_printf(dev,"Unable to allocate "
2608                     "bus resource: Link interrupt [%d]\n", rid);
2609                 return (ENXIO);
2610         }
2611         /* Set the link handler function */
2612         error = bus_setup_intr(dev, adapter->res,
2613             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2614             em_msix_link, adapter, &adapter->tag);
2615         if (error) {
2616                 adapter->res = NULL;
2617                 device_printf(dev, "Failed to register LINK handler");
2618                 return (error);
2619         }
2620 #if __FreeBSD_version >= 800504
2621                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2622 #endif
2623         adapter->linkvec = vector;
2624         adapter->ivars |=  (8 | vector) << 16;
2625         adapter->ivars |= 0x80000000;
2626
2627         return (0);
2628 }
2629
2630
2631 static void
2632 em_free_pci_resources(struct adapter *adapter)
2633 {
2634         device_t        dev = adapter->dev;
2635         struct tx_ring  *txr;
2636         struct rx_ring  *rxr;
2637         int             rid;
2638
2639
2640         /*
2641         ** Release all the queue interrupt resources:
2642         */
2643         for (int i = 0; i < adapter->num_queues; i++) {
2644                 txr = &adapter->tx_rings[i];
2645                 rxr = &adapter->rx_rings[i];
2646                 /* an early abort? */
2647                 if ((txr == NULL) || (rxr == NULL))
2648                         break;
2649                 rid = txr->msix +1;
2650                 if (txr->tag != NULL) {
2651                         bus_teardown_intr(dev, txr->res, txr->tag);
2652                         txr->tag = NULL;
2653                 }
2654                 if (txr->res != NULL)
2655                         bus_release_resource(dev, SYS_RES_IRQ,
2656                             rid, txr->res);
2657                 rid = rxr->msix +1;
2658                 if (rxr->tag != NULL) {
2659                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2660                         rxr->tag = NULL;
2661                 }
2662                 if (rxr->res != NULL)
2663                         bus_release_resource(dev, SYS_RES_IRQ,
2664                             rid, rxr->res);
2665         }
2666
2667         if (adapter->linkvec) /* we are doing MSIX */
2668                 rid = adapter->linkvec + 1;
2669         else
2670                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2671
2672         if (adapter->tag != NULL) {
2673                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2674                 adapter->tag = NULL;
2675         }
2676
2677         if (adapter->res != NULL)
2678                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2679
2680
2681         if (adapter->msix)
2682                 pci_release_msi(dev);
2683
2684         if (adapter->msix_mem != NULL)
2685                 bus_release_resource(dev, SYS_RES_MEMORY,
2686                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2687
2688         if (adapter->memory != NULL)
2689                 bus_release_resource(dev, SYS_RES_MEMORY,
2690                     PCIR_BAR(0), adapter->memory);
2691
2692         if (adapter->flash != NULL)
2693                 bus_release_resource(dev, SYS_RES_MEMORY,
2694                     EM_FLASH, adapter->flash);
2695 }
2696
2697 /*
2698  * Setup MSI or MSI/X
2699  */
2700 static int
2701 em_setup_msix(struct adapter *adapter)
2702 {
2703         device_t dev = adapter->dev;
2704         int val = 0;
2705
2706         /*
2707         ** Setup MSI/X for Hartwell: tests have shown
2708         ** use of two queues to be unstable, and to
2709         ** provide no great gain anyway, so we simply
2710         ** seperate the interrupts and use a single queue.
2711         */
2712         if ((adapter->hw.mac.type == e1000_82574) &&
2713             (em_enable_msix == TRUE)) {
2714                 /* Map the MSIX BAR */
2715                 int rid = PCIR_BAR(EM_MSIX_BAR);
2716                 adapter->msix_mem = bus_alloc_resource_any(dev,
2717                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2718                 if (!adapter->msix_mem) {
2719                         /* May not be enabled */
2720                         device_printf(adapter->dev,
2721                             "Unable to map MSIX table \n");
2722                         goto msi;
2723                 }
2724                 val = pci_msix_count(dev); 
2725                 /* We only need 3 vectors */
2726                 if (val > 3)
2727                         val = 3;
2728                 if ((val != 3) && (val != 5)) {
2729                         bus_release_resource(dev, SYS_RES_MEMORY,
2730                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2731                         adapter->msix_mem = NULL;
2732                         device_printf(adapter->dev,
2733                             "MSIX: incorrect vectors, using MSI\n");
2734                         goto msi;
2735                 }
2736
2737                 if (pci_alloc_msix(dev, &val) == 0) {
2738                         device_printf(adapter->dev,
2739                             "Using MSIX interrupts "
2740                             "with %d vectors\n", val);
2741                 }
2742
2743                 return (val);
2744         }
2745 msi:
2746         val = pci_msi_count(dev);
2747         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2748                 adapter->msix = 1;
2749                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2750                 return (val);
2751         } 
2752         /* Should only happen due to manual configuration */
2753         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2754         return (0);
2755 }
2756
2757
2758 /*********************************************************************
2759  *
2760  *  Initialize the hardware to a configuration
2761  *  as specified by the adapter structure.
2762  *
2763  **********************************************************************/
2764 static void
2765 em_reset(struct adapter *adapter)
2766 {
2767         device_t        dev = adapter->dev;
2768         struct ifnet    *ifp = adapter->ifp;
2769         struct e1000_hw *hw = &adapter->hw;
2770         u16             rx_buffer_size;
2771         u32             pba;
2772
2773         INIT_DEBUGOUT("em_reset: begin");
2774
2775         /* Set up smart power down as default off on newer adapters. */
2776         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2777             hw->mac.type == e1000_82572)) {
2778                 u16 phy_tmp = 0;
2779
2780                 /* Speed up time to link by disabling smart power down. */
2781                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2782                 phy_tmp &= ~IGP02E1000_PM_SPD;
2783                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2784         }
2785
2786         /*
2787          * Packet Buffer Allocation (PBA)
2788          * Writing PBA sets the receive portion of the buffer
2789          * the remainder is used for the transmit buffer.
2790          */
2791         switch (hw->mac.type) {
2792         /* Total Packet Buffer on these is 48K */
2793         case e1000_82571:
2794         case e1000_82572:
2795         case e1000_80003es2lan:
2796                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2797                 break;
2798         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2799                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2800                 break;
2801         case e1000_82574:
2802         case e1000_82583:
2803                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2804                 break;
2805         case e1000_ich8lan:
2806                 pba = E1000_PBA_8K;
2807                 break;
2808         case e1000_ich9lan:
2809         case e1000_ich10lan:
2810                 /* Boost Receive side for jumbo frames */
2811                 if (adapter->max_frame_size > 4096)
2812                         pba = E1000_PBA_14K;
2813                 else
2814                         pba = E1000_PBA_10K;
2815                 break;
2816         case e1000_pchlan:
2817         case e1000_pch2lan:
2818                 pba = E1000_PBA_26K;
2819                 break;
2820         default:
2821                 if (adapter->max_frame_size > 8192)
2822                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2823                 else
2824                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2825         }
2826         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2827
2828         /*
2829          * These parameters control the automatic generation (Tx) and
2830          * response (Rx) to Ethernet PAUSE frames.
2831          * - High water mark should allow for at least two frames to be
2832          *   received after sending an XOFF.
2833          * - Low water mark works best when it is very near the high water mark.
2834          *   This allows the receiver to restart by sending XON when it has
2835          *   drained a bit. Here we use an arbitary value of 1500 which will
2836          *   restart after one full frame is pulled from the buffer. There
2837          *   could be several smaller frames in the buffer and if so they will
2838          *   not trigger the XON until their total number reduces the buffer
2839          *   by 1500.
2840          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2841          */
2842         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2843         hw->fc.high_water = rx_buffer_size -
2844             roundup2(adapter->max_frame_size, 1024);
2845         hw->fc.low_water = hw->fc.high_water - 1500;
2846
2847         if (adapter->fc) /* locally set flow control value? */
2848                 hw->fc.requested_mode = adapter->fc;
2849         else
2850                 hw->fc.requested_mode = e1000_fc_full;
2851
2852         if (hw->mac.type == e1000_80003es2lan)
2853                 hw->fc.pause_time = 0xFFFF;
2854         else
2855                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2856
2857         hw->fc.send_xon = TRUE;
2858
2859         /* Device specific overrides/settings */
2860         switch (hw->mac.type) {
2861         case e1000_pchlan:
2862                 /* Workaround: no TX flow ctrl for PCH */
2863                 hw->fc.requested_mode = e1000_fc_rx_pause;
2864                 hw->fc.pause_time = 0xFFFF; /* override */
2865                 if (ifp->if_mtu > ETHERMTU) {
2866                         hw->fc.high_water = 0x3500;
2867                         hw->fc.low_water = 0x1500;
2868                 } else {
2869                         hw->fc.high_water = 0x5000;
2870                         hw->fc.low_water = 0x3000;
2871                 }
2872                 hw->fc.refresh_time = 0x1000;
2873                 break;
2874         case e1000_pch2lan:
2875                 hw->fc.high_water = 0x5C20;
2876                 hw->fc.low_water = 0x5048;
2877                 hw->fc.pause_time = 0x0650;
2878                 hw->fc.refresh_time = 0x0400;
2879                 /* Jumbos need adjusted PBA */
2880                 if (ifp->if_mtu > ETHERMTU)
2881                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2882                 else
2883                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2884                 break;
2885         case e1000_ich9lan:
2886         case e1000_ich10lan:
2887                 if (ifp->if_mtu > ETHERMTU) {
2888                         hw->fc.high_water = 0x2800;
2889                         hw->fc.low_water = hw->fc.high_water - 8;
2890                         break;
2891                 } 
2892                 /* else fall thru */
2893         default:
2894                 if (hw->mac.type == e1000_80003es2lan)
2895                         hw->fc.pause_time = 0xFFFF;
2896                 break;
2897         }
2898
2899         /* Issue a global reset */
2900         e1000_reset_hw(hw);
2901         E1000_WRITE_REG(hw, E1000_WUC, 0);
2902         em_disable_aspm(adapter);
2903         /* and a re-init */
2904         if (e1000_init_hw(hw) < 0) {
2905                 device_printf(dev, "Hardware Initialization Failed\n");
2906                 return;
2907         }
2908
2909         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2910         e1000_get_phy_info(hw);
2911         e1000_check_for_link(hw);
2912         return;
2913 }
2914
2915 /*********************************************************************
2916  *
2917  *  Setup networking device structure and register an interface.
2918  *
2919  **********************************************************************/
2920 static int
2921 em_setup_interface(device_t dev, struct adapter *adapter)
2922 {
2923         struct ifnet   *ifp;
2924
2925         INIT_DEBUGOUT("em_setup_interface: begin");
2926
2927         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2928         if (ifp == NULL) {
2929                 device_printf(dev, "can not allocate ifnet structure\n");
2930                 return (-1);
2931         }
2932         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2933         ifp->if_init =  em_init;
2934         ifp->if_softc = adapter;
2935         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2936         ifp->if_ioctl = em_ioctl;
2937 #ifdef EM_MULTIQUEUE
2938         /* Multiqueue stack interface */
2939         ifp->if_transmit = em_mq_start;
2940         ifp->if_qflush = em_qflush;
2941 #else
2942         ifp->if_start = em_start;
2943         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2944         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2945         IFQ_SET_READY(&ifp->if_snd);
2946 #endif  
2947
2948         ether_ifattach(ifp, adapter->hw.mac.addr);
2949
2950         ifp->if_capabilities = ifp->if_capenable = 0;
2951
2952
2953         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2954         ifp->if_capabilities |= IFCAP_TSO4;
2955         /*
2956          * Tell the upper layer(s) we
2957          * support full VLAN capability
2958          */
2959         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2960         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2961                              |  IFCAP_VLAN_HWTSO
2962                              |  IFCAP_VLAN_MTU;
2963         ifp->if_capenable = ifp->if_capabilities;
2964
2965         /*
2966         ** Don't turn this on by default, if vlans are
2967         ** created on another pseudo device (eg. lagg)
2968         ** then vlan events are not passed thru, breaking
2969         ** operation, but with HW FILTER off it works. If
2970         ** using vlans directly on the em driver you can
2971         ** enable this and get full hardware tag filtering.
2972         */
2973         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2974
2975 #ifdef DEVICE_POLLING
2976         ifp->if_capabilities |= IFCAP_POLLING;
2977 #endif
2978
2979         /* Enable only WOL MAGIC by default */
2980         if (adapter->wol) {
2981                 ifp->if_capabilities |= IFCAP_WOL;
2982                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2983         }
2984                 
2985         /*
2986          * Specify the media types supported by this adapter and register
2987          * callbacks to update media and link information
2988          */
2989         ifmedia_init(&adapter->media, IFM_IMASK,
2990             em_media_change, em_media_status);
2991         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2992             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2993                 u_char fiber_type = IFM_1000_SX;        /* default type */
2994
2995                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
2996                             0, NULL);
2997                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2998         } else {
2999                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3000                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3001                             0, NULL);
3002                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3003                             0, NULL);
3004                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3005                             0, NULL);
3006                 if (adapter->hw.phy.type != e1000_phy_ife) {
3007                         ifmedia_add(&adapter->media,
3008                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3009                         ifmedia_add(&adapter->media,
3010                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3011                 }
3012         }
3013         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3014         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3015         return (0);
3016 }
3017
3018
3019 /*
3020  * Manage DMA'able memory.
3021  */
3022 static void
3023 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3024 {
3025         if (error)
3026                 return;
3027         *(bus_addr_t *) arg = segs[0].ds_addr;
3028 }
3029
3030 static int
3031 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3032         struct em_dma_alloc *dma, int mapflags)
3033 {
3034         int error;
3035
3036         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3037                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3038                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3039                                 BUS_SPACE_MAXADDR,      /* highaddr */
3040                                 NULL, NULL,             /* filter, filterarg */
3041                                 size,                   /* maxsize */
3042                                 1,                      /* nsegments */
3043                                 size,                   /* maxsegsize */
3044                                 0,                      /* flags */
3045                                 NULL,                   /* lockfunc */
3046                                 NULL,                   /* lockarg */
3047                                 &dma->dma_tag);
3048         if (error) {
3049                 device_printf(adapter->dev,
3050                     "%s: bus_dma_tag_create failed: %d\n",
3051                     __func__, error);
3052                 goto fail_0;
3053         }
3054
3055         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3056             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3057         if (error) {
3058                 device_printf(adapter->dev,
3059                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3060                     __func__, (uintmax_t)size, error);
3061                 goto fail_2;
3062         }
3063
3064         dma->dma_paddr = 0;
3065         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3066             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3067         if (error || dma->dma_paddr == 0) {
3068                 device_printf(adapter->dev,
3069                     "%s: bus_dmamap_load failed: %d\n",
3070                     __func__, error);
3071                 goto fail_3;
3072         }
3073
3074         return (0);
3075
3076 fail_3:
3077         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3078 fail_2:
3079         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3080         bus_dma_tag_destroy(dma->dma_tag);
3081 fail_0:
3082         dma->dma_map = NULL;
3083         dma->dma_tag = NULL;
3084
3085         return (error);
3086 }
3087
3088 static void
3089 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3090 {
3091         if (dma->dma_tag == NULL)
3092                 return;
3093         if (dma->dma_map != NULL) {
3094                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3095                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3096                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3097                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3098                 dma->dma_map = NULL;
3099         }
3100         bus_dma_tag_destroy(dma->dma_tag);
3101         dma->dma_tag = NULL;
3102 }
3103
3104
3105 /*********************************************************************
3106  *
3107  *  Allocate memory for the transmit and receive rings, and then
3108  *  the descriptors associated with each, called only once at attach.
3109  *
3110  **********************************************************************/
3111 static int
3112 em_allocate_queues(struct adapter *adapter)
3113 {
3114         device_t                dev = adapter->dev;
3115         struct tx_ring          *txr = NULL;
3116         struct rx_ring          *rxr = NULL;
3117         int rsize, tsize, error = E1000_SUCCESS;
3118         int txconf = 0, rxconf = 0;
3119
3120
3121         /* Allocate the TX ring struct memory */
3122         if (!(adapter->tx_rings =
3123             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3124             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3125                 device_printf(dev, "Unable to allocate TX ring memory\n");
3126                 error = ENOMEM;
3127                 goto fail;
3128         }
3129
3130         /* Now allocate the RX */
3131         if (!(adapter->rx_rings =
3132             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3133             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3134                 device_printf(dev, "Unable to allocate RX ring memory\n");
3135                 error = ENOMEM;
3136                 goto rx_fail;
3137         }
3138
3139         tsize = roundup2(adapter->num_tx_desc *
3140             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3141         /*
3142          * Now set up the TX queues, txconf is needed to handle the
3143          * possibility that things fail midcourse and we need to
3144          * undo memory gracefully
3145          */ 
3146         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3147                 /* Set up some basics */
3148                 txr = &adapter->tx_rings[i];
3149                 txr->adapter = adapter;
3150                 txr->me = i;
3151
3152                 /* Initialize the TX lock */
3153                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3154                     device_get_nameunit(dev), txr->me);
3155                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3156
3157                 if (em_dma_malloc(adapter, tsize,
3158                         &txr->txdma, BUS_DMA_NOWAIT)) {
3159                         device_printf(dev,
3160                             "Unable to allocate TX Descriptor memory\n");
3161                         error = ENOMEM;
3162                         goto err_tx_desc;
3163                 }
3164                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3165                 bzero((void *)txr->tx_base, tsize);
3166
3167                 if (em_allocate_transmit_buffers(txr)) {
3168                         device_printf(dev,
3169                             "Critical Failure setting up transmit buffers\n");
3170                         error = ENOMEM;
3171                         goto err_tx_desc;
3172                 }
3173 #if __FreeBSD_version >= 800000
3174                 /* Allocate a buf ring */
3175                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3176                     M_WAITOK, &txr->tx_mtx);
3177 #endif
3178         }
3179
3180         /*
3181          * Next the RX queues...
3182          */ 
3183         rsize = roundup2(adapter->num_rx_desc *
3184             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3185         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3186                 rxr = &adapter->rx_rings[i];
3187                 rxr->adapter = adapter;
3188                 rxr->me = i;
3189
3190                 /* Initialize the RX lock */
3191                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3192                     device_get_nameunit(dev), txr->me);
3193                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3194
3195                 if (em_dma_malloc(adapter, rsize,
3196                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3197                         device_printf(dev,
3198                             "Unable to allocate RxDescriptor memory\n");
3199                         error = ENOMEM;
3200                         goto err_rx_desc;
3201                 }
3202                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3203                 bzero((void *)rxr->rx_base, rsize);
3204
3205                 /* Allocate receive buffers for the ring*/
3206                 if (em_allocate_receive_buffers(rxr)) {
3207                         device_printf(dev,
3208                             "Critical Failure setting up receive buffers\n");
3209                         error = ENOMEM;
3210                         goto err_rx_desc;
3211                 }
3212         }
3213
3214         return (0);
3215
3216 err_rx_desc:
3217         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3218                 em_dma_free(adapter, &rxr->rxdma);
3219 err_tx_desc:
3220         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3221                 em_dma_free(adapter, &txr->txdma);
3222         free(adapter->rx_rings, M_DEVBUF);
3223 rx_fail:
3224 #if __FreeBSD_version >= 800000
3225         buf_ring_free(txr->br, M_DEVBUF);
3226 #endif
3227         free(adapter->tx_rings, M_DEVBUF);
3228 fail:
3229         return (error);
3230 }
3231
3232
3233 /*********************************************************************
3234  *
3235  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3236  *  the information needed to transmit a packet on the wire. This is
3237  *  called only once at attach, setup is done every reset.
3238  *
3239  **********************************************************************/
3240 static int
3241 em_allocate_transmit_buffers(struct tx_ring *txr)
3242 {
3243         struct adapter *adapter = txr->adapter;
3244         device_t dev = adapter->dev;
3245         struct em_buffer *txbuf;
3246         int error, i;
3247
3248         /*
3249          * Setup DMA descriptor areas.
3250          */
3251         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3252                                1, 0,                    /* alignment, bounds */
3253                                BUS_SPACE_MAXADDR,       /* lowaddr */
3254                                BUS_SPACE_MAXADDR,       /* highaddr */
3255                                NULL, NULL,              /* filter, filterarg */
3256                                EM_TSO_SIZE,             /* maxsize */
3257                                EM_MAX_SCATTER,          /* nsegments */
3258                                PAGE_SIZE,               /* maxsegsize */
3259                                0,                       /* flags */
3260                                NULL,                    /* lockfunc */
3261                                NULL,                    /* lockfuncarg */
3262                                &txr->txtag))) {
3263                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3264                 goto fail;
3265         }
3266
3267         if (!(txr->tx_buffers =
3268             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3269             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3270                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3271                 error = ENOMEM;
3272                 goto fail;
3273         }
3274
3275         /* Create the descriptor buffer dma maps */
3276         txbuf = txr->tx_buffers;
3277         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3278                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3279                 if (error != 0) {
3280                         device_printf(dev, "Unable to create TX DMA map\n");
3281                         goto fail;
3282                 }
3283         }
3284
3285         return 0;
3286 fail:
3287         /* We free all, it handles case where we are in the middle */
3288         em_free_transmit_structures(adapter);
3289         return (error);
3290 }
3291
3292 /*********************************************************************
3293  *
3294  *  Initialize a transmit ring.
3295  *
3296  **********************************************************************/
3297 static void
3298 em_setup_transmit_ring(struct tx_ring *txr)
3299 {
3300         struct adapter *adapter = txr->adapter;
3301         struct em_buffer *txbuf;
3302         int i;
3303 #ifdef DEV_NETMAP
3304         struct netmap_adapter *na = NA(adapter->ifp);
3305         struct netmap_slot *slot;
3306 #endif /* DEV_NETMAP */
3307
3308         /* Clear the old descriptor contents */
3309         EM_TX_LOCK(txr);
3310 #ifdef DEV_NETMAP
3311         slot = netmap_reset(na, NR_TX, txr->me, 0);
3312 #endif /* DEV_NETMAP */
3313
3314         bzero((void *)txr->tx_base,
3315               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3316         /* Reset indices */
3317         txr->next_avail_desc = 0;
3318         txr->next_to_clean = 0;
3319
3320         /* Free any existing tx buffers. */
3321         txbuf = txr->tx_buffers;
3322         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3323                 if (txbuf->m_head != NULL) {
3324                         bus_dmamap_sync(txr->txtag, txbuf->map,
3325                             BUS_DMASYNC_POSTWRITE);
3326                         bus_dmamap_unload(txr->txtag, txbuf->map);
3327                         m_freem(txbuf->m_head);
3328                         txbuf->m_head = NULL;
3329                 }
3330 #ifdef DEV_NETMAP
3331                 if (slot) {
3332                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3333                         uint64_t paddr;
3334                         void *addr;
3335
3336                         addr = PNMB(slot + si, &paddr);
3337                         txr->tx_base[i].buffer_addr = htole64(paddr);
3338                         /* reload the map for netmap mode */
3339                         netmap_load_map(txr->txtag, txbuf->map, addr);
3340                 }
3341 #endif /* DEV_NETMAP */
3342
3343                 /* clear the watch index */
3344                 txbuf->next_eop = -1;
3345         }
3346
3347         /* Set number of descriptors available */
3348         txr->tx_avail = adapter->num_tx_desc;
3349         txr->queue_status = EM_QUEUE_IDLE;
3350
3351         /* Clear checksum offload context. */
3352         txr->last_hw_offload = 0;
3353         txr->last_hw_ipcss = 0;
3354         txr->last_hw_ipcso = 0;
3355         txr->last_hw_tucss = 0;
3356         txr->last_hw_tucso = 0;
3357
3358         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3359             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3360         EM_TX_UNLOCK(txr);
3361 }
3362
3363 /*********************************************************************
3364  *
3365  *  Initialize all transmit rings.
3366  *
3367  **********************************************************************/
3368 static void
3369 em_setup_transmit_structures(struct adapter *adapter)
3370 {
3371         struct tx_ring *txr = adapter->tx_rings;
3372
3373         for (int i = 0; i < adapter->num_queues; i++, txr++)
3374                 em_setup_transmit_ring(txr);
3375
3376         return;
3377 }
3378
3379 /*********************************************************************
3380  *
3381  *  Enable transmit unit.
3382  *
3383  **********************************************************************/
3384 static void
3385 em_initialize_transmit_unit(struct adapter *adapter)
3386 {
3387         struct tx_ring  *txr = adapter->tx_rings;
3388         struct e1000_hw *hw = &adapter->hw;
3389         u32     tctl, tarc, tipg = 0;
3390
3391          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3392
3393         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3394                 u64 bus_addr = txr->txdma.dma_paddr;
3395                 /* Base and Len of TX Ring */
3396                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3397                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3398                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3399                     (u32)(bus_addr >> 32));
3400                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3401                     (u32)bus_addr);
3402                 /* Init the HEAD/TAIL indices */
3403                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3404                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3405
3406                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3407                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3408                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3409
3410                 txr->queue_status = EM_QUEUE_IDLE;
3411         }
3412
3413         /* Set the default values for the Tx Inter Packet Gap timer */
3414         switch (adapter->hw.mac.type) {
3415         case e1000_80003es2lan:
3416                 tipg = DEFAULT_82543_TIPG_IPGR1;
3417                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3418                     E1000_TIPG_IPGR2_SHIFT;
3419                 break;
3420         default:
3421                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3422                     (adapter->hw.phy.media_type ==
3423                     e1000_media_type_internal_serdes))
3424                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3425                 else
3426                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3427                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3428                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3429         }
3430
3431         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3432         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3433
3434         if(adapter->hw.mac.type >= e1000_82540)
3435                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3436                     adapter->tx_abs_int_delay.value);
3437
3438         if ((adapter->hw.mac.type == e1000_82571) ||
3439             (adapter->hw.mac.type == e1000_82572)) {
3440                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3441                 tarc |= SPEED_MODE_BIT;
3442                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3443         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3444                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3445                 tarc |= 1;
3446                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3447                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3448                 tarc |= 1;
3449                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3450         }
3451
3452         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3453         if (adapter->tx_int_delay.value > 0)
3454                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3455
3456         /* Program the Transmit Control Register */
3457         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3458         tctl &= ~E1000_TCTL_CT;
3459         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3460                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3461
3462         if (adapter->hw.mac.type >= e1000_82571)
3463                 tctl |= E1000_TCTL_MULR;
3464
3465         /* This write will effectively turn on the transmit unit. */
3466         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3467
3468 }
3469
3470
3471 /*********************************************************************
3472  *
3473  *  Free all transmit rings.
3474  *
3475  **********************************************************************/
3476 static void
3477 em_free_transmit_structures(struct adapter *adapter)
3478 {
3479         struct tx_ring *txr = adapter->tx_rings;
3480
3481         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3482                 EM_TX_LOCK(txr);
3483                 em_free_transmit_buffers(txr);
3484                 em_dma_free(adapter, &txr->txdma);
3485                 EM_TX_UNLOCK(txr);
3486                 EM_TX_LOCK_DESTROY(txr);
3487         }
3488
3489         free(adapter->tx_rings, M_DEVBUF);
3490 }
3491
3492 /*********************************************************************
3493  *
3494  *  Free transmit ring related data structures.
3495  *
3496  **********************************************************************/
3497 static void
3498 em_free_transmit_buffers(struct tx_ring *txr)
3499 {
3500         struct adapter          *adapter = txr->adapter;
3501         struct em_buffer        *txbuf;
3502
3503         INIT_DEBUGOUT("free_transmit_ring: begin");
3504
3505         if (txr->tx_buffers == NULL)
3506                 return;
3507
3508         for (int i = 0; i < adapter->num_tx_desc; i++) {
3509                 txbuf = &txr->tx_buffers[i];
3510                 if (txbuf->m_head != NULL) {
3511                         bus_dmamap_sync(txr->txtag, txbuf->map,
3512                             BUS_DMASYNC_POSTWRITE);
3513                         bus_dmamap_unload(txr->txtag,
3514                             txbuf->map);
3515                         m_freem(txbuf->m_head);
3516                         txbuf->m_head = NULL;
3517                         if (txbuf->map != NULL) {
3518                                 bus_dmamap_destroy(txr->txtag,
3519                                     txbuf->map);
3520                                 txbuf->map = NULL;
3521                         }
3522                 } else if (txbuf->map != NULL) {
3523                         bus_dmamap_unload(txr->txtag,
3524                             txbuf->map);
3525                         bus_dmamap_destroy(txr->txtag,
3526                             txbuf->map);
3527                         txbuf->map = NULL;
3528                 }
3529         }
3530 #if __FreeBSD_version >= 800000
3531         if (txr->br != NULL)
3532                 buf_ring_free(txr->br, M_DEVBUF);
3533 #endif
3534         if (txr->tx_buffers != NULL) {
3535                 free(txr->tx_buffers, M_DEVBUF);
3536                 txr->tx_buffers = NULL;
3537         }
3538         if (txr->txtag != NULL) {
3539                 bus_dma_tag_destroy(txr->txtag);
3540                 txr->txtag = NULL;
3541         }
3542         return;
3543 }
3544
3545
3546 /*********************************************************************
3547  *  The offload context is protocol specific (TCP/UDP) and thus
3548  *  only needs to be set when the protocol changes. The occasion
3549  *  of a context change can be a performance detriment, and
3550  *  might be better just disabled. The reason arises in the way
3551  *  in which the controller supports pipelined requests from the
3552  *  Tx data DMA. Up to four requests can be pipelined, and they may
3553  *  belong to the same packet or to multiple packets. However all
3554  *  requests for one packet are issued before a request is issued
3555  *  for a subsequent packet and if a request for the next packet
3556  *  requires a context change, that request will be stalled
3557  *  until the previous request completes. This means setting up
3558  *  a new context effectively disables pipelined Tx data DMA which
3559  *  in turn greatly slow down performance to send small sized
3560  *  frames. 
3561  **********************************************************************/
3562 static void
3563 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3564     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3565 {
3566         struct adapter                  *adapter = txr->adapter;
3567         struct e1000_context_desc       *TXD = NULL;
3568         struct em_buffer                *tx_buffer;
3569         int                             cur, hdr_len;
3570         u32                             cmd = 0;
3571         u16                             offload = 0;
3572         u8                              ipcso, ipcss, tucso, tucss;
3573
3574         ipcss = ipcso = tucss = tucso = 0;
3575         hdr_len = ip_off + (ip->ip_hl << 2);
3576         cur = txr->next_avail_desc;
3577
3578         /* Setup of IP header checksum. */
3579         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3580                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3581                 offload |= CSUM_IP;
3582                 ipcss = ip_off;
3583                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3584                 /*
3585                  * Start offset for header checksum calculation.
3586                  * End offset for header checksum calculation.
3587                  * Offset of place to put the checksum.
3588                  */
3589                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3590                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3591                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3592                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3593                 cmd |= E1000_TXD_CMD_IP;
3594         }
3595
3596         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3597                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3598                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3599                 offload |= CSUM_TCP;
3600                 tucss = hdr_len;
3601                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3602                 /*
3603                  * Setting up new checksum offload context for every frames
3604                  * takes a lot of processing time for hardware. This also
3605                  * reduces performance a lot for small sized frames so avoid
3606                  * it if driver can use previously configured checksum
3607                  * offload context.
3608                  */
3609                 if (txr->last_hw_offload == offload) {
3610                         if (offload & CSUM_IP) {
3611                                 if (txr->last_hw_ipcss == ipcss &&
3612                                     txr->last_hw_ipcso == ipcso &&
3613                                     txr->last_hw_tucss == tucss &&
3614                                     txr->last_hw_tucso == tucso)
3615                                         return;
3616                         } else {
3617                                 if (txr->last_hw_tucss == tucss &&
3618                                     txr->last_hw_tucso == tucso)
3619                                         return;
3620                         }
3621                 }
3622                 txr->last_hw_offload = offload;
3623                 txr->last_hw_tucss = tucss;
3624                 txr->last_hw_tucso = tucso;
3625                 /*
3626                  * Start offset for payload checksum calculation.
3627                  * End offset for payload checksum calculation.
3628                  * Offset of place to put the checksum.
3629                  */
3630                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3631                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3632                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3633                 TXD->upper_setup.tcp_fields.tucso = tucso;
3634                 cmd |= E1000_TXD_CMD_TCP;
3635         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3636                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3637                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3638                 tucss = hdr_len;
3639                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3640                 /*
3641                  * Setting up new checksum offload context for every frames
3642                  * takes a lot of processing time for hardware. This also
3643                  * reduces performance a lot for small sized frames so avoid
3644                  * it if driver can use previously configured checksum
3645                  * offload context.
3646                  */
3647                 if (txr->last_hw_offload == offload) {
3648                         if (offload & CSUM_IP) {
3649                                 if (txr->last_hw_ipcss == ipcss &&
3650                                     txr->last_hw_ipcso == ipcso &&
3651                                     txr->last_hw_tucss == tucss &&
3652                                     txr->last_hw_tucso == tucso)
3653                                         return;
3654                         } else {
3655                                 if (txr->last_hw_tucss == tucss &&
3656                                     txr->last_hw_tucso == tucso)
3657                                         return;
3658                         }
3659                 }
3660                 txr->last_hw_offload = offload;
3661                 txr->last_hw_tucss = tucss;
3662                 txr->last_hw_tucso = tucso;
3663                 /*
3664                  * Start offset for header checksum calculation.
3665                  * End offset for header checksum calculation.
3666                  * Offset of place to put the checksum.
3667                  */
3668                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3669                 TXD->upper_setup.tcp_fields.tucss = tucss;
3670                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3671                 TXD->upper_setup.tcp_fields.tucso = tucso;
3672         }
3673   
3674         if (offload & CSUM_IP) {
3675                 txr->last_hw_ipcss = ipcss;
3676                 txr->last_hw_ipcso = ipcso;
3677         }
3678
3679         TXD->tcp_seg_setup.data = htole32(0);
3680         TXD->cmd_and_length =
3681             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3682         tx_buffer = &txr->tx_buffers[cur];
3683         tx_buffer->m_head = NULL;
3684         tx_buffer->next_eop = -1;
3685
3686         if (++cur == adapter->num_tx_desc)
3687                 cur = 0;
3688
3689         txr->tx_avail--;
3690         txr->next_avail_desc = cur;
3691 }
3692
3693
3694 /**********************************************************************
3695  *
3696  *  Setup work for hardware segmentation offload (TSO)
3697  *
3698  **********************************************************************/
3699 static void
3700 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3701     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3702 {
3703         struct adapter                  *adapter = txr->adapter;
3704         struct e1000_context_desc       *TXD;
3705         struct em_buffer                *tx_buffer;
3706         int cur, hdr_len;
3707
3708         /*
3709          * In theory we can use the same TSO context if and only if
3710          * frame is the same type(IP/TCP) and the same MSS. However
3711          * checking whether a frame has the same IP/TCP structure is
3712          * hard thing so just ignore that and always restablish a
3713          * new TSO context.
3714          */
3715         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3716         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3717                       E1000_TXD_DTYP_D |        /* Data descr type */
3718                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3719
3720         /* IP and/or TCP header checksum calculation and insertion. */
3721         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3722
3723         cur = txr->next_avail_desc;
3724         tx_buffer = &txr->tx_buffers[cur];
3725         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3726
3727         /*
3728          * Start offset for header checksum calculation.
3729          * End offset for header checksum calculation.
3730          * Offset of place put the checksum.
3731          */
3732         TXD->lower_setup.ip_fields.ipcss = ip_off;
3733         TXD->lower_setup.ip_fields.ipcse =
3734             htole16(ip_off + (ip->ip_hl << 2) - 1);
3735         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3736         /*
3737          * Start offset for payload checksum calculation.
3738          * End offset for payload checksum calculation.
3739          * Offset of place to put the checksum.
3740          */
3741         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3742         TXD->upper_setup.tcp_fields.tucse = 0;
3743         TXD->upper_setup.tcp_fields.tucso =
3744             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3745         /*
3746          * Payload size per packet w/o any headers.
3747          * Length of all headers up to payload.
3748          */
3749         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3750         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3751
3752         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3753                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3754                                 E1000_TXD_CMD_TSE |     /* TSE context */
3755                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3756                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3757                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3758
3759         tx_buffer->m_head = NULL;
3760         tx_buffer->next_eop = -1;
3761
3762         if (++cur == adapter->num_tx_desc)
3763                 cur = 0;
3764
3765         txr->tx_avail--;
3766         txr->next_avail_desc = cur;
3767         txr->tx_tso = TRUE;
3768 }
3769
3770
3771 /**********************************************************************
3772  *
3773  *  Examine each tx_buffer in the used queue. If the hardware is done
3774  *  processing the packet then free associated resources. The
3775  *  tx_buffer is put back on the free queue.
3776  *
3777  **********************************************************************/
3778 static void
3779 em_txeof(struct tx_ring *txr)
3780 {
3781         struct adapter  *adapter = txr->adapter;
3782         int first, last, done, processed;
3783         struct em_buffer *tx_buffer;
3784         struct e1000_tx_desc   *tx_desc, *eop_desc;
3785         struct ifnet   *ifp = adapter->ifp;
3786
3787         EM_TX_LOCK_ASSERT(txr);
3788 #ifdef DEV_NETMAP
3789         if (ifp->if_capenable & IFCAP_NETMAP) {
3790                 struct netmap_adapter *na = NA(ifp);
3791
3792                 selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3793                 EM_TX_UNLOCK(txr);
3794                 EM_CORE_LOCK(adapter);
3795                 selwakeuppri(&na->tx_si, PI_NET);
3796                 EM_CORE_UNLOCK(adapter);
3797                 EM_TX_LOCK(txr);
3798                 return;
3799         }
3800 #endif /* DEV_NETMAP */
3801
3802         /* No work, make sure watchdog is off */
3803         if (txr->tx_avail == adapter->num_tx_desc) {
3804                 txr->queue_status = EM_QUEUE_IDLE;
3805                 return;
3806         }
3807
3808         processed = 0;
3809         first = txr->next_to_clean;
3810         tx_desc = &txr->tx_base[first];
3811         tx_buffer = &txr->tx_buffers[first];
3812         last = tx_buffer->next_eop;
3813         eop_desc = &txr->tx_base[last];
3814
3815         /*
3816          * What this does is get the index of the
3817          * first descriptor AFTER the EOP of the 
3818          * first packet, that way we can do the
3819          * simple comparison on the inner while loop.
3820          */
3821         if (++last == adapter->num_tx_desc)
3822                 last = 0;
3823         done = last;
3824
3825         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3826             BUS_DMASYNC_POSTREAD);
3827
3828         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3829                 /* We clean the range of the packet */
3830                 while (first != done) {
3831                         tx_desc->upper.data = 0;
3832                         tx_desc->lower.data = 0;
3833                         tx_desc->buffer_addr = 0;
3834                         ++txr->tx_avail;
3835                         ++processed;
3836
3837                         if (tx_buffer->m_head) {
3838                                 bus_dmamap_sync(txr->txtag,
3839                                     tx_buffer->map,
3840                                     BUS_DMASYNC_POSTWRITE);
3841                                 bus_dmamap_unload(txr->txtag,
3842                                     tx_buffer->map);
3843                                 m_freem(tx_buffer->m_head);
3844                                 tx_buffer->m_head = NULL;
3845                         }
3846                         tx_buffer->next_eop = -1;
3847                         txr->watchdog_time = ticks;
3848
3849                         if (++first == adapter->num_tx_desc)
3850                                 first = 0;
3851
3852                         tx_buffer = &txr->tx_buffers[first];
3853                         tx_desc = &txr->tx_base[first];
3854                 }
3855                 ++ifp->if_opackets;
3856                 /* See if we can continue to the next packet */
3857                 last = tx_buffer->next_eop;
3858                 if (last != -1) {
3859                         eop_desc = &txr->tx_base[last];
3860                         /* Get new done point */
3861                         if (++last == adapter->num_tx_desc) last = 0;
3862                         done = last;
3863                 } else
3864                         break;
3865         }
3866         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3867             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3868
3869         txr->next_to_clean = first;
3870
3871         /*
3872         ** Watchdog calculation, we know there's
3873         ** work outstanding or the first return
3874         ** would have been taken, so none processed
3875         ** for too long indicates a hang. local timer
3876         ** will examine this and do a reset if needed.
3877         */
3878         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3879                 txr->queue_status = EM_QUEUE_HUNG;
3880
3881         /*
3882          * If we have a minimum free, clear IFF_DRV_OACTIVE
3883          * to tell the stack that it is OK to send packets.
3884          * Notice that all writes of OACTIVE happen under the
3885          * TX lock which, with a single queue, guarantees 
3886          * sanity.
3887          */
3888         if (txr->tx_avail >= EM_MAX_SCATTER)
3889                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3890
3891         /* Disable watchdog if all clean */
3892         if (txr->tx_avail == adapter->num_tx_desc) {
3893                 txr->queue_status = EM_QUEUE_IDLE;
3894         } 
3895 }
3896
3897
3898 /*********************************************************************
3899  *
3900  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3901  *
3902  **********************************************************************/
3903 static void
3904 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3905 {
3906         struct adapter          *adapter = rxr->adapter;
3907         struct mbuf             *m;
3908         bus_dma_segment_t       segs[1];
3909         struct em_buffer        *rxbuf;
3910         int                     i, j, error, nsegs;
3911         bool                    cleaned = FALSE;
3912
3913         i = j = rxr->next_to_refresh;
3914         /*
3915         ** Get one descriptor beyond
3916         ** our work mark to control
3917         ** the loop.
3918         */
3919         if (++j == adapter->num_rx_desc)
3920                 j = 0;
3921
3922         while (j != limit) {
3923                 rxbuf = &rxr->rx_buffers[i];
3924                 if (rxbuf->m_head == NULL) {
3925                         m = m_getjcl(M_DONTWAIT, MT_DATA,
3926                             M_PKTHDR, adapter->rx_mbuf_sz);
3927                         /*
3928                         ** If we have a temporary resource shortage
3929                         ** that causes a failure, just abort refresh
3930                         ** for now, we will return to this point when
3931                         ** reinvoked from em_rxeof.
3932                         */
3933                         if (m == NULL)
3934                                 goto update;
3935                 } else
3936                         m = rxbuf->m_head;
3937
3938                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3939                 m->m_flags |= M_PKTHDR;
3940                 m->m_data = m->m_ext.ext_buf;
3941
3942                 /* Use bus_dma machinery to setup the memory mapping  */
3943                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3944                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3945                 if (error != 0) {
3946                         printf("Refresh mbufs: hdr dmamap load"
3947                             " failure - %d\n", error);
3948                         m_free(m);
3949                         rxbuf->m_head = NULL;
3950                         goto update;
3951                 }
3952                 rxbuf->m_head = m;
3953                 bus_dmamap_sync(rxr->rxtag,
3954                     rxbuf->map, BUS_DMASYNC_PREREAD);
3955                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3956                 cleaned = TRUE;
3957
3958                 i = j; /* Next is precalulated for us */
3959                 rxr->next_to_refresh = i;
3960                 /* Calculate next controlling index */
3961                 if (++j == adapter->num_rx_desc)
3962                         j = 0;
3963         }
3964 update:
3965         /*
3966         ** Update the tail pointer only if,
3967         ** and as far as we have refreshed.
3968         */
3969         if (cleaned)
3970                 E1000_WRITE_REG(&adapter->hw,
3971                     E1000_RDT(rxr->me), rxr->next_to_refresh);
3972
3973         return;
3974 }
3975
3976
3977 /*********************************************************************
3978  *
3979  *  Allocate memory for rx_buffer structures. Since we use one
3980  *  rx_buffer per received packet, the maximum number of rx_buffer's
3981  *  that we'll need is equal to the number of receive descriptors
3982  *  that we've allocated.
3983  *
3984  **********************************************************************/
3985 static int
3986 em_allocate_receive_buffers(struct rx_ring *rxr)
3987 {
3988         struct adapter          *adapter = rxr->adapter;
3989         device_t                dev = adapter->dev;
3990         struct em_buffer        *rxbuf;
3991         int                     error;
3992
3993         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3994             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3995         if (rxr->rx_buffers == NULL) {
3996                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3997                 return (ENOMEM);
3998         }
3999
4000         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4001                                 1, 0,                   /* alignment, bounds */
4002                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4003                                 BUS_SPACE_MAXADDR,      /* highaddr */
4004                                 NULL, NULL,             /* filter, filterarg */
4005                                 MJUM9BYTES,             /* maxsize */
4006                                 1,                      /* nsegments */
4007                                 MJUM9BYTES,             /* maxsegsize */
4008                                 0,                      /* flags */
4009                                 NULL,                   /* lockfunc */
4010                                 NULL,                   /* lockarg */
4011                                 &rxr->rxtag);
4012         if (error) {
4013                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4014                     __func__, error);
4015                 goto fail;
4016         }
4017
4018         rxbuf = rxr->rx_buffers;
4019         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4020                 rxbuf = &rxr->rx_buffers[i];
4021                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4022                     &rxbuf->map);
4023                 if (error) {
4024                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4025                             __func__, error);
4026                         goto fail;
4027                 }
4028         }
4029
4030         return (0);
4031
4032 fail:
4033         em_free_receive_structures(adapter);
4034         return (error);
4035 }
4036
4037
4038 /*********************************************************************
4039  *
4040  *  Initialize a receive ring and its buffers.
4041  *
4042  **********************************************************************/
4043 static int
4044 em_setup_receive_ring(struct rx_ring *rxr)
4045 {
4046         struct  adapter         *adapter = rxr->adapter;
4047         struct em_buffer        *rxbuf;
4048         bus_dma_segment_t       seg[1];
4049         int                     rsize, nsegs, error = 0;
4050 #ifdef DEV_NETMAP
4051         struct netmap_adapter *na = NA(adapter->ifp);
4052         struct netmap_slot *slot;
4053 #endif
4054
4055
4056         /* Clear the ring contents */
4057         EM_RX_LOCK(rxr);
4058         rsize = roundup2(adapter->num_rx_desc *
4059             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4060         bzero((void *)rxr->rx_base, rsize);
4061 #ifdef DEV_NETMAP
4062         slot = netmap_reset(na, NR_RX, 0, 0);
4063 #endif
4064
4065         /*
4066         ** Free current RX buffer structs and their mbufs
4067         */
4068         for (int i = 0; i < adapter->num_rx_desc; i++) {
4069                 rxbuf = &rxr->rx_buffers[i];
4070                 if (rxbuf->m_head != NULL) {
4071                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4072                             BUS_DMASYNC_POSTREAD);
4073                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4074                         m_freem(rxbuf->m_head);
4075                         rxbuf->m_head = NULL; /* mark as freed */
4076                 }
4077         }
4078
4079         /* Now replenish the mbufs */
4080         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4081                 rxbuf = &rxr->rx_buffers[j];
4082 #ifdef DEV_NETMAP
4083                 if (slot) {
4084                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4085                         uint64_t paddr;
4086                         void *addr;
4087
4088                         addr = PNMB(slot + si, &paddr);
4089                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4090                         /* Update descriptor */
4091                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4092                         continue;
4093                 }
4094 #endif /* DEV_NETMAP */
4095                 rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4096                     M_PKTHDR, adapter->rx_mbuf_sz);
4097                 if (rxbuf->m_head == NULL) {
4098                         error = ENOBUFS;
4099                         goto fail;
4100                 }
4101                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4102                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4103                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4104
4105                 /* Get the memory mapping */
4106                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4107                     rxbuf->map, rxbuf->m_head, seg,
4108                     &nsegs, BUS_DMA_NOWAIT);
4109                 if (error != 0) {
4110                         m_freem(rxbuf->m_head);
4111                         rxbuf->m_head = NULL;
4112                         goto fail;
4113                 }
4114                 bus_dmamap_sync(rxr->rxtag,
4115                     rxbuf->map, BUS_DMASYNC_PREREAD);
4116
4117                 /* Update descriptor */
4118                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4119         }
4120         rxr->next_to_check = 0;
4121         rxr->next_to_refresh = 0;
4122         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4123             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4124
4125 fail:
4126         EM_RX_UNLOCK(rxr);
4127         return (error);
4128 }
4129
4130 /*********************************************************************
4131  *
4132  *  Initialize all receive rings.
4133  *
4134  **********************************************************************/
4135 static int
4136 em_setup_receive_structures(struct adapter *adapter)
4137 {
4138         struct rx_ring *rxr = adapter->rx_rings;
4139         int q;
4140
4141         for (q = 0; q < adapter->num_queues; q++, rxr++)
4142                 if (em_setup_receive_ring(rxr))
4143                         goto fail;
4144
4145         return (0);
4146 fail:
4147         /*
4148          * Free RX buffers allocated so far, we will only handle
4149          * the rings that completed, the failing case will have
4150          * cleaned up for itself. 'q' failed, so its the terminus.
4151          */
4152         for (int i = 0; i < q; ++i) {
4153                 rxr = &adapter->rx_rings[i];
4154                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4155                         struct em_buffer *rxbuf;
4156                         rxbuf = &rxr->rx_buffers[n];
4157                         if (rxbuf->m_head != NULL) {
4158                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4159                                   BUS_DMASYNC_POSTREAD);
4160                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4161                                 m_freem(rxbuf->m_head);
4162                                 rxbuf->m_head = NULL;
4163                         }
4164                 }
4165                 rxr->next_to_check = 0;
4166                 rxr->next_to_refresh = 0;
4167         }
4168
4169         return (ENOBUFS);
4170 }
4171
4172 /*********************************************************************
4173  *
4174  *  Free all receive rings.
4175  *
4176  **********************************************************************/
4177 static void
4178 em_free_receive_structures(struct adapter *adapter)
4179 {
4180         struct rx_ring *rxr = adapter->rx_rings;
4181
4182         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4183                 em_free_receive_buffers(rxr);
4184                 /* Free the ring memory as well */
4185                 em_dma_free(adapter, &rxr->rxdma);
4186                 EM_RX_LOCK_DESTROY(rxr);
4187         }
4188
4189         free(adapter->rx_rings, M_DEVBUF);
4190 }
4191
4192
4193 /*********************************************************************
4194  *
4195  *  Free receive ring data structures
4196  *
4197  **********************************************************************/
4198 static void
4199 em_free_receive_buffers(struct rx_ring *rxr)
4200 {
4201         struct adapter          *adapter = rxr->adapter;
4202         struct em_buffer        *rxbuf = NULL;
4203
4204         INIT_DEBUGOUT("free_receive_buffers: begin");
4205
4206         if (rxr->rx_buffers != NULL) {
4207                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4208                         rxbuf = &rxr->rx_buffers[i];
4209                         if (rxbuf->map != NULL) {
4210                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4211                                     BUS_DMASYNC_POSTREAD);
4212                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4213                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4214                         }
4215                         if (rxbuf->m_head != NULL) {
4216                                 m_freem(rxbuf->m_head);
4217                                 rxbuf->m_head = NULL;
4218                         }
4219                 }
4220                 free(rxr->rx_buffers, M_DEVBUF);
4221                 rxr->rx_buffers = NULL;
4222                 rxr->next_to_check = 0;
4223                 rxr->next_to_refresh = 0;
4224         }
4225
4226         if (rxr->rxtag != NULL) {
4227                 bus_dma_tag_destroy(rxr->rxtag);
4228                 rxr->rxtag = NULL;
4229         }
4230
4231         return;
4232 }
4233
4234
4235 /*********************************************************************
4236  *
4237  *  Enable receive unit.
4238  *
4239  **********************************************************************/
4240 #define MAX_INTS_PER_SEC        8000
4241 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
4242
4243 static void
4244 em_initialize_receive_unit(struct adapter *adapter)
4245 {
4246         struct rx_ring  *rxr = adapter->rx_rings;
4247         struct ifnet    *ifp = adapter->ifp;
4248         struct e1000_hw *hw = &adapter->hw;
4249         u64     bus_addr;
4250         u32     rctl, rxcsum;
4251
4252         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4253
4254         /*
4255          * Make sure receives are disabled while setting
4256          * up the descriptor ring
4257          */
4258         rctl = E1000_READ_REG(hw, E1000_RCTL);
4259         /* Do not disable if ever enabled on this hardware */
4260         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4261                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4262
4263         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4264             adapter->rx_abs_int_delay.value);
4265         /*
4266          * Set the interrupt throttling rate. Value is calculated
4267          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4268          */
4269         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4270
4271         /*
4272         ** When using MSIX interrupts we need to throttle
4273         ** using the EITR register (82574 only)
4274         */
4275         if (hw->mac.type == e1000_82574) {
4276                 for (int i = 0; i < 4; i++)
4277                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4278                             DEFAULT_ITR);
4279                 /* Disable accelerated acknowledge */
4280                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4281         }
4282
4283         if (ifp->if_capenable & IFCAP_RXCSUM) {
4284                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4285                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4286                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4287         }
4288
4289         /*
4290         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4291         ** long latencies are observed, like Lenovo X60. This
4292         ** change eliminates the problem, but since having positive
4293         ** values in RDTR is a known source of problems on other
4294         ** platforms another solution is being sought.
4295         */
4296         if (hw->mac.type == e1000_82573)
4297                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4298
4299         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4300                 /* Setup the Base and Length of the Rx Descriptor Ring */
4301                 bus_addr = rxr->rxdma.dma_paddr;
4302                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4303                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4304                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4305                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4306                 /* Setup the Head and Tail Descriptor Pointers */
4307                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4308 #ifdef DEV_NETMAP
4309                 /*
4310                  * an init() while a netmap client is active must
4311                  * preserve the rx buffers passed to userspace.
4312                  * In this driver it means we adjust RDT to
4313                  * something different from na->num_rx_desc - 1.
4314                  */
4315                 if (ifp->if_capenable & IFCAP_NETMAP) {
4316                         struct netmap_adapter *na = NA(adapter->ifp);
4317                         struct netmap_kring *kring = &na->rx_rings[i];
4318                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4319
4320                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4321                 } else
4322 #endif /* DEV_NETMAP */
4323                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4324         }
4325
4326         /* Set PTHRESH for improved jumbo performance */
4327         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4328             (adapter->hw.mac.type == e1000_pch2lan) ||
4329             (adapter->hw.mac.type == e1000_ich10lan)) &&
4330             (ifp->if_mtu > ETHERMTU)) {
4331                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4332                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4333         }
4334                 
4335         if (adapter->hw.mac.type == e1000_pch2lan) {
4336                 if (ifp->if_mtu > ETHERMTU)
4337                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4338                 else
4339                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4340         }
4341
4342         /* Setup the Receive Control Register */
4343         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4344         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4345             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4346             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4347
4348         /* Strip the CRC */
4349         rctl |= E1000_RCTL_SECRC;
4350
4351         /* Make sure VLAN Filters are off */
4352         rctl &= ~E1000_RCTL_VFE;
4353         rctl &= ~E1000_RCTL_SBP;
4354
4355         if (adapter->rx_mbuf_sz == MCLBYTES)
4356                 rctl |= E1000_RCTL_SZ_2048;
4357         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4358                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4359         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4360                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4361
4362         if (ifp->if_mtu > ETHERMTU)
4363                 rctl |= E1000_RCTL_LPE;
4364         else
4365                 rctl &= ~E1000_RCTL_LPE;
4366
4367         /* Write out the settings */
4368         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4369
4370         return;
4371 }
4372
4373
4374 /*********************************************************************
4375  *
4376  *  This routine executes in interrupt context. It replenishes
4377  *  the mbufs in the descriptor and sends data which has been
4378  *  dma'ed into host memory to upper layer.
4379  *
4380  *  We loop at most count times if count is > 0, or until done if
4381  *  count < 0.
4382  *  
4383  *  For polling we also now return the number of cleaned packets
4384  *********************************************************************/
4385 static bool
4386 em_rxeof(struct rx_ring *rxr, int count, int *done)
4387 {
4388         struct adapter          *adapter = rxr->adapter;
4389         struct ifnet            *ifp = adapter->ifp;
4390         struct mbuf             *mp, *sendmp;
4391         u8                      status = 0;
4392         u16                     len;
4393         int                     i, processed, rxdone = 0;
4394         bool                    eop;
4395         struct e1000_rx_desc    *cur;
4396
4397         EM_RX_LOCK(rxr);
4398
4399 #ifdef DEV_NETMAP
4400         if (ifp->if_capenable & IFCAP_NETMAP) {
4401                 struct netmap_adapter *na = NA(ifp);
4402
4403                 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4404                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4405                 EM_RX_UNLOCK(rxr);
4406                 EM_CORE_LOCK(adapter);
4407                 selwakeuppri(&na->rx_si, PI_NET);
4408                 EM_CORE_UNLOCK(adapter);
4409                 return (0);
4410         }
4411 #endif /* DEV_NETMAP */
4412
4413         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4414
4415                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4416                         break;
4417
4418                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4419                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4420
4421                 cur = &rxr->rx_base[i];
4422                 status = cur->status;
4423                 mp = sendmp = NULL;
4424
4425                 if ((status & E1000_RXD_STAT_DD) == 0)
4426                         break;
4427
4428                 len = le16toh(cur->length);
4429                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4430
4431                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4432                     (rxr->discard == TRUE)) {
4433                         ifp->if_ierrors++;
4434                         ++rxr->rx_discarded;
4435                         if (!eop) /* Catch subsequent segs */
4436                                 rxr->discard = TRUE;
4437                         else
4438                                 rxr->discard = FALSE;
4439                         em_rx_discard(rxr, i);
4440                         goto next_desc;
4441                 }
4442
4443                 /* Assign correct length to the current fragment */
4444                 mp = rxr->rx_buffers[i].m_head;
4445                 mp->m_len = len;
4446
4447                 /* Trigger for refresh */
4448                 rxr->rx_buffers[i].m_head = NULL;
4449
4450                 /* First segment? */
4451                 if (rxr->fmp == NULL) {
4452                         mp->m_pkthdr.len = len;
4453                         rxr->fmp = rxr->lmp = mp;
4454                 } else {
4455                         /* Chain mbuf's together */
4456                         mp->m_flags &= ~M_PKTHDR;
4457                         rxr->lmp->m_next = mp;
4458                         rxr->lmp = mp;
4459                         rxr->fmp->m_pkthdr.len += len;
4460                 }
4461
4462                 if (eop) {
4463                         --count;
4464                         sendmp = rxr->fmp;
4465                         sendmp->m_pkthdr.rcvif = ifp;
4466                         ifp->if_ipackets++;
4467                         em_receive_checksum(cur, sendmp);
4468 #ifndef __NO_STRICT_ALIGNMENT
4469                         if (adapter->max_frame_size >
4470                             (MCLBYTES - ETHER_ALIGN) &&
4471                             em_fixup_rx(rxr) != 0)
4472                                 goto skip;
4473 #endif
4474                         if (status & E1000_RXD_STAT_VP) {
4475                                 sendmp->m_pkthdr.ether_vtag =
4476                                     le16toh(cur->special);
4477                                 sendmp->m_flags |= M_VLANTAG;
4478                         }
4479 #ifndef __NO_STRICT_ALIGNMENT
4480 skip:
4481 #endif
4482                         rxr->fmp = rxr->lmp = NULL;
4483                 }
4484 next_desc:
4485                 /* Zero out the receive descriptors status. */
4486                 cur->status = 0;
4487                 ++rxdone;       /* cumulative for POLL */
4488                 ++processed;
4489
4490                 /* Advance our pointers to the next descriptor. */
4491                 if (++i == adapter->num_rx_desc)
4492                         i = 0;
4493
4494                 /* Send to the stack */
4495                 if (sendmp != NULL) {
4496                         rxr->next_to_check = i;
4497                         EM_RX_UNLOCK(rxr);
4498                         (*ifp->if_input)(ifp, sendmp);
4499                         EM_RX_LOCK(rxr);
4500                         i = rxr->next_to_check;
4501                 }
4502
4503                 /* Only refresh mbufs every 8 descriptors */
4504                 if (processed == 8) {
4505                         em_refresh_mbufs(rxr, i);
4506                         processed = 0;
4507                 }
4508         }
4509
4510         /* Catch any remaining refresh work */
4511         if (e1000_rx_unrefreshed(rxr))
4512                 em_refresh_mbufs(rxr, i);
4513
4514         rxr->next_to_check = i;
4515         if (done != NULL)
4516                 *done = rxdone;
4517         EM_RX_UNLOCK(rxr);
4518
4519         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4520 }
4521
4522 static __inline void
4523 em_rx_discard(struct rx_ring *rxr, int i)
4524 {
4525         struct em_buffer        *rbuf;
4526
4527         rbuf = &rxr->rx_buffers[i];
4528         /* Free any previous pieces */
4529         if (rxr->fmp != NULL) {
4530                 rxr->fmp->m_flags |= M_PKTHDR;
4531                 m_freem(rxr->fmp);
4532                 rxr->fmp = NULL;
4533                 rxr->lmp = NULL;
4534         }
4535         /*
4536         ** Free buffer and allow em_refresh_mbufs()
4537         ** to clean up and recharge buffer.
4538         */
4539         if (rbuf->m_head) {
4540                 m_free(rbuf->m_head);
4541                 rbuf->m_head = NULL;
4542         }
4543         return;
4544 }
4545
4546 #ifndef __NO_STRICT_ALIGNMENT
4547 /*
4548  * When jumbo frames are enabled we should realign entire payload on
4549  * architecures with strict alignment. This is serious design mistake of 8254x
4550  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4551  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4552  * payload. On architecures without strict alignment restrictions 8254x still
4553  * performs unaligned memory access which would reduce the performance too.
4554  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4555  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4556  * existing mbuf chain.
4557  *
4558  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4559  * not used at all on architectures with strict alignment.
4560  */
4561 static int
4562 em_fixup_rx(struct rx_ring *rxr)
4563 {
4564         struct adapter *adapter = rxr->adapter;
4565         struct mbuf *m, *n;
4566         int error;
4567
4568         error = 0;
4569         m = rxr->fmp;
4570         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4571                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4572                 m->m_data += ETHER_HDR_LEN;
4573         } else {
4574                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4575                 if (n != NULL) {
4576                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4577                         m->m_data += ETHER_HDR_LEN;
4578                         m->m_len -= ETHER_HDR_LEN;
4579                         n->m_len = ETHER_HDR_LEN;
4580                         M_MOVE_PKTHDR(n, m);
4581                         n->m_next = m;
4582                         rxr->fmp = n;
4583                 } else {
4584                         adapter->dropped_pkts++;
4585                         m_freem(rxr->fmp);
4586                         rxr->fmp = NULL;
4587                         error = ENOMEM;
4588                 }
4589         }
4590
4591         return (error);
4592 }
4593 #endif
4594
4595 /*********************************************************************
4596  *
4597  *  Verify that the hardware indicated that the checksum is valid.
4598  *  Inform the stack about the status of checksum so that stack
4599  *  doesn't spend time verifying the checksum.
4600  *
4601  *********************************************************************/
4602 static void
4603 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4604 {
4605         /* Ignore Checksum bit is set */
4606         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4607                 mp->m_pkthdr.csum_flags = 0;
4608                 return;
4609         }
4610
4611         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4612                 /* Did it pass? */
4613                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4614                         /* IP Checksum Good */
4615                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4616                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4617
4618                 } else {
4619                         mp->m_pkthdr.csum_flags = 0;
4620                 }
4621         }
4622
4623         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4624                 /* Did it pass? */
4625                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4626                         mp->m_pkthdr.csum_flags |=
4627                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4628                         mp->m_pkthdr.csum_data = htons(0xffff);
4629                 }
4630         }
4631 }
4632
4633 /*
4634  * This routine is run via an vlan
4635  * config EVENT
4636  */
4637 static void
4638 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4639 {
4640         struct adapter  *adapter = ifp->if_softc;
4641         u32             index, bit;
4642
4643         if (ifp->if_softc !=  arg)   /* Not our event */
4644                 return;
4645
4646         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4647                 return;
4648
4649         EM_CORE_LOCK(adapter);
4650         index = (vtag >> 5) & 0x7F;
4651         bit = vtag & 0x1F;
4652         adapter->shadow_vfta[index] |= (1 << bit);
4653         ++adapter->num_vlans;
4654         /* Re-init to load the changes */
4655         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4656                 em_init_locked(adapter);
4657         EM_CORE_UNLOCK(adapter);
4658 }
4659
4660 /*
4661  * This routine is run via an vlan
4662  * unconfig EVENT
4663  */
4664 static void
4665 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4666 {
4667         struct adapter  *adapter = ifp->if_softc;
4668         u32             index, bit;
4669
4670         if (ifp->if_softc !=  arg)
4671                 return;
4672
4673         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4674                 return;
4675
4676         EM_CORE_LOCK(adapter);
4677         index = (vtag >> 5) & 0x7F;
4678         bit = vtag & 0x1F;
4679         adapter->shadow_vfta[index] &= ~(1 << bit);
4680         --adapter->num_vlans;
4681         /* Re-init to load the changes */
4682         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4683                 em_init_locked(adapter);
4684         EM_CORE_UNLOCK(adapter);
4685 }
4686
4687 static void
4688 em_setup_vlan_hw_support(struct adapter *adapter)
4689 {
4690         struct e1000_hw *hw = &adapter->hw;
4691         u32             reg;
4692
4693         /*
4694         ** We get here thru init_locked, meaning
4695         ** a soft reset, this has already cleared
4696         ** the VFTA and other state, so if there
4697         ** have been no vlan's registered do nothing.
4698         */
4699         if (adapter->num_vlans == 0)
4700                 return;
4701
4702         /*
4703         ** A soft reset zero's out the VFTA, so
4704         ** we need to repopulate it now.
4705         */
4706         for (int i = 0; i < EM_VFTA_SIZE; i++)
4707                 if (adapter->shadow_vfta[i] != 0)
4708                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4709                             i, adapter->shadow_vfta[i]);
4710
4711         reg = E1000_READ_REG(hw, E1000_CTRL);
4712         reg |= E1000_CTRL_VME;
4713         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4714
4715         /* Enable the Filter Table */
4716         reg = E1000_READ_REG(hw, E1000_RCTL);
4717         reg &= ~E1000_RCTL_CFIEN;
4718         reg |= E1000_RCTL_VFE;
4719         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4720 }
4721
4722 static void
4723 em_enable_intr(struct adapter *adapter)
4724 {
4725         struct e1000_hw *hw = &adapter->hw;
4726         u32 ims_mask = IMS_ENABLE_MASK;
4727
4728         if (hw->mac.type == e1000_82574) {
4729                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4730                 ims_mask |= EM_MSIX_MASK;
4731         } 
4732         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4733 }
4734
4735 static void
4736 em_disable_intr(struct adapter *adapter)
4737 {
4738         struct e1000_hw *hw = &adapter->hw;
4739
4740         if (hw->mac.type == e1000_82574)
4741                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4742         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4743 }
4744
4745 /*
4746  * Bit of a misnomer, what this really means is
4747  * to enable OS management of the system... aka
4748  * to disable special hardware management features 
4749  */
4750 static void
4751 em_init_manageability(struct adapter *adapter)
4752 {
4753         /* A shared code workaround */
4754 #define E1000_82542_MANC2H E1000_MANC2H
4755         if (adapter->has_manage) {
4756                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4757                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4758
4759                 /* disable hardware interception of ARP */
4760                 manc &= ~(E1000_MANC_ARP_EN);
4761
4762                 /* enable receiving management packets to the host */
4763                 manc |= E1000_MANC_EN_MNG2HOST;
4764 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4765 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4766                 manc2h |= E1000_MNG2HOST_PORT_623;
4767                 manc2h |= E1000_MNG2HOST_PORT_664;
4768                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4769                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4770         }
4771 }
4772
4773 /*
4774  * Give control back to hardware management
4775  * controller if there is one.
4776  */
4777 static void
4778 em_release_manageability(struct adapter *adapter)
4779 {
4780         if (adapter->has_manage) {
4781                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4782
4783                 /* re-enable hardware interception of ARP */
4784                 manc |= E1000_MANC_ARP_EN;
4785                 manc &= ~E1000_MANC_EN_MNG2HOST;
4786
4787                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4788         }
4789 }
4790
4791 /*
4792  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4793  * For ASF and Pass Through versions of f/w this means
4794  * that the driver is loaded. For AMT version type f/w
4795  * this means that the network i/f is open.
4796  */
4797 static void
4798 em_get_hw_control(struct adapter *adapter)
4799 {
4800         u32 ctrl_ext, swsm;
4801
4802         if (adapter->hw.mac.type == e1000_82573) {
4803                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4804                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4805                     swsm | E1000_SWSM_DRV_LOAD);
4806                 return;
4807         }
4808         /* else */
4809         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4810         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4811             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4812         return;
4813 }
4814
4815 /*
4816  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4817  * For ASF and Pass Through versions of f/w this means that
4818  * the driver is no longer loaded. For AMT versions of the
4819  * f/w this means that the network i/f is closed.
4820  */
4821 static void
4822 em_release_hw_control(struct adapter *adapter)
4823 {
4824         u32 ctrl_ext, swsm;
4825
4826         if (!adapter->has_manage)
4827                 return;
4828
4829         if (adapter->hw.mac.type == e1000_82573) {
4830                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4831                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4832                     swsm & ~E1000_SWSM_DRV_LOAD);
4833                 return;
4834         }
4835         /* else */
4836         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4837         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4838             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4839         return;
4840 }
4841
4842 static int
4843 em_is_valid_ether_addr(u8 *addr)
4844 {
4845         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4846
4847         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4848                 return (FALSE);
4849         }
4850
4851         return (TRUE);
4852 }
4853
4854 /*
4855 ** Parse the interface capabilities with regard
4856 ** to both system management and wake-on-lan for
4857 ** later use.
4858 */
4859 static void
4860 em_get_wakeup(device_t dev)
4861 {
4862         struct adapter  *adapter = device_get_softc(dev);
4863         u16             eeprom_data = 0, device_id, apme_mask;
4864
4865         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4866         apme_mask = EM_EEPROM_APME;
4867
4868         switch (adapter->hw.mac.type) {
4869         case e1000_82573:
4870         case e1000_82583:
4871                 adapter->has_amt = TRUE;
4872                 /* Falls thru */
4873         case e1000_82571:
4874         case e1000_82572:
4875         case e1000_80003es2lan:
4876                 if (adapter->hw.bus.func == 1) {
4877                         e1000_read_nvm(&adapter->hw,
4878                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4879                         break;
4880                 } else
4881                         e1000_read_nvm(&adapter->hw,
4882                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4883                 break;
4884         case e1000_ich8lan:
4885         case e1000_ich9lan:
4886         case e1000_ich10lan:
4887         case e1000_pchlan:
4888         case e1000_pch2lan:
4889                 apme_mask = E1000_WUC_APME;
4890                 adapter->has_amt = TRUE;
4891                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4892                 break;
4893         default:
4894                 e1000_read_nvm(&adapter->hw,
4895                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4896                 break;
4897         }
4898         if (eeprom_data & apme_mask)
4899                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4900         /*
4901          * We have the eeprom settings, now apply the special cases
4902          * where the eeprom may be wrong or the board won't support
4903          * wake on lan on a particular port
4904          */
4905         device_id = pci_get_device(dev);
4906         switch (device_id) {
4907         case E1000_DEV_ID_82571EB_FIBER:
4908                 /* Wake events only supported on port A for dual fiber
4909                  * regardless of eeprom setting */
4910                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4911                     E1000_STATUS_FUNC_1)
4912                         adapter->wol = 0;
4913                 break;
4914         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4915         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4916         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4917                 /* if quad port adapter, disable WoL on all but port A */
4918                 if (global_quad_port_a != 0)
4919                         adapter->wol = 0;
4920                 /* Reset for multiple quad port adapters */
4921                 if (++global_quad_port_a == 4)
4922                         global_quad_port_a = 0;
4923                 break;
4924         }
4925         return;
4926 }
4927
4928
4929 /*
4930  * Enable PCI Wake On Lan capability
4931  */
4932 static void
4933 em_enable_wakeup(device_t dev)
4934 {
4935         struct adapter  *adapter = device_get_softc(dev);
4936         struct ifnet    *ifp = adapter->ifp;
4937         u32             pmc, ctrl, ctrl_ext, rctl;
4938         u16             status;
4939
4940         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4941                 return;
4942
4943         /* Advertise the wakeup capability */
4944         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4945         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4946         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4947         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4948
4949         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4950             (adapter->hw.mac.type == e1000_pchlan) ||
4951             (adapter->hw.mac.type == e1000_ich9lan) ||
4952             (adapter->hw.mac.type == e1000_ich10lan))
4953                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4954
4955         /* Keep the laser running on Fiber adapters */
4956         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4957             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4958                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4959                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4960                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4961         }
4962
4963         /*
4964         ** Determine type of Wakeup: note that wol
4965         ** is set with all bits on by default.
4966         */
4967         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4968                 adapter->wol &= ~E1000_WUFC_MAG;
4969
4970         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4971                 adapter->wol &= ~E1000_WUFC_MC;
4972         else {
4973                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4974                 rctl |= E1000_RCTL_MPE;
4975                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4976         }
4977
4978         if ((adapter->hw.mac.type == e1000_pchlan) ||
4979             (adapter->hw.mac.type == e1000_pch2lan)) {
4980                 if (em_enable_phy_wakeup(adapter))
4981                         return;
4982         } else {
4983                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4984                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4985         }
4986
4987         if (adapter->hw.phy.type == e1000_phy_igp_3)
4988                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4989
4990         /* Request PME */
4991         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4992         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4993         if (ifp->if_capenable & IFCAP_WOL)
4994                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4995         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4996
4997         return;
4998 }
4999
5000 /*
5001 ** WOL in the newer chipset interfaces (pchlan)
5002 ** require thing to be copied into the phy
5003 */
5004 static int
5005 em_enable_phy_wakeup(struct adapter *adapter)
5006 {
5007         struct e1000_hw *hw = &adapter->hw;
5008         u32 mreg, ret = 0;
5009         u16 preg;
5010
5011         /* copy MAC RARs to PHY RARs */
5012         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5013
5014         /* copy MAC MTA to PHY MTA */
5015         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5016                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5017                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5018                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5019                     (u16)((mreg >> 16) & 0xFFFF));
5020         }
5021
5022         /* configure PHY Rx Control register */
5023         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5024         mreg = E1000_READ_REG(hw, E1000_RCTL);
5025         if (mreg & E1000_RCTL_UPE)
5026                 preg |= BM_RCTL_UPE;
5027         if (mreg & E1000_RCTL_MPE)
5028                 preg |= BM_RCTL_MPE;
5029         preg &= ~(BM_RCTL_MO_MASK);
5030         if (mreg & E1000_RCTL_MO_3)
5031                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5032                                 << BM_RCTL_MO_SHIFT);
5033         if (mreg & E1000_RCTL_BAM)
5034                 preg |= BM_RCTL_BAM;
5035         if (mreg & E1000_RCTL_PMCF)
5036                 preg |= BM_RCTL_PMCF;
5037         mreg = E1000_READ_REG(hw, E1000_CTRL);
5038         if (mreg & E1000_CTRL_RFCE)
5039                 preg |= BM_RCTL_RFCE;
5040         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5041
5042         /* enable PHY wakeup in MAC register */
5043         E1000_WRITE_REG(hw, E1000_WUC,
5044             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5045         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5046
5047         /* configure and enable PHY wakeup in PHY registers */
5048         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5049         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5050
5051         /* activate PHY wakeup */
5052         ret = hw->phy.ops.acquire(hw);
5053         if (ret) {
5054                 printf("Could not acquire PHY\n");
5055                 return ret;
5056         }
5057         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5058                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5059         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5060         if (ret) {
5061                 printf("Could not read PHY page 769\n");
5062                 goto out;
5063         }
5064         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5065         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5066         if (ret)
5067                 printf("Could not set PHY Host Wakeup bit\n");
5068 out:
5069         hw->phy.ops.release(hw);
5070
5071         return ret;
5072 }
5073
5074 static void
5075 em_led_func(void *arg, int onoff)
5076 {
5077         struct adapter  *adapter = arg;
5078  
5079         EM_CORE_LOCK(adapter);
5080         if (onoff) {
5081                 e1000_setup_led(&adapter->hw);
5082                 e1000_led_on(&adapter->hw);
5083         } else {
5084                 e1000_led_off(&adapter->hw);
5085                 e1000_cleanup_led(&adapter->hw);
5086         }
5087         EM_CORE_UNLOCK(adapter);
5088 }
5089
5090 /*
5091 ** Disable the L0S and L1 LINK states
5092 */
5093 static void
5094 em_disable_aspm(struct adapter *adapter)
5095 {
5096         int             base, reg;
5097         u16             link_cap,link_ctrl;
5098         device_t        dev = adapter->dev;
5099
5100         switch (adapter->hw.mac.type) {
5101                 case e1000_82573:
5102                 case e1000_82574:
5103                 case e1000_82583:
5104                         break;
5105                 default:
5106                         return;
5107         }
5108         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5109                 return;
5110         reg = base + PCIR_EXPRESS_LINK_CAP;
5111         link_cap = pci_read_config(dev, reg, 2);
5112         if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
5113                 return;
5114         reg = base + PCIR_EXPRESS_LINK_CTL;
5115         link_ctrl = pci_read_config(dev, reg, 2);
5116         link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
5117         pci_write_config(dev, reg, link_ctrl, 2);
5118         return;
5119 }
5120
5121 /**********************************************************************
5122  *
5123  *  Update the board statistics counters.
5124  *
5125  **********************************************************************/
5126 static void
5127 em_update_stats_counters(struct adapter *adapter)
5128 {
5129         struct ifnet   *ifp;
5130
5131         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5132            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5133                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5134                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5135         }
5136         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5137         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5138         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5139         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5140
5141         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5142         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5143         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5144         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5145         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5146         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5147         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5148         /*
5149         ** For watchdog management we need to know if we have been
5150         ** paused during the last interval, so capture that here.
5151         */
5152         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5153         adapter->stats.xoffrxc += adapter->pause_frames;
5154         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5155         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5156         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5157         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5158         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5159         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5160         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5161         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5162         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5163         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5164         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5165         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5166
5167         /* For the 64-bit byte counters the low dword must be read first. */
5168         /* Both registers clear on the read of the high dword */
5169
5170         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5171             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5172         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5173             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5174
5175         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5176         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5177         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5178         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5179         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5180
5181         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5182         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5183
5184         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5185         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5186         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5187         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5188         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5189         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5190         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5191         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5192         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5193         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5194
5195         /* Interrupt Counts */
5196
5197         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5198         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5199         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5200         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5201         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5202         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5203         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5204         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5205         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5206
5207         if (adapter->hw.mac.type >= e1000_82543) {
5208                 adapter->stats.algnerrc += 
5209                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5210                 adapter->stats.rxerrc += 
5211                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5212                 adapter->stats.tncrs += 
5213                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5214                 adapter->stats.cexterr += 
5215                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5216                 adapter->stats.tsctc += 
5217                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5218                 adapter->stats.tsctfc += 
5219                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5220         }
5221         ifp = adapter->ifp;
5222
5223         ifp->if_collisions = adapter->stats.colc;
5224
5225         /* Rx Errors */
5226         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5227             adapter->stats.crcerrs + adapter->stats.algnerrc +
5228             adapter->stats.ruc + adapter->stats.roc +
5229             adapter->stats.mpc + adapter->stats.cexterr;
5230
5231         /* Tx Errors */
5232         ifp->if_oerrors = adapter->stats.ecol +
5233             adapter->stats.latecol + adapter->watchdog_events;
5234 }
5235
5236 /* Export a single 32-bit register via a read-only sysctl. */
5237 static int
5238 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5239 {
5240         struct adapter *adapter;
5241         u_int val;
5242
5243         adapter = oidp->oid_arg1;
5244         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5245         return (sysctl_handle_int(oidp, &val, 0, req));
5246 }
5247
5248 /*
5249  * Add sysctl variables, one per statistic, to the system.
5250  */
5251 static void
5252 em_add_hw_stats(struct adapter *adapter)
5253 {
5254         device_t dev = adapter->dev;
5255
5256         struct tx_ring *txr = adapter->tx_rings;
5257         struct rx_ring *rxr = adapter->rx_rings;
5258
5259         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5260         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5261         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5262         struct e1000_hw_stats *stats = &adapter->stats;
5263
5264         struct sysctl_oid *stat_node, *queue_node, *int_node;
5265         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5266
5267 #define QUEUE_NAME_LEN 32
5268         char namebuf[QUEUE_NAME_LEN];
5269         
5270         /* Driver Statistics */
5271         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5272                         CTLFLAG_RD, &adapter->link_irq,
5273                         "Link MSIX IRQ Handled");
5274         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5275                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5276                          "Std mbuf failed");
5277         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5278                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5279                          "Std mbuf cluster failed");
5280         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5281                         CTLFLAG_RD, &adapter->dropped_pkts,
5282                         "Driver dropped packets");
5283         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5284                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5285                         "Driver tx dma failure in xmit");
5286         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5287                         CTLFLAG_RD, &adapter->rx_overruns,
5288                         "RX overruns");
5289         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5290                         CTLFLAG_RD, &adapter->watchdog_events,
5291                         "Watchdog timeouts");
5292         
5293         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5294                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5295                         em_sysctl_reg_handler, "IU",
5296                         "Device Control Register");
5297         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5298                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5299                         em_sysctl_reg_handler, "IU",
5300                         "Receiver Control Register");
5301         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5302                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5303                         "Flow Control High Watermark");
5304         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5305                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5306                         "Flow Control Low Watermark");
5307
5308         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5309                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5310                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5311                                             CTLFLAG_RD, NULL, "Queue Name");
5312                 queue_list = SYSCTL_CHILDREN(queue_node);
5313
5314                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5315                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5316                                 E1000_TDH(txr->me),
5317                                 em_sysctl_reg_handler, "IU",
5318                                 "Transmit Descriptor Head");
5319                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5320                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5321                                 E1000_TDT(txr->me),
5322                                 em_sysctl_reg_handler, "IU",
5323                                 "Transmit Descriptor Tail");
5324                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5325                                 CTLFLAG_RD, &txr->tx_irq,
5326                                 "Queue MSI-X Transmit Interrupts");
5327                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5328                                 CTLFLAG_RD, &txr->no_desc_avail,
5329                                 "Queue No Descriptor Available");
5330                 
5331                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5332                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5333                                 E1000_RDH(rxr->me),
5334                                 em_sysctl_reg_handler, "IU",
5335                                 "Receive Descriptor Head");
5336                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5337                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5338                                 E1000_RDT(rxr->me),
5339                                 em_sysctl_reg_handler, "IU",
5340                                 "Receive Descriptor Tail");
5341                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5342                                 CTLFLAG_RD, &rxr->rx_irq,
5343                                 "Queue MSI-X Receive Interrupts");
5344         }
5345
5346         /* MAC stats get their own sub node */
5347
5348         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5349                                     CTLFLAG_RD, NULL, "Statistics");
5350         stat_list = SYSCTL_CHILDREN(stat_node);
5351
5352         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5353                         CTLFLAG_RD, &stats->ecol,
5354                         "Excessive collisions");
5355         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5356                         CTLFLAG_RD, &stats->scc,
5357                         "Single collisions");
5358         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5359                         CTLFLAG_RD, &stats->mcc,
5360                         "Multiple collisions");
5361         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5362                         CTLFLAG_RD, &stats->latecol,
5363                         "Late collisions");
5364         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5365                         CTLFLAG_RD, &stats->colc,
5366                         "Collision Count");
5367         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5368                         CTLFLAG_RD, &adapter->stats.symerrs,
5369                         "Symbol Errors");
5370         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5371                         CTLFLAG_RD, &adapter->stats.sec,
5372                         "Sequence Errors");
5373         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5374                         CTLFLAG_RD, &adapter->stats.dc,
5375                         "Defer Count");
5376         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5377                         CTLFLAG_RD, &adapter->stats.mpc,
5378                         "Missed Packets");
5379         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5380                         CTLFLAG_RD, &adapter->stats.rnbc,
5381                         "Receive No Buffers");
5382         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5383                         CTLFLAG_RD, &adapter->stats.ruc,
5384                         "Receive Undersize");
5385         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5386                         CTLFLAG_RD, &adapter->stats.rfc,
5387                         "Fragmented Packets Received ");
5388         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5389                         CTLFLAG_RD, &adapter->stats.roc,
5390                         "Oversized Packets Received");
5391         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5392                         CTLFLAG_RD, &adapter->stats.rjc,
5393                         "Recevied Jabber");
5394         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5395                         CTLFLAG_RD, &adapter->stats.rxerrc,
5396                         "Receive Errors");
5397         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5398                         CTLFLAG_RD, &adapter->stats.crcerrs,
5399                         "CRC errors");
5400         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5401                         CTLFLAG_RD, &adapter->stats.algnerrc,
5402                         "Alignment Errors");
5403         /* On 82575 these are collision counts */
5404         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5405                         CTLFLAG_RD, &adapter->stats.cexterr,
5406                         "Collision/Carrier extension errors");
5407         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5408                         CTLFLAG_RD, &adapter->stats.xonrxc,
5409                         "XON Received");
5410         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5411                         CTLFLAG_RD, &adapter->stats.xontxc,
5412                         "XON Transmitted");
5413         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5414                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5415                         "XOFF Received");
5416         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5417                         CTLFLAG_RD, &adapter->stats.xofftxc,
5418                         "XOFF Transmitted");
5419
5420         /* Packet Reception Stats */
5421         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5422                         CTLFLAG_RD, &adapter->stats.tpr,
5423                         "Total Packets Received ");
5424         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5425                         CTLFLAG_RD, &adapter->stats.gprc,
5426                         "Good Packets Received");
5427         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5428                         CTLFLAG_RD, &adapter->stats.bprc,
5429                         "Broadcast Packets Received");
5430         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5431                         CTLFLAG_RD, &adapter->stats.mprc,
5432                         "Multicast Packets Received");
5433         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5434                         CTLFLAG_RD, &adapter->stats.prc64,
5435                         "64 byte frames received ");
5436         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5437                         CTLFLAG_RD, &adapter->stats.prc127,
5438                         "65-127 byte frames received");
5439         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5440                         CTLFLAG_RD, &adapter->stats.prc255,
5441                         "128-255 byte frames received");
5442         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5443                         CTLFLAG_RD, &adapter->stats.prc511,
5444                         "256-511 byte frames received");
5445         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5446                         CTLFLAG_RD, &adapter->stats.prc1023,
5447                         "512-1023 byte frames received");
5448         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5449                         CTLFLAG_RD, &adapter->stats.prc1522,
5450                         "1023-1522 byte frames received");
5451         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5452                         CTLFLAG_RD, &adapter->stats.gorc, 
5453                         "Good Octets Received"); 
5454
5455         /* Packet Transmission Stats */
5456         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5457                         CTLFLAG_RD, &adapter->stats.gotc, 
5458                         "Good Octets Transmitted"); 
5459         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5460                         CTLFLAG_RD, &adapter->stats.tpt,
5461                         "Total Packets Transmitted");
5462         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5463                         CTLFLAG_RD, &adapter->stats.gptc,
5464                         "Good Packets Transmitted");
5465         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5466                         CTLFLAG_RD, &adapter->stats.bptc,
5467                         "Broadcast Packets Transmitted");
5468         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5469                         CTLFLAG_RD, &adapter->stats.mptc,
5470                         "Multicast Packets Transmitted");
5471         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5472                         CTLFLAG_RD, &adapter->stats.ptc64,
5473                         "64 byte frames transmitted ");
5474         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5475                         CTLFLAG_RD, &adapter->stats.ptc127,
5476                         "65-127 byte frames transmitted");
5477         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5478                         CTLFLAG_RD, &adapter->stats.ptc255,
5479                         "128-255 byte frames transmitted");
5480         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5481                         CTLFLAG_RD, &adapter->stats.ptc511,
5482                         "256-511 byte frames transmitted");
5483         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5484                         CTLFLAG_RD, &adapter->stats.ptc1023,
5485                         "512-1023 byte frames transmitted");
5486         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5487                         CTLFLAG_RD, &adapter->stats.ptc1522,
5488                         "1024-1522 byte frames transmitted");
5489         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5490                         CTLFLAG_RD, &adapter->stats.tsctc,
5491                         "TSO Contexts Transmitted");
5492         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5493                         CTLFLAG_RD, &adapter->stats.tsctfc,
5494                         "TSO Contexts Failed");
5495
5496
5497         /* Interrupt Stats */
5498
5499         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5500                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5501         int_list = SYSCTL_CHILDREN(int_node);
5502
5503         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5504                         CTLFLAG_RD, &adapter->stats.iac,
5505                         "Interrupt Assertion Count");
5506
5507         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5508                         CTLFLAG_RD, &adapter->stats.icrxptc,
5509                         "Interrupt Cause Rx Pkt Timer Expire Count");
5510
5511         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5512                         CTLFLAG_RD, &adapter->stats.icrxatc,
5513                         "Interrupt Cause Rx Abs Timer Expire Count");
5514
5515         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5516                         CTLFLAG_RD, &adapter->stats.ictxptc,
5517                         "Interrupt Cause Tx Pkt Timer Expire Count");
5518
5519         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5520                         CTLFLAG_RD, &adapter->stats.ictxatc,
5521                         "Interrupt Cause Tx Abs Timer Expire Count");
5522
5523         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5524                         CTLFLAG_RD, &adapter->stats.ictxqec,
5525                         "Interrupt Cause Tx Queue Empty Count");
5526
5527         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5528                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5529                         "Interrupt Cause Tx Queue Min Thresh Count");
5530
5531         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5532                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5533                         "Interrupt Cause Rx Desc Min Thresh Count");
5534
5535         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5536                         CTLFLAG_RD, &adapter->stats.icrxoc,
5537                         "Interrupt Cause Receiver Overrun Count");
5538 }
5539
5540 /**********************************************************************
5541  *
5542  *  This routine provides a way to dump out the adapter eeprom,
5543  *  often a useful debug/service tool. This only dumps the first
5544  *  32 words, stuff that matters is in that extent.
5545  *
5546  **********************************************************************/
5547 static int
5548 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5549 {
5550         struct adapter *adapter = (struct adapter *)arg1;
5551         int error;
5552         int result;
5553
5554         result = -1;
5555         error = sysctl_handle_int(oidp, &result, 0, req);
5556
5557         if (error || !req->newptr)
5558                 return (error);
5559
5560         /*
5561          * This value will cause a hex dump of the
5562          * first 32 16-bit words of the EEPROM to
5563          * the screen.
5564          */
5565         if (result == 1)
5566                 em_print_nvm_info(adapter);
5567
5568         return (error);
5569 }
5570
5571 static void
5572 em_print_nvm_info(struct adapter *adapter)
5573 {
5574         u16     eeprom_data;
5575         int     i, j, row = 0;
5576
5577         /* Its a bit crude, but it gets the job done */
5578         printf("\nInterface EEPROM Dump:\n");
5579         printf("Offset\n0x0000  ");
5580         for (i = 0, j = 0; i < 32; i++, j++) {
5581                 if (j == 8) { /* Make the offset block */
5582                         j = 0; ++row;
5583                         printf("\n0x00%x0  ",row);
5584                 }
5585                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5586                 printf("%04x ", eeprom_data);
5587         }
5588         printf("\n");
5589 }
5590
5591 static int
5592 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5593 {
5594         struct em_int_delay_info *info;
5595         struct adapter *adapter;
5596         u32 regval;
5597         int error, usecs, ticks;
5598
5599         info = (struct em_int_delay_info *)arg1;
5600         usecs = info->value;
5601         error = sysctl_handle_int(oidp, &usecs, 0, req);
5602         if (error != 0 || req->newptr == NULL)
5603                 return (error);
5604         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5605                 return (EINVAL);
5606         info->value = usecs;
5607         ticks = EM_USECS_TO_TICKS(usecs);
5608
5609         adapter = info->adapter;
5610         
5611         EM_CORE_LOCK(adapter);
5612         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5613         regval = (regval & ~0xffff) | (ticks & 0xffff);
5614         /* Handle a few special cases. */
5615         switch (info->offset) {
5616         case E1000_RDTR:
5617                 break;
5618         case E1000_TIDV:
5619                 if (ticks == 0) {
5620                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5621                         /* Don't write 0 into the TIDV register. */
5622                         regval++;
5623                 } else
5624                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5625                 break;
5626         }
5627         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5628         EM_CORE_UNLOCK(adapter);
5629         return (0);
5630 }
5631
5632 static void
5633 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5634         const char *description, struct em_int_delay_info *info,
5635         int offset, int value)
5636 {
5637         info->adapter = adapter;
5638         info->offset = offset;
5639         info->value = value;
5640         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5641             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5642             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5643             info, 0, em_sysctl_int_delay, "I", description);
5644 }
5645
5646 static void
5647 em_set_sysctl_value(struct adapter *adapter, const char *name,
5648         const char *description, int *limit, int value)
5649 {
5650         *limit = value;
5651         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5652             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5653             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5654 }
5655
5656
5657 /*
5658 ** Set flow control using sysctl:
5659 ** Flow control values:
5660 **      0 - off
5661 **      1 - rx pause
5662 **      2 - tx pause
5663 **      3 - full
5664 */
5665 static int
5666 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5667 {       
5668         int             error;
5669         static int      input = 3; /* default is full */
5670         struct adapter  *adapter = (struct adapter *) arg1;
5671                     
5672         error = sysctl_handle_int(oidp, &input, 0, req);
5673     
5674         if ((error) || (req->newptr == NULL))
5675                 return (error);
5676                 
5677         if (input == adapter->fc) /* no change? */
5678                 return (error);
5679
5680         switch (input) {
5681                 case e1000_fc_rx_pause:
5682                 case e1000_fc_tx_pause:
5683                 case e1000_fc_full:
5684                 case e1000_fc_none:
5685                         adapter->hw.fc.requested_mode = input;
5686                         adapter->fc = input;
5687                         break;
5688                 default:
5689                         /* Do nothing */
5690                         return (error);
5691         }
5692
5693         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5694         e1000_force_mac_fc(&adapter->hw);
5695         return (error);
5696 }
5697
5698
5699 static int
5700 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5701 {
5702         struct adapter *adapter;
5703         int error;
5704         int result;
5705
5706         result = -1;
5707         error = sysctl_handle_int(oidp, &result, 0, req);
5708
5709         if (error || !req->newptr)
5710                 return (error);
5711
5712         if (result == 1) {
5713                 adapter = (struct adapter *)arg1;
5714                 em_print_debug_info(adapter);
5715         }
5716
5717         return (error);
5718 }
5719
5720 /*
5721 ** This routine is meant to be fluid, add whatever is
5722 ** needed for debugging a problem.  -jfv
5723 */
5724 static void
5725 em_print_debug_info(struct adapter *adapter)
5726 {
5727         device_t dev = adapter->dev;
5728         struct tx_ring *txr = adapter->tx_rings;
5729         struct rx_ring *rxr = adapter->rx_rings;
5730
5731         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5732                 printf("Interface is RUNNING ");
5733         else
5734                 printf("Interface is NOT RUNNING\n");
5735
5736         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5737                 printf("and INACTIVE\n");
5738         else
5739                 printf("and ACTIVE\n");
5740
5741         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5742             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5743             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5744         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5745             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5746             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5747         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5748         device_printf(dev, "TX descriptors avail = %d\n",
5749             txr->tx_avail);
5750         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5751             txr->no_desc_avail);
5752         device_printf(dev, "RX discarded packets = %ld\n",
5753             rxr->rx_discarded);
5754         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5755         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5756 }