]> CyberLeo.Net >> Repos - FreeBSD/releng/9.0.git/blob - sys/dev/e1000/if_em.c
Copy stable/9 to releng/9.0 as part of the FreeBSD 9.0-RELEASE release
[FreeBSD/releng/9.0.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2011, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int     em_display_debug_stats = 0;
92
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.2.3";
97
98 /*********************************************************************
99  *  PCI Device ID Table
100  *
101  *  Used by probe to select devices to load on
102  *  Last field stores an index into e1000_strings
103  *  Last entry must be all 0s
104  *
105  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106  *********************************************************************/
107
108 static em_vendor_info_t em_vendor_info_array[] =
109 {
110         /* Intel(R) PRO/1000 Network Connection */
111         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
112         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
130
131         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
174         /* required last entry */
175         { 0, 0, 0, 0, 0}
176 };
177
178 /*********************************************************************
179  *  Table of branding strings for all supported NICs.
180  *********************************************************************/
181
182 static char *em_strings[] = {
183         "Intel(R) PRO/1000 Network Connection"
184 };
185
186 /*********************************************************************
187  *  Function prototypes
188  *********************************************************************/
189 static int      em_probe(device_t);
190 static int      em_attach(device_t);
191 static int      em_detach(device_t);
192 static int      em_shutdown(device_t);
193 static int      em_suspend(device_t);
194 static int      em_resume(device_t);
195 static void     em_start(struct ifnet *);
196 static void     em_start_locked(struct ifnet *, struct tx_ring *);
197 #ifdef EM_MULTIQUEUE
198 static int      em_mq_start(struct ifnet *, struct mbuf *);
199 static int      em_mq_start_locked(struct ifnet *,
200                     struct tx_ring *, struct mbuf *);
201 static void     em_qflush(struct ifnet *);
202 #endif
203 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
204 static void     em_init(void *);
205 static void     em_init_locked(struct adapter *);
206 static void     em_stop(void *);
207 static void     em_media_status(struct ifnet *, struct ifmediareq *);
208 static int      em_media_change(struct ifnet *);
209 static void     em_identify_hardware(struct adapter *);
210 static int      em_allocate_pci_resources(struct adapter *);
211 static int      em_allocate_legacy(struct adapter *);
212 static int      em_allocate_msix(struct adapter *);
213 static int      em_allocate_queues(struct adapter *);
214 static int      em_setup_msix(struct adapter *);
215 static void     em_free_pci_resources(struct adapter *);
216 static void     em_local_timer(void *);
217 static void     em_reset(struct adapter *);
218 static int      em_setup_interface(device_t, struct adapter *);
219
220 static void     em_setup_transmit_structures(struct adapter *);
221 static void     em_initialize_transmit_unit(struct adapter *);
222 static int      em_allocate_transmit_buffers(struct tx_ring *);
223 static void     em_free_transmit_structures(struct adapter *);
224 static void     em_free_transmit_buffers(struct tx_ring *);
225
226 static int      em_setup_receive_structures(struct adapter *);
227 static int      em_allocate_receive_buffers(struct rx_ring *);
228 static void     em_initialize_receive_unit(struct adapter *);
229 static void     em_free_receive_structures(struct adapter *);
230 static void     em_free_receive_buffers(struct rx_ring *);
231
232 static void     em_enable_intr(struct adapter *);
233 static void     em_disable_intr(struct adapter *);
234 static void     em_update_stats_counters(struct adapter *);
235 static void     em_add_hw_stats(struct adapter *adapter);
236 static bool     em_txeof(struct tx_ring *);
237 static bool     em_rxeof(struct rx_ring *, int, int *);
238 #ifndef __NO_STRICT_ALIGNMENT
239 static int      em_fixup_rx(struct rx_ring *);
240 #endif
241 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243                     struct ip *, u32 *, u32 *);
244 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245                     struct tcphdr *, u32 *, u32 *);
246 static void     em_set_promisc(struct adapter *);
247 static void     em_disable_promisc(struct adapter *);
248 static void     em_set_multi(struct adapter *);
249 static void     em_update_link_status(struct adapter *);
250 static void     em_refresh_mbufs(struct rx_ring *, int);
251 static void     em_register_vlan(void *, struct ifnet *, u16);
252 static void     em_unregister_vlan(void *, struct ifnet *, u16);
253 static void     em_setup_vlan_hw_support(struct adapter *);
254 static int      em_xmit(struct tx_ring *, struct mbuf **);
255 static int      em_dma_malloc(struct adapter *, bus_size_t,
256                     struct em_dma_alloc *, int);
257 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
258 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259 static void     em_print_nvm_info(struct adapter *);
260 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261 static void     em_print_debug_info(struct adapter *);
262 static int      em_is_valid_ether_addr(u8 *);
263 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
265                     const char *, struct em_int_delay_info *, int, int);
266 /* Management and WOL Support */
267 static void     em_init_manageability(struct adapter *);
268 static void     em_release_manageability(struct adapter *);
269 static void     em_get_hw_control(struct adapter *);
270 static void     em_release_hw_control(struct adapter *);
271 static void     em_get_wakeup(device_t);
272 static void     em_enable_wakeup(device_t);
273 static int      em_enable_phy_wakeup(struct adapter *);
274 static void     em_led_func(void *, int);
275 static void     em_disable_aspm(struct adapter *);
276
277 static int      em_irq_fast(void *);
278
279 /* MSIX handlers */
280 static void     em_msix_tx(void *);
281 static void     em_msix_rx(void *);
282 static void     em_msix_link(void *);
283 static void     em_handle_tx(void *context, int pending);
284 static void     em_handle_rx(void *context, int pending);
285 static void     em_handle_link(void *context, int pending);
286
287 static void     em_set_sysctl_value(struct adapter *, const char *,
288                     const char *, int *, int);
289
290 static __inline void em_rx_discard(struct rx_ring *, int);
291
292 #ifdef DEVICE_POLLING
293 static poll_handler_t em_poll;
294 #endif /* POLLING */
295
296 /*********************************************************************
297  *  FreeBSD Device Interface Entry Points
298  *********************************************************************/
299
300 static device_method_t em_methods[] = {
301         /* Device interface */
302         DEVMETHOD(device_probe, em_probe),
303         DEVMETHOD(device_attach, em_attach),
304         DEVMETHOD(device_detach, em_detach),
305         DEVMETHOD(device_shutdown, em_shutdown),
306         DEVMETHOD(device_suspend, em_suspend),
307         DEVMETHOD(device_resume, em_resume),
308         {0, 0}
309 };
310
311 static driver_t em_driver = {
312         "em", em_methods, sizeof(struct adapter),
313 };
314
315 devclass_t em_devclass;
316 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
317 MODULE_DEPEND(em, pci, 1, 1, 1);
318 MODULE_DEPEND(em, ether, 1, 1, 1);
319
320 /*********************************************************************
321  *  Tunable default values.
322  *********************************************************************/
323
324 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
325 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
326 #define M_TSO_LEN                       66
327
328 /* Allow common code without TSO */
329 #ifndef CSUM_TSO
330 #define CSUM_TSO        0
331 #endif
332
333 SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
334
335 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
336 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
337 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
338 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
339 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
340     0, "Default transmit interrupt delay in usecs");
341 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
342     0, "Default receive interrupt delay in usecs");
343
344 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
345 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
346 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
347 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
348 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
349     &em_tx_abs_int_delay_dflt, 0,
350     "Default transmit interrupt delay limit in usecs");
351 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
352     &em_rx_abs_int_delay_dflt, 0,
353     "Default receive interrupt delay limit in usecs");
354
355 static int em_rxd = EM_DEFAULT_RXD;
356 static int em_txd = EM_DEFAULT_TXD;
357 TUNABLE_INT("hw.em.rxd", &em_rxd);
358 TUNABLE_INT("hw.em.txd", &em_txd);
359 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
360     "Number of receive descriptors per queue");
361 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
362     "Number of transmit descriptors per queue");
363
364 static int em_smart_pwr_down = FALSE;
365 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
366 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
367     0, "Set to true to leave smart power down enabled on newer adapters");
368
369 /* Controls whether promiscuous also shows bad packets */
370 static int em_debug_sbp = FALSE;
371 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
372 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
373     "Show bad packets in promiscuous mode");
374
375 static int em_enable_msix = TRUE;
376 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
377 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
378     "Enable MSI-X interrupts");
379
380 /* How many packets rxeof tries to clean at a time */
381 static int em_rx_process_limit = 100;
382 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
383 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
384     &em_rx_process_limit, 0,
385     "Maximum number of received packets to process at a time, -1 means unlimited");
386
387 /* Flow control setting - default to FULL */
388 static int em_fc_setting = e1000_fc_full;
389 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
390 SYSCTL_INT(_hw_em, OID_AUTO, fc_setting, CTLFLAG_RDTUN, &em_fc_setting, 0,
391     "Flow control");
392
393 /* Energy efficient ethernet - default to OFF */
394 static int eee_setting = 0;
395 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
396 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
397     "Enable Energy Efficient Ethernet");
398
399 /* Global used in WOL setup with multiport cards */
400 static int global_quad_port_a = 0;
401
402 /*********************************************************************
403  *  Device identification routine
404  *
405  *  em_probe determines if the driver should be loaded on
406  *  adapter based on PCI vendor/device id of the adapter.
407  *
408  *  return BUS_PROBE_DEFAULT on success, positive on failure
409  *********************************************************************/
410
411 static int
412 em_probe(device_t dev)
413 {
414         char            adapter_name[60];
415         u16             pci_vendor_id = 0;
416         u16             pci_device_id = 0;
417         u16             pci_subvendor_id = 0;
418         u16             pci_subdevice_id = 0;
419         em_vendor_info_t *ent;
420
421         INIT_DEBUGOUT("em_probe: begin");
422
423         pci_vendor_id = pci_get_vendor(dev);
424         if (pci_vendor_id != EM_VENDOR_ID)
425                 return (ENXIO);
426
427         pci_device_id = pci_get_device(dev);
428         pci_subvendor_id = pci_get_subvendor(dev);
429         pci_subdevice_id = pci_get_subdevice(dev);
430
431         ent = em_vendor_info_array;
432         while (ent->vendor_id != 0) {
433                 if ((pci_vendor_id == ent->vendor_id) &&
434                     (pci_device_id == ent->device_id) &&
435
436                     ((pci_subvendor_id == ent->subvendor_id) ||
437                     (ent->subvendor_id == PCI_ANY_ID)) &&
438
439                     ((pci_subdevice_id == ent->subdevice_id) ||
440                     (ent->subdevice_id == PCI_ANY_ID))) {
441                         sprintf(adapter_name, "%s %s",
442                                 em_strings[ent->index],
443                                 em_driver_version);
444                         device_set_desc_copy(dev, adapter_name);
445                         return (BUS_PROBE_DEFAULT);
446                 }
447                 ent++;
448         }
449
450         return (ENXIO);
451 }
452
453 /*********************************************************************
454  *  Device initialization routine
455  *
456  *  The attach entry point is called when the driver is being loaded.
457  *  This routine identifies the type of hardware, allocates all resources
458  *  and initializes the hardware.
459  *
460  *  return 0 on success, positive on failure
461  *********************************************************************/
462
463 static int
464 em_attach(device_t dev)
465 {
466         struct adapter  *adapter;
467         struct e1000_hw *hw;
468         int             error = 0;
469
470         INIT_DEBUGOUT("em_attach: begin");
471
472         adapter = device_get_softc(dev);
473         adapter->dev = adapter->osdep.dev = dev;
474         hw = &adapter->hw;
475         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
476
477         /* SYSCTL stuff */
478         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
479             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
480             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
481             em_sysctl_nvm_info, "I", "NVM Information");
482
483         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
484             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
485             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
486             em_sysctl_debug_info, "I", "Debug Information");
487
488         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
489
490         /* Determine hardware and mac info */
491         em_identify_hardware(adapter);
492
493         /* Setup PCI resources */
494         if (em_allocate_pci_resources(adapter)) {
495                 device_printf(dev, "Allocation of PCI resources failed\n");
496                 error = ENXIO;
497                 goto err_pci;
498         }
499
500         /*
501         ** For ICH8 and family we need to
502         ** map the flash memory, and this
503         ** must happen after the MAC is 
504         ** identified
505         */
506         if ((hw->mac.type == e1000_ich8lan) ||
507             (hw->mac.type == e1000_ich9lan) ||
508             (hw->mac.type == e1000_ich10lan) ||
509             (hw->mac.type == e1000_pchlan) ||
510             (hw->mac.type == e1000_pch2lan)) {
511                 int rid = EM_BAR_TYPE_FLASH;
512                 adapter->flash = bus_alloc_resource_any(dev,
513                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
514                 if (adapter->flash == NULL) {
515                         device_printf(dev, "Mapping of Flash failed\n");
516                         error = ENXIO;
517                         goto err_pci;
518                 }
519                 /* This is used in the shared code */
520                 hw->flash_address = (u8 *)adapter->flash;
521                 adapter->osdep.flash_bus_space_tag =
522                     rman_get_bustag(adapter->flash);
523                 adapter->osdep.flash_bus_space_handle =
524                     rman_get_bushandle(adapter->flash);
525         }
526
527         /* Do Shared Code initialization */
528         if (e1000_setup_init_funcs(hw, TRUE)) {
529                 device_printf(dev, "Setup of Shared code failed\n");
530                 error = ENXIO;
531                 goto err_pci;
532         }
533
534         e1000_get_bus_info(hw);
535
536         /* Set up some sysctls for the tunable interrupt delays */
537         em_add_int_delay_sysctl(adapter, "rx_int_delay",
538             "receive interrupt delay in usecs", &adapter->rx_int_delay,
539             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
540         em_add_int_delay_sysctl(adapter, "tx_int_delay",
541             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
542             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
543         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
544             "receive interrupt delay limit in usecs",
545             &adapter->rx_abs_int_delay,
546             E1000_REGISTER(hw, E1000_RADV),
547             em_rx_abs_int_delay_dflt);
548         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
549             "transmit interrupt delay limit in usecs",
550             &adapter->tx_abs_int_delay,
551             E1000_REGISTER(hw, E1000_TADV),
552             em_tx_abs_int_delay_dflt);
553
554         /* Sysctl for limiting the amount of work done in the taskqueue */
555         em_set_sysctl_value(adapter, "rx_processing_limit",
556             "max number of rx packets to process", &adapter->rx_process_limit,
557             em_rx_process_limit);
558
559         /* Sysctl for setting the interface flow control */
560         em_set_sysctl_value(adapter, "flow_control",
561             "configure flow control",
562             &adapter->fc_setting, em_fc_setting);
563
564         /*
565          * Validate number of transmit and receive descriptors. It
566          * must not exceed hardware maximum, and must be multiple
567          * of E1000_DBA_ALIGN.
568          */
569         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
570             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
571                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
572                     EM_DEFAULT_TXD, em_txd);
573                 adapter->num_tx_desc = EM_DEFAULT_TXD;
574         } else
575                 adapter->num_tx_desc = em_txd;
576
577         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
578             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
579                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
580                     EM_DEFAULT_RXD, em_rxd);
581                 adapter->num_rx_desc = EM_DEFAULT_RXD;
582         } else
583                 adapter->num_rx_desc = em_rxd;
584
585         hw->mac.autoneg = DO_AUTO_NEG;
586         hw->phy.autoneg_wait_to_complete = FALSE;
587         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
588
589         /* Copper options */
590         if (hw->phy.media_type == e1000_media_type_copper) {
591                 hw->phy.mdix = AUTO_ALL_MODES;
592                 hw->phy.disable_polarity_correction = FALSE;
593                 hw->phy.ms_type = EM_MASTER_SLAVE;
594         }
595
596         /*
597          * Set the frame limits assuming
598          * standard ethernet sized frames.
599          */
600         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
601         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
602
603         /*
604          * This controls when hardware reports transmit completion
605          * status.
606          */
607         hw->mac.report_tx_early = 1;
608
609         /* 
610         ** Get queue/ring memory
611         */
612         if (em_allocate_queues(adapter)) {
613                 error = ENOMEM;
614                 goto err_pci;
615         }
616
617         /* Allocate multicast array memory. */
618         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
619             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
620         if (adapter->mta == NULL) {
621                 device_printf(dev, "Can not allocate multicast setup array\n");
622                 error = ENOMEM;
623                 goto err_late;
624         }
625
626         /* Check SOL/IDER usage */
627         if (e1000_check_reset_block(hw))
628                 device_printf(dev, "PHY reset is blocked"
629                     " due to SOL/IDER session.\n");
630
631         /* Sysctl for setting Energy Efficient Ethernet */
632         em_set_sysctl_value(adapter, "eee_control",
633             "enable Energy Efficient Ethernet",
634             &hw->dev_spec.ich8lan.eee_disable, eee_setting);
635
636         /*
637         ** Start from a known state, this is
638         ** important in reading the nvm and
639         ** mac from that.
640         */
641         e1000_reset_hw(hw);
642
643
644         /* Make sure we have a good EEPROM before we read from it */
645         if (e1000_validate_nvm_checksum(hw) < 0) {
646                 /*
647                 ** Some PCI-E parts fail the first check due to
648                 ** the link being in sleep state, call it again,
649                 ** if it fails a second time its a real issue.
650                 */
651                 if (e1000_validate_nvm_checksum(hw) < 0) {
652                         device_printf(dev,
653                             "The EEPROM Checksum Is Not Valid\n");
654                         error = EIO;
655                         goto err_late;
656                 }
657         }
658
659         /* Copy the permanent MAC address out of the EEPROM */
660         if (e1000_read_mac_addr(hw) < 0) {
661                 device_printf(dev, "EEPROM read error while reading MAC"
662                     " address\n");
663                 error = EIO;
664                 goto err_late;
665         }
666
667         if (!em_is_valid_ether_addr(hw->mac.addr)) {
668                 device_printf(dev, "Invalid MAC address\n");
669                 error = EIO;
670                 goto err_late;
671         }
672
673         /*
674         **  Do interrupt configuration
675         */
676         if (adapter->msix > 1) /* Do MSIX */
677                 error = em_allocate_msix(adapter);
678         else  /* MSI or Legacy */
679                 error = em_allocate_legacy(adapter);
680         if (error)
681                 goto err_late;
682
683         /*
684          * Get Wake-on-Lan and Management info for later use
685          */
686         em_get_wakeup(dev);
687
688         /* Setup OS specific network interface */
689         if (em_setup_interface(dev, adapter) != 0)
690                 goto err_late;
691
692         em_reset(adapter);
693
694         /* Initialize statistics */
695         em_update_stats_counters(adapter);
696
697         hw->mac.get_link_status = 1;
698         em_update_link_status(adapter);
699
700         /* Register for VLAN events */
701         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
702             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
703         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
704             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
705
706         em_add_hw_stats(adapter);
707
708         /* Non-AMT based hardware can now take control from firmware */
709         if (adapter->has_manage && !adapter->has_amt)
710                 em_get_hw_control(adapter);
711
712         /* Tell the stack that the interface is not active */
713         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
714
715         adapter->led_dev = led_create(em_led_func, adapter,
716             device_get_nameunit(dev));
717
718         INIT_DEBUGOUT("em_attach: end");
719
720         return (0);
721
722 err_late:
723         em_free_transmit_structures(adapter);
724         em_free_receive_structures(adapter);
725         em_release_hw_control(adapter);
726         if (adapter->ifp != NULL)
727                 if_free(adapter->ifp);
728 err_pci:
729         em_free_pci_resources(adapter);
730         free(adapter->mta, M_DEVBUF);
731         EM_CORE_LOCK_DESTROY(adapter);
732
733         return (error);
734 }
735
736 /*********************************************************************
737  *  Device removal routine
738  *
739  *  The detach entry point is called when the driver is being removed.
740  *  This routine stops the adapter and deallocates all the resources
741  *  that were allocated for driver operation.
742  *
743  *  return 0 on success, positive on failure
744  *********************************************************************/
745
746 static int
747 em_detach(device_t dev)
748 {
749         struct adapter  *adapter = device_get_softc(dev);
750         struct ifnet    *ifp = adapter->ifp;
751
752         INIT_DEBUGOUT("em_detach: begin");
753
754         /* Make sure VLANS are not using driver */
755         if (adapter->ifp->if_vlantrunk != NULL) {
756                 device_printf(dev,"Vlan in use, detach first\n");
757                 return (EBUSY);
758         }
759
760 #ifdef DEVICE_POLLING
761         if (ifp->if_capenable & IFCAP_POLLING)
762                 ether_poll_deregister(ifp);
763 #endif
764
765         if (adapter->led_dev != NULL)
766                 led_destroy(adapter->led_dev);
767
768         EM_CORE_LOCK(adapter);
769         adapter->in_detach = 1;
770         em_stop(adapter);
771         EM_CORE_UNLOCK(adapter);
772         EM_CORE_LOCK_DESTROY(adapter);
773
774         e1000_phy_hw_reset(&adapter->hw);
775
776         em_release_manageability(adapter);
777         em_release_hw_control(adapter);
778
779         /* Unregister VLAN events */
780         if (adapter->vlan_attach != NULL)
781                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
782         if (adapter->vlan_detach != NULL)
783                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
784
785         ether_ifdetach(adapter->ifp);
786         callout_drain(&adapter->timer);
787
788         em_free_pci_resources(adapter);
789         bus_generic_detach(dev);
790         if_free(ifp);
791
792         em_free_transmit_structures(adapter);
793         em_free_receive_structures(adapter);
794
795         em_release_hw_control(adapter);
796         free(adapter->mta, M_DEVBUF);
797
798         return (0);
799 }
800
801 /*********************************************************************
802  *
803  *  Shutdown entry point
804  *
805  **********************************************************************/
806
807 static int
808 em_shutdown(device_t dev)
809 {
810         return em_suspend(dev);
811 }
812
813 /*
814  * Suspend/resume device methods.
815  */
816 static int
817 em_suspend(device_t dev)
818 {
819         struct adapter *adapter = device_get_softc(dev);
820
821         EM_CORE_LOCK(adapter);
822
823         em_release_manageability(adapter);
824         em_release_hw_control(adapter);
825         em_enable_wakeup(dev);
826
827         EM_CORE_UNLOCK(adapter);
828
829         return bus_generic_suspend(dev);
830 }
831
832 static int
833 em_resume(device_t dev)
834 {
835         struct adapter *adapter = device_get_softc(dev);
836         struct ifnet *ifp = adapter->ifp;
837
838         EM_CORE_LOCK(adapter);
839         em_init_locked(adapter);
840         em_init_manageability(adapter);
841         EM_CORE_UNLOCK(adapter);
842         em_start(ifp);
843
844         return bus_generic_resume(dev);
845 }
846
847
848 /*********************************************************************
849  *  Transmit entry point
850  *
851  *  em_start is called by the stack to initiate a transmit.
852  *  The driver will remain in this routine as long as there are
853  *  packets to transmit and transmit resources are available.
854  *  In case resources are not available stack is notified and
855  *  the packet is requeued.
856  **********************************************************************/
857
858 #ifdef EM_MULTIQUEUE
859 static int
860 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
861 {
862         struct adapter  *adapter = txr->adapter;
863         struct mbuf     *next;
864         int             err = 0, enq = 0;
865
866         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
867             IFF_DRV_RUNNING || adapter->link_active == 0) {
868                 if (m != NULL)
869                         err = drbr_enqueue(ifp, txr->br, m);
870                 return (err);
871         }
872
873         /* Call cleanup if number of TX descriptors low */
874         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
875                 em_txeof(txr);
876
877         enq = 0;
878         if (m == NULL) {
879                 next = drbr_dequeue(ifp, txr->br);
880         } else if (drbr_needs_enqueue(ifp, txr->br)) {
881                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
882                         return (err);
883                 next = drbr_dequeue(ifp, txr->br);
884         } else
885                 next = m;
886
887         /* Process the queue */
888         while (next != NULL) {
889                 if ((err = em_xmit(txr, &next)) != 0) {
890                         if (next != NULL)
891                                 err = drbr_enqueue(ifp, txr->br, next);
892                         break;
893                 }
894                 enq++;
895                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
896                 ETHER_BPF_MTAP(ifp, next);
897                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
898                         break;
899                 if (txr->tx_avail < EM_MAX_SCATTER) {
900                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
901                         break;
902                 }
903                 next = drbr_dequeue(ifp, txr->br);
904         }
905
906         if (enq > 0) {
907                 /* Set the watchdog */
908                 txr->queue_status = EM_QUEUE_WORKING;
909                 txr->watchdog_time = ticks;
910         }
911         return (err);
912 }
913
914 /*
915 ** Multiqueue capable stack interface
916 */
917 static int
918 em_mq_start(struct ifnet *ifp, struct mbuf *m)
919 {
920         struct adapter  *adapter = ifp->if_softc;
921         struct tx_ring  *txr = adapter->tx_rings;
922         int             error;
923
924         if (EM_TX_TRYLOCK(txr)) {
925                 error = em_mq_start_locked(ifp, txr, m);
926                 EM_TX_UNLOCK(txr);
927         } else 
928                 error = drbr_enqueue(ifp, txr->br, m);
929
930         return (error);
931 }
932
933 /*
934 ** Flush all ring buffers
935 */
936 static void
937 em_qflush(struct ifnet *ifp)
938 {
939         struct adapter  *adapter = ifp->if_softc;
940         struct tx_ring  *txr = adapter->tx_rings;
941         struct mbuf     *m;
942
943         for (int i = 0; i < adapter->num_queues; i++, txr++) {
944                 EM_TX_LOCK(txr);
945                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
946                         m_freem(m);
947                 EM_TX_UNLOCK(txr);
948         }
949         if_qflush(ifp);
950 }
951
952 #endif /* EM_MULTIQUEUE */
953
954 static void
955 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
956 {
957         struct adapter  *adapter = ifp->if_softc;
958         struct mbuf     *m_head;
959
960         EM_TX_LOCK_ASSERT(txr);
961
962         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
963             IFF_DRV_RUNNING)
964                 return;
965
966         if (!adapter->link_active)
967                 return;
968
969         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
970                 /* Call cleanup if number of TX descriptors low */
971                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
972                         em_txeof(txr);
973                 if (txr->tx_avail < EM_MAX_SCATTER) {
974                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
975                         break;
976                 }
977                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
978                 if (m_head == NULL)
979                         break;
980                 /*
981                  *  Encapsulation can modify our pointer, and or make it
982                  *  NULL on failure.  In that event, we can't requeue.
983                  */
984                 if (em_xmit(txr, &m_head)) {
985                         if (m_head == NULL)
986                                 break;
987                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
988                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
989                         break;
990                 }
991
992                 /* Send a copy of the frame to the BPF listener */
993                 ETHER_BPF_MTAP(ifp, m_head);
994
995                 /* Set timeout in case hardware has problems transmitting. */
996                 txr->watchdog_time = ticks;
997                 txr->queue_status = EM_QUEUE_WORKING;
998         }
999
1000         return;
1001 }
1002
1003 static void
1004 em_start(struct ifnet *ifp)
1005 {
1006         struct adapter  *adapter = ifp->if_softc;
1007         struct tx_ring  *txr = adapter->tx_rings;
1008
1009         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1010                 EM_TX_LOCK(txr);
1011                 em_start_locked(ifp, txr);
1012                 EM_TX_UNLOCK(txr);
1013         }
1014         return;
1015 }
1016
1017 /*********************************************************************
1018  *  Ioctl entry point
1019  *
1020  *  em_ioctl is called when the user wants to configure the
1021  *  interface.
1022  *
1023  *  return 0 on success, positive on failure
1024  **********************************************************************/
1025
1026 static int
1027 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1028 {
1029         struct adapter  *adapter = ifp->if_softc;
1030         struct ifreq *ifr = (struct ifreq *)data;
1031 #ifdef INET
1032         struct ifaddr *ifa = (struct ifaddr *)data;
1033 #endif
1034         int error = 0;
1035
1036         if (adapter->in_detach)
1037                 return (error);
1038
1039         switch (command) {
1040         case SIOCSIFADDR:
1041 #ifdef INET
1042                 if (ifa->ifa_addr->sa_family == AF_INET) {
1043                         /*
1044                          * XXX
1045                          * Since resetting hardware takes a very long time
1046                          * and results in link renegotiation we only
1047                          * initialize the hardware only when it is absolutely
1048                          * required.
1049                          */
1050                         ifp->if_flags |= IFF_UP;
1051                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1052                                 EM_CORE_LOCK(adapter);
1053                                 em_init_locked(adapter);
1054                                 EM_CORE_UNLOCK(adapter);
1055                         }
1056                         arp_ifinit(ifp, ifa);
1057                 } else
1058 #endif
1059                         error = ether_ioctl(ifp, command, data);
1060                 break;
1061         case SIOCSIFMTU:
1062             {
1063                 int max_frame_size;
1064
1065                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1066
1067                 EM_CORE_LOCK(adapter);
1068                 switch (adapter->hw.mac.type) {
1069                 case e1000_82571:
1070                 case e1000_82572:
1071                 case e1000_ich9lan:
1072                 case e1000_ich10lan:
1073                 case e1000_pch2lan:
1074                 case e1000_82574:
1075                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1076                         max_frame_size = 9234;
1077                         break;
1078                 case e1000_pchlan:
1079                         max_frame_size = 4096;
1080                         break;
1081                         /* Adapters that do not support jumbo frames */
1082                 case e1000_82583:
1083                 case e1000_ich8lan:
1084                         max_frame_size = ETHER_MAX_LEN;
1085                         break;
1086                 default:
1087                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1088                 }
1089                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1090                     ETHER_CRC_LEN) {
1091                         EM_CORE_UNLOCK(adapter);
1092                         error = EINVAL;
1093                         break;
1094                 }
1095
1096                 ifp->if_mtu = ifr->ifr_mtu;
1097                 adapter->max_frame_size =
1098                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1099                 em_init_locked(adapter);
1100                 EM_CORE_UNLOCK(adapter);
1101                 break;
1102             }
1103         case SIOCSIFFLAGS:
1104                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1105                     SIOCSIFFLAGS (Set Interface Flags)");
1106                 EM_CORE_LOCK(adapter);
1107                 if (ifp->if_flags & IFF_UP) {
1108                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1109                                 if ((ifp->if_flags ^ adapter->if_flags) &
1110                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1111                                         em_disable_promisc(adapter);
1112                                         em_set_promisc(adapter);
1113                                 }
1114                         } else
1115                                 em_init_locked(adapter);
1116                 } else
1117                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1118                                 em_stop(adapter);
1119                 adapter->if_flags = ifp->if_flags;
1120                 EM_CORE_UNLOCK(adapter);
1121                 break;
1122         case SIOCADDMULTI:
1123         case SIOCDELMULTI:
1124                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1125                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1126                         EM_CORE_LOCK(adapter);
1127                         em_disable_intr(adapter);
1128                         em_set_multi(adapter);
1129 #ifdef DEVICE_POLLING
1130                         if (!(ifp->if_capenable & IFCAP_POLLING))
1131 #endif
1132                                 em_enable_intr(adapter);
1133                         EM_CORE_UNLOCK(adapter);
1134                 }
1135                 break;
1136         case SIOCSIFMEDIA:
1137                 /*
1138                 ** As the speed/duplex settings are being
1139                 ** changed, we need to reset the PHY.
1140                 */
1141                 adapter->hw.phy.reset_disable = FALSE;
1142                 /* Check SOL/IDER usage */
1143                 EM_CORE_LOCK(adapter);
1144                 if (e1000_check_reset_block(&adapter->hw)) {
1145                         EM_CORE_UNLOCK(adapter);
1146                         device_printf(adapter->dev, "Media change is"
1147                             " blocked due to SOL/IDER session.\n");
1148                         break;
1149                 }
1150                 EM_CORE_UNLOCK(adapter);
1151                 /* falls thru */
1152         case SIOCGIFMEDIA:
1153                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1154                     SIOCxIFMEDIA (Get/Set Interface Media)");
1155                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1156                 break;
1157         case SIOCSIFCAP:
1158             {
1159                 int mask, reinit;
1160
1161                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1162                 reinit = 0;
1163                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1164 #ifdef DEVICE_POLLING
1165                 if (mask & IFCAP_POLLING) {
1166                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1167                                 error = ether_poll_register(em_poll, ifp);
1168                                 if (error)
1169                                         return (error);
1170                                 EM_CORE_LOCK(adapter);
1171                                 em_disable_intr(adapter);
1172                                 ifp->if_capenable |= IFCAP_POLLING;
1173                                 EM_CORE_UNLOCK(adapter);
1174                         } else {
1175                                 error = ether_poll_deregister(ifp);
1176                                 /* Enable interrupt even in error case */
1177                                 EM_CORE_LOCK(adapter);
1178                                 em_enable_intr(adapter);
1179                                 ifp->if_capenable &= ~IFCAP_POLLING;
1180                                 EM_CORE_UNLOCK(adapter);
1181                         }
1182                 }
1183 #endif
1184                 if (mask & IFCAP_HWCSUM) {
1185                         ifp->if_capenable ^= IFCAP_HWCSUM;
1186                         reinit = 1;
1187                 }
1188                 if (mask & IFCAP_TSO4) {
1189                         ifp->if_capenable ^= IFCAP_TSO4;
1190                         reinit = 1;
1191                 }
1192                 if (mask & IFCAP_VLAN_HWTAGGING) {
1193                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1194                         reinit = 1;
1195                 }
1196                 if (mask & IFCAP_VLAN_HWFILTER) {
1197                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1198                         reinit = 1;
1199                 }
1200                 if ((mask & IFCAP_WOL) &&
1201                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1202                         if (mask & IFCAP_WOL_MCAST)
1203                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1204                         if (mask & IFCAP_WOL_MAGIC)
1205                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1206                 }
1207                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1208                         em_init(adapter);
1209                 VLAN_CAPABILITIES(ifp);
1210                 break;
1211             }
1212
1213         default:
1214                 error = ether_ioctl(ifp, command, data);
1215                 break;
1216         }
1217
1218         return (error);
1219 }
1220
1221
1222 /*********************************************************************
1223  *  Init entry point
1224  *
1225  *  This routine is used in two ways. It is used by the stack as
1226  *  init entry point in network interface structure. It is also used
1227  *  by the driver as a hw/sw initialization routine to get to a
1228  *  consistent state.
1229  *
1230  *  return 0 on success, positive on failure
1231  **********************************************************************/
1232
1233 static void
1234 em_init_locked(struct adapter *adapter)
1235 {
1236         struct ifnet    *ifp = adapter->ifp;
1237         device_t        dev = adapter->dev;
1238         u32             pba;
1239
1240         INIT_DEBUGOUT("em_init: begin");
1241
1242         EM_CORE_LOCK_ASSERT(adapter);
1243
1244         em_disable_intr(adapter);
1245         callout_stop(&adapter->timer);
1246
1247         /*
1248          * Packet Buffer Allocation (PBA)
1249          * Writing PBA sets the receive portion of the buffer
1250          * the remainder is used for the transmit buffer.
1251          */
1252         switch (adapter->hw.mac.type) {
1253         /* Total Packet Buffer on these is 48K */
1254         case e1000_82571:
1255         case e1000_82572:
1256         case e1000_80003es2lan:
1257                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1258                 break;
1259         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1260                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1261                 break;
1262         case e1000_82574:
1263         case e1000_82583:
1264                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1265                 break;
1266         case e1000_ich8lan:
1267                 pba = E1000_PBA_8K;
1268                 break;
1269         case e1000_ich9lan:
1270         case e1000_ich10lan:
1271                 pba = E1000_PBA_10K;
1272                 break;
1273         case e1000_pchlan:
1274         case e1000_pch2lan:
1275                 pba = E1000_PBA_26K;
1276                 break;
1277         default:
1278                 if (adapter->max_frame_size > 8192)
1279                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1280                 else
1281                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1282         }
1283
1284         INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1285         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1286         
1287         /* Get the latest mac address, User can use a LAA */
1288         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1289               ETHER_ADDR_LEN);
1290
1291         /* Put the address into the Receive Address Array */
1292         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1293
1294         /*
1295          * With the 82571 adapter, RAR[0] may be overwritten
1296          * when the other port is reset, we make a duplicate
1297          * in RAR[14] for that eventuality, this assures
1298          * the interface continues to function.
1299          */
1300         if (adapter->hw.mac.type == e1000_82571) {
1301                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1302                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1303                     E1000_RAR_ENTRIES - 1);
1304         }
1305
1306         /* Initialize the hardware */
1307         em_reset(adapter);
1308         em_update_link_status(adapter);
1309
1310         /* Setup VLAN support, basic and offload if available */
1311         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1312
1313         /* Set hardware offload abilities */
1314         ifp->if_hwassist = 0;
1315         if (ifp->if_capenable & IFCAP_TXCSUM)
1316                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1317         if (ifp->if_capenable & IFCAP_TSO4)
1318                 ifp->if_hwassist |= CSUM_TSO;
1319
1320         /* Configure for OS presence */
1321         em_init_manageability(adapter);
1322
1323         /* Prepare transmit descriptors and buffers */
1324         em_setup_transmit_structures(adapter);
1325         em_initialize_transmit_unit(adapter);
1326
1327         /* Setup Multicast table */
1328         em_set_multi(adapter);
1329
1330         /*
1331         ** Figure out the desired mbuf
1332         ** pool for doing jumbos
1333         */
1334         if (adapter->max_frame_size <= 2048)
1335                 adapter->rx_mbuf_sz = MCLBYTES;
1336         else if (adapter->max_frame_size <= 4096)
1337                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1338         else
1339                 adapter->rx_mbuf_sz = MJUM9BYTES;
1340
1341         /* Prepare receive descriptors and buffers */
1342         if (em_setup_receive_structures(adapter)) {
1343                 device_printf(dev, "Could not setup receive structures\n");
1344                 em_stop(adapter);
1345                 return;
1346         }
1347         em_initialize_receive_unit(adapter);
1348
1349         /* Use real VLAN Filter support? */
1350         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1351                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1352                         /* Use real VLAN Filter support */
1353                         em_setup_vlan_hw_support(adapter);
1354                 else {
1355                         u32 ctrl;
1356                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1357                         ctrl |= E1000_CTRL_VME;
1358                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1359                 }
1360         }
1361
1362         /* Don't lose promiscuous settings */
1363         em_set_promisc(adapter);
1364
1365         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1366         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1367
1368         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1369         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1370
1371         /* MSI/X configuration for 82574 */
1372         if (adapter->hw.mac.type == e1000_82574) {
1373                 int tmp;
1374                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1375                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1376                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1377                 /* Set the IVAR - interrupt vector routing. */
1378                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1379         }
1380
1381 #ifdef DEVICE_POLLING
1382         /*
1383          * Only enable interrupts if we are not polling, make sure
1384          * they are off otherwise.
1385          */
1386         if (ifp->if_capenable & IFCAP_POLLING)
1387                 em_disable_intr(adapter);
1388         else
1389 #endif /* DEVICE_POLLING */
1390                 em_enable_intr(adapter);
1391
1392         /* AMT based hardware can now take control from firmware */
1393         if (adapter->has_manage && adapter->has_amt)
1394                 em_get_hw_control(adapter);
1395
1396         /* Don't reset the phy next time init gets called */
1397         adapter->hw.phy.reset_disable = TRUE;
1398 }
1399
1400 static void
1401 em_init(void *arg)
1402 {
1403         struct adapter *adapter = arg;
1404
1405         EM_CORE_LOCK(adapter);
1406         em_init_locked(adapter);
1407         EM_CORE_UNLOCK(adapter);
1408 }
1409
1410
1411 #ifdef DEVICE_POLLING
1412 /*********************************************************************
1413  *
1414  *  Legacy polling routine: note this only works with single queue
1415  *
1416  *********************************************************************/
1417 static int
1418 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1419 {
1420         struct adapter *adapter = ifp->if_softc;
1421         struct tx_ring  *txr = adapter->tx_rings;
1422         struct rx_ring  *rxr = adapter->rx_rings;
1423         u32             reg_icr;
1424         int             rx_done;
1425
1426         EM_CORE_LOCK(adapter);
1427         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1428                 EM_CORE_UNLOCK(adapter);
1429                 return (0);
1430         }
1431
1432         if (cmd == POLL_AND_CHECK_STATUS) {
1433                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1434                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1435                         callout_stop(&adapter->timer);
1436                         adapter->hw.mac.get_link_status = 1;
1437                         em_update_link_status(adapter);
1438                         callout_reset(&adapter->timer, hz,
1439                             em_local_timer, adapter);
1440                 }
1441         }
1442         EM_CORE_UNLOCK(adapter);
1443
1444         em_rxeof(rxr, count, &rx_done);
1445
1446         EM_TX_LOCK(txr);
1447         em_txeof(txr);
1448 #ifdef EM_MULTIQUEUE
1449         if (!drbr_empty(ifp, txr->br))
1450                 em_mq_start_locked(ifp, txr, NULL);
1451 #else
1452         em_start_locked(ifp, txr);
1453 #endif
1454         EM_TX_UNLOCK(txr);
1455
1456         return (rx_done);
1457 }
1458 #endif /* DEVICE_POLLING */
1459
1460
1461 /*********************************************************************
1462  *
1463  *  Fast Legacy/MSI Combined Interrupt Service routine  
1464  *
1465  *********************************************************************/
1466 static int
1467 em_irq_fast(void *arg)
1468 {
1469         struct adapter  *adapter = arg;
1470         struct ifnet    *ifp;
1471         u32             reg_icr;
1472
1473         ifp = adapter->ifp;
1474
1475         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1476
1477         /* Hot eject?  */
1478         if (reg_icr == 0xffffffff)
1479                 return FILTER_STRAY;
1480
1481         /* Definitely not our interrupt.  */
1482         if (reg_icr == 0x0)
1483                 return FILTER_STRAY;
1484
1485         /*
1486          * Starting with the 82571 chip, bit 31 should be used to
1487          * determine whether the interrupt belongs to us.
1488          */
1489         if (adapter->hw.mac.type >= e1000_82571 &&
1490             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1491                 return FILTER_STRAY;
1492
1493         em_disable_intr(adapter);
1494         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1495
1496         /* Link status change */
1497         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1498                 adapter->hw.mac.get_link_status = 1;
1499                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1500         }
1501
1502         if (reg_icr & E1000_ICR_RXO)
1503                 adapter->rx_overruns++;
1504         return FILTER_HANDLED;
1505 }
1506
1507 /* Combined RX/TX handler, used by Legacy and MSI */
1508 static void
1509 em_handle_que(void *context, int pending)
1510 {
1511         struct adapter  *adapter = context;
1512         struct ifnet    *ifp = adapter->ifp;
1513         struct tx_ring  *txr = adapter->tx_rings;
1514         struct rx_ring  *rxr = adapter->rx_rings;
1515
1516
1517         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1518                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1519                 EM_TX_LOCK(txr);
1520                 em_txeof(txr);
1521 #ifdef EM_MULTIQUEUE
1522                 if (!drbr_empty(ifp, txr->br))
1523                         em_mq_start_locked(ifp, txr, NULL);
1524 #else
1525                 em_start_locked(ifp, txr);
1526 #endif
1527                 EM_TX_UNLOCK(txr);
1528                 if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1529                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1530                         return;
1531                 }
1532         }
1533
1534         em_enable_intr(adapter);
1535         return;
1536 }
1537
1538
1539 /*********************************************************************
1540  *
1541  *  MSIX Interrupt Service Routines
1542  *
1543  **********************************************************************/
1544 static void
1545 em_msix_tx(void *arg)
1546 {
1547         struct tx_ring *txr = arg;
1548         struct adapter *adapter = txr->adapter;
1549         bool            more;
1550
1551         ++txr->tx_irq;
1552         EM_TX_LOCK(txr);
1553         more = em_txeof(txr);
1554         EM_TX_UNLOCK(txr);
1555         if (more)
1556                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1557         else
1558                 /* Reenable this interrupt */
1559                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1560         return;
1561 }
1562
1563 /*********************************************************************
1564  *
1565  *  MSIX RX Interrupt Service routine
1566  *
1567  **********************************************************************/
1568
1569 static void
1570 em_msix_rx(void *arg)
1571 {
1572         struct rx_ring  *rxr = arg;
1573         struct adapter  *adapter = rxr->adapter;
1574         bool            more;
1575
1576         ++rxr->rx_irq;
1577         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1578         if (more)
1579                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1580         else
1581                 /* Reenable this interrupt */
1582                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1583         return;
1584 }
1585
1586 /*********************************************************************
1587  *
1588  *  MSIX Link Fast Interrupt Service routine
1589  *
1590  **********************************************************************/
1591 static void
1592 em_msix_link(void *arg)
1593 {
1594         struct adapter  *adapter = arg;
1595         u32             reg_icr;
1596
1597         ++adapter->link_irq;
1598         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1599
1600         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1601                 adapter->hw.mac.get_link_status = 1;
1602                 em_handle_link(adapter, 0);
1603         } else
1604                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1605                     EM_MSIX_LINK | E1000_IMS_LSC);
1606         return;
1607 }
1608
1609 static void
1610 em_handle_rx(void *context, int pending)
1611 {
1612         struct rx_ring  *rxr = context;
1613         struct adapter  *adapter = rxr->adapter;
1614         bool            more;
1615
1616         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1617         if (more)
1618                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1619         else
1620                 /* Reenable this interrupt */
1621                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1622 }
1623
1624 static void
1625 em_handle_tx(void *context, int pending)
1626 {
1627         struct tx_ring  *txr = context;
1628         struct adapter  *adapter = txr->adapter;
1629         struct ifnet    *ifp = adapter->ifp;
1630
1631         EM_TX_LOCK(txr);
1632         em_txeof(txr);
1633 #ifdef EM_MULTIQUEUE
1634         if (!drbr_empty(ifp, txr->br))
1635                 em_mq_start_locked(ifp, txr, NULL);
1636 #else
1637         em_start_locked(ifp, txr);
1638 #endif
1639         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1640         EM_TX_UNLOCK(txr);
1641 }
1642
1643 static void
1644 em_handle_link(void *context, int pending)
1645 {
1646         struct adapter  *adapter = context;
1647         struct ifnet *ifp = adapter->ifp;
1648
1649         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1650                 return;
1651
1652         EM_CORE_LOCK(adapter);
1653         callout_stop(&adapter->timer);
1654         em_update_link_status(adapter);
1655         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1656         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1657             EM_MSIX_LINK | E1000_IMS_LSC);
1658         EM_CORE_UNLOCK(adapter);
1659 }
1660
1661
1662 /*********************************************************************
1663  *
1664  *  Media Ioctl callback
1665  *
1666  *  This routine is called whenever the user queries the status of
1667  *  the interface using ifconfig.
1668  *
1669  **********************************************************************/
1670 static void
1671 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1672 {
1673         struct adapter *adapter = ifp->if_softc;
1674         u_char fiber_type = IFM_1000_SX;
1675
1676         INIT_DEBUGOUT("em_media_status: begin");
1677
1678         EM_CORE_LOCK(adapter);
1679         em_update_link_status(adapter);
1680
1681         ifmr->ifm_status = IFM_AVALID;
1682         ifmr->ifm_active = IFM_ETHER;
1683
1684         if (!adapter->link_active) {
1685                 EM_CORE_UNLOCK(adapter);
1686                 return;
1687         }
1688
1689         ifmr->ifm_status |= IFM_ACTIVE;
1690
1691         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1692             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1693                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1694         } else {
1695                 switch (adapter->link_speed) {
1696                 case 10:
1697                         ifmr->ifm_active |= IFM_10_T;
1698                         break;
1699                 case 100:
1700                         ifmr->ifm_active |= IFM_100_TX;
1701                         break;
1702                 case 1000:
1703                         ifmr->ifm_active |= IFM_1000_T;
1704                         break;
1705                 }
1706                 if (adapter->link_duplex == FULL_DUPLEX)
1707                         ifmr->ifm_active |= IFM_FDX;
1708                 else
1709                         ifmr->ifm_active |= IFM_HDX;
1710         }
1711         EM_CORE_UNLOCK(adapter);
1712 }
1713
1714 /*********************************************************************
1715  *
1716  *  Media Ioctl callback
1717  *
1718  *  This routine is called when the user changes speed/duplex using
1719  *  media/mediopt option with ifconfig.
1720  *
1721  **********************************************************************/
1722 static int
1723 em_media_change(struct ifnet *ifp)
1724 {
1725         struct adapter *adapter = ifp->if_softc;
1726         struct ifmedia  *ifm = &adapter->media;
1727
1728         INIT_DEBUGOUT("em_media_change: begin");
1729
1730         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1731                 return (EINVAL);
1732
1733         EM_CORE_LOCK(adapter);
1734         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1735         case IFM_AUTO:
1736                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1737                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1738                 break;
1739         case IFM_1000_LX:
1740         case IFM_1000_SX:
1741         case IFM_1000_T:
1742                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1743                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1744                 break;
1745         case IFM_100_TX:
1746                 adapter->hw.mac.autoneg = FALSE;
1747                 adapter->hw.phy.autoneg_advertised = 0;
1748                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1749                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1750                 else
1751                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1752                 break;
1753         case IFM_10_T:
1754                 adapter->hw.mac.autoneg = FALSE;
1755                 adapter->hw.phy.autoneg_advertised = 0;
1756                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1757                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1758                 else
1759                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1760                 break;
1761         default:
1762                 device_printf(adapter->dev, "Unsupported media type\n");
1763         }
1764
1765         em_init_locked(adapter);
1766         EM_CORE_UNLOCK(adapter);
1767
1768         return (0);
1769 }
1770
1771 /*********************************************************************
1772  *
1773  *  This routine maps the mbufs to tx descriptors.
1774  *
1775  *  return 0 on success, positive on failure
1776  **********************************************************************/
1777
1778 static int
1779 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1780 {
1781         struct adapter          *adapter = txr->adapter;
1782         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1783         bus_dmamap_t            map;
1784         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1785         struct e1000_tx_desc    *ctxd = NULL;
1786         struct mbuf             *m_head;
1787         struct ether_header     *eh;
1788         struct ip               *ip = NULL;
1789         struct tcphdr           *tp = NULL;
1790         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1791         int                     ip_off, poff;
1792         int                     nsegs, i, j, first, last = 0;
1793         int                     error, do_tso, tso_desc = 0, remap = 1;
1794
1795 retry:
1796         m_head = *m_headp;
1797         txd_upper = txd_lower = txd_used = txd_saved = 0;
1798         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1799         ip_off = poff = 0;
1800
1801         /*
1802          * Intel recommends entire IP/TCP header length reside in a single
1803          * buffer. If multiple descriptors are used to describe the IP and
1804          * TCP header, each descriptor should describe one or more
1805          * complete headers; descriptors referencing only parts of headers
1806          * are not supported. If all layer headers are not coalesced into
1807          * a single buffer, each buffer should not cross a 4KB boundary,
1808          * or be larger than the maximum read request size.
1809          * Controller also requires modifing IP/TCP header to make TSO work
1810          * so we firstly get a writable mbuf chain then coalesce ethernet/
1811          * IP/TCP header into a single buffer to meet the requirement of
1812          * controller. This also simplifies IP/TCP/UDP checksum offloading
1813          * which also has similiar restrictions.
1814          */
1815         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1816                 if (do_tso || (m_head->m_next != NULL && 
1817                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1818                         if (M_WRITABLE(*m_headp) == 0) {
1819                                 m_head = m_dup(*m_headp, M_DONTWAIT);
1820                                 m_freem(*m_headp);
1821                                 if (m_head == NULL) {
1822                                         *m_headp = NULL;
1823                                         return (ENOBUFS);
1824                                 }
1825                                 *m_headp = m_head;
1826                         }
1827                 }
1828                 /*
1829                  * XXX
1830                  * Assume IPv4, we don't have TSO/checksum offload support
1831                  * for IPv6 yet.
1832                  */
1833                 ip_off = sizeof(struct ether_header);
1834                 m_head = m_pullup(m_head, ip_off);
1835                 if (m_head == NULL) {
1836                         *m_headp = NULL;
1837                         return (ENOBUFS);
1838                 }
1839                 eh = mtod(m_head, struct ether_header *);
1840                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1841                         ip_off = sizeof(struct ether_vlan_header);
1842                         m_head = m_pullup(m_head, ip_off);
1843                         if (m_head == NULL) {
1844                                 *m_headp = NULL;
1845                                 return (ENOBUFS);
1846                         }
1847                 }
1848                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1849                 if (m_head == NULL) {
1850                         *m_headp = NULL;
1851                         return (ENOBUFS);
1852                 }
1853                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1854                 poff = ip_off + (ip->ip_hl << 2);
1855                 if (do_tso) {
1856                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1857                         if (m_head == NULL) {
1858                                 *m_headp = NULL;
1859                                 return (ENOBUFS);
1860                         }
1861                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1862                         /*
1863                          * TSO workaround:
1864                          *   pull 4 more bytes of data into it.
1865                          */
1866                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1867                         if (m_head == NULL) {
1868                                 *m_headp = NULL;
1869                                 return (ENOBUFS);
1870                         }
1871                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1872                         ip->ip_len = 0;
1873                         ip->ip_sum = 0;
1874                         /*
1875                          * The pseudo TCP checksum does not include TCP payload
1876                          * length so driver should recompute the checksum here
1877                          * what hardware expect to see. This is adherence of
1878                          * Microsoft's Large Send specification.
1879                          */
1880                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1881                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1882                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1883                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1884                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1885                         if (m_head == NULL) {
1886                                 *m_headp = NULL;
1887                                 return (ENOBUFS);
1888                         }
1889                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1890                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1891                         if (m_head == NULL) {
1892                                 *m_headp = NULL;
1893                                 return (ENOBUFS);
1894                         }
1895                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1896                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1897                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1898                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1899                         if (m_head == NULL) {
1900                                 *m_headp = NULL;
1901                                 return (ENOBUFS);
1902                         }
1903                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1904                 }
1905                 *m_headp = m_head;
1906         }
1907
1908         /*
1909          * Map the packet for DMA
1910          *
1911          * Capture the first descriptor index,
1912          * this descriptor will have the index
1913          * of the EOP which is the only one that
1914          * now gets a DONE bit writeback.
1915          */
1916         first = txr->next_avail_desc;
1917         tx_buffer = &txr->tx_buffers[first];
1918         tx_buffer_mapped = tx_buffer;
1919         map = tx_buffer->map;
1920
1921         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1922             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1923
1924         /*
1925          * There are two types of errors we can (try) to handle:
1926          * - EFBIG means the mbuf chain was too long and bus_dma ran
1927          *   out of segments.  Defragment the mbuf chain and try again.
1928          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1929          *   at this point in time.  Defer sending and try again later.
1930          * All other errors, in particular EINVAL, are fatal and prevent the
1931          * mbuf chain from ever going through.  Drop it and report error.
1932          */
1933         if (error == EFBIG && remap) {
1934                 struct mbuf *m;
1935
1936                 m = m_defrag(*m_headp, M_DONTWAIT);
1937                 if (m == NULL) {
1938                         adapter->mbuf_alloc_failed++;
1939                         m_freem(*m_headp);
1940                         *m_headp = NULL;
1941                         return (ENOBUFS);
1942                 }
1943                 *m_headp = m;
1944
1945                 /* Try it again, but only once */
1946                 remap = 0;
1947                 goto retry;
1948         } else if (error == ENOMEM) {
1949                 adapter->no_tx_dma_setup++;
1950                 return (error);
1951         } else if (error != 0) {
1952                 adapter->no_tx_dma_setup++;
1953                 m_freem(*m_headp);
1954                 *m_headp = NULL;
1955                 return (error);
1956         }
1957
1958         /*
1959          * TSO Hardware workaround, if this packet is not
1960          * TSO, and is only a single descriptor long, and
1961          * it follows a TSO burst, then we need to add a
1962          * sentinel descriptor to prevent premature writeback.
1963          */
1964         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1965                 if (nsegs == 1)
1966                         tso_desc = TRUE;
1967                 txr->tx_tso = FALSE;
1968         }
1969
1970         if (nsegs > (txr->tx_avail - 2)) {
1971                 txr->no_desc_avail++;
1972                 bus_dmamap_unload(txr->txtag, map);
1973                 return (ENOBUFS);
1974         }
1975         m_head = *m_headp;
1976
1977         /* Do hardware assists */
1978         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1979                 em_tso_setup(txr, m_head, ip_off, ip, tp,
1980                     &txd_upper, &txd_lower);
1981                 /* we need to make a final sentinel transmit desc */
1982                 tso_desc = TRUE;
1983         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1984                 em_transmit_checksum_setup(txr, m_head,
1985                     ip_off, ip, &txd_upper, &txd_lower);
1986
1987         i = txr->next_avail_desc;
1988
1989         /* Set up our transmit descriptors */
1990         for (j = 0; j < nsegs; j++) {
1991                 bus_size_t seg_len;
1992                 bus_addr_t seg_addr;
1993
1994                 tx_buffer = &txr->tx_buffers[i];
1995                 ctxd = &txr->tx_base[i];
1996                 seg_addr = segs[j].ds_addr;
1997                 seg_len  = segs[j].ds_len;
1998                 /*
1999                 ** TSO Workaround:
2000                 ** If this is the last descriptor, we want to
2001                 ** split it so we have a small final sentinel
2002                 */
2003                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2004                         seg_len -= 4;
2005                         ctxd->buffer_addr = htole64(seg_addr);
2006                         ctxd->lower.data = htole32(
2007                         adapter->txd_cmd | txd_lower | seg_len);
2008                         ctxd->upper.data =
2009                             htole32(txd_upper);
2010                         if (++i == adapter->num_tx_desc)
2011                                 i = 0;
2012                         /* Now make the sentinel */     
2013                         ++txd_used; /* using an extra txd */
2014                         ctxd = &txr->tx_base[i];
2015                         tx_buffer = &txr->tx_buffers[i];
2016                         ctxd->buffer_addr =
2017                             htole64(seg_addr + seg_len);
2018                         ctxd->lower.data = htole32(
2019                         adapter->txd_cmd | txd_lower | 4);
2020                         ctxd->upper.data =
2021                             htole32(txd_upper);
2022                         last = i;
2023                         if (++i == adapter->num_tx_desc)
2024                                 i = 0;
2025                 } else {
2026                         ctxd->buffer_addr = htole64(seg_addr);
2027                         ctxd->lower.data = htole32(
2028                         adapter->txd_cmd | txd_lower | seg_len);
2029                         ctxd->upper.data =
2030                             htole32(txd_upper);
2031                         last = i;
2032                         if (++i == adapter->num_tx_desc)
2033                                 i = 0;
2034                 }
2035                 tx_buffer->m_head = NULL;
2036                 tx_buffer->next_eop = -1;
2037         }
2038
2039         txr->next_avail_desc = i;
2040         txr->tx_avail -= nsegs;
2041         if (tso_desc) /* TSO used an extra for sentinel */
2042                 txr->tx_avail -= txd_used;
2043
2044         if (m_head->m_flags & M_VLANTAG) {
2045                 /* Set the vlan id. */
2046                 ctxd->upper.fields.special =
2047                     htole16(m_head->m_pkthdr.ether_vtag);
2048                 /* Tell hardware to add tag */
2049                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2050         }
2051
2052         tx_buffer->m_head = m_head;
2053         tx_buffer_mapped->map = tx_buffer->map;
2054         tx_buffer->map = map;
2055         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2056
2057         /*
2058          * Last Descriptor of Packet
2059          * needs End Of Packet (EOP)
2060          * and Report Status (RS)
2061          */
2062         ctxd->lower.data |=
2063             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2064         /*
2065          * Keep track in the first buffer which
2066          * descriptor will be written back
2067          */
2068         tx_buffer = &txr->tx_buffers[first];
2069         tx_buffer->next_eop = last;
2070         /* Update the watchdog time early and often */
2071         txr->watchdog_time = ticks;
2072
2073         /*
2074          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2075          * that this frame is available to transmit.
2076          */
2077         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2078             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2079         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2080
2081         return (0);
2082 }
2083
2084 static void
2085 em_set_promisc(struct adapter *adapter)
2086 {
2087         struct ifnet    *ifp = adapter->ifp;
2088         u32             reg_rctl;
2089
2090         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2091
2092         if (ifp->if_flags & IFF_PROMISC) {
2093                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2094                 /* Turn this on if you want to see bad packets */
2095                 if (em_debug_sbp)
2096                         reg_rctl |= E1000_RCTL_SBP;
2097                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2098         } else if (ifp->if_flags & IFF_ALLMULTI) {
2099                 reg_rctl |= E1000_RCTL_MPE;
2100                 reg_rctl &= ~E1000_RCTL_UPE;
2101                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2102         }
2103 }
2104
2105 static void
2106 em_disable_promisc(struct adapter *adapter)
2107 {
2108         u32     reg_rctl;
2109
2110         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2111
2112         reg_rctl &=  (~E1000_RCTL_UPE);
2113         reg_rctl &=  (~E1000_RCTL_MPE);
2114         reg_rctl &=  (~E1000_RCTL_SBP);
2115         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2116 }
2117
2118
2119 /*********************************************************************
2120  *  Multicast Update
2121  *
2122  *  This routine is called whenever multicast address list is updated.
2123  *
2124  **********************************************************************/
2125
2126 static void
2127 em_set_multi(struct adapter *adapter)
2128 {
2129         struct ifnet    *ifp = adapter->ifp;
2130         struct ifmultiaddr *ifma;
2131         u32 reg_rctl = 0;
2132         u8  *mta; /* Multicast array memory */
2133         int mcnt = 0;
2134
2135         IOCTL_DEBUGOUT("em_set_multi: begin");
2136
2137         mta = adapter->mta;
2138         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2139
2140         if (adapter->hw.mac.type == e1000_82542 && 
2141             adapter->hw.revision_id == E1000_REVISION_2) {
2142                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2143                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2144                         e1000_pci_clear_mwi(&adapter->hw);
2145                 reg_rctl |= E1000_RCTL_RST;
2146                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2147                 msec_delay(5);
2148         }
2149
2150 #if __FreeBSD_version < 800000
2151         IF_ADDR_LOCK(ifp);
2152 #else
2153         if_maddr_rlock(ifp);
2154 #endif
2155         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2156                 if (ifma->ifma_addr->sa_family != AF_LINK)
2157                         continue;
2158
2159                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2160                         break;
2161
2162                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2163                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2164                 mcnt++;
2165         }
2166 #if __FreeBSD_version < 800000
2167         IF_ADDR_UNLOCK(ifp);
2168 #else
2169         if_maddr_runlock(ifp);
2170 #endif
2171         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2172                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2173                 reg_rctl |= E1000_RCTL_MPE;
2174                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2175         } else
2176                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2177
2178         if (adapter->hw.mac.type == e1000_82542 && 
2179             adapter->hw.revision_id == E1000_REVISION_2) {
2180                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2181                 reg_rctl &= ~E1000_RCTL_RST;
2182                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2183                 msec_delay(5);
2184                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2185                         e1000_pci_set_mwi(&adapter->hw);
2186         }
2187 }
2188
2189
2190 /*********************************************************************
2191  *  Timer routine
2192  *
2193  *  This routine checks for link status and updates statistics.
2194  *
2195  **********************************************************************/
2196
2197 static void
2198 em_local_timer(void *arg)
2199 {
2200         struct adapter  *adapter = arg;
2201         struct ifnet    *ifp = adapter->ifp;
2202         struct tx_ring  *txr = adapter->tx_rings;
2203         struct rx_ring  *rxr = adapter->rx_rings;
2204         u32             trigger;
2205
2206         EM_CORE_LOCK_ASSERT(adapter);
2207
2208         em_update_link_status(adapter);
2209         em_update_stats_counters(adapter);
2210
2211         /* Reset LAA into RAR[0] on 82571 */
2212         if ((adapter->hw.mac.type == e1000_82571) &&
2213             e1000_get_laa_state_82571(&adapter->hw))
2214                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2215
2216         /* Mask to use in the irq trigger */
2217         if (adapter->msix_mem)
2218                 trigger = rxr->ims; /* RX for 82574 */
2219         else
2220                 trigger = E1000_ICS_RXDMT0;
2221
2222         /* 
2223         ** Don't do TX watchdog check if we've been paused
2224         */
2225         if (adapter->pause_frames) {
2226                 adapter->pause_frames = 0;
2227                 goto out;
2228         }
2229         /*
2230         ** Check on the state of the TX queue(s), this 
2231         ** can be done without the lock because its RO
2232         ** and the HUNG state will be static if set.
2233         */
2234         for (int i = 0; i < adapter->num_queues; i++, txr++)
2235                 if (txr->queue_status == EM_QUEUE_HUNG)
2236                         goto hung;
2237 out:
2238         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2239 #ifndef DEVICE_POLLING
2240         /* Trigger an RX interrupt to guarantee mbuf refresh */
2241         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2242 #endif
2243         return;
2244 hung:
2245         /* Looks like we're hung */
2246         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2247         device_printf(adapter->dev,
2248             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2249             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2250             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2251         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2252             "Next TX to Clean = %d\n",
2253             txr->me, txr->tx_avail, txr->next_to_clean);
2254         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2255         adapter->watchdog_events++;
2256         em_init_locked(adapter);
2257 }
2258
2259
2260 static void
2261 em_update_link_status(struct adapter *adapter)
2262 {
2263         struct e1000_hw *hw = &adapter->hw;
2264         struct ifnet *ifp = adapter->ifp;
2265         device_t dev = adapter->dev;
2266         struct tx_ring *txr = adapter->tx_rings;
2267         u32 link_check = 0;
2268
2269         /* Get the cached link value or read phy for real */
2270         switch (hw->phy.media_type) {
2271         case e1000_media_type_copper:
2272                 if (hw->mac.get_link_status) {
2273                         /* Do the work to read phy */
2274                         e1000_check_for_link(hw);
2275                         link_check = !hw->mac.get_link_status;
2276                         if (link_check) /* ESB2 fix */
2277                                 e1000_cfg_on_link_up(hw);
2278                 } else
2279                         link_check = TRUE;
2280                 break;
2281         case e1000_media_type_fiber:
2282                 e1000_check_for_link(hw);
2283                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2284                                  E1000_STATUS_LU);
2285                 break;
2286         case e1000_media_type_internal_serdes:
2287                 e1000_check_for_link(hw);
2288                 link_check = adapter->hw.mac.serdes_has_link;
2289                 break;
2290         default:
2291         case e1000_media_type_unknown:
2292                 break;
2293         }
2294
2295         /* Now check for a transition */
2296         if (link_check && (adapter->link_active == 0)) {
2297                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2298                     &adapter->link_duplex);
2299                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2300                 if ((adapter->link_speed != SPEED_1000) &&
2301                     ((hw->mac.type == e1000_82571) ||
2302                     (hw->mac.type == e1000_82572))) {
2303                         int tarc0;
2304                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2305                         tarc0 &= ~SPEED_MODE_BIT;
2306                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2307                 }
2308                 if (bootverbose)
2309                         device_printf(dev, "Link is up %d Mbps %s\n",
2310                             adapter->link_speed,
2311                             ((adapter->link_duplex == FULL_DUPLEX) ?
2312                             "Full Duplex" : "Half Duplex"));
2313                 adapter->link_active = 1;
2314                 adapter->smartspeed = 0;
2315                 ifp->if_baudrate = adapter->link_speed * 1000000;
2316                 if_link_state_change(ifp, LINK_STATE_UP);
2317         } else if (!link_check && (adapter->link_active == 1)) {
2318                 ifp->if_baudrate = adapter->link_speed = 0;
2319                 adapter->link_duplex = 0;
2320                 if (bootverbose)
2321                         device_printf(dev, "Link is Down\n");
2322                 adapter->link_active = 0;
2323                 /* Link down, disable watchdog */
2324                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2325                         txr->queue_status = EM_QUEUE_IDLE;
2326                 if_link_state_change(ifp, LINK_STATE_DOWN);
2327         }
2328 }
2329
2330 /*********************************************************************
2331  *
2332  *  This routine disables all traffic on the adapter by issuing a
2333  *  global reset on the MAC and deallocates TX/RX buffers.
2334  *
2335  *  This routine should always be called with BOTH the CORE
2336  *  and TX locks.
2337  **********************************************************************/
2338
2339 static void
2340 em_stop(void *arg)
2341 {
2342         struct adapter  *adapter = arg;
2343         struct ifnet    *ifp = adapter->ifp;
2344         struct tx_ring  *txr = adapter->tx_rings;
2345
2346         EM_CORE_LOCK_ASSERT(adapter);
2347
2348         INIT_DEBUGOUT("em_stop: begin");
2349
2350         em_disable_intr(adapter);
2351         callout_stop(&adapter->timer);
2352
2353         /* Tell the stack that the interface is no longer active */
2354         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2355
2356         /* Unarm watchdog timer. */
2357         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2358                 EM_TX_LOCK(txr);
2359                 txr->queue_status = EM_QUEUE_IDLE;
2360                 EM_TX_UNLOCK(txr);
2361         }
2362
2363         e1000_reset_hw(&adapter->hw);
2364         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2365
2366         e1000_led_off(&adapter->hw);
2367         e1000_cleanup_led(&adapter->hw);
2368 }
2369
2370
2371 /*********************************************************************
2372  *
2373  *  Determine hardware revision.
2374  *
2375  **********************************************************************/
2376 static void
2377 em_identify_hardware(struct adapter *adapter)
2378 {
2379         device_t dev = adapter->dev;
2380
2381         /* Make sure our PCI config space has the necessary stuff set */
2382         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2383         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2384             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2385                 device_printf(dev, "Memory Access and/or Bus Master bits "
2386                     "were not set!\n");
2387                 adapter->hw.bus.pci_cmd_word |=
2388                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2389                 pci_write_config(dev, PCIR_COMMAND,
2390                     adapter->hw.bus.pci_cmd_word, 2);
2391         }
2392
2393         /* Save off the information about this board */
2394         adapter->hw.vendor_id = pci_get_vendor(dev);
2395         adapter->hw.device_id = pci_get_device(dev);
2396         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2397         adapter->hw.subsystem_vendor_id =
2398             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2399         adapter->hw.subsystem_device_id =
2400             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2401
2402         /* Do Shared Code Init and Setup */
2403         if (e1000_set_mac_type(&adapter->hw)) {
2404                 device_printf(dev, "Setup init failure\n");
2405                 return;
2406         }
2407 }
2408
2409 static int
2410 em_allocate_pci_resources(struct adapter *adapter)
2411 {
2412         device_t        dev = adapter->dev;
2413         int             rid;
2414
2415         rid = PCIR_BAR(0);
2416         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2417             &rid, RF_ACTIVE);
2418         if (adapter->memory == NULL) {
2419                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2420                 return (ENXIO);
2421         }
2422         adapter->osdep.mem_bus_space_tag =
2423             rman_get_bustag(adapter->memory);
2424         adapter->osdep.mem_bus_space_handle =
2425             rman_get_bushandle(adapter->memory);
2426         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2427
2428         /* Default to a single queue */
2429         adapter->num_queues = 1;
2430
2431         /*
2432          * Setup MSI/X or MSI if PCI Express
2433          */
2434         adapter->msix = em_setup_msix(adapter);
2435
2436         adapter->hw.back = &adapter->osdep;
2437
2438         return (0);
2439 }
2440
2441 /*********************************************************************
2442  *
2443  *  Setup the Legacy or MSI Interrupt handler
2444  *
2445  **********************************************************************/
2446 int
2447 em_allocate_legacy(struct adapter *adapter)
2448 {
2449         device_t dev = adapter->dev;
2450         int error, rid = 0;
2451
2452         /* Manually turn off all interrupts */
2453         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2454
2455         if (adapter->msix == 1) /* using MSI */
2456                 rid = 1;
2457         /* We allocate a single interrupt resource */
2458         adapter->res = bus_alloc_resource_any(dev,
2459             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2460         if (adapter->res == NULL) {
2461                 device_printf(dev, "Unable to allocate bus resource: "
2462                     "interrupt\n");
2463                 return (ENXIO);
2464         }
2465
2466         /*
2467          * Allocate a fast interrupt and the associated
2468          * deferred processing contexts.
2469          */
2470         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2471         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2472         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2473             taskqueue_thread_enqueue, &adapter->tq);
2474         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2475             device_get_nameunit(adapter->dev));
2476         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2477             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2478                 device_printf(dev, "Failed to register fast interrupt "
2479                             "handler: %d\n", error);
2480                 taskqueue_free(adapter->tq);
2481                 adapter->tq = NULL;
2482                 return (error);
2483         }
2484         
2485         return (0);
2486 }
2487
2488 /*********************************************************************
2489  *
2490  *  Setup the MSIX Interrupt handlers
2491  *   This is not really Multiqueue, rather
2492  *   its just multiple interrupt vectors.
2493  *
2494  **********************************************************************/
2495 int
2496 em_allocate_msix(struct adapter *adapter)
2497 {
2498         device_t        dev = adapter->dev;
2499         struct          tx_ring *txr = adapter->tx_rings;
2500         struct          rx_ring *rxr = adapter->rx_rings;
2501         int             error, rid, vector = 0;
2502
2503
2504         /* Make sure all interrupts are disabled */
2505         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2506
2507         /* First set up ring resources */
2508         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2509
2510                 /* RX ring */
2511                 rid = vector + 1;
2512
2513                 rxr->res = bus_alloc_resource_any(dev,
2514                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2515                 if (rxr->res == NULL) {
2516                         device_printf(dev,
2517                             "Unable to allocate bus resource: "
2518                             "RX MSIX Interrupt %d\n", i);
2519                         return (ENXIO);
2520                 }
2521                 if ((error = bus_setup_intr(dev, rxr->res,
2522                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2523                     rxr, &rxr->tag)) != 0) {
2524                         device_printf(dev, "Failed to register RX handler");
2525                         return (error);
2526                 }
2527 #if __FreeBSD_version >= 800504
2528                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2529 #endif
2530                 rxr->msix = vector++; /* NOTE increment vector for TX */
2531                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2532                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2533                     taskqueue_thread_enqueue, &rxr->tq);
2534                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2535                     device_get_nameunit(adapter->dev));
2536                 /*
2537                 ** Set the bit to enable interrupt
2538                 ** in E1000_IMS -- bits 20 and 21
2539                 ** are for RX0 and RX1, note this has
2540                 ** NOTHING to do with the MSIX vector
2541                 */
2542                 rxr->ims = 1 << (20 + i);
2543                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2544
2545                 /* TX ring */
2546                 rid = vector + 1;
2547                 txr->res = bus_alloc_resource_any(dev,
2548                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2549                 if (txr->res == NULL) {
2550                         device_printf(dev,
2551                             "Unable to allocate bus resource: "
2552                             "TX MSIX Interrupt %d\n", i);
2553                         return (ENXIO);
2554                 }
2555                 if ((error = bus_setup_intr(dev, txr->res,
2556                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2557                     txr, &txr->tag)) != 0) {
2558                         device_printf(dev, "Failed to register TX handler");
2559                         return (error);
2560                 }
2561 #if __FreeBSD_version >= 800504
2562                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2563 #endif
2564                 txr->msix = vector++; /* Increment vector for next pass */
2565                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2566                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2567                     taskqueue_thread_enqueue, &txr->tq);
2568                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2569                     device_get_nameunit(adapter->dev));
2570                 /*
2571                 ** Set the bit to enable interrupt
2572                 ** in E1000_IMS -- bits 22 and 23
2573                 ** are for TX0 and TX1, note this has
2574                 ** NOTHING to do with the MSIX vector
2575                 */
2576                 txr->ims = 1 << (22 + i);
2577                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2578         }
2579
2580         /* Link interrupt */
2581         ++rid;
2582         adapter->res = bus_alloc_resource_any(dev,
2583             SYS_RES_IRQ, &rid, RF_ACTIVE);
2584         if (!adapter->res) {
2585                 device_printf(dev,"Unable to allocate "
2586                     "bus resource: Link interrupt [%d]\n", rid);
2587                 return (ENXIO);
2588         }
2589         /* Set the link handler function */
2590         error = bus_setup_intr(dev, adapter->res,
2591             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2592             em_msix_link, adapter, &adapter->tag);
2593         if (error) {
2594                 adapter->res = NULL;
2595                 device_printf(dev, "Failed to register LINK handler");
2596                 return (error);
2597         }
2598 #if __FreeBSD_version >= 800504
2599                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2600 #endif
2601         adapter->linkvec = vector;
2602         adapter->ivars |=  (8 | vector) << 16;
2603         adapter->ivars |= 0x80000000;
2604
2605         return (0);
2606 }
2607
2608
2609 static void
2610 em_free_pci_resources(struct adapter *adapter)
2611 {
2612         device_t        dev = adapter->dev;
2613         struct tx_ring  *txr;
2614         struct rx_ring  *rxr;
2615         int             rid;
2616
2617
2618         /*
2619         ** Release all the queue interrupt resources:
2620         */
2621         for (int i = 0; i < adapter->num_queues; i++) {
2622                 txr = &adapter->tx_rings[i];
2623                 rxr = &adapter->rx_rings[i];
2624                 /* an early abort? */
2625                 if ((txr == NULL) || (rxr == NULL))
2626                         break;
2627                 rid = txr->msix +1;
2628                 if (txr->tag != NULL) {
2629                         bus_teardown_intr(dev, txr->res, txr->tag);
2630                         txr->tag = NULL;
2631                 }
2632                 if (txr->res != NULL)
2633                         bus_release_resource(dev, SYS_RES_IRQ,
2634                             rid, txr->res);
2635                 rid = rxr->msix +1;
2636                 if (rxr->tag != NULL) {
2637                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2638                         rxr->tag = NULL;
2639                 }
2640                 if (rxr->res != NULL)
2641                         bus_release_resource(dev, SYS_RES_IRQ,
2642                             rid, rxr->res);
2643         }
2644
2645         if (adapter->linkvec) /* we are doing MSIX */
2646                 rid = adapter->linkvec + 1;
2647         else
2648                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2649
2650         if (adapter->tag != NULL) {
2651                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2652                 adapter->tag = NULL;
2653         }
2654
2655         if (adapter->res != NULL)
2656                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2657
2658
2659         if (adapter->msix)
2660                 pci_release_msi(dev);
2661
2662         if (adapter->msix_mem != NULL)
2663                 bus_release_resource(dev, SYS_RES_MEMORY,
2664                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2665
2666         if (adapter->memory != NULL)
2667                 bus_release_resource(dev, SYS_RES_MEMORY,
2668                     PCIR_BAR(0), adapter->memory);
2669
2670         if (adapter->flash != NULL)
2671                 bus_release_resource(dev, SYS_RES_MEMORY,
2672                     EM_FLASH, adapter->flash);
2673 }
2674
2675 /*
2676  * Setup MSI or MSI/X
2677  */
2678 static int
2679 em_setup_msix(struct adapter *adapter)
2680 {
2681         device_t dev = adapter->dev;
2682         int val = 0;
2683
2684
2685         /*
2686         ** Setup MSI/X for Hartwell: tests have shown
2687         ** use of two queues to be unstable, and to
2688         ** provide no great gain anyway, so we simply
2689         ** seperate the interrupts and use a single queue.
2690         */
2691         if ((adapter->hw.mac.type == e1000_82574) &&
2692             (em_enable_msix == TRUE)) {
2693                 /* Map the MSIX BAR */
2694                 int rid = PCIR_BAR(EM_MSIX_BAR);
2695                 adapter->msix_mem = bus_alloc_resource_any(dev,
2696                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2697                 if (!adapter->msix_mem) {
2698                         /* May not be enabled */
2699                         device_printf(adapter->dev,
2700                             "Unable to map MSIX table \n");
2701                         goto msi;
2702                 }
2703                 val = pci_msix_count(dev); 
2704                 if (val < 3) {
2705                         bus_release_resource(dev, SYS_RES_MEMORY,
2706                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2707                         adapter->msix_mem = NULL;
2708                         device_printf(adapter->dev,
2709                             "MSIX: insufficient vectors, using MSI\n");
2710                         goto msi;
2711                 }
2712                 val = 3;
2713                 adapter->num_queues = 1;
2714                 if (pci_alloc_msix(dev, &val) == 0) {
2715                         device_printf(adapter->dev,
2716                             "Using MSIX interrupts "
2717                             "with %d vectors\n", val);
2718                 }
2719
2720                 return (val);
2721         }
2722 msi:
2723         val = pci_msi_count(dev);
2724         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2725                 adapter->msix = 1;
2726                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2727                 return (val);
2728         } 
2729         /* Should only happen due to manual configuration */
2730         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2731         return (0);
2732 }
2733
2734
2735 /*********************************************************************
2736  *
2737  *  Initialize the hardware to a configuration
2738  *  as specified by the adapter structure.
2739  *
2740  **********************************************************************/
2741 static void
2742 em_reset(struct adapter *adapter)
2743 {
2744         device_t        dev = adapter->dev;
2745         struct ifnet    *ifp = adapter->ifp;
2746         struct e1000_hw *hw = &adapter->hw;
2747         u16             rx_buffer_size;
2748
2749         INIT_DEBUGOUT("em_reset: begin");
2750
2751         /* Set up smart power down as default off on newer adapters. */
2752         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2753             hw->mac.type == e1000_82572)) {
2754                 u16 phy_tmp = 0;
2755
2756                 /* Speed up time to link by disabling smart power down. */
2757                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2758                 phy_tmp &= ~IGP02E1000_PM_SPD;
2759                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2760         }
2761
2762         /*
2763          * These parameters control the automatic generation (Tx) and
2764          * response (Rx) to Ethernet PAUSE frames.
2765          * - High water mark should allow for at least two frames to be
2766          *   received after sending an XOFF.
2767          * - Low water mark works best when it is very near the high water mark.
2768          *   This allows the receiver to restart by sending XON when it has
2769          *   drained a bit. Here we use an arbitary value of 1500 which will
2770          *   restart after one full frame is pulled from the buffer. There
2771          *   could be several smaller frames in the buffer and if so they will
2772          *   not trigger the XON until their total number reduces the buffer
2773          *   by 1500.
2774          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2775          */
2776         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2777
2778         hw->fc.high_water = rx_buffer_size -
2779             roundup2(adapter->max_frame_size, 1024);
2780         hw->fc.low_water = hw->fc.high_water - 1500;
2781
2782         if (hw->mac.type == e1000_80003es2lan)
2783                 hw->fc.pause_time = 0xFFFF;
2784         else
2785                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2786
2787         hw->fc.send_xon = TRUE;
2788
2789         /* Set Flow control, use the tunable location if sane */
2790         hw->fc.requested_mode = adapter->fc_setting;
2791
2792         /* Workaround: no TX flow ctrl for PCH */
2793         if (hw->mac.type == e1000_pchlan)
2794                 hw->fc.requested_mode = e1000_fc_rx_pause;
2795
2796         /* Override - settings for PCH2LAN, ya its magic :) */
2797         if (hw->mac.type == e1000_pch2lan) {
2798                 hw->fc.high_water = 0x5C20;
2799                 hw->fc.low_water = 0x5048;
2800                 hw->fc.pause_time = 0x0650;
2801                 hw->fc.refresh_time = 0x0400;
2802                 /* Jumbos need adjusted PBA */
2803                 if (ifp->if_mtu > ETHERMTU)
2804                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2805                 else
2806                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2807         }
2808
2809         /* Issue a global reset */
2810         e1000_reset_hw(hw);
2811         E1000_WRITE_REG(hw, E1000_WUC, 0);
2812         em_disable_aspm(adapter);
2813
2814         if (e1000_init_hw(hw) < 0) {
2815                 device_printf(dev, "Hardware Initialization Failed\n");
2816                 return;
2817         }
2818
2819         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2820         e1000_get_phy_info(hw);
2821         e1000_check_for_link(hw);
2822         return;
2823 }
2824
2825 /*********************************************************************
2826  *
2827  *  Setup networking device structure and register an interface.
2828  *
2829  **********************************************************************/
2830 static int
2831 em_setup_interface(device_t dev, struct adapter *adapter)
2832 {
2833         struct ifnet   *ifp;
2834
2835         INIT_DEBUGOUT("em_setup_interface: begin");
2836
2837         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2838         if (ifp == NULL) {
2839                 device_printf(dev, "can not allocate ifnet structure\n");
2840                 return (-1);
2841         }
2842         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2843         ifp->if_mtu = ETHERMTU;
2844         ifp->if_init =  em_init;
2845         ifp->if_softc = adapter;
2846         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2847         ifp->if_ioctl = em_ioctl;
2848         ifp->if_start = em_start;
2849         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2850         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2851         IFQ_SET_READY(&ifp->if_snd);
2852
2853         ether_ifattach(ifp, adapter->hw.mac.addr);
2854
2855         ifp->if_capabilities = ifp->if_capenable = 0;
2856
2857 #ifdef EM_MULTIQUEUE
2858         /* Multiqueue tx functions */
2859         ifp->if_transmit = em_mq_start;
2860         ifp->if_qflush = em_qflush;
2861 #endif  
2862
2863         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2864         ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2865
2866         /* Enable TSO by default, can disable with ifconfig */
2867         ifp->if_capabilities |= IFCAP_TSO4;
2868         ifp->if_capenable |= IFCAP_TSO4;
2869
2870         /*
2871          * Tell the upper layer(s) we
2872          * support full VLAN capability
2873          */
2874         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2875         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2876         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2877
2878         /*
2879         ** Dont turn this on by default, if vlans are
2880         ** created on another pseudo device (eg. lagg)
2881         ** then vlan events are not passed thru, breaking
2882         ** operation, but with HW FILTER off it works. If
2883         ** using vlans directly on the em driver you can
2884         ** enable this and get full hardware tag filtering.
2885         */
2886         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2887
2888 #ifdef DEVICE_POLLING
2889         ifp->if_capabilities |= IFCAP_POLLING;
2890 #endif
2891
2892         /* Enable only WOL MAGIC by default */
2893         if (adapter->wol) {
2894                 ifp->if_capabilities |= IFCAP_WOL;
2895                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2896         }
2897                 
2898         /*
2899          * Specify the media types supported by this adapter and register
2900          * callbacks to update media and link information
2901          */
2902         ifmedia_init(&adapter->media, IFM_IMASK,
2903             em_media_change, em_media_status);
2904         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2905             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2906                 u_char fiber_type = IFM_1000_SX;        /* default type */
2907
2908                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
2909                             0, NULL);
2910                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2911         } else {
2912                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2913                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2914                             0, NULL);
2915                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2916                             0, NULL);
2917                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2918                             0, NULL);
2919                 if (adapter->hw.phy.type != e1000_phy_ife) {
2920                         ifmedia_add(&adapter->media,
2921                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2922                         ifmedia_add(&adapter->media,
2923                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2924                 }
2925         }
2926         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2927         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2928         return (0);
2929 }
2930
2931
2932 /*
2933  * Manage DMA'able memory.
2934  */
2935 static void
2936 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2937 {
2938         if (error)
2939                 return;
2940         *(bus_addr_t *) arg = segs[0].ds_addr;
2941 }
2942
2943 static int
2944 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2945         struct em_dma_alloc *dma, int mapflags)
2946 {
2947         int error;
2948
2949         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2950                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
2951                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2952                                 BUS_SPACE_MAXADDR,      /* highaddr */
2953                                 NULL, NULL,             /* filter, filterarg */
2954                                 size,                   /* maxsize */
2955                                 1,                      /* nsegments */
2956                                 size,                   /* maxsegsize */
2957                                 0,                      /* flags */
2958                                 NULL,                   /* lockfunc */
2959                                 NULL,                   /* lockarg */
2960                                 &dma->dma_tag);
2961         if (error) {
2962                 device_printf(adapter->dev,
2963                     "%s: bus_dma_tag_create failed: %d\n",
2964                     __func__, error);
2965                 goto fail_0;
2966         }
2967
2968         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2969             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2970         if (error) {
2971                 device_printf(adapter->dev,
2972                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2973                     __func__, (uintmax_t)size, error);
2974                 goto fail_2;
2975         }
2976
2977         dma->dma_paddr = 0;
2978         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2979             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2980         if (error || dma->dma_paddr == 0) {
2981                 device_printf(adapter->dev,
2982                     "%s: bus_dmamap_load failed: %d\n",
2983                     __func__, error);
2984                 goto fail_3;
2985         }
2986
2987         return (0);
2988
2989 fail_3:
2990         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2991 fail_2:
2992         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2993         bus_dma_tag_destroy(dma->dma_tag);
2994 fail_0:
2995         dma->dma_map = NULL;
2996         dma->dma_tag = NULL;
2997
2998         return (error);
2999 }
3000
3001 static void
3002 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3003 {
3004         if (dma->dma_tag == NULL)
3005                 return;
3006         if (dma->dma_map != NULL) {
3007                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3008                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3009                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3010                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3011                 dma->dma_map = NULL;
3012         }
3013         bus_dma_tag_destroy(dma->dma_tag);
3014         dma->dma_tag = NULL;
3015 }
3016
3017
3018 /*********************************************************************
3019  *
3020  *  Allocate memory for the transmit and receive rings, and then
3021  *  the descriptors associated with each, called only once at attach.
3022  *
3023  **********************************************************************/
3024 static int
3025 em_allocate_queues(struct adapter *adapter)
3026 {
3027         device_t                dev = adapter->dev;
3028         struct tx_ring          *txr = NULL;
3029         struct rx_ring          *rxr = NULL;
3030         int rsize, tsize, error = E1000_SUCCESS;
3031         int txconf = 0, rxconf = 0;
3032
3033
3034         /* Allocate the TX ring struct memory */
3035         if (!(adapter->tx_rings =
3036             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3037             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3038                 device_printf(dev, "Unable to allocate TX ring memory\n");
3039                 error = ENOMEM;
3040                 goto fail;
3041         }
3042
3043         /* Now allocate the RX */
3044         if (!(adapter->rx_rings =
3045             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3046             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3047                 device_printf(dev, "Unable to allocate RX ring memory\n");
3048                 error = ENOMEM;
3049                 goto rx_fail;
3050         }
3051
3052         tsize = roundup2(adapter->num_tx_desc *
3053             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3054         /*
3055          * Now set up the TX queues, txconf is needed to handle the
3056          * possibility that things fail midcourse and we need to
3057          * undo memory gracefully
3058          */ 
3059         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3060                 /* Set up some basics */
3061                 txr = &adapter->tx_rings[i];
3062                 txr->adapter = adapter;
3063                 txr->me = i;
3064
3065                 /* Initialize the TX lock */
3066                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3067                     device_get_nameunit(dev), txr->me);
3068                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3069
3070                 if (em_dma_malloc(adapter, tsize,
3071                         &txr->txdma, BUS_DMA_NOWAIT)) {
3072                         device_printf(dev,
3073                             "Unable to allocate TX Descriptor memory\n");
3074                         error = ENOMEM;
3075                         goto err_tx_desc;
3076                 }
3077                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3078                 bzero((void *)txr->tx_base, tsize);
3079
3080                 if (em_allocate_transmit_buffers(txr)) {
3081                         device_printf(dev,
3082                             "Critical Failure setting up transmit buffers\n");
3083                         error = ENOMEM;
3084                         goto err_tx_desc;
3085                 }
3086 #if __FreeBSD_version >= 800000
3087                 /* Allocate a buf ring */
3088                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3089                     M_WAITOK, &txr->tx_mtx);
3090 #endif
3091         }
3092
3093         /*
3094          * Next the RX queues...
3095          */ 
3096         rsize = roundup2(adapter->num_rx_desc *
3097             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3098         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3099                 rxr = &adapter->rx_rings[i];
3100                 rxr->adapter = adapter;
3101                 rxr->me = i;
3102
3103                 /* Initialize the RX lock */
3104                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3105                     device_get_nameunit(dev), txr->me);
3106                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3107
3108                 if (em_dma_malloc(adapter, rsize,
3109                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3110                         device_printf(dev,
3111                             "Unable to allocate RxDescriptor memory\n");
3112                         error = ENOMEM;
3113                         goto err_rx_desc;
3114                 }
3115                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3116                 bzero((void *)rxr->rx_base, rsize);
3117
3118                 /* Allocate receive buffers for the ring*/
3119                 if (em_allocate_receive_buffers(rxr)) {
3120                         device_printf(dev,
3121                             "Critical Failure setting up receive buffers\n");
3122                         error = ENOMEM;
3123                         goto err_rx_desc;
3124                 }
3125         }
3126
3127         return (0);
3128
3129 err_rx_desc:
3130         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3131                 em_dma_free(adapter, &rxr->rxdma);
3132 err_tx_desc:
3133         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3134                 em_dma_free(adapter, &txr->txdma);
3135         free(adapter->rx_rings, M_DEVBUF);
3136 rx_fail:
3137 #if __FreeBSD_version >= 800000
3138         buf_ring_free(txr->br, M_DEVBUF);
3139 #endif
3140         free(adapter->tx_rings, M_DEVBUF);
3141 fail:
3142         return (error);
3143 }
3144
3145
3146 /*********************************************************************
3147  *
3148  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3149  *  the information needed to transmit a packet on the wire. This is
3150  *  called only once at attach, setup is done every reset.
3151  *
3152  **********************************************************************/
3153 static int
3154 em_allocate_transmit_buffers(struct tx_ring *txr)
3155 {
3156         struct adapter *adapter = txr->adapter;
3157         device_t dev = adapter->dev;
3158         struct em_buffer *txbuf;
3159         int error, i;
3160
3161         /*
3162          * Setup DMA descriptor areas.
3163          */
3164         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3165                                1, 0,                    /* alignment, bounds */
3166                                BUS_SPACE_MAXADDR,       /* lowaddr */
3167                                BUS_SPACE_MAXADDR,       /* highaddr */
3168                                NULL, NULL,              /* filter, filterarg */
3169                                EM_TSO_SIZE,             /* maxsize */
3170                                EM_MAX_SCATTER,          /* nsegments */
3171                                PAGE_SIZE,               /* maxsegsize */
3172                                0,                       /* flags */
3173                                NULL,                    /* lockfunc */
3174                                NULL,                    /* lockfuncarg */
3175                                &txr->txtag))) {
3176                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3177                 goto fail;
3178         }
3179
3180         if (!(txr->tx_buffers =
3181             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3182             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3183                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3184                 error = ENOMEM;
3185                 goto fail;
3186         }
3187
3188         /* Create the descriptor buffer dma maps */
3189         txbuf = txr->tx_buffers;
3190         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3191                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3192                 if (error != 0) {
3193                         device_printf(dev, "Unable to create TX DMA map\n");
3194                         goto fail;
3195                 }
3196         }
3197
3198         return 0;
3199 fail:
3200         /* We free all, it handles case where we are in the middle */
3201         em_free_transmit_structures(adapter);
3202         return (error);
3203 }
3204
3205 /*********************************************************************
3206  *
3207  *  Initialize a transmit ring.
3208  *
3209  **********************************************************************/
3210 static void
3211 em_setup_transmit_ring(struct tx_ring *txr)
3212 {
3213         struct adapter *adapter = txr->adapter;
3214         struct em_buffer *txbuf;
3215         int i;
3216
3217         /* Clear the old descriptor contents */
3218         EM_TX_LOCK(txr);
3219         bzero((void *)txr->tx_base,
3220               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3221         /* Reset indices */
3222         txr->next_avail_desc = 0;
3223         txr->next_to_clean = 0;
3224
3225         /* Free any existing tx buffers. */
3226         txbuf = txr->tx_buffers;
3227         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3228                 if (txbuf->m_head != NULL) {
3229                         bus_dmamap_sync(txr->txtag, txbuf->map,
3230                             BUS_DMASYNC_POSTWRITE);
3231                         bus_dmamap_unload(txr->txtag, txbuf->map);
3232                         m_freem(txbuf->m_head);
3233                         txbuf->m_head = NULL;
3234                 }
3235                 /* clear the watch index */
3236                 txbuf->next_eop = -1;
3237         }
3238
3239         /* Set number of descriptors available */
3240         txr->tx_avail = adapter->num_tx_desc;
3241         txr->queue_status = EM_QUEUE_IDLE;
3242
3243         /* Clear checksum offload context. */
3244         txr->last_hw_offload = 0;
3245         txr->last_hw_ipcss = 0;
3246         txr->last_hw_ipcso = 0;
3247         txr->last_hw_tucss = 0;
3248         txr->last_hw_tucso = 0;
3249
3250         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3251             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3252         EM_TX_UNLOCK(txr);
3253 }
3254
3255 /*********************************************************************
3256  *
3257  *  Initialize all transmit rings.
3258  *
3259  **********************************************************************/
3260 static void
3261 em_setup_transmit_structures(struct adapter *adapter)
3262 {
3263         struct tx_ring *txr = adapter->tx_rings;
3264
3265         for (int i = 0; i < adapter->num_queues; i++, txr++)
3266                 em_setup_transmit_ring(txr);
3267
3268         return;
3269 }
3270
3271 /*********************************************************************
3272  *
3273  *  Enable transmit unit.
3274  *
3275  **********************************************************************/
3276 static void
3277 em_initialize_transmit_unit(struct adapter *adapter)
3278 {
3279         struct tx_ring  *txr = adapter->tx_rings;
3280         struct e1000_hw *hw = &adapter->hw;
3281         u32     tctl, tarc, tipg = 0;
3282
3283          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3284
3285         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3286                 u64 bus_addr = txr->txdma.dma_paddr;
3287                 /* Base and Len of TX Ring */
3288                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3289                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3290                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3291                     (u32)(bus_addr >> 32));
3292                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3293                     (u32)bus_addr);
3294                 /* Init the HEAD/TAIL indices */
3295                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3296                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3297
3298                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3299                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3300                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3301
3302                 txr->queue_status = EM_QUEUE_IDLE;
3303         }
3304
3305         /* Set the default values for the Tx Inter Packet Gap timer */
3306         switch (adapter->hw.mac.type) {
3307         case e1000_82542:
3308                 tipg = DEFAULT_82542_TIPG_IPGT;
3309                 tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3310                 tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3311                 break;
3312         case e1000_80003es2lan:
3313                 tipg = DEFAULT_82543_TIPG_IPGR1;
3314                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3315                     E1000_TIPG_IPGR2_SHIFT;
3316                 break;
3317         default:
3318                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3319                     (adapter->hw.phy.media_type ==
3320                     e1000_media_type_internal_serdes))
3321                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3322                 else
3323                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3324                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3325                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3326         }
3327
3328         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3329         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3330
3331         if(adapter->hw.mac.type >= e1000_82540)
3332                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3333                     adapter->tx_abs_int_delay.value);
3334
3335         if ((adapter->hw.mac.type == e1000_82571) ||
3336             (adapter->hw.mac.type == e1000_82572)) {
3337                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3338                 tarc |= SPEED_MODE_BIT;
3339                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3340         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3341                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3342                 tarc |= 1;
3343                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3344                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3345                 tarc |= 1;
3346                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3347         }
3348
3349         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3350         if (adapter->tx_int_delay.value > 0)
3351                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3352
3353         /* Program the Transmit Control Register */
3354         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3355         tctl &= ~E1000_TCTL_CT;
3356         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3357                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3358
3359         if (adapter->hw.mac.type >= e1000_82571)
3360                 tctl |= E1000_TCTL_MULR;
3361
3362         /* This write will effectively turn on the transmit unit. */
3363         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3364
3365 }
3366
3367
3368 /*********************************************************************
3369  *
3370  *  Free all transmit rings.
3371  *
3372  **********************************************************************/
3373 static void
3374 em_free_transmit_structures(struct adapter *adapter)
3375 {
3376         struct tx_ring *txr = adapter->tx_rings;
3377
3378         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3379                 EM_TX_LOCK(txr);
3380                 em_free_transmit_buffers(txr);
3381                 em_dma_free(adapter, &txr->txdma);
3382                 EM_TX_UNLOCK(txr);
3383                 EM_TX_LOCK_DESTROY(txr);
3384         }
3385
3386         free(adapter->tx_rings, M_DEVBUF);
3387 }
3388
3389 /*********************************************************************
3390  *
3391  *  Free transmit ring related data structures.
3392  *
3393  **********************************************************************/
3394 static void
3395 em_free_transmit_buffers(struct tx_ring *txr)
3396 {
3397         struct adapter          *adapter = txr->adapter;
3398         struct em_buffer        *txbuf;
3399
3400         INIT_DEBUGOUT("free_transmit_ring: begin");
3401
3402         if (txr->tx_buffers == NULL)
3403                 return;
3404
3405         for (int i = 0; i < adapter->num_tx_desc; i++) {
3406                 txbuf = &txr->tx_buffers[i];
3407                 if (txbuf->m_head != NULL) {
3408                         bus_dmamap_sync(txr->txtag, txbuf->map,
3409                             BUS_DMASYNC_POSTWRITE);
3410                         bus_dmamap_unload(txr->txtag,
3411                             txbuf->map);
3412                         m_freem(txbuf->m_head);
3413                         txbuf->m_head = NULL;
3414                         if (txbuf->map != NULL) {
3415                                 bus_dmamap_destroy(txr->txtag,
3416                                     txbuf->map);
3417                                 txbuf->map = NULL;
3418                         }
3419                 } else if (txbuf->map != NULL) {
3420                         bus_dmamap_unload(txr->txtag,
3421                             txbuf->map);
3422                         bus_dmamap_destroy(txr->txtag,
3423                             txbuf->map);
3424                         txbuf->map = NULL;
3425                 }
3426         }
3427 #if __FreeBSD_version >= 800000
3428         if (txr->br != NULL)
3429                 buf_ring_free(txr->br, M_DEVBUF);
3430 #endif
3431         if (txr->tx_buffers != NULL) {
3432                 free(txr->tx_buffers, M_DEVBUF);
3433                 txr->tx_buffers = NULL;
3434         }
3435         if (txr->txtag != NULL) {
3436                 bus_dma_tag_destroy(txr->txtag);
3437                 txr->txtag = NULL;
3438         }
3439         return;
3440 }
3441
3442
3443 /*********************************************************************
3444  *  The offload context is protocol specific (TCP/UDP) and thus
3445  *  only needs to be set when the protocol changes. The occasion
3446  *  of a context change can be a performance detriment, and
3447  *  might be better just disabled. The reason arises in the way
3448  *  in which the controller supports pipelined requests from the
3449  *  Tx data DMA. Up to four requests can be pipelined, and they may
3450  *  belong to the same packet or to multiple packets. However all
3451  *  requests for one packet are issued before a request is issued
3452  *  for a subsequent packet and if a request for the next packet
3453  *  requires a context change, that request will be stalled
3454  *  until the previous request completes. This means setting up
3455  *  a new context effectively disables pipelined Tx data DMA which
3456  *  in turn greatly slow down performance to send small sized
3457  *  frames. 
3458  **********************************************************************/
3459 static void
3460 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3461     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3462 {
3463         struct adapter                  *adapter = txr->adapter;
3464         struct e1000_context_desc       *TXD = NULL;
3465         struct em_buffer                *tx_buffer;
3466         int                             cur, hdr_len;
3467         u32                             cmd = 0;
3468         u16                             offload = 0;
3469         u8                              ipcso, ipcss, tucso, tucss;
3470
3471         ipcss = ipcso = tucss = tucso = 0;
3472         hdr_len = ip_off + (ip->ip_hl << 2);
3473         cur = txr->next_avail_desc;
3474
3475         /* Setup of IP header checksum. */
3476         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3477                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3478                 offload |= CSUM_IP;
3479                 ipcss = ip_off;
3480                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3481                 /*
3482                  * Start offset for header checksum calculation.
3483                  * End offset for header checksum calculation.
3484                  * Offset of place to put the checksum.
3485                  */
3486                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3487                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3488                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3489                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3490                 cmd |= E1000_TXD_CMD_IP;
3491         }
3492
3493         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3494                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3495                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3496                 offload |= CSUM_TCP;
3497                 tucss = hdr_len;
3498                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3499                 /*
3500                  * Setting up new checksum offload context for every frames
3501                  * takes a lot of processing time for hardware. This also
3502                  * reduces performance a lot for small sized frames so avoid
3503                  * it if driver can use previously configured checksum
3504                  * offload context.
3505                  */
3506                 if (txr->last_hw_offload == offload) {
3507                         if (offload & CSUM_IP) {
3508                                 if (txr->last_hw_ipcss == ipcss &&
3509                                     txr->last_hw_ipcso == ipcso &&
3510                                     txr->last_hw_tucss == tucss &&
3511                                     txr->last_hw_tucso == tucso)
3512                                         return;
3513                         } else {
3514                                 if (txr->last_hw_tucss == tucss &&
3515                                     txr->last_hw_tucso == tucso)
3516                                         return;
3517                         }
3518                 }
3519                 txr->last_hw_offload = offload;
3520                 txr->last_hw_tucss = tucss;
3521                 txr->last_hw_tucso = tucso;
3522                 /*
3523                  * Start offset for payload checksum calculation.
3524                  * End offset for payload checksum calculation.
3525                  * Offset of place to put the checksum.
3526                  */
3527                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3528                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3529                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3530                 TXD->upper_setup.tcp_fields.tucso = tucso;
3531                 cmd |= E1000_TXD_CMD_TCP;
3532         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3533                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3534                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3535                 tucss = hdr_len;
3536                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3537                 /*
3538                  * Setting up new checksum offload context for every frames
3539                  * takes a lot of processing time for hardware. This also
3540                  * reduces performance a lot for small sized frames so avoid
3541                  * it if driver can use previously configured checksum
3542                  * offload context.
3543                  */
3544                 if (txr->last_hw_offload == offload) {
3545                         if (offload & CSUM_IP) {
3546                                 if (txr->last_hw_ipcss == ipcss &&
3547                                     txr->last_hw_ipcso == ipcso &&
3548                                     txr->last_hw_tucss == tucss &&
3549                                     txr->last_hw_tucso == tucso)
3550                                         return;
3551                         } else {
3552                                 if (txr->last_hw_tucss == tucss &&
3553                                     txr->last_hw_tucso == tucso)
3554                                         return;
3555                         }
3556                 }
3557                 txr->last_hw_offload = offload;
3558                 txr->last_hw_tucss = tucss;
3559                 txr->last_hw_tucso = tucso;
3560                 /*
3561                  * Start offset for header checksum calculation.
3562                  * End offset for header checksum calculation.
3563                  * Offset of place to put the checksum.
3564                  */
3565                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3566                 TXD->upper_setup.tcp_fields.tucss = tucss;
3567                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3568                 TXD->upper_setup.tcp_fields.tucso = tucso;
3569         }
3570   
3571         if (offload & CSUM_IP) {
3572                 txr->last_hw_ipcss = ipcss;
3573                 txr->last_hw_ipcso = ipcso;
3574         }
3575
3576         TXD->tcp_seg_setup.data = htole32(0);
3577         TXD->cmd_and_length =
3578             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3579         tx_buffer = &txr->tx_buffers[cur];
3580         tx_buffer->m_head = NULL;
3581         tx_buffer->next_eop = -1;
3582
3583         if (++cur == adapter->num_tx_desc)
3584                 cur = 0;
3585
3586         txr->tx_avail--;
3587         txr->next_avail_desc = cur;
3588 }
3589
3590
3591 /**********************************************************************
3592  *
3593  *  Setup work for hardware segmentation offload (TSO)
3594  *
3595  **********************************************************************/
3596 static void
3597 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3598     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3599 {
3600         struct adapter                  *adapter = txr->adapter;
3601         struct e1000_context_desc       *TXD;
3602         struct em_buffer                *tx_buffer;
3603         int cur, hdr_len;
3604
3605         /*
3606          * In theory we can use the same TSO context if and only if
3607          * frame is the same type(IP/TCP) and the same MSS. However
3608          * checking whether a frame has the same IP/TCP structure is
3609          * hard thing so just ignore that and always restablish a
3610          * new TSO context.
3611          */
3612         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3613         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3614                       E1000_TXD_DTYP_D |        /* Data descr type */
3615                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3616
3617         /* IP and/or TCP header checksum calculation and insertion. */
3618         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3619
3620         cur = txr->next_avail_desc;
3621         tx_buffer = &txr->tx_buffers[cur];
3622         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3623
3624         /*
3625          * Start offset for header checksum calculation.
3626          * End offset for header checksum calculation.
3627          * Offset of place put the checksum.
3628          */
3629         TXD->lower_setup.ip_fields.ipcss = ip_off;
3630         TXD->lower_setup.ip_fields.ipcse =
3631             htole16(ip_off + (ip->ip_hl << 2) - 1);
3632         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3633         /*
3634          * Start offset for payload checksum calculation.
3635          * End offset for payload checksum calculation.
3636          * Offset of place to put the checksum.
3637          */
3638         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3639         TXD->upper_setup.tcp_fields.tucse = 0;
3640         TXD->upper_setup.tcp_fields.tucso =
3641             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3642         /*
3643          * Payload size per packet w/o any headers.
3644          * Length of all headers up to payload.
3645          */
3646         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3647         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3648
3649         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3650                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3651                                 E1000_TXD_CMD_TSE |     /* TSE context */
3652                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3653                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3654                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3655
3656         tx_buffer->m_head = NULL;
3657         tx_buffer->next_eop = -1;
3658
3659         if (++cur == adapter->num_tx_desc)
3660                 cur = 0;
3661
3662         txr->tx_avail--;
3663         txr->next_avail_desc = cur;
3664         txr->tx_tso = TRUE;
3665 }
3666
3667
3668 /**********************************************************************
3669  *
3670  *  Examine each tx_buffer in the used queue. If the hardware is done
3671  *  processing the packet then free associated resources. The
3672  *  tx_buffer is put back on the free queue.
3673  *
3674  **********************************************************************/
3675 static bool
3676 em_txeof(struct tx_ring *txr)
3677 {
3678         struct adapter  *adapter = txr->adapter;
3679         int first, last, done, processed;
3680         struct em_buffer *tx_buffer;
3681         struct e1000_tx_desc   *tx_desc, *eop_desc;
3682         struct ifnet   *ifp = adapter->ifp;
3683
3684         EM_TX_LOCK_ASSERT(txr);
3685
3686         /* No work, make sure watchdog is off */
3687         if (txr->tx_avail == adapter->num_tx_desc) {
3688                 txr->queue_status = EM_QUEUE_IDLE;
3689                 return (FALSE);
3690         }
3691
3692         processed = 0;
3693         first = txr->next_to_clean;
3694         tx_desc = &txr->tx_base[first];
3695         tx_buffer = &txr->tx_buffers[first];
3696         last = tx_buffer->next_eop;
3697         eop_desc = &txr->tx_base[last];
3698
3699         /*
3700          * What this does is get the index of the
3701          * first descriptor AFTER the EOP of the 
3702          * first packet, that way we can do the
3703          * simple comparison on the inner while loop.
3704          */
3705         if (++last == adapter->num_tx_desc)
3706                 last = 0;
3707         done = last;
3708
3709         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3710             BUS_DMASYNC_POSTREAD);
3711
3712         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3713                 /* We clean the range of the packet */
3714                 while (first != done) {
3715                         tx_desc->upper.data = 0;
3716                         tx_desc->lower.data = 0;
3717                         tx_desc->buffer_addr = 0;
3718                         ++txr->tx_avail;
3719                         ++processed;
3720
3721                         if (tx_buffer->m_head) {
3722                                 bus_dmamap_sync(txr->txtag,
3723                                     tx_buffer->map,
3724                                     BUS_DMASYNC_POSTWRITE);
3725                                 bus_dmamap_unload(txr->txtag,
3726                                     tx_buffer->map);
3727                                 m_freem(tx_buffer->m_head);
3728                                 tx_buffer->m_head = NULL;
3729                         }
3730                         tx_buffer->next_eop = -1;
3731                         txr->watchdog_time = ticks;
3732
3733                         if (++first == adapter->num_tx_desc)
3734                                 first = 0;
3735
3736                         tx_buffer = &txr->tx_buffers[first];
3737                         tx_desc = &txr->tx_base[first];
3738                 }
3739                 ++ifp->if_opackets;
3740                 /* See if we can continue to the next packet */
3741                 last = tx_buffer->next_eop;
3742                 if (last != -1) {
3743                         eop_desc = &txr->tx_base[last];
3744                         /* Get new done point */
3745                         if (++last == adapter->num_tx_desc) last = 0;
3746                         done = last;
3747                 } else
3748                         break;
3749         }
3750         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3751             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3752
3753         txr->next_to_clean = first;
3754
3755         /*
3756         ** Watchdog calculation, we know there's
3757         ** work outstanding or the first return
3758         ** would have been taken, so none processed
3759         ** for too long indicates a hang. local timer
3760         ** will examine this and do a reset if needed.
3761         */
3762         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3763                 txr->queue_status = EM_QUEUE_HUNG;
3764
3765         /*
3766          * If we have a minimum free, clear IFF_DRV_OACTIVE
3767          * to tell the stack that it is OK to send packets.
3768          */
3769         if (txr->tx_avail > EM_MAX_SCATTER)
3770                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3771
3772         /* Disable watchdog if all clean */
3773         if (txr->tx_avail == adapter->num_tx_desc) {
3774                 txr->queue_status = EM_QUEUE_IDLE;
3775                 return (FALSE);
3776         } 
3777
3778         return (TRUE);
3779 }
3780
3781
3782 /*********************************************************************
3783  *
3784  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3785  *
3786  **********************************************************************/
3787 static void
3788 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3789 {
3790         struct adapter          *adapter = rxr->adapter;
3791         struct mbuf             *m;
3792         bus_dma_segment_t       segs[1];
3793         struct em_buffer        *rxbuf;
3794         int                     i, j, error, nsegs;
3795         bool                    cleaned = FALSE;
3796
3797         i = j = rxr->next_to_refresh;
3798         /*
3799         ** Get one descriptor beyond
3800         ** our work mark to control
3801         ** the loop.
3802         */
3803         if (++j == adapter->num_rx_desc)
3804                 j = 0;
3805
3806         while (j != limit) {
3807                 rxbuf = &rxr->rx_buffers[i];
3808                 if (rxbuf->m_head == NULL) {
3809                         m = m_getjcl(M_DONTWAIT, MT_DATA,
3810                             M_PKTHDR, adapter->rx_mbuf_sz);
3811                         /*
3812                         ** If we have a temporary resource shortage
3813                         ** that causes a failure, just abort refresh
3814                         ** for now, we will return to this point when
3815                         ** reinvoked from em_rxeof.
3816                         */
3817                         if (m == NULL)
3818                                 goto update;
3819                 } else
3820                         m = rxbuf->m_head;
3821
3822                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3823                 m->m_flags |= M_PKTHDR;
3824                 m->m_data = m->m_ext.ext_buf;
3825
3826                 /* Use bus_dma machinery to setup the memory mapping  */
3827                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3828                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3829                 if (error != 0) {
3830                         printf("Refresh mbufs: hdr dmamap load"
3831                             " failure - %d\n", error);
3832                         m_free(m);
3833                         rxbuf->m_head = NULL;
3834                         goto update;
3835                 }
3836                 rxbuf->m_head = m;
3837                 bus_dmamap_sync(rxr->rxtag,
3838                     rxbuf->map, BUS_DMASYNC_PREREAD);
3839                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3840                 cleaned = TRUE;
3841
3842                 i = j; /* Next is precalulated for us */
3843                 rxr->next_to_refresh = i;
3844                 /* Calculate next controlling index */
3845                 if (++j == adapter->num_rx_desc)
3846                         j = 0;
3847         }
3848 update:
3849         /*
3850         ** Update the tail pointer only if,
3851         ** and as far as we have refreshed.
3852         */
3853         if (cleaned)
3854                 E1000_WRITE_REG(&adapter->hw,
3855                     E1000_RDT(rxr->me), rxr->next_to_refresh);
3856
3857         return;
3858 }
3859
3860
3861 /*********************************************************************
3862  *
3863  *  Allocate memory for rx_buffer structures. Since we use one
3864  *  rx_buffer per received packet, the maximum number of rx_buffer's
3865  *  that we'll need is equal to the number of receive descriptors
3866  *  that we've allocated.
3867  *
3868  **********************************************************************/
3869 static int
3870 em_allocate_receive_buffers(struct rx_ring *rxr)
3871 {
3872         struct adapter          *adapter = rxr->adapter;
3873         device_t                dev = adapter->dev;
3874         struct em_buffer        *rxbuf;
3875         int                     error;
3876
3877         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3878             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3879         if (rxr->rx_buffers == NULL) {
3880                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3881                 return (ENOMEM);
3882         }
3883
3884         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3885                                 1, 0,                   /* alignment, bounds */
3886                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3887                                 BUS_SPACE_MAXADDR,      /* highaddr */
3888                                 NULL, NULL,             /* filter, filterarg */
3889                                 MJUM9BYTES,             /* maxsize */
3890                                 1,                      /* nsegments */
3891                                 MJUM9BYTES,             /* maxsegsize */
3892                                 0,                      /* flags */
3893                                 NULL,                   /* lockfunc */
3894                                 NULL,                   /* lockarg */
3895                                 &rxr->rxtag);
3896         if (error) {
3897                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3898                     __func__, error);
3899                 goto fail;
3900         }
3901
3902         rxbuf = rxr->rx_buffers;
3903         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3904                 rxbuf = &rxr->rx_buffers[i];
3905                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3906                     &rxbuf->map);
3907                 if (error) {
3908                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3909                             __func__, error);
3910                         goto fail;
3911                 }
3912         }
3913
3914         return (0);
3915
3916 fail:
3917         em_free_receive_structures(adapter);
3918         return (error);
3919 }
3920
3921
3922 /*********************************************************************
3923  *
3924  *  Initialize a receive ring and its buffers.
3925  *
3926  **********************************************************************/
3927 static int
3928 em_setup_receive_ring(struct rx_ring *rxr)
3929 {
3930         struct  adapter         *adapter = rxr->adapter;
3931         struct em_buffer        *rxbuf;
3932         bus_dma_segment_t       seg[1];
3933         int                     i, j, nsegs, error = 0;
3934
3935
3936         /* Clear the ring contents */
3937         EM_RX_LOCK(rxr);
3938
3939         /* Invalidate all descriptors */
3940         for (i = 0; i < adapter->num_rx_desc; i++) {
3941                 struct e1000_rx_desc* cur;
3942                 cur = &rxr->rx_base[i];
3943                 cur->status = 0;
3944         }
3945
3946         /* Now replenish the mbufs */
3947         i = j = rxr->next_to_refresh;
3948         if (++j == adapter->num_rx_desc)
3949                 j = 0;
3950
3951         while (j != rxr->next_to_check) {
3952                 rxbuf = &rxr->rx_buffers[i];
3953                 rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3954                     M_PKTHDR, adapter->rx_mbuf_sz);
3955                 if (rxbuf->m_head == NULL) {
3956                         error = ENOBUFS;
3957                         goto fail;
3958                 }
3959                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3960                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3961                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3962
3963                 /* Get the memory mapping */
3964                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3965                     rxbuf->map, rxbuf->m_head, seg,
3966                     &nsegs, BUS_DMA_NOWAIT);
3967                 if (error != 0) {
3968                         m_freem(rxbuf->m_head);
3969                         rxbuf->m_head = NULL;
3970                         goto fail;
3971                 }
3972                 bus_dmamap_sync(rxr->rxtag,
3973                     rxbuf->map, BUS_DMASYNC_PREREAD);
3974
3975                 /* Update descriptor */
3976                 rxr->rx_base[i].buffer_addr = htole64(seg[0].ds_addr);
3977                 i = j;
3978                 if (++j == adapter->num_rx_desc)
3979                         j = 0;
3980         }
3981
3982 fail:
3983         rxr->next_to_refresh = i;
3984         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3985             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3986         EM_RX_UNLOCK(rxr);
3987         return (error);
3988 }
3989
3990 /*********************************************************************
3991  *
3992  *  Initialize all receive rings.
3993  *
3994  **********************************************************************/
3995 static int
3996 em_setup_receive_structures(struct adapter *adapter)
3997 {
3998         struct rx_ring *rxr = adapter->rx_rings;
3999         int q;
4000
4001         for (q = 0; q < adapter->num_queues; q++, rxr++)
4002                 if (em_setup_receive_ring(rxr))
4003                         goto fail;
4004
4005         return (0);
4006 fail:
4007         /*
4008          * Free RX buffers allocated so far, we will only handle
4009          * the rings that completed, the failing case will have
4010          * cleaned up for itself. 'q' failed, so its the terminus.
4011          */
4012         for (int i = 0, n = 0; i < q; ++i) {
4013                 rxr = &adapter->rx_rings[i];
4014                 n = rxr->next_to_check;
4015                 while(n != rxr->next_to_refresh) {
4016                         struct em_buffer *rxbuf;
4017                         rxbuf = &rxr->rx_buffers[n];
4018                         if (rxbuf->m_head != NULL) {
4019                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4020                                   BUS_DMASYNC_POSTREAD);
4021                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4022                                 m_freem(rxbuf->m_head);
4023                                 rxbuf->m_head = NULL;
4024                         }
4025                         if (++n == adapter->num_rx_desc)
4026                                 n = 0;
4027                 }
4028                 rxr->next_to_check = 0;
4029                 rxr->next_to_refresh = 0;
4030         }
4031
4032         return (ENOBUFS);
4033 }
4034
4035 /*********************************************************************
4036  *
4037  *  Free all receive rings.
4038  *
4039  **********************************************************************/
4040 static void
4041 em_free_receive_structures(struct adapter *adapter)
4042 {
4043         struct rx_ring *rxr = adapter->rx_rings;
4044
4045         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4046                 em_free_receive_buffers(rxr);
4047                 /* Free the ring memory as well */
4048                 em_dma_free(adapter, &rxr->rxdma);
4049                 EM_RX_LOCK_DESTROY(rxr);
4050         }
4051
4052         free(adapter->rx_rings, M_DEVBUF);
4053 }
4054
4055
4056 /*********************************************************************
4057  *
4058  *  Free receive ring data structures
4059  *
4060  **********************************************************************/
4061 static void
4062 em_free_receive_buffers(struct rx_ring *rxr)
4063 {
4064         struct adapter          *adapter = rxr->adapter;
4065         struct em_buffer        *rxbuf = NULL;
4066
4067         INIT_DEBUGOUT("free_receive_buffers: begin");
4068
4069         if (rxr->rx_buffers != NULL) {
4070                 int i = rxr->next_to_check;
4071                 while(i != rxr->next_to_refresh) {
4072                         rxbuf = &rxr->rx_buffers[i];
4073                         if (rxbuf->map != NULL) {
4074                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4075                                     BUS_DMASYNC_POSTREAD);
4076                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4077                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4078                         }
4079                         if (rxbuf->m_head != NULL) {
4080                                 m_freem(rxbuf->m_head);
4081                                 rxbuf->m_head = NULL;
4082                         }
4083                         if (++i == adapter->num_rx_desc)
4084                                 i = 0;
4085                 }
4086                 free(rxr->rx_buffers, M_DEVBUF);
4087                 rxr->rx_buffers = NULL;
4088                 rxr->next_to_check = 0;
4089                 rxr->next_to_refresh = 0;
4090         }
4091
4092         if (rxr->rxtag != NULL) {
4093                 bus_dma_tag_destroy(rxr->rxtag);
4094                 rxr->rxtag = NULL;
4095         }
4096
4097         return;
4098 }
4099
4100
4101 /*********************************************************************
4102  *
4103  *  Enable receive unit.
4104  *
4105  **********************************************************************/
4106 #define MAX_INTS_PER_SEC        8000
4107 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
4108
4109 static void
4110 em_initialize_receive_unit(struct adapter *adapter)
4111 {
4112         struct rx_ring  *rxr = adapter->rx_rings;
4113         struct ifnet    *ifp = adapter->ifp;
4114         struct e1000_hw *hw = &adapter->hw;
4115         u64     bus_addr;
4116         u32     rctl, rxcsum;
4117
4118         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4119
4120         /*
4121          * Make sure receives are disabled while setting
4122          * up the descriptor ring
4123          */
4124         rctl = E1000_READ_REG(hw, E1000_RCTL);
4125         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4126
4127         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4128             adapter->rx_abs_int_delay.value);
4129         /*
4130          * Set the interrupt throttling rate. Value is calculated
4131          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4132          */
4133         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4134
4135         /*
4136         ** When using MSIX interrupts we need to throttle
4137         ** using the EITR register (82574 only)
4138         */
4139         if (hw->mac.type == e1000_82574)
4140                 for (int i = 0; i < 4; i++)
4141                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4142                             DEFAULT_ITR);
4143
4144         /* Disable accelerated ackknowledge */
4145         if (adapter->hw.mac.type == e1000_82574)
4146                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4147
4148         if (ifp->if_capenable & IFCAP_RXCSUM) {
4149                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4150                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4151                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4152         }
4153
4154         /*
4155         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4156         ** long latencies are observed, like Lenovo X60. This
4157         ** change eliminates the problem, but since having positive
4158         ** values in RDTR is a known source of problems on other
4159         ** platforms another solution is being sought.
4160         */
4161         if (hw->mac.type == e1000_82573)
4162                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4163
4164         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4165                 /* Setup the Base and Length of the Rx Descriptor Ring */
4166                 bus_addr = rxr->rxdma.dma_paddr;
4167                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4168                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4169                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4170                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4171                 /* Setup the Head and Tail Descriptor Pointers */
4172                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4173                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4174         }
4175
4176         /* Set early receive threshold on appropriate hw */
4177         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4178             (adapter->hw.mac.type == e1000_pch2lan) ||
4179             (adapter->hw.mac.type == e1000_ich10lan)) &&
4180             (ifp->if_mtu > ETHERMTU)) {
4181                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4182                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4183                 E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4184         }
4185                 
4186         if (adapter->hw.mac.type == e1000_pch2lan) {
4187                 if (ifp->if_mtu > ETHERMTU)
4188                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4189                 else
4190                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4191         }
4192
4193         /* Setup the Receive Control Register */
4194         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4195         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4196             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4197             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4198
4199         /* Strip the CRC */
4200         rctl |= E1000_RCTL_SECRC;
4201
4202         /* Make sure VLAN Filters are off */
4203         rctl &= ~E1000_RCTL_VFE;
4204         rctl &= ~E1000_RCTL_SBP;
4205
4206         if (adapter->rx_mbuf_sz == MCLBYTES)
4207                 rctl |= E1000_RCTL_SZ_2048;
4208         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4209                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4210         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4211                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4212
4213         if (ifp->if_mtu > ETHERMTU)
4214                 rctl |= E1000_RCTL_LPE;
4215         else
4216                 rctl &= ~E1000_RCTL_LPE;
4217
4218         /* Write out the settings */
4219         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4220
4221         return;
4222 }
4223
4224
4225 /*********************************************************************
4226  *
4227  *  This routine executes in interrupt context. It replenishes
4228  *  the mbufs in the descriptor and sends data which has been
4229  *  dma'ed into host memory to upper layer.
4230  *
4231  *  We loop at most count times if count is > 0, or until done if
4232  *  count < 0.
4233  *  
4234  *  For polling we also now return the number of cleaned packets
4235  *********************************************************************/
4236 static bool
4237 em_rxeof(struct rx_ring *rxr, int count, int *done)
4238 {
4239         struct adapter          *adapter = rxr->adapter;
4240         struct ifnet            *ifp = adapter->ifp;
4241         struct mbuf             *mp, *sendmp;
4242         u8                      status = 0;
4243         u16                     len;
4244         int                     i, processed, rxdone = 0;
4245         bool                    eop;
4246         struct e1000_rx_desc    *cur;
4247
4248         EM_RX_LOCK(rxr);
4249
4250         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4251
4252                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4253                         break;
4254
4255                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4256                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4257
4258                 cur = &rxr->rx_base[i];
4259                 status = cur->status;
4260                 mp = sendmp = NULL;
4261
4262                 if ((status & E1000_RXD_STAT_DD) == 0)
4263                         break;
4264
4265                 len = le16toh(cur->length);
4266                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4267
4268                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4269                     (rxr->discard == TRUE)) {
4270                         ifp->if_ierrors++;
4271                         ++rxr->rx_discarded;
4272                         if (!eop) /* Catch subsequent segs */
4273                                 rxr->discard = TRUE;
4274                         else
4275                                 rxr->discard = FALSE;
4276                         em_rx_discard(rxr, i);
4277                         goto next_desc;
4278                 }
4279
4280                 /* Assign correct length to the current fragment */
4281                 mp = rxr->rx_buffers[i].m_head;
4282                 mp->m_len = len;
4283
4284                 /* Trigger for refresh */
4285                 rxr->rx_buffers[i].m_head = NULL;
4286
4287                 /* First segment? */
4288                 if (rxr->fmp == NULL) {
4289                         mp->m_pkthdr.len = len;
4290                         rxr->fmp = rxr->lmp = mp;
4291                 } else {
4292                         /* Chain mbuf's together */
4293                         mp->m_flags &= ~M_PKTHDR;
4294                         rxr->lmp->m_next = mp;
4295                         rxr->lmp = mp;
4296                         rxr->fmp->m_pkthdr.len += len;
4297                 }
4298
4299                 if (eop) {
4300                         --count;
4301                         sendmp = rxr->fmp;
4302                         sendmp->m_pkthdr.rcvif = ifp;
4303                         ifp->if_ipackets++;
4304                         em_receive_checksum(cur, sendmp);
4305 #ifndef __NO_STRICT_ALIGNMENT
4306                         if (adapter->max_frame_size >
4307                             (MCLBYTES - ETHER_ALIGN) &&
4308                             em_fixup_rx(rxr) != 0)
4309                                 goto skip;
4310 #endif
4311                         if (status & E1000_RXD_STAT_VP) {
4312                                 sendmp->m_pkthdr.ether_vtag =
4313                                     (le16toh(cur->special) &
4314                                     E1000_RXD_SPC_VLAN_MASK);
4315                                 sendmp->m_flags |= M_VLANTAG;
4316                         }
4317 #ifdef EM_MULTIQUEUE
4318                         sendmp->m_pkthdr.flowid = rxr->msix;
4319                         sendmp->m_flags |= M_FLOWID;
4320 #endif
4321 #ifndef __NO_STRICT_ALIGNMENT
4322 skip:
4323 #endif
4324                         rxr->fmp = rxr->lmp = NULL;
4325                 }
4326 next_desc:
4327                 /* Zero out the receive descriptors status. */
4328                 cur->status = 0;
4329                 ++rxdone;       /* cumulative for POLL */
4330                 ++processed;
4331
4332                 /* Advance our pointers to the next descriptor. */
4333                 if (++i == adapter->num_rx_desc)
4334                         i = 0;
4335
4336                 /* Send to the stack */
4337                 if (sendmp != NULL) {
4338                         rxr->next_to_check = i;
4339                         EM_RX_UNLOCK(rxr);
4340                         (*ifp->if_input)(ifp, sendmp);
4341                         EM_RX_LOCK(rxr);
4342                         i = rxr->next_to_check;
4343                 }
4344
4345                 /* Only refresh mbufs every 8 descriptors */
4346                 if (processed == 8) {
4347                         em_refresh_mbufs(rxr, i);
4348                         processed = 0;
4349                 }
4350         }
4351
4352         /* Catch any remaining refresh work */
4353         if (e1000_rx_unrefreshed(rxr))
4354                 em_refresh_mbufs(rxr, i);
4355
4356         rxr->next_to_check = i;
4357         if (done != NULL)
4358                 *done = rxdone;
4359         EM_RX_UNLOCK(rxr);
4360
4361         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4362 }
4363
4364 static __inline void
4365 em_rx_discard(struct rx_ring *rxr, int i)
4366 {
4367         struct em_buffer        *rbuf;
4368
4369         rbuf = &rxr->rx_buffers[i];
4370         /* Free any previous pieces */
4371         if (rxr->fmp != NULL) {
4372                 rxr->fmp->m_flags |= M_PKTHDR;
4373                 m_freem(rxr->fmp);
4374                 rxr->fmp = NULL;
4375                 rxr->lmp = NULL;
4376         }
4377         /*
4378         ** Free buffer and allow em_refresh_mbufs()
4379         ** to clean up and recharge buffer.
4380         */
4381         if (rbuf->m_head) {
4382                 m_free(rbuf->m_head);
4383                 rbuf->m_head = NULL;
4384         }
4385         return;
4386 }
4387
4388 #ifndef __NO_STRICT_ALIGNMENT
4389 /*
4390  * When jumbo frames are enabled we should realign entire payload on
4391  * architecures with strict alignment. This is serious design mistake of 8254x
4392  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4393  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4394  * payload. On architecures without strict alignment restrictions 8254x still
4395  * performs unaligned memory access which would reduce the performance too.
4396  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4397  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4398  * existing mbuf chain.
4399  *
4400  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4401  * not used at all on architectures with strict alignment.
4402  */
4403 static int
4404 em_fixup_rx(struct rx_ring *rxr)
4405 {
4406         struct adapter *adapter = rxr->adapter;
4407         struct mbuf *m, *n;
4408         int error;
4409
4410         error = 0;
4411         m = rxr->fmp;
4412         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4413                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4414                 m->m_data += ETHER_HDR_LEN;
4415         } else {
4416                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4417                 if (n != NULL) {
4418                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4419                         m->m_data += ETHER_HDR_LEN;
4420                         m->m_len -= ETHER_HDR_LEN;
4421                         n->m_len = ETHER_HDR_LEN;
4422                         M_MOVE_PKTHDR(n, m);
4423                         n->m_next = m;
4424                         rxr->fmp = n;
4425                 } else {
4426                         adapter->dropped_pkts++;
4427                         m_freem(rxr->fmp);
4428                         rxr->fmp = NULL;
4429                         error = ENOMEM;
4430                 }
4431         }
4432
4433         return (error);
4434 }
4435 #endif
4436
4437 /*********************************************************************
4438  *
4439  *  Verify that the hardware indicated that the checksum is valid.
4440  *  Inform the stack about the status of checksum so that stack
4441  *  doesn't spend time verifying the checksum.
4442  *
4443  *********************************************************************/
4444 static void
4445 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4446 {
4447         /* Ignore Checksum bit is set */
4448         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4449                 mp->m_pkthdr.csum_flags = 0;
4450                 return;
4451         }
4452
4453         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4454                 /* Did it pass? */
4455                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4456                         /* IP Checksum Good */
4457                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4458                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4459
4460                 } else {
4461                         mp->m_pkthdr.csum_flags = 0;
4462                 }
4463         }
4464
4465         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4466                 /* Did it pass? */
4467                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4468                         mp->m_pkthdr.csum_flags |=
4469                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4470                         mp->m_pkthdr.csum_data = htons(0xffff);
4471                 }
4472         }
4473 }
4474
4475 /*
4476  * This routine is run via an vlan
4477  * config EVENT
4478  */
4479 static void
4480 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4481 {
4482         struct adapter  *adapter = ifp->if_softc;
4483         u32             index, bit;
4484
4485         if (ifp->if_softc !=  arg)   /* Not our event */
4486                 return;
4487
4488         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4489                 return;
4490
4491         EM_CORE_LOCK(adapter);
4492         index = (vtag >> 5) & 0x7F;
4493         bit = vtag & 0x1F;
4494         adapter->shadow_vfta[index] |= (1 << bit);
4495         ++adapter->num_vlans;
4496         /* Re-init to load the changes */
4497         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4498                 em_init_locked(adapter);
4499         EM_CORE_UNLOCK(adapter);
4500 }
4501
4502 /*
4503  * This routine is run via an vlan
4504  * unconfig EVENT
4505  */
4506 static void
4507 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4508 {
4509         struct adapter  *adapter = ifp->if_softc;
4510         u32             index, bit;
4511
4512         if (ifp->if_softc !=  arg)
4513                 return;
4514
4515         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4516                 return;
4517
4518         EM_CORE_LOCK(adapter);
4519         index = (vtag >> 5) & 0x7F;
4520         bit = vtag & 0x1F;
4521         adapter->shadow_vfta[index] &= ~(1 << bit);
4522         --adapter->num_vlans;
4523         /* Re-init to load the changes */
4524         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4525                 em_init_locked(adapter);
4526         EM_CORE_UNLOCK(adapter);
4527 }
4528
4529 static void
4530 em_setup_vlan_hw_support(struct adapter *adapter)
4531 {
4532         struct e1000_hw *hw = &adapter->hw;
4533         u32             reg;
4534
4535         /*
4536         ** We get here thru init_locked, meaning
4537         ** a soft reset, this has already cleared
4538         ** the VFTA and other state, so if there
4539         ** have been no vlan's registered do nothing.
4540         */
4541         if (adapter->num_vlans == 0)
4542                 return;
4543
4544         /*
4545         ** A soft reset zero's out the VFTA, so
4546         ** we need to repopulate it now.
4547         */
4548         for (int i = 0; i < EM_VFTA_SIZE; i++)
4549                 if (adapter->shadow_vfta[i] != 0)
4550                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4551                             i, adapter->shadow_vfta[i]);
4552
4553         reg = E1000_READ_REG(hw, E1000_CTRL);
4554         reg |= E1000_CTRL_VME;
4555         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4556
4557         /* Enable the Filter Table */
4558         reg = E1000_READ_REG(hw, E1000_RCTL);
4559         reg &= ~E1000_RCTL_CFIEN;
4560         reg |= E1000_RCTL_VFE;
4561         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4562 }
4563
4564 static void
4565 em_enable_intr(struct adapter *adapter)
4566 {
4567         struct e1000_hw *hw = &adapter->hw;
4568         u32 ims_mask = IMS_ENABLE_MASK;
4569
4570         if (hw->mac.type == e1000_82574) {
4571                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4572                 ims_mask |= EM_MSIX_MASK;
4573         } 
4574         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4575 }
4576
4577 static void
4578 em_disable_intr(struct adapter *adapter)
4579 {
4580         struct e1000_hw *hw = &adapter->hw;
4581
4582         if (hw->mac.type == e1000_82574)
4583                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4584         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4585 }
4586
4587 /*
4588  * Bit of a misnomer, what this really means is
4589  * to enable OS management of the system... aka
4590  * to disable special hardware management features 
4591  */
4592 static void
4593 em_init_manageability(struct adapter *adapter)
4594 {
4595         /* A shared code workaround */
4596 #define E1000_82542_MANC2H E1000_MANC2H
4597         if (adapter->has_manage) {
4598                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4599                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4600
4601                 /* disable hardware interception of ARP */
4602                 manc &= ~(E1000_MANC_ARP_EN);
4603
4604                 /* enable receiving management packets to the host */
4605                 manc |= E1000_MANC_EN_MNG2HOST;
4606 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4607 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4608                 manc2h |= E1000_MNG2HOST_PORT_623;
4609                 manc2h |= E1000_MNG2HOST_PORT_664;
4610                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4611                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4612         }
4613 }
4614
4615 /*
4616  * Give control back to hardware management
4617  * controller if there is one.
4618  */
4619 static void
4620 em_release_manageability(struct adapter *adapter)
4621 {
4622         if (adapter->has_manage) {
4623                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4624
4625                 /* re-enable hardware interception of ARP */
4626                 manc |= E1000_MANC_ARP_EN;
4627                 manc &= ~E1000_MANC_EN_MNG2HOST;
4628
4629                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4630         }
4631 }
4632
4633 /*
4634  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4635  * For ASF and Pass Through versions of f/w this means
4636  * that the driver is loaded. For AMT version type f/w
4637  * this means that the network i/f is open.
4638  */
4639 static void
4640 em_get_hw_control(struct adapter *adapter)
4641 {
4642         u32 ctrl_ext, swsm;
4643
4644         if (adapter->hw.mac.type == e1000_82573) {
4645                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4646                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4647                     swsm | E1000_SWSM_DRV_LOAD);
4648                 return;
4649         }
4650         /* else */
4651         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4652         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4653             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4654         return;
4655 }
4656
4657 /*
4658  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4659  * For ASF and Pass Through versions of f/w this means that
4660  * the driver is no longer loaded. For AMT versions of the
4661  * f/w this means that the network i/f is closed.
4662  */
4663 static void
4664 em_release_hw_control(struct adapter *adapter)
4665 {
4666         u32 ctrl_ext, swsm;
4667
4668         if (!adapter->has_manage)
4669                 return;
4670
4671         if (adapter->hw.mac.type == e1000_82573) {
4672                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4673                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4674                     swsm & ~E1000_SWSM_DRV_LOAD);
4675                 return;
4676         }
4677         /* else */
4678         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4679         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4680             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4681         return;
4682 }
4683
4684 static int
4685 em_is_valid_ether_addr(u8 *addr)
4686 {
4687         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4688
4689         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4690                 return (FALSE);
4691         }
4692
4693         return (TRUE);
4694 }
4695
4696 /*
4697 ** Parse the interface capabilities with regard
4698 ** to both system management and wake-on-lan for
4699 ** later use.
4700 */
4701 static void
4702 em_get_wakeup(device_t dev)
4703 {
4704         struct adapter  *adapter = device_get_softc(dev);
4705         u16             eeprom_data = 0, device_id, apme_mask;
4706
4707         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4708         apme_mask = EM_EEPROM_APME;
4709
4710         switch (adapter->hw.mac.type) {
4711         case e1000_82573:
4712         case e1000_82583:
4713                 adapter->has_amt = TRUE;
4714                 /* Falls thru */
4715         case e1000_82571:
4716         case e1000_82572:
4717         case e1000_80003es2lan:
4718                 if (adapter->hw.bus.func == 1) {
4719                         e1000_read_nvm(&adapter->hw,
4720                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4721                         break;
4722                 } else
4723                         e1000_read_nvm(&adapter->hw,
4724                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4725                 break;
4726         case e1000_ich8lan:
4727         case e1000_ich9lan:
4728         case e1000_ich10lan:
4729         case e1000_pchlan:
4730         case e1000_pch2lan:
4731                 apme_mask = E1000_WUC_APME;
4732                 adapter->has_amt = TRUE;
4733                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4734                 break;
4735         default:
4736                 e1000_read_nvm(&adapter->hw,
4737                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4738                 break;
4739         }
4740         if (eeprom_data & apme_mask)
4741                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4742         /*
4743          * We have the eeprom settings, now apply the special cases
4744          * where the eeprom may be wrong or the board won't support
4745          * wake on lan on a particular port
4746          */
4747         device_id = pci_get_device(dev);
4748         switch (device_id) {
4749         case E1000_DEV_ID_82571EB_FIBER:
4750                 /* Wake events only supported on port A for dual fiber
4751                  * regardless of eeprom setting */
4752                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4753                     E1000_STATUS_FUNC_1)
4754                         adapter->wol = 0;
4755                 break;
4756         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4757         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4758         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4759                 /* if quad port adapter, disable WoL on all but port A */
4760                 if (global_quad_port_a != 0)
4761                         adapter->wol = 0;
4762                 /* Reset for multiple quad port adapters */
4763                 if (++global_quad_port_a == 4)
4764                         global_quad_port_a = 0;
4765                 break;
4766         }
4767         return;
4768 }
4769
4770
4771 /*
4772  * Enable PCI Wake On Lan capability
4773  */
4774 static void
4775 em_enable_wakeup(device_t dev)
4776 {
4777         struct adapter  *adapter = device_get_softc(dev);
4778         struct ifnet    *ifp = adapter->ifp;
4779         u32             pmc, ctrl, ctrl_ext, rctl;
4780         u16             status;
4781
4782         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4783                 return;
4784
4785         /* Advertise the wakeup capability */
4786         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4787         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4788         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4789         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4790
4791         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4792             (adapter->hw.mac.type == e1000_pchlan) ||
4793             (adapter->hw.mac.type == e1000_ich9lan) ||
4794             (adapter->hw.mac.type == e1000_ich10lan))
4795                 e1000_disable_gig_wol_ich8lan(&adapter->hw);
4796
4797         /* Keep the laser running on Fiber adapters */
4798         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4799             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4800                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4801                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4802                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4803         }
4804
4805         /*
4806         ** Determine type of Wakeup: note that wol
4807         ** is set with all bits on by default.
4808         */
4809         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4810                 adapter->wol &= ~E1000_WUFC_MAG;
4811
4812         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4813                 adapter->wol &= ~E1000_WUFC_MC;
4814         else {
4815                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4816                 rctl |= E1000_RCTL_MPE;
4817                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4818         }
4819
4820         if ((adapter->hw.mac.type == e1000_pchlan) ||
4821             (adapter->hw.mac.type == e1000_pch2lan)) {
4822                 if (em_enable_phy_wakeup(adapter))
4823                         return;
4824         } else {
4825                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4826                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4827         }
4828
4829         if (adapter->hw.phy.type == e1000_phy_igp_3)
4830                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4831
4832         /* Request PME */
4833         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4834         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4835         if (ifp->if_capenable & IFCAP_WOL)
4836                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4837         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4838
4839         return;
4840 }
4841
4842 /*
4843 ** WOL in the newer chipset interfaces (pchlan)
4844 ** require thing to be copied into the phy
4845 */
4846 static int
4847 em_enable_phy_wakeup(struct adapter *adapter)
4848 {
4849         struct e1000_hw *hw = &adapter->hw;
4850         u32 mreg, ret = 0;
4851         u16 preg;
4852
4853         /* copy MAC RARs to PHY RARs */
4854         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4855
4856         /* copy MAC MTA to PHY MTA */
4857         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4858                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4859                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4860                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4861                     (u16)((mreg >> 16) & 0xFFFF));
4862         }
4863
4864         /* configure PHY Rx Control register */
4865         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4866         mreg = E1000_READ_REG(hw, E1000_RCTL);
4867         if (mreg & E1000_RCTL_UPE)
4868                 preg |= BM_RCTL_UPE;
4869         if (mreg & E1000_RCTL_MPE)
4870                 preg |= BM_RCTL_MPE;
4871         preg &= ~(BM_RCTL_MO_MASK);
4872         if (mreg & E1000_RCTL_MO_3)
4873                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4874                                 << BM_RCTL_MO_SHIFT);
4875         if (mreg & E1000_RCTL_BAM)
4876                 preg |= BM_RCTL_BAM;
4877         if (mreg & E1000_RCTL_PMCF)
4878                 preg |= BM_RCTL_PMCF;
4879         mreg = E1000_READ_REG(hw, E1000_CTRL);
4880         if (mreg & E1000_CTRL_RFCE)
4881                 preg |= BM_RCTL_RFCE;
4882         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4883
4884         /* enable PHY wakeup in MAC register */
4885         E1000_WRITE_REG(hw, E1000_WUC,
4886             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4887         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4888
4889         /* configure and enable PHY wakeup in PHY registers */
4890         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4891         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4892
4893         /* activate PHY wakeup */
4894         ret = hw->phy.ops.acquire(hw);
4895         if (ret) {
4896                 printf("Could not acquire PHY\n");
4897                 return ret;
4898         }
4899         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4900                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4901         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4902         if (ret) {
4903                 printf("Could not read PHY page 769\n");
4904                 goto out;
4905         }
4906         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4907         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4908         if (ret)
4909                 printf("Could not set PHY Host Wakeup bit\n");
4910 out:
4911         hw->phy.ops.release(hw);
4912
4913         return ret;
4914 }
4915
4916 static void
4917 em_led_func(void *arg, int onoff)
4918 {
4919         struct adapter  *adapter = arg;
4920  
4921         EM_CORE_LOCK(adapter);
4922         if (onoff) {
4923                 e1000_setup_led(&adapter->hw);
4924                 e1000_led_on(&adapter->hw);
4925         } else {
4926                 e1000_led_off(&adapter->hw);
4927                 e1000_cleanup_led(&adapter->hw);
4928         }
4929         EM_CORE_UNLOCK(adapter);
4930 }
4931
4932 /*
4933 ** Disable the L0S and L1 LINK states
4934 */
4935 static void
4936 em_disable_aspm(struct adapter *adapter)
4937 {
4938         int             base, reg;
4939         u16             link_cap,link_ctrl;
4940         device_t        dev = adapter->dev;
4941
4942         switch (adapter->hw.mac.type) {
4943                 case e1000_82573:
4944                 case e1000_82574:
4945                 case e1000_82583:
4946                         break;
4947                 default:
4948                         return;
4949         }
4950         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
4951                 return;
4952         reg = base + PCIR_EXPRESS_LINK_CAP;
4953         link_cap = pci_read_config(dev, reg, 2);
4954         if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4955                 return;
4956         reg = base + PCIR_EXPRESS_LINK_CTL;
4957         link_ctrl = pci_read_config(dev, reg, 2);
4958         link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4959         pci_write_config(dev, reg, link_ctrl, 2);
4960         return;
4961 }
4962
4963 /**********************************************************************
4964  *
4965  *  Update the board statistics counters.
4966  *
4967  **********************************************************************/
4968 static void
4969 em_update_stats_counters(struct adapter *adapter)
4970 {
4971         struct ifnet   *ifp;
4972
4973         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4974            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4975                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4976                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4977         }
4978         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4979         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4980         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4981         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4982
4983         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4984         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4985         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4986         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4987         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4988         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4989         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4990         /*
4991         ** For watchdog management we need to know if we have been
4992         ** paused during the last interval, so capture that here.
4993         */
4994         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4995         adapter->stats.xoffrxc += adapter->pause_frames;
4996         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4997         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4998         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4999         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5000         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5001         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5002         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5003         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5004         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5005         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5006         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5007         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5008
5009         /* For the 64-bit byte counters the low dword must be read first. */
5010         /* Both registers clear on the read of the high dword */
5011
5012         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5013             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5014         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5015             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5016
5017         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5018         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5019         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5020         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5021         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5022
5023         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5024         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5025
5026         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5027         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5028         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5029         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5030         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5031         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5032         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5033         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5034         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5035         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5036
5037         /* Interrupt Counts */
5038
5039         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5040         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5041         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5042         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5043         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5044         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5045         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5046         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5047         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5048
5049         if (adapter->hw.mac.type >= e1000_82543) {
5050                 adapter->stats.algnerrc += 
5051                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5052                 adapter->stats.rxerrc += 
5053                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5054                 adapter->stats.tncrs += 
5055                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5056                 adapter->stats.cexterr += 
5057                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5058                 adapter->stats.tsctc += 
5059                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5060                 adapter->stats.tsctfc += 
5061                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5062         }
5063         ifp = adapter->ifp;
5064
5065         ifp->if_collisions = adapter->stats.colc;
5066
5067         /* Rx Errors */
5068         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5069             adapter->stats.crcerrs + adapter->stats.algnerrc +
5070             adapter->stats.ruc + adapter->stats.roc +
5071             adapter->stats.mpc + adapter->stats.cexterr;
5072
5073         /* Tx Errors */
5074         ifp->if_oerrors = adapter->stats.ecol +
5075             adapter->stats.latecol + adapter->watchdog_events;
5076 }
5077
5078 /* Export a single 32-bit register via a read-only sysctl. */
5079 static int
5080 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5081 {
5082         struct adapter *adapter;
5083         u_int val;
5084
5085         adapter = oidp->oid_arg1;
5086         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5087         return (sysctl_handle_int(oidp, &val, 0, req));
5088 }
5089
5090 /*
5091  * Add sysctl variables, one per statistic, to the system.
5092  */
5093 static void
5094 em_add_hw_stats(struct adapter *adapter)
5095 {
5096         device_t dev = adapter->dev;
5097
5098         struct tx_ring *txr = adapter->tx_rings;
5099         struct rx_ring *rxr = adapter->rx_rings;
5100
5101         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5102         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5103         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5104         struct e1000_hw_stats *stats = &adapter->stats;
5105
5106         struct sysctl_oid *stat_node, *queue_node, *int_node;
5107         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5108
5109 #define QUEUE_NAME_LEN 32
5110         char namebuf[QUEUE_NAME_LEN];
5111         
5112         /* Driver Statistics */
5113         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5114                         CTLFLAG_RD, &adapter->link_irq,
5115                         "Link MSIX IRQ Handled");
5116         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5117                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5118                          "Std mbuf failed");
5119         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5120                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5121                          "Std mbuf cluster failed");
5122         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5123                         CTLFLAG_RD, &adapter->dropped_pkts,
5124                         "Driver dropped packets");
5125         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5126                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5127                         "Driver tx dma failure in xmit");
5128         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5129                         CTLFLAG_RD, &adapter->rx_overruns,
5130                         "RX overruns");
5131         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5132                         CTLFLAG_RD, &adapter->watchdog_events,
5133                         "Watchdog timeouts");
5134         
5135         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5136                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5137                         em_sysctl_reg_handler, "IU",
5138                         "Device Control Register");
5139         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5140                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5141                         em_sysctl_reg_handler, "IU",
5142                         "Receiver Control Register");
5143         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5144                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5145                         "Flow Control High Watermark");
5146         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5147                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5148                         "Flow Control Low Watermark");
5149
5150         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5151                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5152                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5153                                             CTLFLAG_RD, NULL, "Queue Name");
5154                 queue_list = SYSCTL_CHILDREN(queue_node);
5155
5156                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5157                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5158                                 E1000_TDH(txr->me),
5159                                 em_sysctl_reg_handler, "IU",
5160                                 "Transmit Descriptor Head");
5161                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5162                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5163                                 E1000_TDT(txr->me),
5164                                 em_sysctl_reg_handler, "IU",
5165                                 "Transmit Descriptor Tail");
5166                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5167                                 CTLFLAG_RD, &txr->tx_irq,
5168                                 "Queue MSI-X Transmit Interrupts");
5169                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5170                                 CTLFLAG_RD, &txr->no_desc_avail,
5171                                 "Queue No Descriptor Available");
5172                 
5173                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5174                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5175                                 E1000_RDH(rxr->me),
5176                                 em_sysctl_reg_handler, "IU",
5177                                 "Receive Descriptor Head");
5178                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5179                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5180                                 E1000_RDT(rxr->me),
5181                                 em_sysctl_reg_handler, "IU",
5182                                 "Receive Descriptor Tail");
5183                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5184                                 CTLFLAG_RD, &rxr->rx_irq,
5185                                 "Queue MSI-X Receive Interrupts");
5186         }
5187
5188         /* MAC stats get their own sub node */
5189
5190         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5191                                     CTLFLAG_RD, NULL, "Statistics");
5192         stat_list = SYSCTL_CHILDREN(stat_node);
5193
5194         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5195                         CTLFLAG_RD, &stats->ecol,
5196                         "Excessive collisions");
5197         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5198                         CTLFLAG_RD, &stats->scc,
5199                         "Single collisions");
5200         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5201                         CTLFLAG_RD, &stats->mcc,
5202                         "Multiple collisions");
5203         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5204                         CTLFLAG_RD, &stats->latecol,
5205                         "Late collisions");
5206         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5207                         CTLFLAG_RD, &stats->colc,
5208                         "Collision Count");
5209         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5210                         CTLFLAG_RD, &adapter->stats.symerrs,
5211                         "Symbol Errors");
5212         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5213                         CTLFLAG_RD, &adapter->stats.sec,
5214                         "Sequence Errors");
5215         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5216                         CTLFLAG_RD, &adapter->stats.dc,
5217                         "Defer Count");
5218         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5219                         CTLFLAG_RD, &adapter->stats.mpc,
5220                         "Missed Packets");
5221         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5222                         CTLFLAG_RD, &adapter->stats.rnbc,
5223                         "Receive No Buffers");
5224         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5225                         CTLFLAG_RD, &adapter->stats.ruc,
5226                         "Receive Undersize");
5227         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5228                         CTLFLAG_RD, &adapter->stats.rfc,
5229                         "Fragmented Packets Received ");
5230         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5231                         CTLFLAG_RD, &adapter->stats.roc,
5232                         "Oversized Packets Received");
5233         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5234                         CTLFLAG_RD, &adapter->stats.rjc,
5235                         "Recevied Jabber");
5236         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5237                         CTLFLAG_RD, &adapter->stats.rxerrc,
5238                         "Receive Errors");
5239         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5240                         CTLFLAG_RD, &adapter->stats.crcerrs,
5241                         "CRC errors");
5242         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5243                         CTLFLAG_RD, &adapter->stats.algnerrc,
5244                         "Alignment Errors");
5245         /* On 82575 these are collision counts */
5246         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5247                         CTLFLAG_RD, &adapter->stats.cexterr,
5248                         "Collision/Carrier extension errors");
5249         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5250                         CTLFLAG_RD, &adapter->stats.xonrxc,
5251                         "XON Received");
5252         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5253                         CTLFLAG_RD, &adapter->stats.xontxc,
5254                         "XON Transmitted");
5255         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5256                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5257                         "XOFF Received");
5258         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5259                         CTLFLAG_RD, &adapter->stats.xofftxc,
5260                         "XOFF Transmitted");
5261
5262         /* Packet Reception Stats */
5263         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5264                         CTLFLAG_RD, &adapter->stats.tpr,
5265                         "Total Packets Received ");
5266         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5267                         CTLFLAG_RD, &adapter->stats.gprc,
5268                         "Good Packets Received");
5269         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5270                         CTLFLAG_RD, &adapter->stats.bprc,
5271                         "Broadcast Packets Received");
5272         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5273                         CTLFLAG_RD, &adapter->stats.mprc,
5274                         "Multicast Packets Received");
5275         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5276                         CTLFLAG_RD, &adapter->stats.prc64,
5277                         "64 byte frames received ");
5278         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5279                         CTLFLAG_RD, &adapter->stats.prc127,
5280                         "65-127 byte frames received");
5281         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5282                         CTLFLAG_RD, &adapter->stats.prc255,
5283                         "128-255 byte frames received");
5284         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5285                         CTLFLAG_RD, &adapter->stats.prc511,
5286                         "256-511 byte frames received");
5287         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5288                         CTLFLAG_RD, &adapter->stats.prc1023,
5289                         "512-1023 byte frames received");
5290         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5291                         CTLFLAG_RD, &adapter->stats.prc1522,
5292                         "1023-1522 byte frames received");
5293         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5294                         CTLFLAG_RD, &adapter->stats.gorc, 
5295                         "Good Octets Received"); 
5296
5297         /* Packet Transmission Stats */
5298         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5299                         CTLFLAG_RD, &adapter->stats.gotc, 
5300                         "Good Octets Transmitted"); 
5301         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5302                         CTLFLAG_RD, &adapter->stats.tpt,
5303                         "Total Packets Transmitted");
5304         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5305                         CTLFLAG_RD, &adapter->stats.gptc,
5306                         "Good Packets Transmitted");
5307         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5308                         CTLFLAG_RD, &adapter->stats.bptc,
5309                         "Broadcast Packets Transmitted");
5310         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5311                         CTLFLAG_RD, &adapter->stats.mptc,
5312                         "Multicast Packets Transmitted");
5313         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5314                         CTLFLAG_RD, &adapter->stats.ptc64,
5315                         "64 byte frames transmitted ");
5316         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5317                         CTLFLAG_RD, &adapter->stats.ptc127,
5318                         "65-127 byte frames transmitted");
5319         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5320                         CTLFLAG_RD, &adapter->stats.ptc255,
5321                         "128-255 byte frames transmitted");
5322         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5323                         CTLFLAG_RD, &adapter->stats.ptc511,
5324                         "256-511 byte frames transmitted");
5325         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5326                         CTLFLAG_RD, &adapter->stats.ptc1023,
5327                         "512-1023 byte frames transmitted");
5328         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5329                         CTLFLAG_RD, &adapter->stats.ptc1522,
5330                         "1024-1522 byte frames transmitted");
5331         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5332                         CTLFLAG_RD, &adapter->stats.tsctc,
5333                         "TSO Contexts Transmitted");
5334         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5335                         CTLFLAG_RD, &adapter->stats.tsctfc,
5336                         "TSO Contexts Failed");
5337
5338
5339         /* Interrupt Stats */
5340
5341         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5342                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5343         int_list = SYSCTL_CHILDREN(int_node);
5344
5345         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5346                         CTLFLAG_RD, &adapter->stats.iac,
5347                         "Interrupt Assertion Count");
5348
5349         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5350                         CTLFLAG_RD, &adapter->stats.icrxptc,
5351                         "Interrupt Cause Rx Pkt Timer Expire Count");
5352
5353         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5354                         CTLFLAG_RD, &adapter->stats.icrxatc,
5355                         "Interrupt Cause Rx Abs Timer Expire Count");
5356
5357         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5358                         CTLFLAG_RD, &adapter->stats.ictxptc,
5359                         "Interrupt Cause Tx Pkt Timer Expire Count");
5360
5361         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5362                         CTLFLAG_RD, &adapter->stats.ictxatc,
5363                         "Interrupt Cause Tx Abs Timer Expire Count");
5364
5365         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5366                         CTLFLAG_RD, &adapter->stats.ictxqec,
5367                         "Interrupt Cause Tx Queue Empty Count");
5368
5369         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5370                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5371                         "Interrupt Cause Tx Queue Min Thresh Count");
5372
5373         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5374                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5375                         "Interrupt Cause Rx Desc Min Thresh Count");
5376
5377         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5378                         CTLFLAG_RD, &adapter->stats.icrxoc,
5379                         "Interrupt Cause Receiver Overrun Count");
5380 }
5381
5382 /**********************************************************************
5383  *
5384  *  This routine provides a way to dump out the adapter eeprom,
5385  *  often a useful debug/service tool. This only dumps the first
5386  *  32 words, stuff that matters is in that extent.
5387  *
5388  **********************************************************************/
5389 static int
5390 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5391 {
5392         struct adapter *adapter;
5393         int error;
5394         int result;
5395
5396         result = -1;
5397         error = sysctl_handle_int(oidp, &result, 0, req);
5398
5399         if (error || !req->newptr)
5400                 return (error);
5401
5402         /*
5403          * This value will cause a hex dump of the
5404          * first 32 16-bit words of the EEPROM to
5405          * the screen.
5406          */
5407         if (result == 1) {
5408                 adapter = (struct adapter *)arg1;
5409                 em_print_nvm_info(adapter);
5410         }
5411
5412         return (error);
5413 }
5414
5415 static void
5416 em_print_nvm_info(struct adapter *adapter)
5417 {
5418         u16     eeprom_data;
5419         int     i, j, row = 0;
5420
5421         /* Its a bit crude, but it gets the job done */
5422         printf("\nInterface EEPROM Dump:\n");
5423         printf("Offset\n0x0000  ");
5424         for (i = 0, j = 0; i < 32; i++, j++) {
5425                 if (j == 8) { /* Make the offset block */
5426                         j = 0; ++row;
5427                         printf("\n0x00%x0  ",row);
5428                 }
5429                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5430                 printf("%04x ", eeprom_data);
5431         }
5432         printf("\n");
5433 }
5434
5435 static int
5436 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5437 {
5438         struct em_int_delay_info *info;
5439         struct adapter *adapter;
5440         u32 regval;
5441         int error, usecs, ticks;
5442
5443         info = (struct em_int_delay_info *)arg1;
5444         usecs = info->value;
5445         error = sysctl_handle_int(oidp, &usecs, 0, req);
5446         if (error != 0 || req->newptr == NULL)
5447                 return (error);
5448         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5449                 return (EINVAL);
5450         info->value = usecs;
5451         ticks = EM_USECS_TO_TICKS(usecs);
5452
5453         adapter = info->adapter;
5454         
5455         EM_CORE_LOCK(adapter);
5456         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5457         regval = (regval & ~0xffff) | (ticks & 0xffff);
5458         /* Handle a few special cases. */
5459         switch (info->offset) {
5460         case E1000_RDTR:
5461                 break;
5462         case E1000_TIDV:
5463                 if (ticks == 0) {
5464                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5465                         /* Don't write 0 into the TIDV register. */
5466                         regval++;
5467                 } else
5468                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5469                 break;
5470         }
5471         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5472         EM_CORE_UNLOCK(adapter);
5473         return (0);
5474 }
5475
5476 static void
5477 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5478         const char *description, struct em_int_delay_info *info,
5479         int offset, int value)
5480 {
5481         info->adapter = adapter;
5482         info->offset = offset;
5483         info->value = value;
5484         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5485             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5486             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5487             info, 0, em_sysctl_int_delay, "I", description);
5488 }
5489
5490 static void
5491 em_set_sysctl_value(struct adapter *adapter, const char *name,
5492         const char *description, int *limit, int value)
5493 {
5494         *limit = value;
5495         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5496             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5497             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5498 }
5499
5500 static int
5501 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5502 {
5503         struct adapter *adapter;
5504         int error;
5505         int result;
5506
5507         result = -1;
5508         error = sysctl_handle_int(oidp, &result, 0, req);
5509
5510         if (error || !req->newptr)
5511                 return (error);
5512
5513         if (result == 1) {
5514                 adapter = (struct adapter *)arg1;
5515                 em_print_debug_info(adapter);
5516         }
5517
5518         return (error);
5519 }
5520
5521 /*
5522 ** This routine is meant to be fluid, add whatever is
5523 ** needed for debugging a problem.  -jfv
5524 */
5525 static void
5526 em_print_debug_info(struct adapter *adapter)
5527 {
5528         device_t dev = adapter->dev;
5529         struct tx_ring *txr = adapter->tx_rings;
5530         struct rx_ring *rxr = adapter->rx_rings;
5531
5532         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5533                 printf("Interface is RUNNING ");
5534         else
5535                 printf("Interface is NOT RUNNING\n");
5536         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5537                 printf("and ACTIVE\n");
5538         else
5539                 printf("and INACTIVE\n");
5540
5541         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5542             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5543             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5544         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5545             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5546             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5547         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5548         device_printf(dev, "TX descriptors avail = %d\n",
5549             txr->tx_avail);
5550         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5551             txr->no_desc_avail);
5552         device_printf(dev, "RX discarded packets = %ld\n",
5553             rxr->rx_discarded);
5554         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5555         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5556 }