]> CyberLeo.Net >> Repos - FreeBSD/releng/8.2.git/blob - sys/dev/e1000/if_em.c
MFC stable/8 r217711
[FreeBSD/releng/8.2.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int     em_display_debug_stats = 0;
92
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.1.9";
97
98 /*********************************************************************
99  *  PCI Device ID Table
100  *
101  *  Used by probe to select devices to load on
102  *  Last field stores an index into e1000_strings
103  *  Last entry must be all 0s
104  *
105  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106  *********************************************************************/
107
108 static em_vendor_info_t em_vendor_info_array[] =
109 {
110         /* Intel(R) PRO/1000 Network Connection */
111         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
112         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
130
131         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
174         /* required last entry */
175         { 0, 0, 0, 0, 0}
176 };
177
178 /*********************************************************************
179  *  Table of branding strings for all supported NICs.
180  *********************************************************************/
181
182 static char *em_strings[] = {
183         "Intel(R) PRO/1000 Network Connection"
184 };
185
186 /*********************************************************************
187  *  Function prototypes
188  *********************************************************************/
189 static int      em_probe(device_t);
190 static int      em_attach(device_t);
191 static int      em_detach(device_t);
192 static int      em_shutdown(device_t);
193 static int      em_suspend(device_t);
194 static int      em_resume(device_t);
195 static void     em_start(struct ifnet *);
196 static void     em_start_locked(struct ifnet *, struct tx_ring *);
197 #ifdef EM_MULTIQUEUE
198 static int      em_mq_start(struct ifnet *, struct mbuf *);
199 static int      em_mq_start_locked(struct ifnet *,
200                     struct tx_ring *, struct mbuf *);
201 static void     em_qflush(struct ifnet *);
202 #endif
203 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
204 static void     em_init(void *);
205 static void     em_init_locked(struct adapter *);
206 static void     em_stop(void *);
207 static void     em_media_status(struct ifnet *, struct ifmediareq *);
208 static int      em_media_change(struct ifnet *);
209 static void     em_identify_hardware(struct adapter *);
210 static int      em_allocate_pci_resources(struct adapter *);
211 static int      em_allocate_legacy(struct adapter *);
212 static int      em_allocate_msix(struct adapter *);
213 static int      em_allocate_queues(struct adapter *);
214 static int      em_setup_msix(struct adapter *);
215 static void     em_free_pci_resources(struct adapter *);
216 static void     em_local_timer(void *);
217 static void     em_reset(struct adapter *);
218 static int      em_setup_interface(device_t, struct adapter *);
219
220 static void     em_setup_transmit_structures(struct adapter *);
221 static void     em_initialize_transmit_unit(struct adapter *);
222 static int      em_allocate_transmit_buffers(struct tx_ring *);
223 static void     em_free_transmit_structures(struct adapter *);
224 static void     em_free_transmit_buffers(struct tx_ring *);
225
226 static int      em_setup_receive_structures(struct adapter *);
227 static int      em_allocate_receive_buffers(struct rx_ring *);
228 static void     em_initialize_receive_unit(struct adapter *);
229 static void     em_free_receive_structures(struct adapter *);
230 static void     em_free_receive_buffers(struct rx_ring *);
231
232 static void     em_enable_intr(struct adapter *);
233 static void     em_disable_intr(struct adapter *);
234 static void     em_update_stats_counters(struct adapter *);
235 static void     em_add_hw_stats(struct adapter *adapter);
236 static bool     em_txeof(struct tx_ring *);
237 static bool     em_rxeof(struct rx_ring *, int, int *);
238 #ifndef __NO_STRICT_ALIGNMENT
239 static int      em_fixup_rx(struct rx_ring *);
240 #endif
241 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243                     struct ip *, u32 *, u32 *);
244 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245                     struct tcphdr *, u32 *, u32 *);
246 static void     em_set_promisc(struct adapter *);
247 static void     em_disable_promisc(struct adapter *);
248 static void     em_set_multi(struct adapter *);
249 static void     em_update_link_status(struct adapter *);
250 static void     em_refresh_mbufs(struct rx_ring *, int);
251 static void     em_register_vlan(void *, struct ifnet *, u16);
252 static void     em_unregister_vlan(void *, struct ifnet *, u16);
253 static void     em_setup_vlan_hw_support(struct adapter *);
254 static int      em_xmit(struct tx_ring *, struct mbuf **);
255 static int      em_dma_malloc(struct adapter *, bus_size_t,
256                     struct em_dma_alloc *, int);
257 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
258 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259 static void     em_print_nvm_info(struct adapter *);
260 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261 static void     em_print_debug_info(struct adapter *);
262 static int      em_is_valid_ether_addr(u8 *);
263 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
265                     const char *, struct em_int_delay_info *, int, int);
266 /* Management and WOL Support */
267 static void     em_init_manageability(struct adapter *);
268 static void     em_release_manageability(struct adapter *);
269 static void     em_get_hw_control(struct adapter *);
270 static void     em_release_hw_control(struct adapter *);
271 static void     em_get_wakeup(device_t);
272 static void     em_enable_wakeup(device_t);
273 static int      em_enable_phy_wakeup(struct adapter *);
274 static void     em_led_func(void *, int);
275 static void     em_disable_aspm(struct adapter *);
276
277 static int      em_irq_fast(void *);
278
279 /* MSIX handlers */
280 static void     em_msix_tx(void *);
281 static void     em_msix_rx(void *);
282 static void     em_msix_link(void *);
283 static void     em_handle_tx(void *context, int pending);
284 static void     em_handle_rx(void *context, int pending);
285 static void     em_handle_link(void *context, int pending);
286
287 static void     em_add_rx_process_limit(struct adapter *, const char *,
288                     const char *, int *, int);
289 static void     em_set_flow_cntrl(struct adapter *, const char *,
290                     const char *, int *, int);
291
292 static __inline void em_rx_discard(struct rx_ring *, int);
293
294 #ifdef DEVICE_POLLING
295 static poll_handler_t em_poll;
296 #endif /* POLLING */
297
298 /*********************************************************************
299  *  FreeBSD Device Interface Entry Points
300  *********************************************************************/
301
302 static device_method_t em_methods[] = {
303         /* Device interface */
304         DEVMETHOD(device_probe, em_probe),
305         DEVMETHOD(device_attach, em_attach),
306         DEVMETHOD(device_detach, em_detach),
307         DEVMETHOD(device_shutdown, em_shutdown),
308         DEVMETHOD(device_suspend, em_suspend),
309         DEVMETHOD(device_resume, em_resume),
310         {0, 0}
311 };
312
313 static driver_t em_driver = {
314         "em", em_methods, sizeof(struct adapter),
315 };
316
317 devclass_t em_devclass;
318 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
319 MODULE_DEPEND(em, pci, 1, 1, 1);
320 MODULE_DEPEND(em, ether, 1, 1, 1);
321
322 /*********************************************************************
323  *  Tunable default values.
324  *********************************************************************/
325
326 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
327 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
328 #define M_TSO_LEN                       66
329
330 /* Allow common code without TSO */
331 #ifndef CSUM_TSO
332 #define CSUM_TSO        0
333 #endif
334
335 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
336 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
337 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
338 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
339
340 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
343 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
344
345 static int em_rxd = EM_DEFAULT_RXD;
346 static int em_txd = EM_DEFAULT_TXD;
347 TUNABLE_INT("hw.em.rxd", &em_rxd);
348 TUNABLE_INT("hw.em.txd", &em_txd);
349
350 static int em_smart_pwr_down = FALSE;
351 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
352
353 /* Controls whether promiscuous also shows bad packets */
354 static int em_debug_sbp = FALSE;
355 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
356
357 static int em_enable_msix = TRUE;
358 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
359
360 /* How many packets rxeof tries to clean at a time */
361 static int em_rx_process_limit = 100;
362 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
363
364 /* Flow control setting - default to FULL */
365 static int em_fc_setting = e1000_fc_full;
366 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
367
368 /* Global used in WOL setup with multiport cards */
369 static int global_quad_port_a = 0;
370
371 /*********************************************************************
372  *  Device identification routine
373  *
374  *  em_probe determines if the driver should be loaded on
375  *  adapter based on PCI vendor/device id of the adapter.
376  *
377  *  return BUS_PROBE_DEFAULT on success, positive on failure
378  *********************************************************************/
379
380 static int
381 em_probe(device_t dev)
382 {
383         char            adapter_name[60];
384         u16             pci_vendor_id = 0;
385         u16             pci_device_id = 0;
386         u16             pci_subvendor_id = 0;
387         u16             pci_subdevice_id = 0;
388         em_vendor_info_t *ent;
389
390         INIT_DEBUGOUT("em_probe: begin");
391
392         pci_vendor_id = pci_get_vendor(dev);
393         if (pci_vendor_id != EM_VENDOR_ID)
394                 return (ENXIO);
395
396         pci_device_id = pci_get_device(dev);
397         pci_subvendor_id = pci_get_subvendor(dev);
398         pci_subdevice_id = pci_get_subdevice(dev);
399
400         ent = em_vendor_info_array;
401         while (ent->vendor_id != 0) {
402                 if ((pci_vendor_id == ent->vendor_id) &&
403                     (pci_device_id == ent->device_id) &&
404
405                     ((pci_subvendor_id == ent->subvendor_id) ||
406                     (ent->subvendor_id == PCI_ANY_ID)) &&
407
408                     ((pci_subdevice_id == ent->subdevice_id) ||
409                     (ent->subdevice_id == PCI_ANY_ID))) {
410                         sprintf(adapter_name, "%s %s",
411                                 em_strings[ent->index],
412                                 em_driver_version);
413                         device_set_desc_copy(dev, adapter_name);
414                         return (BUS_PROBE_DEFAULT);
415                 }
416                 ent++;
417         }
418
419         return (ENXIO);
420 }
421
422 /*********************************************************************
423  *  Device initialization routine
424  *
425  *  The attach entry point is called when the driver is being loaded.
426  *  This routine identifies the type of hardware, allocates all resources
427  *  and initializes the hardware.
428  *
429  *  return 0 on success, positive on failure
430  *********************************************************************/
431
432 static int
433 em_attach(device_t dev)
434 {
435         struct adapter  *adapter;
436         int             error = 0;
437
438         INIT_DEBUGOUT("em_attach: begin");
439
440         adapter = device_get_softc(dev);
441         adapter->dev = adapter->osdep.dev = dev;
442         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
443
444         /* SYSCTL stuff */
445         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
446             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
447             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
448             em_sysctl_nvm_info, "I", "NVM Information");
449
450         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
451             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
452             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
453             em_sysctl_debug_info, "I", "Debug Information");
454
455         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
456
457         /* Determine hardware and mac info */
458         em_identify_hardware(adapter);
459
460         /* Setup PCI resources */
461         if (em_allocate_pci_resources(adapter)) {
462                 device_printf(dev, "Allocation of PCI resources failed\n");
463                 error = ENXIO;
464                 goto err_pci;
465         }
466
467         /*
468         ** For ICH8 and family we need to
469         ** map the flash memory, and this
470         ** must happen after the MAC is 
471         ** identified
472         */
473         if ((adapter->hw.mac.type == e1000_ich8lan) ||
474             (adapter->hw.mac.type == e1000_ich9lan) ||
475             (adapter->hw.mac.type == e1000_ich10lan) ||
476             (adapter->hw.mac.type == e1000_pchlan) ||
477             (adapter->hw.mac.type == e1000_pch2lan)) {
478                 int rid = EM_BAR_TYPE_FLASH;
479                 adapter->flash = bus_alloc_resource_any(dev,
480                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
481                 if (adapter->flash == NULL) {
482                         device_printf(dev, "Mapping of Flash failed\n");
483                         error = ENXIO;
484                         goto err_pci;
485                 }
486                 /* This is used in the shared code */
487                 adapter->hw.flash_address = (u8 *)adapter->flash;
488                 adapter->osdep.flash_bus_space_tag =
489                     rman_get_bustag(adapter->flash);
490                 adapter->osdep.flash_bus_space_handle =
491                     rman_get_bushandle(adapter->flash);
492         }
493
494         /* Do Shared Code initialization */
495         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496                 device_printf(dev, "Setup of Shared code failed\n");
497                 error = ENXIO;
498                 goto err_pci;
499         }
500
501         e1000_get_bus_info(&adapter->hw);
502
503         /* Set up some sysctls for the tunable interrupt delays */
504         em_add_int_delay_sysctl(adapter, "rx_int_delay",
505             "receive interrupt delay in usecs", &adapter->rx_int_delay,
506             E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
507         em_add_int_delay_sysctl(adapter, "tx_int_delay",
508             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
509             E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
510         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
511             "receive interrupt delay limit in usecs",
512             &adapter->rx_abs_int_delay,
513             E1000_REGISTER(&adapter->hw, E1000_RADV),
514             em_rx_abs_int_delay_dflt);
515         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
516             "transmit interrupt delay limit in usecs",
517             &adapter->tx_abs_int_delay,
518             E1000_REGISTER(&adapter->hw, E1000_TADV),
519             em_tx_abs_int_delay_dflt);
520
521         /* Sysctl for limiting the amount of work done in the taskqueue */
522         em_add_rx_process_limit(adapter, "rx_processing_limit",
523             "max number of rx packets to process", &adapter->rx_process_limit,
524             em_rx_process_limit);
525
526         /* Sysctl for setting the interface flow control */
527         em_set_flow_cntrl(adapter, "flow_control",
528             "configure flow control",
529             &adapter->fc_setting, em_fc_setting);
530
531         /*
532          * Validate number of transmit and receive descriptors. It
533          * must not exceed hardware maximum, and must be multiple
534          * of E1000_DBA_ALIGN.
535          */
536         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
537             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
538                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
539                     EM_DEFAULT_TXD, em_txd);
540                 adapter->num_tx_desc = EM_DEFAULT_TXD;
541         } else
542                 adapter->num_tx_desc = em_txd;
543
544         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
545             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
546                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
547                     EM_DEFAULT_RXD, em_rxd);
548                 adapter->num_rx_desc = EM_DEFAULT_RXD;
549         } else
550                 adapter->num_rx_desc = em_rxd;
551
552         adapter->hw.mac.autoneg = DO_AUTO_NEG;
553         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
554         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
555
556         /* Copper options */
557         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
558                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
559                 adapter->hw.phy.disable_polarity_correction = FALSE;
560                 adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
561         }
562
563         /*
564          * Set the frame limits assuming
565          * standard ethernet sized frames.
566          */
567         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
568         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
569
570         /*
571          * This controls when hardware reports transmit completion
572          * status.
573          */
574         adapter->hw.mac.report_tx_early = 1;
575
576         /* 
577         ** Get queue/ring memory
578         */
579         if (em_allocate_queues(adapter)) {
580                 error = ENOMEM;
581                 goto err_pci;
582         }
583
584         /* Allocate multicast array memory. */
585         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
586             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
587         if (adapter->mta == NULL) {
588                 device_printf(dev, "Can not allocate multicast setup array\n");
589                 error = ENOMEM;
590                 goto err_late;
591         }
592
593         /* Check SOL/IDER usage */
594         if (e1000_check_reset_block(&adapter->hw))
595                 device_printf(dev, "PHY reset is blocked"
596                     " due to SOL/IDER session.\n");
597
598         /*
599         ** Start from a known state, this is
600         ** important in reading the nvm and
601         ** mac from that.
602         */
603         e1000_reset_hw(&adapter->hw);
604
605         /* Make sure we have a good EEPROM before we read from it */
606         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
607                 /*
608                 ** Some PCI-E parts fail the first check due to
609                 ** the link being in sleep state, call it again,
610                 ** if it fails a second time its a real issue.
611                 */
612                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
613                         device_printf(dev,
614                             "The EEPROM Checksum Is Not Valid\n");
615                         error = EIO;
616                         goto err_late;
617                 }
618         }
619
620         /* Copy the permanent MAC address out of the EEPROM */
621         if (e1000_read_mac_addr(&adapter->hw) < 0) {
622                 device_printf(dev, "EEPROM read error while reading MAC"
623                     " address\n");
624                 error = EIO;
625                 goto err_late;
626         }
627
628         if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
629                 device_printf(dev, "Invalid MAC address\n");
630                 error = EIO;
631                 goto err_late;
632         }
633
634         /*
635         **  Do interrupt configuration
636         */
637         if (adapter->msix > 1) /* Do MSIX */
638                 error = em_allocate_msix(adapter);
639         else  /* MSI or Legacy */
640                 error = em_allocate_legacy(adapter);
641         if (error)
642                 goto err_late;
643
644         /*
645          * Get Wake-on-Lan and Management info for later use
646          */
647         em_get_wakeup(dev);
648
649         /* Setup OS specific network interface */
650         if (em_setup_interface(dev, adapter) != 0)
651                 goto err_late;
652
653         em_reset(adapter);
654
655         /* Initialize statistics */
656         em_update_stats_counters(adapter);
657
658         adapter->hw.mac.get_link_status = 1;
659         em_update_link_status(adapter);
660
661         /* Register for VLAN events */
662         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
663             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
664         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
665             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
666
667         em_add_hw_stats(adapter);
668
669         /* Non-AMT based hardware can now take control from firmware */
670         if (adapter->has_manage && !adapter->has_amt)
671                 em_get_hw_control(adapter);
672
673         /* Tell the stack that the interface is not active */
674         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
675
676         adapter->led_dev = led_create(em_led_func, adapter,
677             device_get_nameunit(dev));
678
679         INIT_DEBUGOUT("em_attach: end");
680
681         return (0);
682
683 err_late:
684         em_free_transmit_structures(adapter);
685         em_free_receive_structures(adapter);
686         em_release_hw_control(adapter);
687         if (adapter->ifp != NULL)
688                 if_free(adapter->ifp);
689 err_pci:
690         em_free_pci_resources(adapter);
691         free(adapter->mta, M_DEVBUF);
692         EM_CORE_LOCK_DESTROY(adapter);
693
694         return (error);
695 }
696
697 /*********************************************************************
698  *  Device removal routine
699  *
700  *  The detach entry point is called when the driver is being removed.
701  *  This routine stops the adapter and deallocates all the resources
702  *  that were allocated for driver operation.
703  *
704  *  return 0 on success, positive on failure
705  *********************************************************************/
706
707 static int
708 em_detach(device_t dev)
709 {
710         struct adapter  *adapter = device_get_softc(dev);
711         struct ifnet    *ifp = adapter->ifp;
712
713         INIT_DEBUGOUT("em_detach: begin");
714
715         /* Make sure VLANS are not using driver */
716         if (adapter->ifp->if_vlantrunk != NULL) {
717                 device_printf(dev,"Vlan in use, detach first\n");
718                 return (EBUSY);
719         }
720
721 #ifdef DEVICE_POLLING
722         if (ifp->if_capenable & IFCAP_POLLING)
723                 ether_poll_deregister(ifp);
724 #endif
725
726         if (adapter->led_dev != NULL)
727                 led_destroy(adapter->led_dev);
728
729         EM_CORE_LOCK(adapter);
730         adapter->in_detach = 1;
731         em_stop(adapter);
732         EM_CORE_UNLOCK(adapter);
733         EM_CORE_LOCK_DESTROY(adapter);
734
735         e1000_phy_hw_reset(&adapter->hw);
736
737         em_release_manageability(adapter);
738         em_release_hw_control(adapter);
739
740         /* Unregister VLAN events */
741         if (adapter->vlan_attach != NULL)
742                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
743         if (adapter->vlan_detach != NULL)
744                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
745
746         ether_ifdetach(adapter->ifp);
747         callout_drain(&adapter->timer);
748
749         em_free_pci_resources(adapter);
750         bus_generic_detach(dev);
751         if_free(ifp);
752
753         em_free_transmit_structures(adapter);
754         em_free_receive_structures(adapter);
755
756         em_release_hw_control(adapter);
757         free(adapter->mta, M_DEVBUF);
758
759         return (0);
760 }
761
762 /*********************************************************************
763  *
764  *  Shutdown entry point
765  *
766  **********************************************************************/
767
768 static int
769 em_shutdown(device_t dev)
770 {
771         return em_suspend(dev);
772 }
773
774 /*
775  * Suspend/resume device methods.
776  */
777 static int
778 em_suspend(device_t dev)
779 {
780         struct adapter *adapter = device_get_softc(dev);
781
782         EM_CORE_LOCK(adapter);
783
784         em_release_manageability(adapter);
785         em_release_hw_control(adapter);
786         em_enable_wakeup(dev);
787
788         EM_CORE_UNLOCK(adapter);
789
790         return bus_generic_suspend(dev);
791 }
792
793 static int
794 em_resume(device_t dev)
795 {
796         struct adapter *adapter = device_get_softc(dev);
797         struct ifnet *ifp = adapter->ifp;
798
799         EM_CORE_LOCK(adapter);
800         em_init_locked(adapter);
801         em_init_manageability(adapter);
802         EM_CORE_UNLOCK(adapter);
803         em_start(ifp);
804
805         return bus_generic_resume(dev);
806 }
807
808
809 /*********************************************************************
810  *  Transmit entry point
811  *
812  *  em_start is called by the stack to initiate a transmit.
813  *  The driver will remain in this routine as long as there are
814  *  packets to transmit and transmit resources are available.
815  *  In case resources are not available stack is notified and
816  *  the packet is requeued.
817  **********************************************************************/
818
819 #ifdef EM_MULTIQUEUE
820 static int
821 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
822 {
823         struct adapter  *adapter = txr->adapter;
824         struct mbuf     *next;
825         int             err = 0, enq = 0;
826
827         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
828             IFF_DRV_RUNNING || adapter->link_active == 0) {
829                 if (m != NULL)
830                         err = drbr_enqueue(ifp, txr->br, m);
831                 return (err);
832         }
833
834         /* Call cleanup if number of TX descriptors low */
835         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
836                 em_txeof(txr);
837
838         enq = 0;
839         if (m == NULL) {
840                 next = drbr_dequeue(ifp, txr->br);
841         } else if (drbr_needs_enqueue(ifp, txr->br)) {
842                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
843                         return (err);
844                 next = drbr_dequeue(ifp, txr->br);
845         } else
846                 next = m;
847
848         /* Process the queue */
849         while (next != NULL) {
850                 if ((err = em_xmit(txr, &next)) != 0) {
851                         if (next != NULL)
852                                 err = drbr_enqueue(ifp, txr->br, next);
853                         break;
854                 }
855                 enq++;
856                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
857                 ETHER_BPF_MTAP(ifp, next);
858                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
859                         break;
860                 if (txr->tx_avail < EM_MAX_SCATTER) {
861                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
862                         break;
863                 }
864                 next = drbr_dequeue(ifp, txr->br);
865         }
866
867         if (enq > 0) {
868                 /* Set the watchdog */
869                 txr->queue_status = EM_QUEUE_WORKING;
870                 txr->watchdog_time = ticks;
871         }
872         return (err);
873 }
874
875 /*
876 ** Multiqueue capable stack interface
877 */
878 static int
879 em_mq_start(struct ifnet *ifp, struct mbuf *m)
880 {
881         struct adapter  *adapter = ifp->if_softc;
882         struct tx_ring  *txr = adapter->tx_rings;
883         int             error;
884
885         if (EM_TX_TRYLOCK(txr)) {
886                 error = em_mq_start_locked(ifp, txr, m);
887                 EM_TX_UNLOCK(txr);
888         } else 
889                 error = drbr_enqueue(ifp, txr->br, m);
890
891         return (error);
892 }
893
894 /*
895 ** Flush all ring buffers
896 */
897 static void
898 em_qflush(struct ifnet *ifp)
899 {
900         struct adapter  *adapter = ifp->if_softc;
901         struct tx_ring  *txr = adapter->tx_rings;
902         struct mbuf     *m;
903
904         for (int i = 0; i < adapter->num_queues; i++, txr++) {
905                 EM_TX_LOCK(txr);
906                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
907                         m_freem(m);
908                 EM_TX_UNLOCK(txr);
909         }
910         if_qflush(ifp);
911 }
912
913 #endif /* EM_MULTIQUEUE */
914
915 static void
916 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
917 {
918         struct adapter  *adapter = ifp->if_softc;
919         struct mbuf     *m_head;
920
921         EM_TX_LOCK_ASSERT(txr);
922
923         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
924             IFF_DRV_RUNNING)
925                 return;
926
927         if (!adapter->link_active)
928                 return;
929
930         /* Call cleanup if number of TX descriptors low */
931         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
932                 em_txeof(txr);
933
934         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
935                 if (txr->tx_avail < EM_MAX_SCATTER) {
936                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
937                         break;
938                 }
939                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
940                 if (m_head == NULL)
941                         break;
942                 /*
943                  *  Encapsulation can modify our pointer, and or make it
944                  *  NULL on failure.  In that event, we can't requeue.
945                  */
946                 if (em_xmit(txr, &m_head)) {
947                         if (m_head == NULL)
948                                 break;
949                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
950                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
951                         break;
952                 }
953
954                 /* Send a copy of the frame to the BPF listener */
955                 ETHER_BPF_MTAP(ifp, m_head);
956
957                 /* Set timeout in case hardware has problems transmitting. */
958                 txr->watchdog_time = ticks;
959                 txr->queue_status = EM_QUEUE_WORKING;
960         }
961
962         return;
963 }
964
965 static void
966 em_start(struct ifnet *ifp)
967 {
968         struct adapter  *adapter = ifp->if_softc;
969         struct tx_ring  *txr = adapter->tx_rings;
970
971         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
972                 EM_TX_LOCK(txr);
973                 em_start_locked(ifp, txr);
974                 EM_TX_UNLOCK(txr);
975         }
976         return;
977 }
978
979 /*********************************************************************
980  *  Ioctl entry point
981  *
982  *  em_ioctl is called when the user wants to configure the
983  *  interface.
984  *
985  *  return 0 on success, positive on failure
986  **********************************************************************/
987
988 static int
989 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
990 {
991         struct adapter  *adapter = ifp->if_softc;
992         struct ifreq *ifr = (struct ifreq *)data;
993 #ifdef INET
994         struct ifaddr *ifa = (struct ifaddr *)data;
995 #endif
996         int error = 0;
997
998         if (adapter->in_detach)
999                 return (error);
1000
1001         switch (command) {
1002         case SIOCSIFADDR:
1003 #ifdef INET
1004                 if (ifa->ifa_addr->sa_family == AF_INET) {
1005                         /*
1006                          * XXX
1007                          * Since resetting hardware takes a very long time
1008                          * and results in link renegotiation we only
1009                          * initialize the hardware only when it is absolutely
1010                          * required.
1011                          */
1012                         ifp->if_flags |= IFF_UP;
1013                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1014                                 EM_CORE_LOCK(adapter);
1015                                 em_init_locked(adapter);
1016                                 EM_CORE_UNLOCK(adapter);
1017                         }
1018                         arp_ifinit(ifp, ifa);
1019                 } else
1020 #endif
1021                         error = ether_ioctl(ifp, command, data);
1022                 break;
1023         case SIOCSIFMTU:
1024             {
1025                 int max_frame_size;
1026
1027                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1028
1029                 EM_CORE_LOCK(adapter);
1030                 switch (adapter->hw.mac.type) {
1031                 case e1000_82571:
1032                 case e1000_82572:
1033                 case e1000_ich9lan:
1034                 case e1000_ich10lan:
1035                 case e1000_pch2lan:
1036                 case e1000_82574:
1037                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1038                         max_frame_size = 9234;
1039                         break;
1040                 case e1000_pchlan:
1041                         max_frame_size = 4096;
1042                         break;
1043                         /* Adapters that do not support jumbo frames */
1044                 case e1000_82583:
1045                 case e1000_ich8lan:
1046                         max_frame_size = ETHER_MAX_LEN;
1047                         break;
1048                 default:
1049                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1050                 }
1051                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1052                     ETHER_CRC_LEN) {
1053                         EM_CORE_UNLOCK(adapter);
1054                         error = EINVAL;
1055                         break;
1056                 }
1057
1058                 ifp->if_mtu = ifr->ifr_mtu;
1059                 adapter->max_frame_size =
1060                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1061                 em_init_locked(adapter);
1062                 EM_CORE_UNLOCK(adapter);
1063                 break;
1064             }
1065         case SIOCSIFFLAGS:
1066                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1067                     SIOCSIFFLAGS (Set Interface Flags)");
1068                 EM_CORE_LOCK(adapter);
1069                 if (ifp->if_flags & IFF_UP) {
1070                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1071                                 if ((ifp->if_flags ^ adapter->if_flags) &
1072                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1073                                         em_disable_promisc(adapter);
1074                                         em_set_promisc(adapter);
1075                                 }
1076                         } else
1077                                 em_init_locked(adapter);
1078                 } else
1079                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1080                                 em_stop(adapter);
1081                 adapter->if_flags = ifp->if_flags;
1082                 EM_CORE_UNLOCK(adapter);
1083                 break;
1084         case SIOCADDMULTI:
1085         case SIOCDELMULTI:
1086                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1087                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1088                         EM_CORE_LOCK(adapter);
1089                         em_disable_intr(adapter);
1090                         em_set_multi(adapter);
1091 #ifdef DEVICE_POLLING
1092                         if (!(ifp->if_capenable & IFCAP_POLLING))
1093 #endif
1094                                 em_enable_intr(adapter);
1095                         EM_CORE_UNLOCK(adapter);
1096                 }
1097                 break;
1098         case SIOCSIFMEDIA:
1099                 /*
1100                 ** As the speed/duplex settings are being
1101                 ** changed, we need to reset the PHY.
1102                 */
1103                 adapter->hw.phy.reset_disable = FALSE;
1104                 /* Check SOL/IDER usage */
1105                 EM_CORE_LOCK(adapter);
1106                 if (e1000_check_reset_block(&adapter->hw)) {
1107                         EM_CORE_UNLOCK(adapter);
1108                         device_printf(adapter->dev, "Media change is"
1109                             " blocked due to SOL/IDER session.\n");
1110                         break;
1111                 }
1112                 EM_CORE_UNLOCK(adapter);
1113                 /* falls thru */
1114         case SIOCGIFMEDIA:
1115                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1116                     SIOCxIFMEDIA (Get/Set Interface Media)");
1117                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1118                 break;
1119         case SIOCSIFCAP:
1120             {
1121                 int mask, reinit;
1122
1123                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1124                 reinit = 0;
1125                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1126 #ifdef DEVICE_POLLING
1127                 if (mask & IFCAP_POLLING) {
1128                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1129                                 error = ether_poll_register(em_poll, ifp);
1130                                 if (error)
1131                                         return (error);
1132                                 EM_CORE_LOCK(adapter);
1133                                 em_disable_intr(adapter);
1134                                 ifp->if_capenable |= IFCAP_POLLING;
1135                                 EM_CORE_UNLOCK(adapter);
1136                         } else {
1137                                 error = ether_poll_deregister(ifp);
1138                                 /* Enable interrupt even in error case */
1139                                 EM_CORE_LOCK(adapter);
1140                                 em_enable_intr(adapter);
1141                                 ifp->if_capenable &= ~IFCAP_POLLING;
1142                                 EM_CORE_UNLOCK(adapter);
1143                         }
1144                 }
1145 #endif
1146                 if (mask & IFCAP_HWCSUM) {
1147                         ifp->if_capenable ^= IFCAP_HWCSUM;
1148                         reinit = 1;
1149                 }
1150                 if (mask & IFCAP_TSO4) {
1151                         ifp->if_capenable ^= IFCAP_TSO4;
1152                         reinit = 1;
1153                 }
1154                 if (mask & IFCAP_VLAN_HWTAGGING) {
1155                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1156                         reinit = 1;
1157                 }
1158                 if (mask & IFCAP_VLAN_HWFILTER) {
1159                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1160                         reinit = 1;
1161                 }
1162                 if ((mask & IFCAP_WOL) &&
1163                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1164                         if (mask & IFCAP_WOL_MCAST)
1165                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1166                         if (mask & IFCAP_WOL_MAGIC)
1167                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1168                 }
1169                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1170                         em_init(adapter);
1171                 VLAN_CAPABILITIES(ifp);
1172                 break;
1173             }
1174
1175         default:
1176                 error = ether_ioctl(ifp, command, data);
1177                 break;
1178         }
1179
1180         return (error);
1181 }
1182
1183
1184 /*********************************************************************
1185  *  Init entry point
1186  *
1187  *  This routine is used in two ways. It is used by the stack as
1188  *  init entry point in network interface structure. It is also used
1189  *  by the driver as a hw/sw initialization routine to get to a
1190  *  consistent state.
1191  *
1192  *  return 0 on success, positive on failure
1193  **********************************************************************/
1194
1195 static void
1196 em_init_locked(struct adapter *adapter)
1197 {
1198         struct ifnet    *ifp = adapter->ifp;
1199         device_t        dev = adapter->dev;
1200         u32             pba;
1201
1202         INIT_DEBUGOUT("em_init: begin");
1203
1204         EM_CORE_LOCK_ASSERT(adapter);
1205
1206         em_disable_intr(adapter);
1207         callout_stop(&adapter->timer);
1208
1209         /*
1210          * Packet Buffer Allocation (PBA)
1211          * Writing PBA sets the receive portion of the buffer
1212          * the remainder is used for the transmit buffer.
1213          */
1214         switch (adapter->hw.mac.type) {
1215         /* Total Packet Buffer on these is 48K */
1216         case e1000_82571:
1217         case e1000_82572:
1218         case e1000_80003es2lan:
1219                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1220                 break;
1221         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1222                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1223                 break;
1224         case e1000_82574:
1225         case e1000_82583:
1226                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1227                 break;
1228         case e1000_ich8lan:
1229                 pba = E1000_PBA_8K;
1230                 break;
1231         case e1000_ich9lan:
1232         case e1000_ich10lan:
1233                 pba = E1000_PBA_10K;
1234                 break;
1235         case e1000_pchlan:
1236         case e1000_pch2lan:
1237                 pba = E1000_PBA_26K;
1238                 break;
1239         default:
1240                 if (adapter->max_frame_size > 8192)
1241                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1242                 else
1243                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1244         }
1245
1246         INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1247         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1248         
1249         /* Get the latest mac address, User can use a LAA */
1250         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1251               ETHER_ADDR_LEN);
1252
1253         /* Put the address into the Receive Address Array */
1254         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1255
1256         /*
1257          * With the 82571 adapter, RAR[0] may be overwritten
1258          * when the other port is reset, we make a duplicate
1259          * in RAR[14] for that eventuality, this assures
1260          * the interface continues to function.
1261          */
1262         if (adapter->hw.mac.type == e1000_82571) {
1263                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1264                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1265                     E1000_RAR_ENTRIES - 1);
1266         }
1267
1268         /* Initialize the hardware */
1269         em_reset(adapter);
1270         em_update_link_status(adapter);
1271
1272         /* Setup VLAN support, basic and offload if available */
1273         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1274
1275         /* Set hardware offload abilities */
1276         ifp->if_hwassist = 0;
1277         if (ifp->if_capenable & IFCAP_TXCSUM)
1278                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1279         if (ifp->if_capenable & IFCAP_TSO4)
1280                 ifp->if_hwassist |= CSUM_TSO;
1281
1282         /* Configure for OS presence */
1283         em_init_manageability(adapter);
1284
1285         /* Prepare transmit descriptors and buffers */
1286         em_setup_transmit_structures(adapter);
1287         em_initialize_transmit_unit(adapter);
1288
1289         /* Setup Multicast table */
1290         em_set_multi(adapter);
1291
1292         /*
1293         ** Figure out the desired mbuf
1294         ** pool for doing jumbos
1295         */
1296         if (adapter->max_frame_size <= 2048)
1297                 adapter->rx_mbuf_sz = MCLBYTES;
1298         else if (adapter->max_frame_size <= 4096)
1299                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1300         else
1301                 adapter->rx_mbuf_sz = MJUM9BYTES;
1302
1303         /* Prepare receive descriptors and buffers */
1304         if (em_setup_receive_structures(adapter)) {
1305                 device_printf(dev, "Could not setup receive structures\n");
1306                 em_stop(adapter);
1307                 return;
1308         }
1309         em_initialize_receive_unit(adapter);
1310
1311         /* Use real VLAN Filter support? */
1312         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1313                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1314                         /* Use real VLAN Filter support */
1315                         em_setup_vlan_hw_support(adapter);
1316                 else {
1317                         u32 ctrl;
1318                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1319                         ctrl |= E1000_CTRL_VME;
1320                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1321                 }
1322         }
1323
1324         /* Don't lose promiscuous settings */
1325         em_set_promisc(adapter);
1326
1327         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1328         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1329
1330         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1331         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1332
1333         /* MSI/X configuration for 82574 */
1334         if (adapter->hw.mac.type == e1000_82574) {
1335                 int tmp;
1336                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1337                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1338                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1339                 /* Set the IVAR - interrupt vector routing. */
1340                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1341         }
1342
1343 #ifdef DEVICE_POLLING
1344         /*
1345          * Only enable interrupts if we are not polling, make sure
1346          * they are off otherwise.
1347          */
1348         if (ifp->if_capenable & IFCAP_POLLING)
1349                 em_disable_intr(adapter);
1350         else
1351 #endif /* DEVICE_POLLING */
1352                 em_enable_intr(adapter);
1353
1354         /* AMT based hardware can now take control from firmware */
1355         if (adapter->has_manage && adapter->has_amt)
1356                 em_get_hw_control(adapter);
1357
1358         /* Don't reset the phy next time init gets called */
1359         adapter->hw.phy.reset_disable = TRUE;
1360 }
1361
1362 static void
1363 em_init(void *arg)
1364 {
1365         struct adapter *adapter = arg;
1366
1367         EM_CORE_LOCK(adapter);
1368         em_init_locked(adapter);
1369         EM_CORE_UNLOCK(adapter);
1370 }
1371
1372
1373 #ifdef DEVICE_POLLING
1374 /*********************************************************************
1375  *
1376  *  Legacy polling routine: note this only works with single queue
1377  *
1378  *********************************************************************/
1379 static int
1380 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1381 {
1382         struct adapter *adapter = ifp->if_softc;
1383         struct tx_ring  *txr = adapter->tx_rings;
1384         struct rx_ring  *rxr = adapter->rx_rings;
1385         u32             reg_icr;
1386         int             rx_done;
1387
1388         EM_CORE_LOCK(adapter);
1389         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1390                 EM_CORE_UNLOCK(adapter);
1391                 return (0);
1392         }
1393
1394         if (cmd == POLL_AND_CHECK_STATUS) {
1395                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1396                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1397                         callout_stop(&adapter->timer);
1398                         adapter->hw.mac.get_link_status = 1;
1399                         em_update_link_status(adapter);
1400                         callout_reset(&adapter->timer, hz,
1401                             em_local_timer, adapter);
1402                 }
1403         }
1404         EM_CORE_UNLOCK(adapter);
1405
1406         em_rxeof(rxr, count, &rx_done);
1407
1408         EM_TX_LOCK(txr);
1409         em_txeof(txr);
1410 #ifdef EM_MULTIQUEUE
1411         if (!drbr_empty(ifp, txr->br))
1412                 em_mq_start_locked(ifp, txr, NULL);
1413 #else
1414         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1415                 em_start_locked(ifp, txr);
1416 #endif
1417         EM_TX_UNLOCK(txr);
1418
1419         return (rx_done);
1420 }
1421 #endif /* DEVICE_POLLING */
1422
1423
1424 /*********************************************************************
1425  *
1426  *  Fast Legacy/MSI Combined Interrupt Service routine  
1427  *
1428  *********************************************************************/
1429 static int
1430 em_irq_fast(void *arg)
1431 {
1432         struct adapter  *adapter = arg;
1433         struct ifnet    *ifp;
1434         u32             reg_icr;
1435
1436         ifp = adapter->ifp;
1437
1438         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1439
1440         /* Hot eject?  */
1441         if (reg_icr == 0xffffffff)
1442                 return FILTER_STRAY;
1443
1444         /* Definitely not our interrupt.  */
1445         if (reg_icr == 0x0)
1446                 return FILTER_STRAY;
1447
1448         /*
1449          * Starting with the 82571 chip, bit 31 should be used to
1450          * determine whether the interrupt belongs to us.
1451          */
1452         if (adapter->hw.mac.type >= e1000_82571 &&
1453             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1454                 return FILTER_STRAY;
1455
1456         em_disable_intr(adapter);
1457         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1458
1459         /* Link status change */
1460         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1461                 adapter->hw.mac.get_link_status = 1;
1462                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1463         }
1464
1465         if (reg_icr & E1000_ICR_RXO)
1466                 adapter->rx_overruns++;
1467         return FILTER_HANDLED;
1468 }
1469
1470 /* Combined RX/TX handler, used by Legacy and MSI */
1471 static void
1472 em_handle_que(void *context, int pending)
1473 {
1474         struct adapter  *adapter = context;
1475         struct ifnet    *ifp = adapter->ifp;
1476         struct tx_ring  *txr = adapter->tx_rings;
1477         struct rx_ring  *rxr = adapter->rx_rings;
1478         bool            more;
1479
1480
1481         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1482                 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1483
1484                 EM_TX_LOCK(txr);
1485                 em_txeof(txr);
1486 #ifdef EM_MULTIQUEUE
1487                 if (!drbr_empty(ifp, txr->br))
1488                         em_mq_start_locked(ifp, txr, NULL);
1489 #else
1490                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1491                         em_start_locked(ifp, txr);
1492 #endif
1493                 em_txeof(txr);
1494                 EM_TX_UNLOCK(txr);
1495                 if (more) {
1496                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1497                         return;
1498                 }
1499         }
1500
1501         em_enable_intr(adapter);
1502         return;
1503 }
1504
1505
1506 /*********************************************************************
1507  *
1508  *  MSIX Interrupt Service Routines
1509  *
1510  **********************************************************************/
1511 static void
1512 em_msix_tx(void *arg)
1513 {
1514         struct tx_ring *txr = arg;
1515         struct adapter *adapter = txr->adapter;
1516         bool            more;
1517
1518         ++txr->tx_irq;
1519         EM_TX_LOCK(txr);
1520         more = em_txeof(txr);
1521         EM_TX_UNLOCK(txr);
1522         if (more)
1523                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1524         else
1525                 /* Reenable this interrupt */
1526                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1527         return;
1528 }
1529
1530 /*********************************************************************
1531  *
1532  *  MSIX RX Interrupt Service routine
1533  *
1534  **********************************************************************/
1535
1536 static void
1537 em_msix_rx(void *arg)
1538 {
1539         struct rx_ring  *rxr = arg;
1540         struct adapter  *adapter = rxr->adapter;
1541         bool            more;
1542
1543         ++rxr->rx_irq;
1544         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1545         if (more)
1546                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1547         else
1548                 /* Reenable this interrupt */
1549                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1550         return;
1551 }
1552
1553 /*********************************************************************
1554  *
1555  *  MSIX Link Fast Interrupt Service routine
1556  *
1557  **********************************************************************/
1558 static void
1559 em_msix_link(void *arg)
1560 {
1561         struct adapter  *adapter = arg;
1562         u32             reg_icr;
1563
1564         ++adapter->link_irq;
1565         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1566
1567         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1568                 adapter->hw.mac.get_link_status = 1;
1569                 em_handle_link(adapter, 0);
1570         } else
1571                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1572                     EM_MSIX_LINK | E1000_IMS_LSC);
1573         return;
1574 }
1575
1576 static void
1577 em_handle_rx(void *context, int pending)
1578 {
1579         struct rx_ring  *rxr = context;
1580         struct adapter  *adapter = rxr->adapter;
1581         bool            more;
1582
1583         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1584         if (more)
1585                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1586         else
1587                 /* Reenable this interrupt */
1588                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1589 }
1590
1591 static void
1592 em_handle_tx(void *context, int pending)
1593 {
1594         struct tx_ring  *txr = context;
1595         struct adapter  *adapter = txr->adapter;
1596         struct ifnet    *ifp = adapter->ifp;
1597
1598         EM_TX_LOCK(txr);
1599         em_txeof(txr);
1600 #ifdef EM_MULTIQUEUE
1601         if (!drbr_empty(ifp, txr->br))
1602                 em_mq_start_locked(ifp, txr, NULL);
1603 #else
1604         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1605                 em_start_locked(ifp, txr);
1606 #endif
1607         em_txeof(txr);
1608         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1609         EM_TX_UNLOCK(txr);
1610 }
1611
1612 static void
1613 em_handle_link(void *context, int pending)
1614 {
1615         struct adapter  *adapter = context;
1616         struct ifnet *ifp = adapter->ifp;
1617
1618         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1619                 return;
1620
1621         EM_CORE_LOCK(adapter);
1622         callout_stop(&adapter->timer);
1623         em_update_link_status(adapter);
1624         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1625         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1626             EM_MSIX_LINK | E1000_IMS_LSC);
1627         EM_CORE_UNLOCK(adapter);
1628 }
1629
1630
1631 /*********************************************************************
1632  *
1633  *  Media Ioctl callback
1634  *
1635  *  This routine is called whenever the user queries the status of
1636  *  the interface using ifconfig.
1637  *
1638  **********************************************************************/
1639 static void
1640 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1641 {
1642         struct adapter *adapter = ifp->if_softc;
1643         u_char fiber_type = IFM_1000_SX;
1644
1645         INIT_DEBUGOUT("em_media_status: begin");
1646
1647         EM_CORE_LOCK(adapter);
1648         em_update_link_status(adapter);
1649
1650         ifmr->ifm_status = IFM_AVALID;
1651         ifmr->ifm_active = IFM_ETHER;
1652
1653         if (!adapter->link_active) {
1654                 EM_CORE_UNLOCK(adapter);
1655                 return;
1656         }
1657
1658         ifmr->ifm_status |= IFM_ACTIVE;
1659
1660         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1661             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1662                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1663         } else {
1664                 switch (adapter->link_speed) {
1665                 case 10:
1666                         ifmr->ifm_active |= IFM_10_T;
1667                         break;
1668                 case 100:
1669                         ifmr->ifm_active |= IFM_100_TX;
1670                         break;
1671                 case 1000:
1672                         ifmr->ifm_active |= IFM_1000_T;
1673                         break;
1674                 }
1675                 if (adapter->link_duplex == FULL_DUPLEX)
1676                         ifmr->ifm_active |= IFM_FDX;
1677                 else
1678                         ifmr->ifm_active |= IFM_HDX;
1679         }
1680         EM_CORE_UNLOCK(adapter);
1681 }
1682
1683 /*********************************************************************
1684  *
1685  *  Media Ioctl callback
1686  *
1687  *  This routine is called when the user changes speed/duplex using
1688  *  media/mediopt option with ifconfig.
1689  *
1690  **********************************************************************/
1691 static int
1692 em_media_change(struct ifnet *ifp)
1693 {
1694         struct adapter *adapter = ifp->if_softc;
1695         struct ifmedia  *ifm = &adapter->media;
1696
1697         INIT_DEBUGOUT("em_media_change: begin");
1698
1699         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1700                 return (EINVAL);
1701
1702         EM_CORE_LOCK(adapter);
1703         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1704         case IFM_AUTO:
1705                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1706                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1707                 break;
1708         case IFM_1000_LX:
1709         case IFM_1000_SX:
1710         case IFM_1000_T:
1711                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1712                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1713                 break;
1714         case IFM_100_TX:
1715                 adapter->hw.mac.autoneg = FALSE;
1716                 adapter->hw.phy.autoneg_advertised = 0;
1717                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1718                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1719                 else
1720                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1721                 break;
1722         case IFM_10_T:
1723                 adapter->hw.mac.autoneg = FALSE;
1724                 adapter->hw.phy.autoneg_advertised = 0;
1725                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1726                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1727                 else
1728                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1729                 break;
1730         default:
1731                 device_printf(adapter->dev, "Unsupported media type\n");
1732         }
1733
1734         em_init_locked(adapter);
1735         EM_CORE_UNLOCK(adapter);
1736
1737         return (0);
1738 }
1739
1740 /*********************************************************************
1741  *
1742  *  This routine maps the mbufs to tx descriptors.
1743  *
1744  *  return 0 on success, positive on failure
1745  **********************************************************************/
1746
1747 static int
1748 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1749 {
1750         struct adapter          *adapter = txr->adapter;
1751         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1752         bus_dmamap_t            map;
1753         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1754         struct e1000_tx_desc    *ctxd = NULL;
1755         struct mbuf             *m_head;
1756         struct ether_header     *eh;
1757         struct ip               *ip = NULL;
1758         struct tcphdr           *tp = NULL;
1759         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1760         int                     ip_off, poff;
1761         int                     nsegs, i, j, first, last = 0;
1762         int                     error, do_tso, tso_desc = 0;
1763
1764         m_head = *m_headp;
1765         txd_upper = txd_lower = txd_used = txd_saved = 0;
1766         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1767         ip_off = poff = 0;
1768
1769         /*
1770          * Intel recommends entire IP/TCP header length reside in a single
1771          * buffer. If multiple descriptors are used to describe the IP and
1772          * TCP header, each descriptor should describe one or more
1773          * complete headers; descriptors referencing only parts of headers
1774          * are not supported. If all layer headers are not coalesced into
1775          * a single buffer, each buffer should not cross a 4KB boundary,
1776          * or be larger than the maximum read request size.
1777          * Controller also requires modifing IP/TCP header to make TSO work
1778          * so we firstly get a writable mbuf chain then coalesce ethernet/
1779          * IP/TCP header into a single buffer to meet the requirement of
1780          * controller. This also simplifies IP/TCP/UDP checksum offloading
1781          * which also has similiar restrictions.
1782          */
1783         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1784                 if (do_tso || (m_head->m_next != NULL && 
1785                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1786                         if (M_WRITABLE(*m_headp) == 0) {
1787                                 m_head = m_dup(*m_headp, M_DONTWAIT);
1788                                 m_freem(*m_headp);
1789                                 if (m_head == NULL) {
1790                                         *m_headp = NULL;
1791                                         return (ENOBUFS);
1792                                 }
1793                                 *m_headp = m_head;
1794                         }
1795                 }
1796                 /*
1797                  * XXX
1798                  * Assume IPv4, we don't have TSO/checksum offload support
1799                  * for IPv6 yet.
1800                  */
1801                 ip_off = sizeof(struct ether_header);
1802                 m_head = m_pullup(m_head, ip_off);
1803                 if (m_head == NULL) {
1804                         *m_headp = NULL;
1805                         return (ENOBUFS);
1806                 }
1807                 eh = mtod(m_head, struct ether_header *);
1808                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1809                         ip_off = sizeof(struct ether_vlan_header);
1810                         m_head = m_pullup(m_head, ip_off);
1811                         if (m_head == NULL) {
1812                                 *m_headp = NULL;
1813                                 return (ENOBUFS);
1814                         }
1815                 }
1816                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1817                 if (m_head == NULL) {
1818                         *m_headp = NULL;
1819                         return (ENOBUFS);
1820                 }
1821                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1822                 poff = ip_off + (ip->ip_hl << 2);
1823                 if (do_tso) {
1824                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1825                         if (m_head == NULL) {
1826                                 *m_headp = NULL;
1827                                 return (ENOBUFS);
1828                         }
1829                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1830                         /*
1831                          * TSO workaround:
1832                          *   pull 4 more bytes of data into it.
1833                          */
1834                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1835                         if (m_head == NULL) {
1836                                 *m_headp = NULL;
1837                                 return (ENOBUFS);
1838                         }
1839                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1840                         ip->ip_len = 0;
1841                         ip->ip_sum = 0;
1842                         /*
1843                          * The pseudo TCP checksum does not include TCP payload
1844                          * length so driver should recompute the checksum here
1845                          * what hardware expect to see. This is adherence of
1846                          * Microsoft's Large Send specification.
1847                          */
1848                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1849                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1850                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1851                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1852                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1853                         if (m_head == NULL) {
1854                                 *m_headp = NULL;
1855                                 return (ENOBUFS);
1856                         }
1857                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1858                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1859                         if (m_head == NULL) {
1860                                 *m_headp = NULL;
1861                                 return (ENOBUFS);
1862                         }
1863                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1864                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1865                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1866                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1867                         if (m_head == NULL) {
1868                                 *m_headp = NULL;
1869                                 return (ENOBUFS);
1870                         }
1871                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1872                 }
1873                 *m_headp = m_head;
1874         }
1875
1876         /*
1877          * Map the packet for DMA
1878          *
1879          * Capture the first descriptor index,
1880          * this descriptor will have the index
1881          * of the EOP which is the only one that
1882          * now gets a DONE bit writeback.
1883          */
1884         first = txr->next_avail_desc;
1885         tx_buffer = &txr->tx_buffers[first];
1886         tx_buffer_mapped = tx_buffer;
1887         map = tx_buffer->map;
1888
1889         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1890             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1891
1892         /*
1893          * There are two types of errors we can (try) to handle:
1894          * - EFBIG means the mbuf chain was too long and bus_dma ran
1895          *   out of segments.  Defragment the mbuf chain and try again.
1896          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1897          *   at this point in time.  Defer sending and try again later.
1898          * All other errors, in particular EINVAL, are fatal and prevent the
1899          * mbuf chain from ever going through.  Drop it and report error.
1900          */
1901         if (error == EFBIG) {
1902                 struct mbuf *m;
1903
1904                 m = m_defrag(*m_headp, M_DONTWAIT);
1905                 if (m == NULL) {
1906                         adapter->mbuf_alloc_failed++;
1907                         m_freem(*m_headp);
1908                         *m_headp = NULL;
1909                         return (ENOBUFS);
1910                 }
1911                 *m_headp = m;
1912
1913                 /* Try it again */
1914                 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1915                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1916
1917                 if (error == ENOMEM) {
1918                         adapter->no_tx_dma_setup++;
1919                         return (error);
1920                 } else if (error != 0) {
1921                         adapter->no_tx_dma_setup++;
1922                         m_freem(*m_headp);
1923                         *m_headp = NULL;
1924                         return (error);
1925                 }
1926
1927         } else if (error == ENOMEM) {
1928                 adapter->no_tx_dma_setup++;
1929                 return (error);
1930         } else if (error != 0) {
1931                 adapter->no_tx_dma_setup++;
1932                 m_freem(*m_headp);
1933                 *m_headp = NULL;
1934                 return (error);
1935         }
1936
1937         /*
1938          * TSO Hardware workaround, if this packet is not
1939          * TSO, and is only a single descriptor long, and
1940          * it follows a TSO burst, then we need to add a
1941          * sentinel descriptor to prevent premature writeback.
1942          */
1943         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1944                 if (nsegs == 1)
1945                         tso_desc = TRUE;
1946                 txr->tx_tso = FALSE;
1947         }
1948
1949         if (nsegs > (txr->tx_avail - 2)) {
1950                 txr->no_desc_avail++;
1951                 bus_dmamap_unload(txr->txtag, map);
1952                 return (ENOBUFS);
1953         }
1954         m_head = *m_headp;
1955
1956         /* Do hardware assists */
1957         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1958                 em_tso_setup(txr, m_head, ip_off, ip, tp,
1959                     &txd_upper, &txd_lower);
1960                 /* we need to make a final sentinel transmit desc */
1961                 tso_desc = TRUE;
1962         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1963                 em_transmit_checksum_setup(txr, m_head,
1964                     ip_off, ip, &txd_upper, &txd_lower);
1965
1966         i = txr->next_avail_desc;
1967
1968         /* Set up our transmit descriptors */
1969         for (j = 0; j < nsegs; j++) {
1970                 bus_size_t seg_len;
1971                 bus_addr_t seg_addr;
1972
1973                 tx_buffer = &txr->tx_buffers[i];
1974                 ctxd = &txr->tx_base[i];
1975                 seg_addr = segs[j].ds_addr;
1976                 seg_len  = segs[j].ds_len;
1977                 /*
1978                 ** TSO Workaround:
1979                 ** If this is the last descriptor, we want to
1980                 ** split it so we have a small final sentinel
1981                 */
1982                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1983                         seg_len -= 4;
1984                         ctxd->buffer_addr = htole64(seg_addr);
1985                         ctxd->lower.data = htole32(
1986                         adapter->txd_cmd | txd_lower | seg_len);
1987                         ctxd->upper.data =
1988                             htole32(txd_upper);
1989                         if (++i == adapter->num_tx_desc)
1990                                 i = 0;
1991                         /* Now make the sentinel */     
1992                         ++txd_used; /* using an extra txd */
1993                         ctxd = &txr->tx_base[i];
1994                         tx_buffer = &txr->tx_buffers[i];
1995                         ctxd->buffer_addr =
1996                             htole64(seg_addr + seg_len);
1997                         ctxd->lower.data = htole32(
1998                         adapter->txd_cmd | txd_lower | 4);
1999                         ctxd->upper.data =
2000                             htole32(txd_upper);
2001                         last = i;
2002                         if (++i == adapter->num_tx_desc)
2003                                 i = 0;
2004                 } else {
2005                         ctxd->buffer_addr = htole64(seg_addr);
2006                         ctxd->lower.data = htole32(
2007                         adapter->txd_cmd | txd_lower | seg_len);
2008                         ctxd->upper.data =
2009                             htole32(txd_upper);
2010                         last = i;
2011                         if (++i == adapter->num_tx_desc)
2012                                 i = 0;
2013                 }
2014                 tx_buffer->m_head = NULL;
2015                 tx_buffer->next_eop = -1;
2016         }
2017
2018         txr->next_avail_desc = i;
2019         txr->tx_avail -= nsegs;
2020         if (tso_desc) /* TSO used an extra for sentinel */
2021                 txr->tx_avail -= txd_used;
2022
2023         if (m_head->m_flags & M_VLANTAG) {
2024                 /* Set the vlan id. */
2025                 ctxd->upper.fields.special =
2026                     htole16(m_head->m_pkthdr.ether_vtag);
2027                 /* Tell hardware to add tag */
2028                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2029         }
2030
2031         tx_buffer->m_head = m_head;
2032         tx_buffer_mapped->map = tx_buffer->map;
2033         tx_buffer->map = map;
2034         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2035
2036         /*
2037          * Last Descriptor of Packet
2038          * needs End Of Packet (EOP)
2039          * and Report Status (RS)
2040          */
2041         ctxd->lower.data |=
2042             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2043         /*
2044          * Keep track in the first buffer which
2045          * descriptor will be written back
2046          */
2047         tx_buffer = &txr->tx_buffers[first];
2048         tx_buffer->next_eop = last;
2049         /* Update the watchdog time early and often */
2050         txr->watchdog_time = ticks;
2051
2052         /*
2053          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2054          * that this frame is available to transmit.
2055          */
2056         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2057             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2058         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2059
2060         return (0);
2061 }
2062
2063 static void
2064 em_set_promisc(struct adapter *adapter)
2065 {
2066         struct ifnet    *ifp = adapter->ifp;
2067         u32             reg_rctl;
2068
2069         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2070
2071         if (ifp->if_flags & IFF_PROMISC) {
2072                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2073                 /* Turn this on if you want to see bad packets */
2074                 if (em_debug_sbp)
2075                         reg_rctl |= E1000_RCTL_SBP;
2076                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2077         } else if (ifp->if_flags & IFF_ALLMULTI) {
2078                 reg_rctl |= E1000_RCTL_MPE;
2079                 reg_rctl &= ~E1000_RCTL_UPE;
2080                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2081         }
2082 }
2083
2084 static void
2085 em_disable_promisc(struct adapter *adapter)
2086 {
2087         u32     reg_rctl;
2088
2089         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2090
2091         reg_rctl &=  (~E1000_RCTL_UPE);
2092         reg_rctl &=  (~E1000_RCTL_MPE);
2093         reg_rctl &=  (~E1000_RCTL_SBP);
2094         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2095 }
2096
2097
2098 /*********************************************************************
2099  *  Multicast Update
2100  *
2101  *  This routine is called whenever multicast address list is updated.
2102  *
2103  **********************************************************************/
2104
2105 static void
2106 em_set_multi(struct adapter *adapter)
2107 {
2108         struct ifnet    *ifp = adapter->ifp;
2109         struct ifmultiaddr *ifma;
2110         u32 reg_rctl = 0;
2111         u8  *mta; /* Multicast array memory */
2112         int mcnt = 0;
2113
2114         IOCTL_DEBUGOUT("em_set_multi: begin");
2115
2116         mta = adapter->mta;
2117         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2118
2119         if (adapter->hw.mac.type == e1000_82542 && 
2120             adapter->hw.revision_id == E1000_REVISION_2) {
2121                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2122                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2123                         e1000_pci_clear_mwi(&adapter->hw);
2124                 reg_rctl |= E1000_RCTL_RST;
2125                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2126                 msec_delay(5);
2127         }
2128
2129 #if __FreeBSD_version < 800000
2130         IF_ADDR_LOCK(ifp);
2131 #else
2132         if_maddr_rlock(ifp);
2133 #endif
2134         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2135                 if (ifma->ifma_addr->sa_family != AF_LINK)
2136                         continue;
2137
2138                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2139                         break;
2140
2141                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2142                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2143                 mcnt++;
2144         }
2145 #if __FreeBSD_version < 800000
2146         IF_ADDR_UNLOCK(ifp);
2147 #else
2148         if_maddr_runlock(ifp);
2149 #endif
2150         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2151                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2152                 reg_rctl |= E1000_RCTL_MPE;
2153                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2154         } else
2155                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2156
2157         if (adapter->hw.mac.type == e1000_82542 && 
2158             adapter->hw.revision_id == E1000_REVISION_2) {
2159                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2160                 reg_rctl &= ~E1000_RCTL_RST;
2161                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2162                 msec_delay(5);
2163                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2164                         e1000_pci_set_mwi(&adapter->hw);
2165         }
2166 }
2167
2168
2169 /*********************************************************************
2170  *  Timer routine
2171  *
2172  *  This routine checks for link status and updates statistics.
2173  *
2174  **********************************************************************/
2175
2176 static void
2177 em_local_timer(void *arg)
2178 {
2179         struct adapter  *adapter = arg;
2180         struct ifnet    *ifp = adapter->ifp;
2181         struct tx_ring  *txr = adapter->tx_rings;
2182
2183         EM_CORE_LOCK_ASSERT(adapter);
2184
2185         em_update_link_status(adapter);
2186         em_update_stats_counters(adapter);
2187
2188         /* Reset LAA into RAR[0] on 82571 */
2189         if ((adapter->hw.mac.type == e1000_82571) &&
2190             e1000_get_laa_state_82571(&adapter->hw))
2191                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2192
2193         /* 
2194         ** Don't do TX watchdog check if we've been paused
2195         */
2196         if (adapter->pause_frames) {
2197                 adapter->pause_frames = 0;
2198                 goto out;
2199         }
2200         /*
2201         ** Check on the state of the TX queue(s), this 
2202         ** can be done without the lock because its RO
2203         ** and the HUNG state will be static if set.
2204         */
2205         for (int i = 0; i < adapter->num_queues; i++, txr++)
2206                 if (txr->queue_status == EM_QUEUE_HUNG)
2207                         goto hung;
2208 out:
2209         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2210         return;
2211 hung:
2212         /* Looks like we're hung */
2213         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2214         device_printf(adapter->dev,
2215             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2216             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2217             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2218         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2219             "Next TX to Clean = %d\n",
2220             txr->me, txr->tx_avail, txr->next_to_clean);
2221         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2222         adapter->watchdog_events++;
2223         em_init_locked(adapter);
2224 }
2225
2226
2227 static void
2228 em_update_link_status(struct adapter *adapter)
2229 {
2230         struct e1000_hw *hw = &adapter->hw;
2231         struct ifnet *ifp = adapter->ifp;
2232         device_t dev = adapter->dev;
2233         struct tx_ring *txr = adapter->tx_rings;
2234         u32 link_check = 0;
2235
2236         /* Get the cached link value or read phy for real */
2237         switch (hw->phy.media_type) {
2238         case e1000_media_type_copper:
2239                 if (hw->mac.get_link_status) {
2240                         /* Do the work to read phy */
2241                         e1000_check_for_link(hw);
2242                         link_check = !hw->mac.get_link_status;
2243                         if (link_check) /* ESB2 fix */
2244                                 e1000_cfg_on_link_up(hw);
2245                 } else
2246                         link_check = TRUE;
2247                 break;
2248         case e1000_media_type_fiber:
2249                 e1000_check_for_link(hw);
2250                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2251                                  E1000_STATUS_LU);
2252                 break;
2253         case e1000_media_type_internal_serdes:
2254                 e1000_check_for_link(hw);
2255                 link_check = adapter->hw.mac.serdes_has_link;
2256                 break;
2257         default:
2258         case e1000_media_type_unknown:
2259                 break;
2260         }
2261
2262         /* Now check for a transition */
2263         if (link_check && (adapter->link_active == 0)) {
2264                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2265                     &adapter->link_duplex);
2266                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2267                 if ((adapter->link_speed != SPEED_1000) &&
2268                     ((hw->mac.type == e1000_82571) ||
2269                     (hw->mac.type == e1000_82572))) {
2270                         int tarc0;
2271                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2272                         tarc0 &= ~SPEED_MODE_BIT;
2273                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2274                 }
2275                 if (bootverbose)
2276                         device_printf(dev, "Link is up %d Mbps %s\n",
2277                             adapter->link_speed,
2278                             ((adapter->link_duplex == FULL_DUPLEX) ?
2279                             "Full Duplex" : "Half Duplex"));
2280                 adapter->link_active = 1;
2281                 adapter->smartspeed = 0;
2282                 ifp->if_baudrate = adapter->link_speed * 1000000;
2283                 if_link_state_change(ifp, LINK_STATE_UP);
2284         } else if (!link_check && (adapter->link_active == 1)) {
2285                 ifp->if_baudrate = adapter->link_speed = 0;
2286                 adapter->link_duplex = 0;
2287                 if (bootverbose)
2288                         device_printf(dev, "Link is Down\n");
2289                 adapter->link_active = 0;
2290                 /* Link down, disable watchdog */
2291                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2292                         txr->queue_status = EM_QUEUE_IDLE;
2293                 if_link_state_change(ifp, LINK_STATE_DOWN);
2294         }
2295 }
2296
2297 /*********************************************************************
2298  *
2299  *  This routine disables all traffic on the adapter by issuing a
2300  *  global reset on the MAC and deallocates TX/RX buffers.
2301  *
2302  *  This routine should always be called with BOTH the CORE
2303  *  and TX locks.
2304  **********************************************************************/
2305
2306 static void
2307 em_stop(void *arg)
2308 {
2309         struct adapter  *adapter = arg;
2310         struct ifnet    *ifp = adapter->ifp;
2311         struct tx_ring  *txr = adapter->tx_rings;
2312
2313         EM_CORE_LOCK_ASSERT(adapter);
2314
2315         INIT_DEBUGOUT("em_stop: begin");
2316
2317         em_disable_intr(adapter);
2318         callout_stop(&adapter->timer);
2319
2320         /* Tell the stack that the interface is no longer active */
2321         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2322
2323         /* Unarm watchdog timer. */
2324         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2325                 EM_TX_LOCK(txr);
2326                 txr->queue_status = EM_QUEUE_IDLE;
2327                 EM_TX_UNLOCK(txr);
2328         }
2329
2330         e1000_reset_hw(&adapter->hw);
2331         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2332
2333         e1000_led_off(&adapter->hw);
2334         e1000_cleanup_led(&adapter->hw);
2335 }
2336
2337
2338 /*********************************************************************
2339  *
2340  *  Determine hardware revision.
2341  *
2342  **********************************************************************/
2343 static void
2344 em_identify_hardware(struct adapter *adapter)
2345 {
2346         device_t dev = adapter->dev;
2347
2348         /* Make sure our PCI config space has the necessary stuff set */
2349         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2350         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2351             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2352                 device_printf(dev, "Memory Access and/or Bus Master bits "
2353                     "were not set!\n");
2354                 adapter->hw.bus.pci_cmd_word |=
2355                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2356                 pci_write_config(dev, PCIR_COMMAND,
2357                     adapter->hw.bus.pci_cmd_word, 2);
2358         }
2359
2360         /* Save off the information about this board */
2361         adapter->hw.vendor_id = pci_get_vendor(dev);
2362         adapter->hw.device_id = pci_get_device(dev);
2363         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2364         adapter->hw.subsystem_vendor_id =
2365             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2366         adapter->hw.subsystem_device_id =
2367             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2368
2369         /* Do Shared Code Init and Setup */
2370         if (e1000_set_mac_type(&adapter->hw)) {
2371                 device_printf(dev, "Setup init failure\n");
2372                 return;
2373         }
2374 }
2375
2376 static int
2377 em_allocate_pci_resources(struct adapter *adapter)
2378 {
2379         device_t        dev = adapter->dev;
2380         int             rid;
2381
2382         rid = PCIR_BAR(0);
2383         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2384             &rid, RF_ACTIVE);
2385         if (adapter->memory == NULL) {
2386                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2387                 return (ENXIO);
2388         }
2389         adapter->osdep.mem_bus_space_tag =
2390             rman_get_bustag(adapter->memory);
2391         adapter->osdep.mem_bus_space_handle =
2392             rman_get_bushandle(adapter->memory);
2393         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2394
2395         /* Default to a single queue */
2396         adapter->num_queues = 1;
2397
2398         /*
2399          * Setup MSI/X or MSI if PCI Express
2400          */
2401         adapter->msix = em_setup_msix(adapter);
2402
2403         adapter->hw.back = &adapter->osdep;
2404
2405         return (0);
2406 }
2407
2408 /*********************************************************************
2409  *
2410  *  Setup the Legacy or MSI Interrupt handler
2411  *
2412  **********************************************************************/
2413 int
2414 em_allocate_legacy(struct adapter *adapter)
2415 {
2416         device_t dev = adapter->dev;
2417         int error, rid = 0;
2418
2419         /* Manually turn off all interrupts */
2420         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2421
2422         if (adapter->msix == 1) /* using MSI */
2423                 rid = 1;
2424         /* We allocate a single interrupt resource */
2425         adapter->res = bus_alloc_resource_any(dev,
2426             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2427         if (adapter->res == NULL) {
2428                 device_printf(dev, "Unable to allocate bus resource: "
2429                     "interrupt\n");
2430                 return (ENXIO);
2431         }
2432
2433         /*
2434          * Allocate a fast interrupt and the associated
2435          * deferred processing contexts.
2436          */
2437         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2438         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2439         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2440             taskqueue_thread_enqueue, &adapter->tq);
2441         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2442             device_get_nameunit(adapter->dev));
2443         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2444             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2445                 device_printf(dev, "Failed to register fast interrupt "
2446                             "handler: %d\n", error);
2447                 taskqueue_free(adapter->tq);
2448                 adapter->tq = NULL;
2449                 return (error);
2450         }
2451         
2452         return (0);
2453 }
2454
2455 /*********************************************************************
2456  *
2457  *  Setup the MSIX Interrupt handlers
2458  *   This is not really Multiqueue, rather
2459  *   its just multiple interrupt vectors.
2460  *
2461  **********************************************************************/
2462 int
2463 em_allocate_msix(struct adapter *adapter)
2464 {
2465         device_t        dev = adapter->dev;
2466         struct          tx_ring *txr = adapter->tx_rings;
2467         struct          rx_ring *rxr = adapter->rx_rings;
2468         int             error, rid, vector = 0;
2469
2470
2471         /* Make sure all interrupts are disabled */
2472         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2473
2474         /* First set up ring resources */
2475         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2476
2477                 /* RX ring */
2478                 rid = vector + 1;
2479
2480                 rxr->res = bus_alloc_resource_any(dev,
2481                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2482                 if (rxr->res == NULL) {
2483                         device_printf(dev,
2484                             "Unable to allocate bus resource: "
2485                             "RX MSIX Interrupt %d\n", i);
2486                         return (ENXIO);
2487                 }
2488                 if ((error = bus_setup_intr(dev, rxr->res,
2489                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2490                     rxr, &rxr->tag)) != 0) {
2491                         device_printf(dev, "Failed to register RX handler");
2492                         return (error);
2493                 }
2494 #if __FreeBSD_version >= 800504
2495                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2496 #endif
2497                 rxr->msix = vector++; /* NOTE increment vector for TX */
2498                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2499                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2500                     taskqueue_thread_enqueue, &rxr->tq);
2501                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2502                     device_get_nameunit(adapter->dev));
2503                 /*
2504                 ** Set the bit to enable interrupt
2505                 ** in E1000_IMS -- bits 20 and 21
2506                 ** are for RX0 and RX1, note this has
2507                 ** NOTHING to do with the MSIX vector
2508                 */
2509                 rxr->ims = 1 << (20 + i);
2510                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2511
2512                 /* TX ring */
2513                 rid = vector + 1;
2514                 txr->res = bus_alloc_resource_any(dev,
2515                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2516                 if (txr->res == NULL) {
2517                         device_printf(dev,
2518                             "Unable to allocate bus resource: "
2519                             "TX MSIX Interrupt %d\n", i);
2520                         return (ENXIO);
2521                 }
2522                 if ((error = bus_setup_intr(dev, txr->res,
2523                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2524                     txr, &txr->tag)) != 0) {
2525                         device_printf(dev, "Failed to register TX handler");
2526                         return (error);
2527                 }
2528 #if __FreeBSD_version >= 800504
2529                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2530 #endif
2531                 txr->msix = vector++; /* Increment vector for next pass */
2532                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2533                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2534                     taskqueue_thread_enqueue, &txr->tq);
2535                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2536                     device_get_nameunit(adapter->dev));
2537                 /*
2538                 ** Set the bit to enable interrupt
2539                 ** in E1000_IMS -- bits 22 and 23
2540                 ** are for TX0 and TX1, note this has
2541                 ** NOTHING to do with the MSIX vector
2542                 */
2543                 txr->ims = 1 << (22 + i);
2544                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2545         }
2546
2547         /* Link interrupt */
2548         ++rid;
2549         adapter->res = bus_alloc_resource_any(dev,
2550             SYS_RES_IRQ, &rid, RF_ACTIVE);
2551         if (!adapter->res) {
2552                 device_printf(dev,"Unable to allocate "
2553                     "bus resource: Link interrupt [%d]\n", rid);
2554                 return (ENXIO);
2555         }
2556         /* Set the link handler function */
2557         error = bus_setup_intr(dev, adapter->res,
2558             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2559             em_msix_link, adapter, &adapter->tag);
2560         if (error) {
2561                 adapter->res = NULL;
2562                 device_printf(dev, "Failed to register LINK handler");
2563                 return (error);
2564         }
2565 #if __FreeBSD_version >= 800504
2566                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2567 #endif
2568         adapter->linkvec = vector;
2569         adapter->ivars |=  (8 | vector) << 16;
2570         adapter->ivars |= 0x80000000;
2571
2572         return (0);
2573 }
2574
2575
2576 static void
2577 em_free_pci_resources(struct adapter *adapter)
2578 {
2579         device_t        dev = adapter->dev;
2580         struct tx_ring  *txr;
2581         struct rx_ring  *rxr;
2582         int             rid;
2583
2584
2585         /*
2586         ** Release all the queue interrupt resources:
2587         */
2588         for (int i = 0; i < adapter->num_queues; i++) {
2589                 txr = &adapter->tx_rings[i];
2590                 rxr = &adapter->rx_rings[i];
2591                 /* an early abort? */
2592                 if ((txr == NULL) || (rxr == NULL))
2593                         break;
2594                 rid = txr->msix +1;
2595                 if (txr->tag != NULL) {
2596                         bus_teardown_intr(dev, txr->res, txr->tag);
2597                         txr->tag = NULL;
2598                 }
2599                 if (txr->res != NULL)
2600                         bus_release_resource(dev, SYS_RES_IRQ,
2601                             rid, txr->res);
2602                 rid = rxr->msix +1;
2603                 if (rxr->tag != NULL) {
2604                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2605                         rxr->tag = NULL;
2606                 }
2607                 if (rxr->res != NULL)
2608                         bus_release_resource(dev, SYS_RES_IRQ,
2609                             rid, rxr->res);
2610         }
2611
2612         if (adapter->linkvec) /* we are doing MSIX */
2613                 rid = adapter->linkvec + 1;
2614         else
2615                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2616
2617         if (adapter->tag != NULL) {
2618                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2619                 adapter->tag = NULL;
2620         }
2621
2622         if (adapter->res != NULL)
2623                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2624
2625
2626         if (adapter->msix)
2627                 pci_release_msi(dev);
2628
2629         if (adapter->msix_mem != NULL)
2630                 bus_release_resource(dev, SYS_RES_MEMORY,
2631                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2632
2633         if (adapter->memory != NULL)
2634                 bus_release_resource(dev, SYS_RES_MEMORY,
2635                     PCIR_BAR(0), adapter->memory);
2636
2637         if (adapter->flash != NULL)
2638                 bus_release_resource(dev, SYS_RES_MEMORY,
2639                     EM_FLASH, adapter->flash);
2640 }
2641
2642 /*
2643  * Setup MSI or MSI/X
2644  */
2645 static int
2646 em_setup_msix(struct adapter *adapter)
2647 {
2648         device_t dev = adapter->dev;
2649         int val = 0;
2650
2651
2652         /*
2653         ** Setup MSI/X for Hartwell: tests have shown
2654         ** use of two queues to be unstable, and to
2655         ** provide no great gain anyway, so we simply
2656         ** seperate the interrupts and use a single queue.
2657         */
2658         if ((adapter->hw.mac.type == e1000_82574) &&
2659             (em_enable_msix == TRUE)) {
2660                 /* Map the MSIX BAR */
2661                 int rid = PCIR_BAR(EM_MSIX_BAR);
2662                 adapter->msix_mem = bus_alloc_resource_any(dev,
2663                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2664                 if (!adapter->msix_mem) {
2665                         /* May not be enabled */
2666                         device_printf(adapter->dev,
2667                             "Unable to map MSIX table \n");
2668                         goto msi;
2669                 }
2670                 val = pci_msix_count(dev); 
2671                 if (val < 3) {
2672                         bus_release_resource(dev, SYS_RES_MEMORY,
2673                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2674                         adapter->msix_mem = NULL;
2675                         device_printf(adapter->dev,
2676                             "MSIX: insufficient vectors, using MSI\n");
2677                         goto msi;
2678                 }
2679                 val = 3;
2680                 adapter->num_queues = 1;
2681                 if (pci_alloc_msix(dev, &val) == 0) {
2682                         device_printf(adapter->dev,
2683                             "Using MSIX interrupts "
2684                             "with %d vectors\n", val);
2685                 }
2686
2687                 return (val);
2688         }
2689 msi:
2690         val = pci_msi_count(dev);
2691         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2692                 adapter->msix = 1;
2693                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2694                 return (val);
2695         } 
2696         /* Should only happen due to manual configuration */
2697         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2698         return (0);
2699 }
2700
2701
2702 /*********************************************************************
2703  *
2704  *  Initialize the hardware to a configuration
2705  *  as specified by the adapter structure.
2706  *
2707  **********************************************************************/
2708 static void
2709 em_reset(struct adapter *adapter)
2710 {
2711         device_t        dev = adapter->dev;
2712         struct ifnet    *ifp = adapter->ifp;
2713         struct e1000_hw *hw = &adapter->hw;
2714         u16             rx_buffer_size;
2715
2716         INIT_DEBUGOUT("em_reset: begin");
2717
2718         /* Set up smart power down as default off on newer adapters. */
2719         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2720             hw->mac.type == e1000_82572)) {
2721                 u16 phy_tmp = 0;
2722
2723                 /* Speed up time to link by disabling smart power down. */
2724                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2725                 phy_tmp &= ~IGP02E1000_PM_SPD;
2726                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2727         }
2728
2729         /*
2730          * These parameters control the automatic generation (Tx) and
2731          * response (Rx) to Ethernet PAUSE frames.
2732          * - High water mark should allow for at least two frames to be
2733          *   received after sending an XOFF.
2734          * - Low water mark works best when it is very near the high water mark.
2735          *   This allows the receiver to restart by sending XON when it has
2736          *   drained a bit. Here we use an arbitary value of 1500 which will
2737          *   restart after one full frame is pulled from the buffer. There
2738          *   could be several smaller frames in the buffer and if so they will
2739          *   not trigger the XON until their total number reduces the buffer
2740          *   by 1500.
2741          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2742          */
2743         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2744
2745         hw->fc.high_water = rx_buffer_size -
2746             roundup2(adapter->max_frame_size, 1024);
2747         hw->fc.low_water = hw->fc.high_water - 1500;
2748
2749         if (hw->mac.type == e1000_80003es2lan)
2750                 hw->fc.pause_time = 0xFFFF;
2751         else
2752                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2753
2754         hw->fc.send_xon = TRUE;
2755
2756         /* Set Flow control, use the tunable location if sane */
2757         hw->fc.requested_mode = adapter->fc_setting;
2758
2759         /* Workaround: no TX flow ctrl for PCH */
2760         if (hw->mac.type == e1000_pchlan)
2761                 hw->fc.requested_mode = e1000_fc_rx_pause;
2762
2763         /* Override - settings for PCH2LAN, ya its magic :) */
2764         if (hw->mac.type == e1000_pch2lan) {
2765                 hw->fc.high_water = 0x5C20;
2766                 hw->fc.low_water = 0x5048;
2767                 hw->fc.pause_time = 0x0650;
2768                 hw->fc.refresh_time = 0x0400;
2769                 /* Jumbos need adjusted PBA */
2770                 if (ifp->if_mtu > ETHERMTU)
2771                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2772                 else
2773                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2774         }
2775
2776         /* Issue a global reset */
2777         e1000_reset_hw(hw);
2778         E1000_WRITE_REG(hw, E1000_WUC, 0);
2779         em_disable_aspm(adapter);
2780
2781         if (e1000_init_hw(hw) < 0) {
2782                 device_printf(dev, "Hardware Initialization Failed\n");
2783                 return;
2784         }
2785
2786         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2787         e1000_get_phy_info(hw);
2788         e1000_check_for_link(hw);
2789         return;
2790 }
2791
2792 /*********************************************************************
2793  *
2794  *  Setup networking device structure and register an interface.
2795  *
2796  **********************************************************************/
2797 static int
2798 em_setup_interface(device_t dev, struct adapter *adapter)
2799 {
2800         struct ifnet   *ifp;
2801
2802         INIT_DEBUGOUT("em_setup_interface: begin");
2803
2804         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2805         if (ifp == NULL) {
2806                 device_printf(dev, "can not allocate ifnet structure\n");
2807                 return (-1);
2808         }
2809         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2810         ifp->if_mtu = ETHERMTU;
2811         ifp->if_init =  em_init;
2812         ifp->if_softc = adapter;
2813         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2814         ifp->if_ioctl = em_ioctl;
2815         ifp->if_start = em_start;
2816         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2817         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2818         IFQ_SET_READY(&ifp->if_snd);
2819
2820         ether_ifattach(ifp, adapter->hw.mac.addr);
2821
2822         ifp->if_capabilities = ifp->if_capenable = 0;
2823
2824 #ifdef EM_MULTIQUEUE
2825         /* Multiqueue tx functions */
2826         ifp->if_transmit = em_mq_start;
2827         ifp->if_qflush = em_qflush;
2828 #endif  
2829
2830         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2831         ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2832
2833         /* Enable TSO by default, can disable with ifconfig */
2834         ifp->if_capabilities |= IFCAP_TSO4;
2835         ifp->if_capenable |= IFCAP_TSO4;
2836
2837         /*
2838          * Tell the upper layer(s) we
2839          * support full VLAN capability
2840          */
2841         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2842         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2843         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2844
2845         /*
2846         ** Dont turn this on by default, if vlans are
2847         ** created on another pseudo device (eg. lagg)
2848         ** then vlan events are not passed thru, breaking
2849         ** operation, but with HW FILTER off it works. If
2850         ** using vlans directly on the em driver you can
2851         ** enable this and get full hardware tag filtering.
2852         */
2853         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2854
2855 #ifdef DEVICE_POLLING
2856         ifp->if_capabilities |= IFCAP_POLLING;
2857 #endif
2858
2859         /* Enable only WOL MAGIC by default */
2860         if (adapter->wol) {
2861                 ifp->if_capabilities |= IFCAP_WOL;
2862                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2863         }
2864                 
2865         /*
2866          * Specify the media types supported by this adapter and register
2867          * callbacks to update media and link information
2868          */
2869         ifmedia_init(&adapter->media, IFM_IMASK,
2870             em_media_change, em_media_status);
2871         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2872             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2873                 u_char fiber_type = IFM_1000_SX;        /* default type */
2874
2875                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
2876                             0, NULL);
2877                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2878         } else {
2879                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2880                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2881                             0, NULL);
2882                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2883                             0, NULL);
2884                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2885                             0, NULL);
2886                 if (adapter->hw.phy.type != e1000_phy_ife) {
2887                         ifmedia_add(&adapter->media,
2888                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2889                         ifmedia_add(&adapter->media,
2890                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2891                 }
2892         }
2893         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2894         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2895         return (0);
2896 }
2897
2898
2899 /*
2900  * Manage DMA'able memory.
2901  */
2902 static void
2903 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2904 {
2905         if (error)
2906                 return;
2907         *(bus_addr_t *) arg = segs[0].ds_addr;
2908 }
2909
2910 static int
2911 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2912         struct em_dma_alloc *dma, int mapflags)
2913 {
2914         int error;
2915
2916         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2917                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
2918                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2919                                 BUS_SPACE_MAXADDR,      /* highaddr */
2920                                 NULL, NULL,             /* filter, filterarg */
2921                                 size,                   /* maxsize */
2922                                 1,                      /* nsegments */
2923                                 size,                   /* maxsegsize */
2924                                 0,                      /* flags */
2925                                 NULL,                   /* lockfunc */
2926                                 NULL,                   /* lockarg */
2927                                 &dma->dma_tag);
2928         if (error) {
2929                 device_printf(adapter->dev,
2930                     "%s: bus_dma_tag_create failed: %d\n",
2931                     __func__, error);
2932                 goto fail_0;
2933         }
2934
2935         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2936             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2937         if (error) {
2938                 device_printf(adapter->dev,
2939                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2940                     __func__, (uintmax_t)size, error);
2941                 goto fail_2;
2942         }
2943
2944         dma->dma_paddr = 0;
2945         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2946             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2947         if (error || dma->dma_paddr == 0) {
2948                 device_printf(adapter->dev,
2949                     "%s: bus_dmamap_load failed: %d\n",
2950                     __func__, error);
2951                 goto fail_3;
2952         }
2953
2954         return (0);
2955
2956 fail_3:
2957         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2958 fail_2:
2959         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2960         bus_dma_tag_destroy(dma->dma_tag);
2961 fail_0:
2962         dma->dma_map = NULL;
2963         dma->dma_tag = NULL;
2964
2965         return (error);
2966 }
2967
2968 static void
2969 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2970 {
2971         if (dma->dma_tag == NULL)
2972                 return;
2973         if (dma->dma_map != NULL) {
2974                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2975                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2976                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2977                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2978                 dma->dma_map = NULL;
2979         }
2980         bus_dma_tag_destroy(dma->dma_tag);
2981         dma->dma_tag = NULL;
2982 }
2983
2984
2985 /*********************************************************************
2986  *
2987  *  Allocate memory for the transmit and receive rings, and then
2988  *  the descriptors associated with each, called only once at attach.
2989  *
2990  **********************************************************************/
2991 static int
2992 em_allocate_queues(struct adapter *adapter)
2993 {
2994         device_t                dev = adapter->dev;
2995         struct tx_ring          *txr = NULL;
2996         struct rx_ring          *rxr = NULL;
2997         int rsize, tsize, error = E1000_SUCCESS;
2998         int txconf = 0, rxconf = 0;
2999
3000
3001         /* Allocate the TX ring struct memory */
3002         if (!(adapter->tx_rings =
3003             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3004             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3005                 device_printf(dev, "Unable to allocate TX ring memory\n");
3006                 error = ENOMEM;
3007                 goto fail;
3008         }
3009
3010         /* Now allocate the RX */
3011         if (!(adapter->rx_rings =
3012             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3013             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3014                 device_printf(dev, "Unable to allocate RX ring memory\n");
3015                 error = ENOMEM;
3016                 goto rx_fail;
3017         }
3018
3019         tsize = roundup2(adapter->num_tx_desc *
3020             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3021         /*
3022          * Now set up the TX queues, txconf is needed to handle the
3023          * possibility that things fail midcourse and we need to
3024          * undo memory gracefully
3025          */ 
3026         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3027                 /* Set up some basics */
3028                 txr = &adapter->tx_rings[i];
3029                 txr->adapter = adapter;
3030                 txr->me = i;
3031
3032                 /* Initialize the TX lock */
3033                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3034                     device_get_nameunit(dev), txr->me);
3035                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3036
3037                 if (em_dma_malloc(adapter, tsize,
3038                         &txr->txdma, BUS_DMA_NOWAIT)) {
3039                         device_printf(dev,
3040                             "Unable to allocate TX Descriptor memory\n");
3041                         error = ENOMEM;
3042                         goto err_tx_desc;
3043                 }
3044                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3045                 bzero((void *)txr->tx_base, tsize);
3046
3047                 if (em_allocate_transmit_buffers(txr)) {
3048                         device_printf(dev,
3049                             "Critical Failure setting up transmit buffers\n");
3050                         error = ENOMEM;
3051                         goto err_tx_desc;
3052                 }
3053 #if __FreeBSD_version >= 800000
3054                 /* Allocate a buf ring */
3055                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3056                     M_WAITOK, &txr->tx_mtx);
3057 #endif
3058         }
3059
3060         /*
3061          * Next the RX queues...
3062          */ 
3063         rsize = roundup2(adapter->num_rx_desc *
3064             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3065         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3066                 rxr = &adapter->rx_rings[i];
3067                 rxr->adapter = adapter;
3068                 rxr->me = i;
3069
3070                 /* Initialize the RX lock */
3071                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3072                     device_get_nameunit(dev), txr->me);
3073                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3074
3075                 if (em_dma_malloc(adapter, rsize,
3076                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3077                         device_printf(dev,
3078                             "Unable to allocate RxDescriptor memory\n");
3079                         error = ENOMEM;
3080                         goto err_rx_desc;
3081                 }
3082                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3083                 bzero((void *)rxr->rx_base, rsize);
3084
3085                 /* Allocate receive buffers for the ring*/
3086                 if (em_allocate_receive_buffers(rxr)) {
3087                         device_printf(dev,
3088                             "Critical Failure setting up receive buffers\n");
3089                         error = ENOMEM;
3090                         goto err_rx_desc;
3091                 }
3092         }
3093
3094         return (0);
3095
3096 err_rx_desc:
3097         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3098                 em_dma_free(adapter, &rxr->rxdma);
3099 err_tx_desc:
3100         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3101                 em_dma_free(adapter, &txr->txdma);
3102         free(adapter->rx_rings, M_DEVBUF);
3103 rx_fail:
3104 #if __FreeBSD_version >= 800000
3105         buf_ring_free(txr->br, M_DEVBUF);
3106 #endif
3107         free(adapter->tx_rings, M_DEVBUF);
3108 fail:
3109         return (error);
3110 }
3111
3112
3113 /*********************************************************************
3114  *
3115  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3116  *  the information needed to transmit a packet on the wire. This is
3117  *  called only once at attach, setup is done every reset.
3118  *
3119  **********************************************************************/
3120 static int
3121 em_allocate_transmit_buffers(struct tx_ring *txr)
3122 {
3123         struct adapter *adapter = txr->adapter;
3124         device_t dev = adapter->dev;
3125         struct em_buffer *txbuf;
3126         int error, i;
3127
3128         /*
3129          * Setup DMA descriptor areas.
3130          */
3131         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3132                                1, 0,                    /* alignment, bounds */
3133                                BUS_SPACE_MAXADDR,       /* lowaddr */
3134                                BUS_SPACE_MAXADDR,       /* highaddr */
3135                                NULL, NULL,              /* filter, filterarg */
3136                                EM_TSO_SIZE,             /* maxsize */
3137                                EM_MAX_SCATTER,          /* nsegments */
3138                                PAGE_SIZE,               /* maxsegsize */
3139                                0,                       /* flags */
3140                                NULL,                    /* lockfunc */
3141                                NULL,                    /* lockfuncarg */
3142                                &txr->txtag))) {
3143                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3144                 goto fail;
3145         }
3146
3147         if (!(txr->tx_buffers =
3148             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3149             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3150                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3151                 error = ENOMEM;
3152                 goto fail;
3153         }
3154
3155         /* Create the descriptor buffer dma maps */
3156         txbuf = txr->tx_buffers;
3157         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3158                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3159                 if (error != 0) {
3160                         device_printf(dev, "Unable to create TX DMA map\n");
3161                         goto fail;
3162                 }
3163         }
3164
3165         return 0;
3166 fail:
3167         /* We free all, it handles case where we are in the middle */
3168         em_free_transmit_structures(adapter);
3169         return (error);
3170 }
3171
3172 /*********************************************************************
3173  *
3174  *  Initialize a transmit ring.
3175  *
3176  **********************************************************************/
3177 static void
3178 em_setup_transmit_ring(struct tx_ring *txr)
3179 {
3180         struct adapter *adapter = txr->adapter;
3181         struct em_buffer *txbuf;
3182         int i;
3183
3184         /* Clear the old descriptor contents */
3185         EM_TX_LOCK(txr);
3186         bzero((void *)txr->tx_base,
3187               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3188         /* Reset indices */
3189         txr->next_avail_desc = 0;
3190         txr->next_to_clean = 0;
3191
3192         /* Free any existing tx buffers. */
3193         txbuf = txr->tx_buffers;
3194         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3195                 if (txbuf->m_head != NULL) {
3196                         bus_dmamap_sync(txr->txtag, txbuf->map,
3197                             BUS_DMASYNC_POSTWRITE);
3198                         bus_dmamap_unload(txr->txtag, txbuf->map);
3199                         m_freem(txbuf->m_head);
3200                         txbuf->m_head = NULL;
3201                 }
3202                 /* clear the watch index */
3203                 txbuf->next_eop = -1;
3204         }
3205
3206         /* Set number of descriptors available */
3207         txr->tx_avail = adapter->num_tx_desc;
3208         txr->queue_status = EM_QUEUE_IDLE;
3209
3210         /* Clear checksum offload context. */
3211         txr->last_hw_offload = 0;
3212         txr->last_hw_ipcss = 0;
3213         txr->last_hw_ipcso = 0;
3214         txr->last_hw_tucss = 0;
3215         txr->last_hw_tucso = 0;
3216
3217         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3218             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3219         EM_TX_UNLOCK(txr);
3220 }
3221
3222 /*********************************************************************
3223  *
3224  *  Initialize all transmit rings.
3225  *
3226  **********************************************************************/
3227 static void
3228 em_setup_transmit_structures(struct adapter *adapter)
3229 {
3230         struct tx_ring *txr = adapter->tx_rings;
3231
3232         for (int i = 0; i < adapter->num_queues; i++, txr++)
3233                 em_setup_transmit_ring(txr);
3234
3235         return;
3236 }
3237
3238 /*********************************************************************
3239  *
3240  *  Enable transmit unit.
3241  *
3242  **********************************************************************/
3243 static void
3244 em_initialize_transmit_unit(struct adapter *adapter)
3245 {
3246         struct tx_ring  *txr = adapter->tx_rings;
3247         struct e1000_hw *hw = &adapter->hw;
3248         u32     tctl, tarc, tipg = 0;
3249
3250          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3251
3252         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3253                 u64 bus_addr = txr->txdma.dma_paddr;
3254                 /* Base and Len of TX Ring */
3255                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3256                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3257                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3258                     (u32)(bus_addr >> 32));
3259                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3260                     (u32)bus_addr);
3261                 /* Init the HEAD/TAIL indices */
3262                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3263                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3264
3265                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3266                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3267                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3268
3269                 txr->queue_status = EM_QUEUE_IDLE;
3270         }
3271
3272         /* Set the default values for the Tx Inter Packet Gap timer */
3273         switch (adapter->hw.mac.type) {
3274         case e1000_82542:
3275                 tipg = DEFAULT_82542_TIPG_IPGT;
3276                 tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3277                 tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3278                 break;
3279         case e1000_80003es2lan:
3280                 tipg = DEFAULT_82543_TIPG_IPGR1;
3281                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3282                     E1000_TIPG_IPGR2_SHIFT;
3283                 break;
3284         default:
3285                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3286                     (adapter->hw.phy.media_type ==
3287                     e1000_media_type_internal_serdes))
3288                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3289                 else
3290                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3291                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3292                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3293         }
3294
3295         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3296         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3297
3298         if(adapter->hw.mac.type >= e1000_82540)
3299                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3300                     adapter->tx_abs_int_delay.value);
3301
3302         if ((adapter->hw.mac.type == e1000_82571) ||
3303             (adapter->hw.mac.type == e1000_82572)) {
3304                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3305                 tarc |= SPEED_MODE_BIT;
3306                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3307         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3308                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3309                 tarc |= 1;
3310                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3311                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3312                 tarc |= 1;
3313                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3314         }
3315
3316         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3317         if (adapter->tx_int_delay.value > 0)
3318                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3319
3320         /* Program the Transmit Control Register */
3321         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3322         tctl &= ~E1000_TCTL_CT;
3323         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3324                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3325
3326         if (adapter->hw.mac.type >= e1000_82571)
3327                 tctl |= E1000_TCTL_MULR;
3328
3329         /* This write will effectively turn on the transmit unit. */
3330         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3331
3332 }
3333
3334
3335 /*********************************************************************
3336  *
3337  *  Free all transmit rings.
3338  *
3339  **********************************************************************/
3340 static void
3341 em_free_transmit_structures(struct adapter *adapter)
3342 {
3343         struct tx_ring *txr = adapter->tx_rings;
3344
3345         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3346                 EM_TX_LOCK(txr);
3347                 em_free_transmit_buffers(txr);
3348                 em_dma_free(adapter, &txr->txdma);
3349                 EM_TX_UNLOCK(txr);
3350                 EM_TX_LOCK_DESTROY(txr);
3351         }
3352
3353         free(adapter->tx_rings, M_DEVBUF);
3354 }
3355
3356 /*********************************************************************
3357  *
3358  *  Free transmit ring related data structures.
3359  *
3360  **********************************************************************/
3361 static void
3362 em_free_transmit_buffers(struct tx_ring *txr)
3363 {
3364         struct adapter          *adapter = txr->adapter;
3365         struct em_buffer        *txbuf;
3366
3367         INIT_DEBUGOUT("free_transmit_ring: begin");
3368
3369         if (txr->tx_buffers == NULL)
3370                 return;
3371
3372         for (int i = 0; i < adapter->num_tx_desc; i++) {
3373                 txbuf = &txr->tx_buffers[i];
3374                 if (txbuf->m_head != NULL) {
3375                         bus_dmamap_sync(txr->txtag, txbuf->map,
3376                             BUS_DMASYNC_POSTWRITE);
3377                         bus_dmamap_unload(txr->txtag,
3378                             txbuf->map);
3379                         m_freem(txbuf->m_head);
3380                         txbuf->m_head = NULL;
3381                         if (txbuf->map != NULL) {
3382                                 bus_dmamap_destroy(txr->txtag,
3383                                     txbuf->map);
3384                                 txbuf->map = NULL;
3385                         }
3386                 } else if (txbuf->map != NULL) {
3387                         bus_dmamap_unload(txr->txtag,
3388                             txbuf->map);
3389                         bus_dmamap_destroy(txr->txtag,
3390                             txbuf->map);
3391                         txbuf->map = NULL;
3392                 }
3393         }
3394 #if __FreeBSD_version >= 800000
3395         if (txr->br != NULL)
3396                 buf_ring_free(txr->br, M_DEVBUF);
3397 #endif
3398         if (txr->tx_buffers != NULL) {
3399                 free(txr->tx_buffers, M_DEVBUF);
3400                 txr->tx_buffers = NULL;
3401         }
3402         if (txr->txtag != NULL) {
3403                 bus_dma_tag_destroy(txr->txtag);
3404                 txr->txtag = NULL;
3405         }
3406         return;
3407 }
3408
3409
3410 /*********************************************************************
3411  *  The offload context is protocol specific (TCP/UDP) and thus
3412  *  only needs to be set when the protocol changes. The occasion
3413  *  of a context change can be a performance detriment, and
3414  *  might be better just disabled. The reason arises in the way
3415  *  in which the controller supports pipelined requests from the
3416  *  Tx data DMA. Up to four requests can be pipelined, and they may
3417  *  belong to the same packet or to multiple packets. However all
3418  *  requests for one packet are issued before a request is issued
3419  *  for a subsequent packet and if a request for the next packet
3420  *  requires a context change, that request will be stalled
3421  *  until the previous request completes. This means setting up
3422  *  a new context effectively disables pipelined Tx data DMA which
3423  *  in turn greatly slow down performance to send small sized
3424  *  frames. 
3425  **********************************************************************/
3426 static void
3427 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3428     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3429 {
3430         struct adapter                  *adapter = txr->adapter;
3431         struct e1000_context_desc       *TXD = NULL;
3432         struct em_buffer                *tx_buffer;
3433         int                             cur, hdr_len;
3434         u32                             cmd = 0;
3435         u16                             offload = 0;
3436         u8                              ipcso, ipcss, tucso, tucss;
3437
3438         ipcss = ipcso = tucss = tucso = 0;
3439         hdr_len = ip_off + (ip->ip_hl << 2);
3440         cur = txr->next_avail_desc;
3441
3442         /* Setup of IP header checksum. */
3443         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3444                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3445                 offload |= CSUM_IP;
3446                 ipcss = ip_off;
3447                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3448                 /*
3449                  * Start offset for header checksum calculation.
3450                  * End offset for header checksum calculation.
3451                  * Offset of place to put the checksum.
3452                  */
3453                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3454                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3455                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3456                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3457                 cmd |= E1000_TXD_CMD_IP;
3458         }
3459
3460         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3461                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3462                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3463                 offload |= CSUM_TCP;
3464                 tucss = hdr_len;
3465                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3466                 /*
3467                  * Setting up new checksum offload context for every frames
3468                  * takes a lot of processing time for hardware. This also
3469                  * reduces performance a lot for small sized frames so avoid
3470                  * it if driver can use previously configured checksum
3471                  * offload context.
3472                  */
3473                 if (txr->last_hw_offload == offload) {
3474                         if (offload & CSUM_IP) {
3475                                 if (txr->last_hw_ipcss == ipcss &&
3476                                     txr->last_hw_ipcso == ipcso &&
3477                                     txr->last_hw_tucss == tucss &&
3478                                     txr->last_hw_tucso == tucso)
3479                                         return;
3480                         } else {
3481                                 if (txr->last_hw_tucss == tucss &&
3482                                     txr->last_hw_tucso == tucso)
3483                                         return;
3484                         }
3485                 }
3486                 txr->last_hw_offload = offload;
3487                 txr->last_hw_tucss = tucss;
3488                 txr->last_hw_tucso = tucso;
3489                 /*
3490                  * Start offset for payload checksum calculation.
3491                  * End offset for payload checksum calculation.
3492                  * Offset of place to put the checksum.
3493                  */
3494                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3495                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3496                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3497                 TXD->upper_setup.tcp_fields.tucso = tucso;
3498                 cmd |= E1000_TXD_CMD_TCP;
3499         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3500                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3501                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3502                 tucss = hdr_len;
3503                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3504                 /*
3505                  * Setting up new checksum offload context for every frames
3506                  * takes a lot of processing time for hardware. This also
3507                  * reduces performance a lot for small sized frames so avoid
3508                  * it if driver can use previously configured checksum
3509                  * offload context.
3510                  */
3511                 if (txr->last_hw_offload == offload) {
3512                         if (offload & CSUM_IP) {
3513                                 if (txr->last_hw_ipcss == ipcss &&
3514                                     txr->last_hw_ipcso == ipcso &&
3515                                     txr->last_hw_tucss == tucss &&
3516                                     txr->last_hw_tucso == tucso)
3517                                         return;
3518                         } else {
3519                                 if (txr->last_hw_tucss == tucss &&
3520                                     txr->last_hw_tucso == tucso)
3521                                         return;
3522                         }
3523                 }
3524                 txr->last_hw_offload = offload;
3525                 txr->last_hw_tucss = tucss;
3526                 txr->last_hw_tucso = tucso;
3527                 /*
3528                  * Start offset for header checksum calculation.
3529                  * End offset for header checksum calculation.
3530                  * Offset of place to put the checksum.
3531                  */
3532                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3533                 TXD->upper_setup.tcp_fields.tucss = tucss;
3534                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3535                 TXD->upper_setup.tcp_fields.tucso = tucso;
3536         }
3537   
3538         if (offload & CSUM_IP) {
3539                 txr->last_hw_ipcss = ipcss;
3540                 txr->last_hw_ipcso = ipcso;
3541         }
3542
3543         TXD->tcp_seg_setup.data = htole32(0);
3544         TXD->cmd_and_length =
3545             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3546         tx_buffer = &txr->tx_buffers[cur];
3547         tx_buffer->m_head = NULL;
3548         tx_buffer->next_eop = -1;
3549
3550         if (++cur == adapter->num_tx_desc)
3551                 cur = 0;
3552
3553         txr->tx_avail--;
3554         txr->next_avail_desc = cur;
3555 }
3556
3557
3558 /**********************************************************************
3559  *
3560  *  Setup work for hardware segmentation offload (TSO)
3561  *
3562  **********************************************************************/
3563 static void
3564 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3565     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3566 {
3567         struct adapter                  *adapter = txr->adapter;
3568         struct e1000_context_desc       *TXD;
3569         struct em_buffer                *tx_buffer;
3570         int cur, hdr_len;
3571
3572         /*
3573          * In theory we can use the same TSO context if and only if
3574          * frame is the same type(IP/TCP) and the same MSS. However
3575          * checking whether a frame has the same IP/TCP structure is
3576          * hard thing so just ignore that and always restablish a
3577          * new TSO context.
3578          */
3579         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3580         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3581                       E1000_TXD_DTYP_D |        /* Data descr type */
3582                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3583
3584         /* IP and/or TCP header checksum calculation and insertion. */
3585         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3586
3587         cur = txr->next_avail_desc;
3588         tx_buffer = &txr->tx_buffers[cur];
3589         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3590
3591         /*
3592          * Start offset for header checksum calculation.
3593          * End offset for header checksum calculation.
3594          * Offset of place put the checksum.
3595          */
3596         TXD->lower_setup.ip_fields.ipcss = ip_off;
3597         TXD->lower_setup.ip_fields.ipcse =
3598             htole16(ip_off + (ip->ip_hl << 2) - 1);
3599         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3600         /*
3601          * Start offset for payload checksum calculation.
3602          * End offset for payload checksum calculation.
3603          * Offset of place to put the checksum.
3604          */
3605         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3606         TXD->upper_setup.tcp_fields.tucse = 0;
3607         TXD->upper_setup.tcp_fields.tucso =
3608             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3609         /*
3610          * Payload size per packet w/o any headers.
3611          * Length of all headers up to payload.
3612          */
3613         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3614         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3615
3616         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3617                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3618                                 E1000_TXD_CMD_TSE |     /* TSE context */
3619                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3620                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3621                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3622
3623         tx_buffer->m_head = NULL;
3624         tx_buffer->next_eop = -1;
3625
3626         if (++cur == adapter->num_tx_desc)
3627                 cur = 0;
3628
3629         txr->tx_avail--;
3630         txr->next_avail_desc = cur;
3631         txr->tx_tso = TRUE;
3632 }
3633
3634
3635 /**********************************************************************
3636  *
3637  *  Examine each tx_buffer in the used queue. If the hardware is done
3638  *  processing the packet then free associated resources. The
3639  *  tx_buffer is put back on the free queue.
3640  *
3641  **********************************************************************/
3642 static bool
3643 em_txeof(struct tx_ring *txr)
3644 {
3645         struct adapter  *adapter = txr->adapter;
3646         int first, last, done, processed;
3647         struct em_buffer *tx_buffer;
3648         struct e1000_tx_desc   *tx_desc, *eop_desc;
3649         struct ifnet   *ifp = adapter->ifp;
3650
3651         EM_TX_LOCK_ASSERT(txr);
3652
3653         /* No work, make sure watchdog is off */
3654         if (txr->tx_avail == adapter->num_tx_desc) {
3655                 txr->queue_status = EM_QUEUE_IDLE;
3656                 return (FALSE);
3657         }
3658
3659         processed = 0;
3660         first = txr->next_to_clean;
3661         tx_desc = &txr->tx_base[first];
3662         tx_buffer = &txr->tx_buffers[first];
3663         last = tx_buffer->next_eop;
3664         eop_desc = &txr->tx_base[last];
3665
3666         /*
3667          * What this does is get the index of the
3668          * first descriptor AFTER the EOP of the 
3669          * first packet, that way we can do the
3670          * simple comparison on the inner while loop.
3671          */
3672         if (++last == adapter->num_tx_desc)
3673                 last = 0;
3674         done = last;
3675
3676         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3677             BUS_DMASYNC_POSTREAD);
3678
3679         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3680                 /* We clean the range of the packet */
3681                 while (first != done) {
3682                         tx_desc->upper.data = 0;
3683                         tx_desc->lower.data = 0;
3684                         tx_desc->buffer_addr = 0;
3685                         ++txr->tx_avail;
3686                         ++processed;
3687
3688                         if (tx_buffer->m_head) {
3689                                 bus_dmamap_sync(txr->txtag,
3690                                     tx_buffer->map,
3691                                     BUS_DMASYNC_POSTWRITE);
3692                                 bus_dmamap_unload(txr->txtag,
3693                                     tx_buffer->map);
3694                                 m_freem(tx_buffer->m_head);
3695                                 tx_buffer->m_head = NULL;
3696                         }
3697                         tx_buffer->next_eop = -1;
3698                         txr->watchdog_time = ticks;
3699
3700                         if (++first == adapter->num_tx_desc)
3701                                 first = 0;
3702
3703                         tx_buffer = &txr->tx_buffers[first];
3704                         tx_desc = &txr->tx_base[first];
3705                 }
3706                 ++ifp->if_opackets;
3707                 /* See if we can continue to the next packet */
3708                 last = tx_buffer->next_eop;
3709                 if (last != -1) {
3710                         eop_desc = &txr->tx_base[last];
3711                         /* Get new done point */
3712                         if (++last == adapter->num_tx_desc) last = 0;
3713                         done = last;
3714                 } else
3715                         break;
3716         }
3717         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3718             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3719
3720         txr->next_to_clean = first;
3721
3722         /*
3723         ** Watchdog calculation, we know there's
3724         ** work outstanding or the first return
3725         ** would have been taken, so none processed
3726         ** for too long indicates a hang. local timer
3727         ** will examine this and do a reset if needed.
3728         */
3729         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3730                 txr->queue_status = EM_QUEUE_HUNG;
3731
3732         /*
3733          * If we have enough room, clear IFF_DRV_OACTIVE
3734          * to tell the stack that it is OK to send packets.
3735          */
3736         if (txr->tx_avail > EM_TX_CLEANUP_THRESHOLD) {                
3737                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3738                 /* Disable watchdog if all clean */
3739                 if (txr->tx_avail == adapter->num_tx_desc) {
3740                         txr->queue_status = EM_QUEUE_IDLE;
3741                         return (FALSE);
3742                 } 
3743         }
3744
3745         return (TRUE);
3746 }
3747
3748
3749 /*********************************************************************
3750  *
3751  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3752  *
3753  **********************************************************************/
3754 static void
3755 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3756 {
3757         struct adapter          *adapter = rxr->adapter;
3758         struct mbuf             *m;
3759         bus_dma_segment_t       segs[1];
3760         struct em_buffer        *rxbuf;
3761         int                     i, error, nsegs, cleaned;
3762
3763         i = rxr->next_to_refresh;
3764         cleaned = -1;
3765         while (i != limit) {
3766                 rxbuf = &rxr->rx_buffers[i];
3767                 if (rxbuf->m_head == NULL) {
3768                         m = m_getjcl(M_DONTWAIT, MT_DATA,
3769                             M_PKTHDR, adapter->rx_mbuf_sz);
3770                         /*
3771                         ** If we have a temporary resource shortage
3772                         ** that causes a failure, just abort refresh
3773                         ** for now, we will return to this point when
3774                         ** reinvoked from em_rxeof.
3775                         */
3776                         if (m == NULL)
3777                                 goto update;
3778                 } else
3779                         m = rxbuf->m_head;
3780
3781                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3782                 m->m_flags |= M_PKTHDR;
3783                 m->m_data = m->m_ext.ext_buf;
3784
3785                 /* Use bus_dma machinery to setup the memory mapping  */
3786                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3787                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3788                 if (error != 0) {
3789                         printf("Refresh mbufs: hdr dmamap load"
3790                             " failure - %d\n", error);
3791                         m_free(m);
3792                         rxbuf->m_head = NULL;
3793                         goto update;
3794                 }
3795                 rxbuf->m_head = m;
3796                 bus_dmamap_sync(rxr->rxtag,
3797                     rxbuf->map, BUS_DMASYNC_PREREAD);
3798                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3799
3800                 cleaned = i;
3801                 /* Calculate next index */
3802                 if (++i == adapter->num_rx_desc)
3803                         i = 0;
3804                 rxr->next_to_refresh = i;
3805         }
3806 update:
3807         /*
3808         ** Update the tail pointer only if,
3809         ** and as far as we have refreshed.
3810         */
3811         if (cleaned != -1) /* Update tail index */
3812                 E1000_WRITE_REG(&adapter->hw,
3813                     E1000_RDT(rxr->me), cleaned);
3814
3815         return;
3816 }
3817
3818
3819 /*********************************************************************
3820  *
3821  *  Allocate memory for rx_buffer structures. Since we use one
3822  *  rx_buffer per received packet, the maximum number of rx_buffer's
3823  *  that we'll need is equal to the number of receive descriptors
3824  *  that we've allocated.
3825  *
3826  **********************************************************************/
3827 static int
3828 em_allocate_receive_buffers(struct rx_ring *rxr)
3829 {
3830         struct adapter          *adapter = rxr->adapter;
3831         device_t                dev = adapter->dev;
3832         struct em_buffer        *rxbuf;
3833         int                     error;
3834
3835         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3836             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3837         if (rxr->rx_buffers == NULL) {
3838                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3839                 return (ENOMEM);
3840         }
3841
3842         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3843                                 1, 0,                   /* alignment, bounds */
3844                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3845                                 BUS_SPACE_MAXADDR,      /* highaddr */
3846                                 NULL, NULL,             /* filter, filterarg */
3847                                 MJUM9BYTES,             /* maxsize */
3848                                 1,                      /* nsegments */
3849                                 MJUM9BYTES,             /* maxsegsize */
3850                                 0,                      /* flags */
3851                                 NULL,                   /* lockfunc */
3852                                 NULL,                   /* lockarg */
3853                                 &rxr->rxtag);
3854         if (error) {
3855                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3856                     __func__, error);
3857                 goto fail;
3858         }
3859
3860         rxbuf = rxr->rx_buffers;
3861         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3862                 rxbuf = &rxr->rx_buffers[i];
3863                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3864                     &rxbuf->map);
3865                 if (error) {
3866                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3867                             __func__, error);
3868                         goto fail;
3869                 }
3870         }
3871
3872         return (0);
3873
3874 fail:
3875         em_free_receive_structures(adapter);
3876         return (error);
3877 }
3878
3879
3880 /*********************************************************************
3881  *
3882  *  Initialize a receive ring and its buffers.
3883  *
3884  **********************************************************************/
3885 static int
3886 em_setup_receive_ring(struct rx_ring *rxr)
3887 {
3888         struct  adapter         *adapter = rxr->adapter;
3889         struct em_buffer        *rxbuf;
3890         bus_dma_segment_t       seg[1];
3891         int                     rsize, nsegs, error;
3892
3893
3894         /* Clear the ring contents */
3895         EM_RX_LOCK(rxr);
3896         rsize = roundup2(adapter->num_rx_desc *
3897             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3898         bzero((void *)rxr->rx_base, rsize);
3899
3900         /*
3901         ** Free current RX buffer structs and their mbufs
3902         */
3903         for (int i = 0; i < adapter->num_rx_desc; i++) {
3904                 rxbuf = &rxr->rx_buffers[i];
3905                 if (rxbuf->m_head != NULL) {
3906                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3907                             BUS_DMASYNC_POSTREAD);
3908                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3909                         m_freem(rxbuf->m_head);
3910                 }
3911         }
3912
3913         /* Now replenish the mbufs */
3914         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3915
3916                 rxbuf = &rxr->rx_buffers[j];
3917                 rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3918                     M_PKTHDR, adapter->rx_mbuf_sz);
3919                 if (rxbuf->m_head == NULL)
3920                         return (ENOBUFS);
3921                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3922                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3923                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3924
3925                 /* Get the memory mapping */
3926                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3927                     rxbuf->map, rxbuf->m_head, seg,
3928                     &nsegs, BUS_DMA_NOWAIT);
3929                 if (error != 0) {
3930                         m_freem(rxbuf->m_head);
3931                         rxbuf->m_head = NULL;
3932                         return (error);
3933                 }
3934                 bus_dmamap_sync(rxr->rxtag,
3935                     rxbuf->map, BUS_DMASYNC_PREREAD);
3936
3937                 /* Update descriptor */
3938                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3939         }
3940
3941
3942         /* Setup our descriptor indices */
3943         rxr->next_to_check = 0;
3944         rxr->next_to_refresh = 0;
3945
3946         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3947             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3948
3949         EM_RX_UNLOCK(rxr);
3950         return (0);
3951 }
3952
3953 /*********************************************************************
3954  *
3955  *  Initialize all receive rings.
3956  *
3957  **********************************************************************/
3958 static int
3959 em_setup_receive_structures(struct adapter *adapter)
3960 {
3961         struct rx_ring *rxr = adapter->rx_rings;
3962         int j;
3963
3964         for (j = 0; j < adapter->num_queues; j++, rxr++)
3965                 if (em_setup_receive_ring(rxr))
3966                         goto fail;
3967
3968         return (0);
3969 fail:
3970         /*
3971          * Free RX buffers allocated so far, we will only handle
3972          * the rings that completed, the failing case will have
3973          * cleaned up for itself. 'j' failed, so its the terminus.
3974          */
3975         for (int i = 0; i < j; ++i) {
3976                 rxr = &adapter->rx_rings[i];
3977                 for (int n = 0; n < adapter->num_rx_desc; n++) {
3978                         struct em_buffer *rxbuf;
3979                         rxbuf = &rxr->rx_buffers[n];
3980                         if (rxbuf->m_head != NULL) {
3981                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3982                                   BUS_DMASYNC_POSTREAD);
3983                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3984                                 m_freem(rxbuf->m_head);
3985                                 rxbuf->m_head = NULL;
3986                         }
3987                 }
3988         }
3989
3990         return (ENOBUFS);
3991 }
3992
3993 /*********************************************************************
3994  *
3995  *  Free all receive rings.
3996  *
3997  **********************************************************************/
3998 static void
3999 em_free_receive_structures(struct adapter *adapter)
4000 {
4001         struct rx_ring *rxr = adapter->rx_rings;
4002
4003         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4004                 em_free_receive_buffers(rxr);
4005                 /* Free the ring memory as well */
4006                 em_dma_free(adapter, &rxr->rxdma);
4007                 EM_RX_LOCK_DESTROY(rxr);
4008         }
4009
4010         free(adapter->rx_rings, M_DEVBUF);
4011 }
4012
4013
4014 /*********************************************************************
4015  *
4016  *  Free receive ring data structures
4017  *
4018  **********************************************************************/
4019 static void
4020 em_free_receive_buffers(struct rx_ring *rxr)
4021 {
4022         struct adapter          *adapter = rxr->adapter;
4023         struct em_buffer        *rxbuf = NULL;
4024
4025         INIT_DEBUGOUT("free_receive_buffers: begin");
4026
4027         if (rxr->rx_buffers != NULL) {
4028                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4029                         rxbuf = &rxr->rx_buffers[i];
4030                         if (rxbuf->map != NULL) {
4031                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4032                                     BUS_DMASYNC_POSTREAD);
4033                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4034                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4035                         }
4036                         if (rxbuf->m_head != NULL) {
4037                                 m_freem(rxbuf->m_head);
4038                                 rxbuf->m_head = NULL;
4039                         }
4040                 }
4041                 free(rxr->rx_buffers, M_DEVBUF);
4042                 rxr->rx_buffers = NULL;
4043         }
4044
4045         if (rxr->rxtag != NULL) {
4046                 bus_dma_tag_destroy(rxr->rxtag);
4047                 rxr->rxtag = NULL;
4048         }
4049
4050         return;
4051 }
4052
4053
4054 /*********************************************************************
4055  *
4056  *  Enable receive unit.
4057  *
4058  **********************************************************************/
4059 #define MAX_INTS_PER_SEC        8000
4060 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
4061
4062 static void
4063 em_initialize_receive_unit(struct adapter *adapter)
4064 {
4065         struct rx_ring  *rxr = adapter->rx_rings;
4066         struct ifnet    *ifp = adapter->ifp;
4067         struct e1000_hw *hw = &adapter->hw;
4068         u64     bus_addr;
4069         u32     rctl, rxcsum;
4070
4071         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4072
4073         /*
4074          * Make sure receives are disabled while setting
4075          * up the descriptor ring
4076          */
4077         rctl = E1000_READ_REG(hw, E1000_RCTL);
4078         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4079
4080         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4081             adapter->rx_abs_int_delay.value);
4082         /*
4083          * Set the interrupt throttling rate. Value is calculated
4084          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4085          */
4086         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4087
4088         /*
4089         ** When using MSIX interrupts we need to throttle
4090         ** using the EITR register (82574 only)
4091         */
4092         if (hw->mac.type == e1000_82574)
4093                 for (int i = 0; i < 4; i++)
4094                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4095                             DEFAULT_ITR);
4096
4097         /* Disable accelerated ackknowledge */
4098         if (adapter->hw.mac.type == e1000_82574)
4099                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4100
4101         if (ifp->if_capenable & IFCAP_RXCSUM) {
4102                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4103                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4104                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4105         }
4106
4107         /*
4108         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4109         ** long latencies are observed, like Lenovo X60. This
4110         ** change eliminates the problem, but since having positive
4111         ** values in RDTR is a known source of problems on other
4112         ** platforms another solution is being sought.
4113         */
4114         if (hw->mac.type == e1000_82573)
4115                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4116
4117         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4118                 /* Setup the Base and Length of the Rx Descriptor Ring */
4119                 bus_addr = rxr->rxdma.dma_paddr;
4120                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4121                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4122                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4123                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4124                 /* Setup the Head and Tail Descriptor Pointers */
4125                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4126                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4127         }
4128
4129         /* Set early receive threshold on appropriate hw */
4130         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4131             (adapter->hw.mac.type == e1000_pch2lan) ||
4132             (adapter->hw.mac.type == e1000_ich10lan)) &&
4133             (ifp->if_mtu > ETHERMTU)) {
4134                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4135                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4136                 E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4137         }
4138                 
4139         if (adapter->hw.mac.type == e1000_pch2lan) {
4140                 if (ifp->if_mtu > ETHERMTU)
4141                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4142                 else
4143                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4144         }
4145
4146         /* Setup the Receive Control Register */
4147         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4148         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4149             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4150             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4151
4152         /* Strip the CRC */
4153         rctl |= E1000_RCTL_SECRC;
4154
4155         /* Make sure VLAN Filters are off */
4156         rctl &= ~E1000_RCTL_VFE;
4157         rctl &= ~E1000_RCTL_SBP;
4158
4159         if (adapter->rx_mbuf_sz == MCLBYTES)
4160                 rctl |= E1000_RCTL_SZ_2048;
4161         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4162                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4163         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4164                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4165
4166         if (ifp->if_mtu > ETHERMTU)
4167                 rctl |= E1000_RCTL_LPE;
4168         else
4169                 rctl &= ~E1000_RCTL_LPE;
4170
4171         /* Write out the settings */
4172         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4173
4174         return;
4175 }
4176
4177
4178 /*********************************************************************
4179  *
4180  *  This routine executes in interrupt context. It replenishes
4181  *  the mbufs in the descriptor and sends data which has been
4182  *  dma'ed into host memory to upper layer.
4183  *
4184  *  We loop at most count times if count is > 0, or until done if
4185  *  count < 0.
4186  *  
4187  *  For polling we also now return the number of cleaned packets
4188  *********************************************************************/
4189 static bool
4190 em_rxeof(struct rx_ring *rxr, int count, int *done)
4191 {
4192         struct adapter          *adapter = rxr->adapter;
4193         struct ifnet            *ifp = adapter->ifp;
4194         struct mbuf             *mp, *sendmp;
4195         u8                      status = 0;
4196         u16                     len;
4197         int                     i, processed, rxdone = 0;
4198         bool                    eop;
4199         struct e1000_rx_desc    *cur;
4200
4201         EM_RX_LOCK(rxr);
4202
4203         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4204
4205                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4206                         break;
4207
4208                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4209                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4210
4211                 cur = &rxr->rx_base[i];
4212                 status = cur->status;
4213                 mp = sendmp = NULL;
4214
4215                 if ((status & E1000_RXD_STAT_DD) == 0)
4216                         break;
4217
4218                 len = le16toh(cur->length);
4219                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4220
4221                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4222                     (rxr->discard == TRUE)) {
4223                         ifp->if_ierrors++;
4224                         ++rxr->rx_discarded;
4225                         if (!eop) /* Catch subsequent segs */
4226                                 rxr->discard = TRUE;
4227                         else
4228                                 rxr->discard = FALSE;
4229                         em_rx_discard(rxr, i);
4230                         goto next_desc;
4231                 }
4232
4233                 /* Assign correct length to the current fragment */
4234                 mp = rxr->rx_buffers[i].m_head;
4235                 mp->m_len = len;
4236
4237                 /* Trigger for refresh */
4238                 rxr->rx_buffers[i].m_head = NULL;
4239
4240                 /* First segment? */
4241                 if (rxr->fmp == NULL) {
4242                         mp->m_pkthdr.len = len;
4243                         rxr->fmp = rxr->lmp = mp;
4244                 } else {
4245                         /* Chain mbuf's together */
4246                         mp->m_flags &= ~M_PKTHDR;
4247                         rxr->lmp->m_next = mp;
4248                         rxr->lmp = mp;
4249                         rxr->fmp->m_pkthdr.len += len;
4250                 }
4251
4252                 if (eop) {
4253                         --count;
4254                         sendmp = rxr->fmp;
4255                         sendmp->m_pkthdr.rcvif = ifp;
4256                         ifp->if_ipackets++;
4257                         em_receive_checksum(cur, sendmp);
4258 #ifndef __NO_STRICT_ALIGNMENT
4259                         if (adapter->max_frame_size >
4260                             (MCLBYTES - ETHER_ALIGN) &&
4261                             em_fixup_rx(rxr) != 0)
4262                                 goto skip;
4263 #endif
4264                         if (status & E1000_RXD_STAT_VP) {
4265                                 sendmp->m_pkthdr.ether_vtag =
4266                                     (le16toh(cur->special) &
4267                                     E1000_RXD_SPC_VLAN_MASK);
4268                                 sendmp->m_flags |= M_VLANTAG;
4269                         }
4270 #ifdef EM_MULTIQUEUE
4271                         sendmp->m_pkthdr.flowid = rxr->msix;
4272                         sendmp->m_flags |= M_FLOWID;
4273 #endif
4274 #ifndef __NO_STRICT_ALIGNMENT
4275 skip:
4276 #endif
4277                         rxr->fmp = rxr->lmp = NULL;
4278                 }
4279 next_desc:
4280                 /* Zero out the receive descriptors status. */
4281                 cur->status = 0;
4282                 ++rxdone;       /* cumulative for POLL */
4283                 ++processed;
4284
4285                 /* Advance our pointers to the next descriptor. */
4286                 if (++i == adapter->num_rx_desc)
4287                         i = 0;
4288
4289                 /* Send to the stack */
4290                 if (sendmp != NULL) {
4291                         rxr->next_to_check = i;
4292                         EM_RX_UNLOCK(rxr);
4293                         (*ifp->if_input)(ifp, sendmp);
4294                         EM_RX_LOCK(rxr);
4295                         i = rxr->next_to_check;
4296                 }
4297
4298                 /* Only refresh mbufs every 8 descriptors */
4299                 if (processed == 8) {
4300                         em_refresh_mbufs(rxr, i);
4301                         processed = 0;
4302                 }
4303         }
4304
4305         /* Catch any remaining refresh work */
4306         em_refresh_mbufs(rxr, i);
4307
4308         rxr->next_to_check = i;
4309         if (done != NULL)
4310                 *done = rxdone;
4311         EM_RX_UNLOCK(rxr);
4312
4313         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4314 }
4315
4316 static __inline void
4317 em_rx_discard(struct rx_ring *rxr, int i)
4318 {
4319         struct em_buffer        *rbuf;
4320
4321         rbuf = &rxr->rx_buffers[i];
4322         /* Free any previous pieces */
4323         if (rxr->fmp != NULL) {
4324                 rxr->fmp->m_flags |= M_PKTHDR;
4325                 m_freem(rxr->fmp);
4326                 rxr->fmp = NULL;
4327                 rxr->lmp = NULL;
4328         }
4329         /*
4330         ** Free buffer and allow em_refresh_mbufs()
4331         ** to clean up and recharge buffer.
4332         */
4333         if (rbuf->m_head) {
4334                 m_free(rbuf->m_head);
4335                 rbuf->m_head = NULL;
4336         }
4337         return;
4338 }
4339
4340 #ifndef __NO_STRICT_ALIGNMENT
4341 /*
4342  * When jumbo frames are enabled we should realign entire payload on
4343  * architecures with strict alignment. This is serious design mistake of 8254x
4344  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4345  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4346  * payload. On architecures without strict alignment restrictions 8254x still
4347  * performs unaligned memory access which would reduce the performance too.
4348  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4349  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4350  * existing mbuf chain.
4351  *
4352  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4353  * not used at all on architectures with strict alignment.
4354  */
4355 static int
4356 em_fixup_rx(struct rx_ring *rxr)
4357 {
4358         struct adapter *adapter = rxr->adapter;
4359         struct mbuf *m, *n;
4360         int error;
4361
4362         error = 0;
4363         m = rxr->fmp;
4364         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4365                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4366                 m->m_data += ETHER_HDR_LEN;
4367         } else {
4368                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4369                 if (n != NULL) {
4370                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4371                         m->m_data += ETHER_HDR_LEN;
4372                         m->m_len -= ETHER_HDR_LEN;
4373                         n->m_len = ETHER_HDR_LEN;
4374                         M_MOVE_PKTHDR(n, m);
4375                         n->m_next = m;
4376                         rxr->fmp = n;
4377                 } else {
4378                         adapter->dropped_pkts++;
4379                         m_freem(rxr->fmp);
4380                         rxr->fmp = NULL;
4381                         error = ENOMEM;
4382                 }
4383         }
4384
4385         return (error);
4386 }
4387 #endif
4388
4389 /*********************************************************************
4390  *
4391  *  Verify that the hardware indicated that the checksum is valid.
4392  *  Inform the stack about the status of checksum so that stack
4393  *  doesn't spend time verifying the checksum.
4394  *
4395  *********************************************************************/
4396 static void
4397 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4398 {
4399         /* Ignore Checksum bit is set */
4400         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4401                 mp->m_pkthdr.csum_flags = 0;
4402                 return;
4403         }
4404
4405         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4406                 /* Did it pass? */
4407                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4408                         /* IP Checksum Good */
4409                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4410                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4411
4412                 } else {
4413                         mp->m_pkthdr.csum_flags = 0;
4414                 }
4415         }
4416
4417         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4418                 /* Did it pass? */
4419                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4420                         mp->m_pkthdr.csum_flags |=
4421                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4422                         mp->m_pkthdr.csum_data = htons(0xffff);
4423                 }
4424         }
4425 }
4426
4427 /*
4428  * This routine is run via an vlan
4429  * config EVENT
4430  */
4431 static void
4432 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4433 {
4434         struct adapter  *adapter = ifp->if_softc;
4435         u32             index, bit;
4436
4437         if (ifp->if_softc !=  arg)   /* Not our event */
4438                 return;
4439
4440         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4441                 return;
4442
4443         EM_CORE_LOCK(adapter);
4444         index = (vtag >> 5) & 0x7F;
4445         bit = vtag & 0x1F;
4446         adapter->shadow_vfta[index] |= (1 << bit);
4447         ++adapter->num_vlans;
4448         /* Re-init to load the changes */
4449         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4450                 em_init_locked(adapter);
4451         EM_CORE_UNLOCK(adapter);
4452 }
4453
4454 /*
4455  * This routine is run via an vlan
4456  * unconfig EVENT
4457  */
4458 static void
4459 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4460 {
4461         struct adapter  *adapter = ifp->if_softc;
4462         u32             index, bit;
4463
4464         if (ifp->if_softc !=  arg)
4465                 return;
4466
4467         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4468                 return;
4469
4470         EM_CORE_LOCK(adapter);
4471         index = (vtag >> 5) & 0x7F;
4472         bit = vtag & 0x1F;
4473         adapter->shadow_vfta[index] &= ~(1 << bit);
4474         --adapter->num_vlans;
4475         /* Re-init to load the changes */
4476         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4477                 em_init_locked(adapter);
4478         EM_CORE_UNLOCK(adapter);
4479 }
4480
4481 static void
4482 em_setup_vlan_hw_support(struct adapter *adapter)
4483 {
4484         struct e1000_hw *hw = &adapter->hw;
4485         u32             reg;
4486
4487         /*
4488         ** We get here thru init_locked, meaning
4489         ** a soft reset, this has already cleared
4490         ** the VFTA and other state, so if there
4491         ** have been no vlan's registered do nothing.
4492         */
4493         if (adapter->num_vlans == 0)
4494                 return;
4495
4496         /*
4497         ** A soft reset zero's out the VFTA, so
4498         ** we need to repopulate it now.
4499         */
4500         for (int i = 0; i < EM_VFTA_SIZE; i++)
4501                 if (adapter->shadow_vfta[i] != 0)
4502                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4503                             i, adapter->shadow_vfta[i]);
4504
4505         reg = E1000_READ_REG(hw, E1000_CTRL);
4506         reg |= E1000_CTRL_VME;
4507         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4508
4509         /* Enable the Filter Table */
4510         reg = E1000_READ_REG(hw, E1000_RCTL);
4511         reg &= ~E1000_RCTL_CFIEN;
4512         reg |= E1000_RCTL_VFE;
4513         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4514 }
4515
4516 static void
4517 em_enable_intr(struct adapter *adapter)
4518 {
4519         struct e1000_hw *hw = &adapter->hw;
4520         u32 ims_mask = IMS_ENABLE_MASK;
4521
4522         if (hw->mac.type == e1000_82574) {
4523                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4524                 ims_mask |= EM_MSIX_MASK;
4525         } 
4526         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4527 }
4528
4529 static void
4530 em_disable_intr(struct adapter *adapter)
4531 {
4532         struct e1000_hw *hw = &adapter->hw;
4533
4534         if (hw->mac.type == e1000_82574)
4535                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4536         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4537 }
4538
4539 /*
4540  * Bit of a misnomer, what this really means is
4541  * to enable OS management of the system... aka
4542  * to disable special hardware management features 
4543  */
4544 static void
4545 em_init_manageability(struct adapter *adapter)
4546 {
4547         /* A shared code workaround */
4548 #define E1000_82542_MANC2H E1000_MANC2H
4549         if (adapter->has_manage) {
4550                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4551                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4552
4553                 /* disable hardware interception of ARP */
4554                 manc &= ~(E1000_MANC_ARP_EN);
4555
4556                 /* enable receiving management packets to the host */
4557                 manc |= E1000_MANC_EN_MNG2HOST;
4558 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4559 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4560                 manc2h |= E1000_MNG2HOST_PORT_623;
4561                 manc2h |= E1000_MNG2HOST_PORT_664;
4562                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4563                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4564         }
4565 }
4566
4567 /*
4568  * Give control back to hardware management
4569  * controller if there is one.
4570  */
4571 static void
4572 em_release_manageability(struct adapter *adapter)
4573 {
4574         if (adapter->has_manage) {
4575                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4576
4577                 /* re-enable hardware interception of ARP */
4578                 manc |= E1000_MANC_ARP_EN;
4579                 manc &= ~E1000_MANC_EN_MNG2HOST;
4580
4581                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4582         }
4583 }
4584
4585 /*
4586  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4587  * For ASF and Pass Through versions of f/w this means
4588  * that the driver is loaded. For AMT version type f/w
4589  * this means that the network i/f is open.
4590  */
4591 static void
4592 em_get_hw_control(struct adapter *adapter)
4593 {
4594         u32 ctrl_ext, swsm;
4595
4596         if (adapter->hw.mac.type == e1000_82573) {
4597                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4598                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4599                     swsm | E1000_SWSM_DRV_LOAD);
4600                 return;
4601         }
4602         /* else */
4603         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4604         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4605             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4606         return;
4607 }
4608
4609 /*
4610  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4611  * For ASF and Pass Through versions of f/w this means that
4612  * the driver is no longer loaded. For AMT versions of the
4613  * f/w this means that the network i/f is closed.
4614  */
4615 static void
4616 em_release_hw_control(struct adapter *adapter)
4617 {
4618         u32 ctrl_ext, swsm;
4619
4620         if (!adapter->has_manage)
4621                 return;
4622
4623         if (adapter->hw.mac.type == e1000_82573) {
4624                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4625                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4626                     swsm & ~E1000_SWSM_DRV_LOAD);
4627                 return;
4628         }
4629         /* else */
4630         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4631         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4632             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4633         return;
4634 }
4635
4636 static int
4637 em_is_valid_ether_addr(u8 *addr)
4638 {
4639         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4640
4641         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4642                 return (FALSE);
4643         }
4644
4645         return (TRUE);
4646 }
4647
4648 /*
4649 ** Parse the interface capabilities with regard
4650 ** to both system management and wake-on-lan for
4651 ** later use.
4652 */
4653 static void
4654 em_get_wakeup(device_t dev)
4655 {
4656         struct adapter  *adapter = device_get_softc(dev);
4657         u16             eeprom_data = 0, device_id, apme_mask;
4658
4659         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4660         apme_mask = EM_EEPROM_APME;
4661
4662         switch (adapter->hw.mac.type) {
4663         case e1000_82573:
4664         case e1000_82583:
4665                 adapter->has_amt = TRUE;
4666                 /* Falls thru */
4667         case e1000_82571:
4668         case e1000_82572:
4669         case e1000_80003es2lan:
4670                 if (adapter->hw.bus.func == 1) {
4671                         e1000_read_nvm(&adapter->hw,
4672                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4673                         break;
4674                 } else
4675                         e1000_read_nvm(&adapter->hw,
4676                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4677                 break;
4678         case e1000_ich8lan:
4679         case e1000_ich9lan:
4680         case e1000_ich10lan:
4681         case e1000_pchlan:
4682         case e1000_pch2lan:
4683                 apme_mask = E1000_WUC_APME;
4684                 adapter->has_amt = TRUE;
4685                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4686                 break;
4687         default:
4688                 e1000_read_nvm(&adapter->hw,
4689                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4690                 break;
4691         }
4692         if (eeprom_data & apme_mask)
4693                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4694         /*
4695          * We have the eeprom settings, now apply the special cases
4696          * where the eeprom may be wrong or the board won't support
4697          * wake on lan on a particular port
4698          */
4699         device_id = pci_get_device(dev);
4700         switch (device_id) {
4701         case E1000_DEV_ID_82571EB_FIBER:
4702                 /* Wake events only supported on port A for dual fiber
4703                  * regardless of eeprom setting */
4704                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4705                     E1000_STATUS_FUNC_1)
4706                         adapter->wol = 0;
4707                 break;
4708         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4709         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4710         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4711                 /* if quad port adapter, disable WoL on all but port A */
4712                 if (global_quad_port_a != 0)
4713                         adapter->wol = 0;
4714                 /* Reset for multiple quad port adapters */
4715                 if (++global_quad_port_a == 4)
4716                         global_quad_port_a = 0;
4717                 break;
4718         }
4719         return;
4720 }
4721
4722
4723 /*
4724  * Enable PCI Wake On Lan capability
4725  */
4726 static void
4727 em_enable_wakeup(device_t dev)
4728 {
4729         struct adapter  *adapter = device_get_softc(dev);
4730         struct ifnet    *ifp = adapter->ifp;
4731         u32             pmc, ctrl, ctrl_ext, rctl;
4732         u16             status;
4733
4734         if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4735                 return;
4736
4737         /* Advertise the wakeup capability */
4738         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4739         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4740         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4741         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4742
4743         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4744             (adapter->hw.mac.type == e1000_pchlan) ||
4745             (adapter->hw.mac.type == e1000_ich9lan) ||
4746             (adapter->hw.mac.type == e1000_ich10lan)) {
4747                 e1000_disable_gig_wol_ich8lan(&adapter->hw);
4748                 e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4749         }
4750
4751         /* Keep the laser running on Fiber adapters */
4752         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4753             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4754                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4755                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4756                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4757         }
4758
4759         /*
4760         ** Determine type of Wakeup: note that wol
4761         ** is set with all bits on by default.
4762         */
4763         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4764                 adapter->wol &= ~E1000_WUFC_MAG;
4765
4766         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4767                 adapter->wol &= ~E1000_WUFC_MC;
4768         else {
4769                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4770                 rctl |= E1000_RCTL_MPE;
4771                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4772         }
4773
4774         if ((adapter->hw.mac.type == e1000_pchlan) ||
4775             (adapter->hw.mac.type == e1000_pch2lan)) {
4776                 if (em_enable_phy_wakeup(adapter))
4777                         return;
4778         } else {
4779                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4780                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4781         }
4782
4783         if (adapter->hw.phy.type == e1000_phy_igp_3)
4784                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4785
4786         /* Request PME */
4787         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4788         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4789         if (ifp->if_capenable & IFCAP_WOL)
4790                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4791         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4792
4793         return;
4794 }
4795
4796 /*
4797 ** WOL in the newer chipset interfaces (pchlan)
4798 ** require thing to be copied into the phy
4799 */
4800 static int
4801 em_enable_phy_wakeup(struct adapter *adapter)
4802 {
4803         struct e1000_hw *hw = &adapter->hw;
4804         u32 mreg, ret = 0;
4805         u16 preg;
4806
4807         /* copy MAC RARs to PHY RARs */
4808         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4809
4810         /* copy MAC MTA to PHY MTA */
4811         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4812                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4813                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4814                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4815                     (u16)((mreg >> 16) & 0xFFFF));
4816         }
4817
4818         /* configure PHY Rx Control register */
4819         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4820         mreg = E1000_READ_REG(hw, E1000_RCTL);
4821         if (mreg & E1000_RCTL_UPE)
4822                 preg |= BM_RCTL_UPE;
4823         if (mreg & E1000_RCTL_MPE)
4824                 preg |= BM_RCTL_MPE;
4825         preg &= ~(BM_RCTL_MO_MASK);
4826         if (mreg & E1000_RCTL_MO_3)
4827                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4828                                 << BM_RCTL_MO_SHIFT);
4829         if (mreg & E1000_RCTL_BAM)
4830                 preg |= BM_RCTL_BAM;
4831         if (mreg & E1000_RCTL_PMCF)
4832                 preg |= BM_RCTL_PMCF;
4833         mreg = E1000_READ_REG(hw, E1000_CTRL);
4834         if (mreg & E1000_CTRL_RFCE)
4835                 preg |= BM_RCTL_RFCE;
4836         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4837
4838         /* enable PHY wakeup in MAC register */
4839         E1000_WRITE_REG(hw, E1000_WUC,
4840             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4841         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4842
4843         /* configure and enable PHY wakeup in PHY registers */
4844         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4845         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4846
4847         /* activate PHY wakeup */
4848         ret = hw->phy.ops.acquire(hw);
4849         if (ret) {
4850                 printf("Could not acquire PHY\n");
4851                 return ret;
4852         }
4853         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4854                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4855         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4856         if (ret) {
4857                 printf("Could not read PHY page 769\n");
4858                 goto out;
4859         }
4860         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4861         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4862         if (ret)
4863                 printf("Could not set PHY Host Wakeup bit\n");
4864 out:
4865         hw->phy.ops.release(hw);
4866
4867         return ret;
4868 }
4869
4870 static void
4871 em_led_func(void *arg, int onoff)
4872 {
4873         struct adapter  *adapter = arg;
4874  
4875         EM_CORE_LOCK(adapter);
4876         if (onoff) {
4877                 e1000_setup_led(&adapter->hw);
4878                 e1000_led_on(&adapter->hw);
4879         } else {
4880                 e1000_led_off(&adapter->hw);
4881                 e1000_cleanup_led(&adapter->hw);
4882         }
4883         EM_CORE_UNLOCK(adapter);
4884 }
4885
4886 /*
4887 ** Disable the L0S and L1 LINK states
4888 */
4889 static void
4890 em_disable_aspm(struct adapter *adapter)
4891 {
4892         int             base, reg;
4893         u16             link_cap,link_ctrl;
4894         device_t        dev = adapter->dev;
4895
4896         switch (adapter->hw.mac.type) {
4897                 case e1000_82573:
4898                 case e1000_82574:
4899                 case e1000_82583:
4900                         break;
4901                 default:
4902                         return;
4903         }
4904         if (pci_find_extcap(dev, PCIY_EXPRESS, &base) != 0)
4905                 return;
4906         reg = base + PCIR_EXPRESS_LINK_CAP;
4907         link_cap = pci_read_config(dev, reg, 2);
4908         if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4909                 return;
4910         reg = base + PCIR_EXPRESS_LINK_CTL;
4911         link_ctrl = pci_read_config(dev, reg, 2);
4912         link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4913         pci_write_config(dev, reg, link_ctrl, 2);
4914         return;
4915 }
4916
4917 /**********************************************************************
4918  *
4919  *  Update the board statistics counters.
4920  *
4921  **********************************************************************/
4922 static void
4923 em_update_stats_counters(struct adapter *adapter)
4924 {
4925         struct ifnet   *ifp;
4926
4927         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4928            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4929                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4930                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4931         }
4932         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4933         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4934         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4935         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4936
4937         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4938         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4939         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4940         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4941         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4942         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4943         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4944         /*
4945         ** For watchdog management we need to know if we have been
4946         ** paused during the last interval, so capture that here.
4947         */
4948         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4949         adapter->stats.xoffrxc += adapter->pause_frames;
4950         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4951         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4952         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4953         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4954         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4955         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4956         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4957         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4958         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4959         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4960         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4961         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4962
4963         /* For the 64-bit byte counters the low dword must be read first. */
4964         /* Both registers clear on the read of the high dword */
4965
4966         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4967             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4968         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4969             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4970
4971         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4972         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4973         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4974         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4975         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4976
4977         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4978         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4979
4980         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4981         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4982         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4983         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4984         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4985         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4986         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4987         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4988         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4989         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4990
4991         /* Interrupt Counts */
4992
4993         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
4994         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
4995         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
4996         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
4997         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
4998         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
4999         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5000         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5001         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5002
5003         if (adapter->hw.mac.type >= e1000_82543) {
5004                 adapter->stats.algnerrc += 
5005                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5006                 adapter->stats.rxerrc += 
5007                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5008                 adapter->stats.tncrs += 
5009                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5010                 adapter->stats.cexterr += 
5011                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5012                 adapter->stats.tsctc += 
5013                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5014                 adapter->stats.tsctfc += 
5015                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5016         }
5017         ifp = adapter->ifp;
5018
5019         ifp->if_collisions = adapter->stats.colc;
5020
5021         /* Rx Errors */
5022         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5023             adapter->stats.crcerrs + adapter->stats.algnerrc +
5024             adapter->stats.ruc + adapter->stats.roc +
5025             adapter->stats.mpc + adapter->stats.cexterr;
5026
5027         /* Tx Errors */
5028         ifp->if_oerrors = adapter->stats.ecol +
5029             adapter->stats.latecol + adapter->watchdog_events;
5030 }
5031
5032 /* Export a single 32-bit register via a read-only sysctl. */
5033 static int
5034 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5035 {
5036         struct adapter *adapter;
5037         u_int val;
5038
5039         adapter = oidp->oid_arg1;
5040         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5041         return (sysctl_handle_int(oidp, &val, 0, req));
5042 }
5043
5044 /*
5045  * Add sysctl variables, one per statistic, to the system.
5046  */
5047 static void
5048 em_add_hw_stats(struct adapter *adapter)
5049 {
5050         device_t dev = adapter->dev;
5051
5052         struct tx_ring *txr = adapter->tx_rings;
5053         struct rx_ring *rxr = adapter->rx_rings;
5054
5055         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5056         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5057         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5058         struct e1000_hw_stats *stats = &adapter->stats;
5059
5060         struct sysctl_oid *stat_node, *queue_node, *int_node;
5061         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5062
5063 #define QUEUE_NAME_LEN 32
5064         char namebuf[QUEUE_NAME_LEN];
5065         
5066         /* Driver Statistics */
5067         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5068                         CTLFLAG_RD, &adapter->link_irq, 0,
5069                         "Link MSIX IRQ Handled");
5070         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5071                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5072                          "Std mbuf failed");
5073         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5074                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5075                          "Std mbuf cluster failed");
5076         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5077                         CTLFLAG_RD, &adapter->dropped_pkts,
5078                         "Driver dropped packets");
5079         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5080                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5081                         "Driver tx dma failure in xmit");
5082         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5083                         CTLFLAG_RD, &adapter->rx_overruns,
5084                         "RX overruns");
5085         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5086                         CTLFLAG_RD, &adapter->watchdog_events,
5087                         "Watchdog timeouts");
5088         
5089         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5090                         CTLFLAG_RD, adapter, E1000_CTRL,
5091                         em_sysctl_reg_handler, "IU",
5092                         "Device Control Register");
5093         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5094                         CTLFLAG_RD, adapter, E1000_RCTL,
5095                         em_sysctl_reg_handler, "IU",
5096                         "Receiver Control Register");
5097         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5098                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5099                         "Flow Control High Watermark");
5100         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5101                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5102                         "Flow Control Low Watermark");
5103
5104         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5105                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5106                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5107                                             CTLFLAG_RD, NULL, "Queue Name");
5108                 queue_list = SYSCTL_CHILDREN(queue_node);
5109
5110                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5111                                 CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5112                                 em_sysctl_reg_handler, "IU",
5113                                 "Transmit Descriptor Head");
5114                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5115                                 CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5116                                 em_sysctl_reg_handler, "IU",
5117                                 "Transmit Descriptor Tail");
5118                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5119                                 CTLFLAG_RD, &txr->tx_irq,
5120                                 "Queue MSI-X Transmit Interrupts");
5121                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5122                                 CTLFLAG_RD, &txr->no_desc_avail,
5123                                 "Queue No Descriptor Available");
5124                 
5125                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5126                                 CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5127                                 em_sysctl_reg_handler, "IU",
5128                                 "Receive Descriptor Head");
5129                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5130                                 CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5131                                 em_sysctl_reg_handler, "IU",
5132                                 "Receive Descriptor Tail");
5133                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5134                                 CTLFLAG_RD, &rxr->rx_irq,
5135                                 "Queue MSI-X Receive Interrupts");
5136         }
5137
5138         /* MAC stats get their own sub node */
5139
5140         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5141                                     CTLFLAG_RD, NULL, "Statistics");
5142         stat_list = SYSCTL_CHILDREN(stat_node);
5143
5144         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5145                         CTLFLAG_RD, &stats->ecol,
5146                         "Excessive collisions");
5147         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5148                         CTLFLAG_RD, &stats->scc,
5149                         "Single collisions");
5150         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5151                         CTLFLAG_RD, &stats->mcc,
5152                         "Multiple collisions");
5153         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5154                         CTLFLAG_RD, &stats->latecol,
5155                         "Late collisions");
5156         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5157                         CTLFLAG_RD, &stats->colc,
5158                         "Collision Count");
5159         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5160                         CTLFLAG_RD, &adapter->stats.symerrs,
5161                         "Symbol Errors");
5162         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5163                         CTLFLAG_RD, &adapter->stats.sec,
5164                         "Sequence Errors");
5165         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5166                         CTLFLAG_RD, &adapter->stats.dc,
5167                         "Defer Count");
5168         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5169                         CTLFLAG_RD, &adapter->stats.mpc,
5170                         "Missed Packets");
5171         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5172                         CTLFLAG_RD, &adapter->stats.rnbc,
5173                         "Receive No Buffers");
5174         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5175                         CTLFLAG_RD, &adapter->stats.ruc,
5176                         "Receive Undersize");
5177         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5178                         CTLFLAG_RD, &adapter->stats.rfc,
5179                         "Fragmented Packets Received ");
5180         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5181                         CTLFLAG_RD, &adapter->stats.roc,
5182                         "Oversized Packets Received");
5183         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5184                         CTLFLAG_RD, &adapter->stats.rjc,
5185                         "Recevied Jabber");
5186         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5187                         CTLFLAG_RD, &adapter->stats.rxerrc,
5188                         "Receive Errors");
5189         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5190                         CTLFLAG_RD, &adapter->stats.crcerrs,
5191                         "CRC errors");
5192         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5193                         CTLFLAG_RD, &adapter->stats.algnerrc,
5194                         "Alignment Errors");
5195         /* On 82575 these are collision counts */
5196         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5197                         CTLFLAG_RD, &adapter->stats.cexterr,
5198                         "Collision/Carrier extension errors");
5199         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5200                         CTLFLAG_RD, &adapter->stats.xonrxc,
5201                         "XON Received");
5202         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5203                         CTLFLAG_RD, &adapter->stats.xontxc,
5204                         "XON Transmitted");
5205         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5206                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5207                         "XOFF Received");
5208         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5209                         CTLFLAG_RD, &adapter->stats.xofftxc,
5210                         "XOFF Transmitted");
5211
5212         /* Packet Reception Stats */
5213         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5214                         CTLFLAG_RD, &adapter->stats.tpr,
5215                         "Total Packets Received ");
5216         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5217                         CTLFLAG_RD, &adapter->stats.gprc,
5218                         "Good Packets Received");
5219         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5220                         CTLFLAG_RD, &adapter->stats.bprc,
5221                         "Broadcast Packets Received");
5222         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5223                         CTLFLAG_RD, &adapter->stats.mprc,
5224                         "Multicast Packets Received");
5225         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5226                         CTLFLAG_RD, &adapter->stats.prc64,
5227                         "64 byte frames received ");
5228         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5229                         CTLFLAG_RD, &adapter->stats.prc127,
5230                         "65-127 byte frames received");
5231         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5232                         CTLFLAG_RD, &adapter->stats.prc255,
5233                         "128-255 byte frames received");
5234         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5235                         CTLFLAG_RD, &adapter->stats.prc511,
5236                         "256-511 byte frames received");
5237         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5238                         CTLFLAG_RD, &adapter->stats.prc1023,
5239                         "512-1023 byte frames received");
5240         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5241                         CTLFLAG_RD, &adapter->stats.prc1522,
5242                         "1023-1522 byte frames received");
5243         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5244                         CTLFLAG_RD, &adapter->stats.gorc, 
5245                         "Good Octets Received"); 
5246
5247         /* Packet Transmission Stats */
5248         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5249                         CTLFLAG_RD, &adapter->stats.gotc, 
5250                         "Good Octets Transmitted"); 
5251         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5252                         CTLFLAG_RD, &adapter->stats.tpt,
5253                         "Total Packets Transmitted");
5254         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5255                         CTLFLAG_RD, &adapter->stats.gptc,
5256                         "Good Packets Transmitted");
5257         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5258                         CTLFLAG_RD, &adapter->stats.bptc,
5259                         "Broadcast Packets Transmitted");
5260         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5261                         CTLFLAG_RD, &adapter->stats.mptc,
5262                         "Multicast Packets Transmitted");
5263         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5264                         CTLFLAG_RD, &adapter->stats.ptc64,
5265                         "64 byte frames transmitted ");
5266         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5267                         CTLFLAG_RD, &adapter->stats.ptc127,
5268                         "65-127 byte frames transmitted");
5269         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5270                         CTLFLAG_RD, &adapter->stats.ptc255,
5271                         "128-255 byte frames transmitted");
5272         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5273                         CTLFLAG_RD, &adapter->stats.ptc511,
5274                         "256-511 byte frames transmitted");
5275         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5276                         CTLFLAG_RD, &adapter->stats.ptc1023,
5277                         "512-1023 byte frames transmitted");
5278         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5279                         CTLFLAG_RD, &adapter->stats.ptc1522,
5280                         "1024-1522 byte frames transmitted");
5281         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5282                         CTLFLAG_RD, &adapter->stats.tsctc,
5283                         "TSO Contexts Transmitted");
5284         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5285                         CTLFLAG_RD, &adapter->stats.tsctfc,
5286                         "TSO Contexts Failed");
5287
5288
5289         /* Interrupt Stats */
5290
5291         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5292                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5293         int_list = SYSCTL_CHILDREN(int_node);
5294
5295         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5296                         CTLFLAG_RD, &adapter->stats.iac,
5297                         "Interrupt Assertion Count");
5298
5299         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5300                         CTLFLAG_RD, &adapter->stats.icrxptc,
5301                         "Interrupt Cause Rx Pkt Timer Expire Count");
5302
5303         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5304                         CTLFLAG_RD, &adapter->stats.icrxatc,
5305                         "Interrupt Cause Rx Abs Timer Expire Count");
5306
5307         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5308                         CTLFLAG_RD, &adapter->stats.ictxptc,
5309                         "Interrupt Cause Tx Pkt Timer Expire Count");
5310
5311         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5312                         CTLFLAG_RD, &adapter->stats.ictxatc,
5313                         "Interrupt Cause Tx Abs Timer Expire Count");
5314
5315         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5316                         CTLFLAG_RD, &adapter->stats.ictxqec,
5317                         "Interrupt Cause Tx Queue Empty Count");
5318
5319         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5320                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5321                         "Interrupt Cause Tx Queue Min Thresh Count");
5322
5323         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5324                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5325                         "Interrupt Cause Rx Desc Min Thresh Count");
5326
5327         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5328                         CTLFLAG_RD, &adapter->stats.icrxoc,
5329                         "Interrupt Cause Receiver Overrun Count");
5330 }
5331
5332 /**********************************************************************
5333  *
5334  *  This routine provides a way to dump out the adapter eeprom,
5335  *  often a useful debug/service tool. This only dumps the first
5336  *  32 words, stuff that matters is in that extent.
5337  *
5338  **********************************************************************/
5339 static int
5340 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5341 {
5342         struct adapter *adapter;
5343         int error;
5344         int result;
5345
5346         result = -1;
5347         error = sysctl_handle_int(oidp, &result, 0, req);
5348
5349         if (error || !req->newptr)
5350                 return (error);
5351
5352         /*
5353          * This value will cause a hex dump of the
5354          * first 32 16-bit words of the EEPROM to
5355          * the screen.
5356          */
5357         if (result == 1) {
5358                 adapter = (struct adapter *)arg1;
5359                 em_print_nvm_info(adapter);
5360         }
5361
5362         return (error);
5363 }
5364
5365 static void
5366 em_print_nvm_info(struct adapter *adapter)
5367 {
5368         u16     eeprom_data;
5369         int     i, j, row = 0;
5370
5371         /* Its a bit crude, but it gets the job done */
5372         printf("\nInterface EEPROM Dump:\n");
5373         printf("Offset\n0x0000  ");
5374         for (i = 0, j = 0; i < 32; i++, j++) {
5375                 if (j == 8) { /* Make the offset block */
5376                         j = 0; ++row;
5377                         printf("\n0x00%x0  ",row);
5378                 }
5379                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5380                 printf("%04x ", eeprom_data);
5381         }
5382         printf("\n");
5383 }
5384
5385 static int
5386 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5387 {
5388         struct em_int_delay_info *info;
5389         struct adapter *adapter;
5390         u32 regval;
5391         int error, usecs, ticks;
5392
5393         info = (struct em_int_delay_info *)arg1;
5394         usecs = info->value;
5395         error = sysctl_handle_int(oidp, &usecs, 0, req);
5396         if (error != 0 || req->newptr == NULL)
5397                 return (error);
5398         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5399                 return (EINVAL);
5400         info->value = usecs;
5401         ticks = EM_USECS_TO_TICKS(usecs);
5402
5403         adapter = info->adapter;
5404         
5405         EM_CORE_LOCK(adapter);
5406         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5407         regval = (regval & ~0xffff) | (ticks & 0xffff);
5408         /* Handle a few special cases. */
5409         switch (info->offset) {
5410         case E1000_RDTR:
5411                 break;
5412         case E1000_TIDV:
5413                 if (ticks == 0) {
5414                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5415                         /* Don't write 0 into the TIDV register. */
5416                         regval++;
5417                 } else
5418                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5419                 break;
5420         }
5421         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5422         EM_CORE_UNLOCK(adapter);
5423         return (0);
5424 }
5425
5426 static void
5427 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5428         const char *description, struct em_int_delay_info *info,
5429         int offset, int value)
5430 {
5431         info->adapter = adapter;
5432         info->offset = offset;
5433         info->value = value;
5434         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5435             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5436             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5437             info, 0, em_sysctl_int_delay, "I", description);
5438 }
5439
5440 static void
5441 em_add_rx_process_limit(struct adapter *adapter, const char *name,
5442         const char *description, int *limit, int value)
5443 {
5444         *limit = value;
5445         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5446             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5447             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5448 }
5449
5450 static void
5451 em_set_flow_cntrl(struct adapter *adapter, const char *name,
5452         const char *description, int *limit, int value)
5453 {
5454         *limit = value;
5455         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5456             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5457             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5458 }
5459
5460 static int
5461 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5462 {
5463         struct adapter *adapter;
5464         int error;
5465         int result;
5466
5467         result = -1;
5468         error = sysctl_handle_int(oidp, &result, 0, req);
5469
5470         if (error || !req->newptr)
5471                 return (error);
5472
5473         if (result == 1) {
5474                 adapter = (struct adapter *)arg1;
5475                 em_print_debug_info(adapter);
5476         }
5477
5478         return (error);
5479 }
5480
5481 /*
5482 ** This routine is meant to be fluid, add whatever is
5483 ** needed for debugging a problem.  -jfv
5484 */
5485 static void
5486 em_print_debug_info(struct adapter *adapter)
5487 {
5488         device_t dev = adapter->dev;
5489         struct tx_ring *txr = adapter->tx_rings;
5490         struct rx_ring *rxr = adapter->rx_rings;
5491
5492         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5493                 printf("Interface is RUNNING ");
5494         else
5495                 printf("Interface is NOT RUNNING\n");
5496         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5497                 printf("and ACTIVE\n");
5498         else
5499                 printf("and INACTIVE\n");
5500
5501         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5502             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5503             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5504         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5505             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5506             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5507         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5508         device_printf(dev, "TX descriptors avail = %d\n",
5509             txr->tx_avail);
5510         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5511             txr->no_desc_avail);
5512         device_printf(dev, "RX discarded packets = %ld\n",
5513             rxr->rx_discarded);
5514         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5515         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5516 }