]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/dev/e1000/if_em.c
MFC of E1000 drivers:
[FreeBSD/stable/8.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int     em_display_debug_stats = 0;
92
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.0.5";
97
98
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111         /* Intel(R) PRO/1000 Network Connection */
112         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         /* required last entry */
173         { 0, 0, 0, 0, 0}
174 };
175
176 /*********************************************************************
177  *  Table of branding strings for all supported NICs.
178  *********************************************************************/
179
180 static char *em_strings[] = {
181         "Intel(R) PRO/1000 Network Connection"
182 };
183
184 /*********************************************************************
185  *  Function prototypes
186  *********************************************************************/
187 static int      em_probe(device_t);
188 static int      em_attach(device_t);
189 static int      em_detach(device_t);
190 static int      em_shutdown(device_t);
191 static int      em_suspend(device_t);
192 static int      em_resume(device_t);
193 static void     em_start(struct ifnet *);
194 static void     em_start_locked(struct ifnet *, struct tx_ring *);
195 #ifdef EM_MULTIQUEUE
196 static int      em_mq_start(struct ifnet *, struct mbuf *);
197 static int      em_mq_start_locked(struct ifnet *,
198                     struct tx_ring *, struct mbuf *);
199 static void     em_qflush(struct ifnet *);
200 #endif
201 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
202 static void     em_init(void *);
203 static void     em_init_locked(struct adapter *);
204 static void     em_stop(void *);
205 static void     em_media_status(struct ifnet *, struct ifmediareq *);
206 static int      em_media_change(struct ifnet *);
207 static void     em_identify_hardware(struct adapter *);
208 static int      em_allocate_pci_resources(struct adapter *);
209 static int      em_allocate_legacy(struct adapter *);
210 static int      em_allocate_msix(struct adapter *);
211 static int      em_allocate_queues(struct adapter *);
212 static int      em_setup_msix(struct adapter *);
213 static void     em_free_pci_resources(struct adapter *);
214 static void     em_local_timer(void *);
215 static void     em_reset(struct adapter *);
216 static void     em_setup_interface(device_t, struct adapter *);
217
218 static void     em_setup_transmit_structures(struct adapter *);
219 static void     em_initialize_transmit_unit(struct adapter *);
220 static int      em_allocate_transmit_buffers(struct tx_ring *);
221 static void     em_free_transmit_structures(struct adapter *);
222 static void     em_free_transmit_buffers(struct tx_ring *);
223
224 static int      em_setup_receive_structures(struct adapter *);
225 static int      em_allocate_receive_buffers(struct rx_ring *);
226 static void     em_initialize_receive_unit(struct adapter *);
227 static void     em_free_receive_structures(struct adapter *);
228 static void     em_free_receive_buffers(struct rx_ring *);
229
230 static void     em_enable_intr(struct adapter *);
231 static void     em_disable_intr(struct adapter *);
232 static void     em_update_stats_counters(struct adapter *);
233 static void     em_add_hw_stats(struct adapter *adapter);
234 static bool     em_txeof(struct tx_ring *);
235 static bool     em_rxeof(struct rx_ring *, int, int *);
236 #ifndef __NO_STRICT_ALIGNMENT
237 static int      em_fixup_rx(struct rx_ring *);
238 #endif
239 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
240 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
241                     u32 *, u32 *);
242 static bool     em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
243 static void     em_set_promisc(struct adapter *);
244 static void     em_disable_promisc(struct adapter *);
245 static void     em_set_multi(struct adapter *);
246 static void     em_update_link_status(struct adapter *);
247 static void     em_refresh_mbufs(struct rx_ring *, int);
248 static void     em_register_vlan(void *, struct ifnet *, u16);
249 static void     em_unregister_vlan(void *, struct ifnet *, u16);
250 static void     em_setup_vlan_hw_support(struct adapter *);
251 static int      em_xmit(struct tx_ring *, struct mbuf **);
252 static int      em_dma_malloc(struct adapter *, bus_size_t,
253                     struct em_dma_alloc *, int);
254 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
255 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
256 static void     em_print_nvm_info(struct adapter *);
257 static int      em_is_valid_ether_addr(u8 *);
258 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
259 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
260                     const char *, struct em_int_delay_info *, int, int);
261 /* Management and WOL Support */
262 static void     em_init_manageability(struct adapter *);
263 static void     em_release_manageability(struct adapter *);
264 static void     em_get_hw_control(struct adapter *);
265 static void     em_release_hw_control(struct adapter *);
266 static void     em_get_wakeup(device_t);
267 static void     em_enable_wakeup(device_t);
268 static int      em_enable_phy_wakeup(struct adapter *);
269 static void     em_led_func(void *, int);
270
271 static int      em_irq_fast(void *);
272
273 /* MSIX handlers */
274 static void     em_msix_tx(void *);
275 static void     em_msix_rx(void *);
276 static void     em_msix_link(void *);
277 static void     em_handle_tx(void *context, int pending);
278 static void     em_handle_rx(void *context, int pending);
279 static void     em_handle_link(void *context, int pending);
280
281 static void     em_add_rx_process_limit(struct adapter *, const char *,
282                     const char *, int *, int);
283
284 #ifdef DEVICE_POLLING
285 static poll_handler_t em_poll;
286 #endif /* POLLING */
287
288 /*********************************************************************
289  *  FreeBSD Device Interface Entry Points
290  *********************************************************************/
291
292 static device_method_t em_methods[] = {
293         /* Device interface */
294         DEVMETHOD(device_probe, em_probe),
295         DEVMETHOD(device_attach, em_attach),
296         DEVMETHOD(device_detach, em_detach),
297         DEVMETHOD(device_shutdown, em_shutdown),
298         DEVMETHOD(device_suspend, em_suspend),
299         DEVMETHOD(device_resume, em_resume),
300         {0, 0}
301 };
302
303 static driver_t em_driver = {
304         "em", em_methods, sizeof(struct adapter),
305 };
306
307 devclass_t em_devclass;
308 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
309 MODULE_DEPEND(em, pci, 1, 1, 1);
310 MODULE_DEPEND(em, ether, 1, 1, 1);
311
312 /*********************************************************************
313  *  Tunable default values.
314  *********************************************************************/
315
316 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
317 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
318 #define M_TSO_LEN                       66
319
320 /* Allow common code without TSO */
321 #ifndef CSUM_TSO
322 #define CSUM_TSO        0
323 #endif
324
325 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
326 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
327 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
328 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
329
330 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
331 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
332 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
333 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
334
335 static int em_rxd = EM_DEFAULT_RXD;
336 static int em_txd = EM_DEFAULT_TXD;
337 TUNABLE_INT("hw.em.rxd", &em_rxd);
338 TUNABLE_INT("hw.em.txd", &em_txd);
339
340 static int em_smart_pwr_down = FALSE;
341 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
342
343 /* Controls whether promiscuous also shows bad packets */
344 static int em_debug_sbp = FALSE;
345 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
346
347 /* Local controls for MSI/MSIX */
348 #ifdef EM_MULTIQUEUE
349 static int em_enable_msix = TRUE;
350 static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
351 #else
352 static int em_enable_msix = FALSE;
353 static int em_msix_queues = 0; /* disable */
354 #endif
355 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
356 TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
357
358 /* How many packets rxeof tries to clean at a time */
359 static int em_rx_process_limit = 100;
360 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
361
362 /* Flow control setting - default to FULL */
363 static int em_fc_setting = e1000_fc_full;
364 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
365
366 /*
367 ** Shadow VFTA table, this is needed because
368 ** the real vlan filter table gets cleared during
369 ** a soft reset and the driver needs to be able
370 ** to repopulate it.
371 */
372 static u32 em_shadow_vfta[EM_VFTA_SIZE];
373
374 /* Global used in WOL setup with multiport cards */
375 static int global_quad_port_a = 0;
376
377 /*********************************************************************
378  *  Device identification routine
379  *
380  *  em_probe determines if the driver should be loaded on
381  *  adapter based on PCI vendor/device id of the adapter.
382  *
383  *  return BUS_PROBE_DEFAULT on success, positive on failure
384  *********************************************************************/
385
386 static int
387 em_probe(device_t dev)
388 {
389         char            adapter_name[60];
390         u16             pci_vendor_id = 0;
391         u16             pci_device_id = 0;
392         u16             pci_subvendor_id = 0;
393         u16             pci_subdevice_id = 0;
394         em_vendor_info_t *ent;
395
396         INIT_DEBUGOUT("em_probe: begin");
397
398         pci_vendor_id = pci_get_vendor(dev);
399         if (pci_vendor_id != EM_VENDOR_ID)
400                 return (ENXIO);
401
402         pci_device_id = pci_get_device(dev);
403         pci_subvendor_id = pci_get_subvendor(dev);
404         pci_subdevice_id = pci_get_subdevice(dev);
405
406         ent = em_vendor_info_array;
407         while (ent->vendor_id != 0) {
408                 if ((pci_vendor_id == ent->vendor_id) &&
409                     (pci_device_id == ent->device_id) &&
410
411                     ((pci_subvendor_id == ent->subvendor_id) ||
412                     (ent->subvendor_id == PCI_ANY_ID)) &&
413
414                     ((pci_subdevice_id == ent->subdevice_id) ||
415                     (ent->subdevice_id == PCI_ANY_ID))) {
416                         sprintf(adapter_name, "%s %s",
417                                 em_strings[ent->index],
418                                 em_driver_version);
419                         device_set_desc_copy(dev, adapter_name);
420                         return (BUS_PROBE_DEFAULT);
421                 }
422                 ent++;
423         }
424
425         return (ENXIO);
426 }
427
428 /*********************************************************************
429  *  Device initialization routine
430  *
431  *  The attach entry point is called when the driver is being loaded.
432  *  This routine identifies the type of hardware, allocates all resources
433  *  and initializes the hardware.
434  *
435  *  return 0 on success, positive on failure
436  *********************************************************************/
437
438 static int
439 em_attach(device_t dev)
440 {
441         struct adapter  *adapter;
442         int             error = 0;
443
444         INIT_DEBUGOUT("em_attach: begin");
445
446         adapter = device_get_softc(dev);
447         adapter->dev = adapter->osdep.dev = dev;
448         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
449
450         /* SYSCTL stuff */
451         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
452             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
453             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
454             em_sysctl_nvm_info, "I", "NVM Information");
455
456         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
457
458         /* Determine hardware and mac info */
459         em_identify_hardware(adapter);
460
461         /* Setup PCI resources */
462         if (em_allocate_pci_resources(adapter)) {
463                 device_printf(dev, "Allocation of PCI resources failed\n");
464                 error = ENXIO;
465                 goto err_pci;
466         }
467
468         /*
469         ** For ICH8 and family we need to
470         ** map the flash memory, and this
471         ** must happen after the MAC is 
472         ** identified
473         */
474         if ((adapter->hw.mac.type == e1000_ich8lan) ||
475             (adapter->hw.mac.type == e1000_pchlan) ||
476             (adapter->hw.mac.type == e1000_ich9lan) ||
477             (adapter->hw.mac.type == e1000_ich10lan)) {
478                 int rid = EM_BAR_TYPE_FLASH;
479                 adapter->flash = bus_alloc_resource_any(dev,
480                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
481                 if (adapter->flash == NULL) {
482                         device_printf(dev, "Mapping of Flash failed\n");
483                         error = ENXIO;
484                         goto err_pci;
485                 }
486                 /* This is used in the shared code */
487                 adapter->hw.flash_address = (u8 *)adapter->flash;
488                 adapter->osdep.flash_bus_space_tag =
489                     rman_get_bustag(adapter->flash);
490                 adapter->osdep.flash_bus_space_handle =
491                     rman_get_bushandle(adapter->flash);
492         }
493
494         /* Do Shared Code initialization */
495         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496                 device_printf(dev, "Setup of Shared code failed\n");
497                 error = ENXIO;
498                 goto err_pci;
499         }
500
501         e1000_get_bus_info(&adapter->hw);
502
503         /* Set up some sysctls for the tunable interrupt delays */
504         em_add_int_delay_sysctl(adapter, "rx_int_delay",
505             "receive interrupt delay in usecs", &adapter->rx_int_delay,
506             E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
507         em_add_int_delay_sysctl(adapter, "tx_int_delay",
508             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
509             E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
510         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
511             "receive interrupt delay limit in usecs",
512             &adapter->rx_abs_int_delay,
513             E1000_REGISTER(&adapter->hw, E1000_RADV),
514             em_rx_abs_int_delay_dflt);
515         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
516             "transmit interrupt delay limit in usecs",
517             &adapter->tx_abs_int_delay,
518             E1000_REGISTER(&adapter->hw, E1000_TADV),
519             em_tx_abs_int_delay_dflt);
520
521         /* Sysctls for limiting the amount of work done in the taskqueue */
522         em_add_rx_process_limit(adapter, "rx_processing_limit",
523             "max number of rx packets to process", &adapter->rx_process_limit,
524             em_rx_process_limit);
525
526         /*
527          * Validate number of transmit and receive descriptors. It
528          * must not exceed hardware maximum, and must be multiple
529          * of E1000_DBA_ALIGN.
530          */
531         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
532             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
533                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
534                     EM_DEFAULT_TXD, em_txd);
535                 adapter->num_tx_desc = EM_DEFAULT_TXD;
536         } else
537                 adapter->num_tx_desc = em_txd;
538
539         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
540             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
541                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
542                     EM_DEFAULT_RXD, em_rxd);
543                 adapter->num_rx_desc = EM_DEFAULT_RXD;
544         } else
545                 adapter->num_rx_desc = em_rxd;
546
547         adapter->hw.mac.autoneg = DO_AUTO_NEG;
548         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
549         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
550
551         /* Copper options */
552         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
554                 adapter->hw.phy.disable_polarity_correction = FALSE;
555                 adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
556         }
557
558         /*
559          * Set the frame limits assuming
560          * standard ethernet sized frames.
561          */
562         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
563         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
564
565         /*
566          * This controls when hardware reports transmit completion
567          * status.
568          */
569         adapter->hw.mac.report_tx_early = 1;
570
571         /* 
572         ** Get queue/ring memory
573         */
574         if (em_allocate_queues(adapter)) {
575                 error = ENOMEM;
576                 goto err_pci;
577         }
578
579         /*
580         ** Start from a known state, this is
581         ** important in reading the nvm and
582         ** mac from that.
583         */
584         e1000_reset_hw(&adapter->hw);
585
586         /* Make sure we have a good EEPROM before we read from it */
587         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
588                 /*
589                 ** Some PCI-E parts fail the first check due to
590                 ** the link being in sleep state, call it again,
591                 ** if it fails a second time its a real issue.
592                 */
593                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
594                         device_printf(dev,
595                             "The EEPROM Checksum Is Not Valid\n");
596                         error = EIO;
597                         goto err_late;
598                 }
599         }
600
601         /* Copy the permanent MAC address out of the EEPROM */
602         if (e1000_read_mac_addr(&adapter->hw) < 0) {
603                 device_printf(dev, "EEPROM read error while reading MAC"
604                     " address\n");
605                 error = EIO;
606                 goto err_late;
607         }
608
609         if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
610                 device_printf(dev, "Invalid MAC address\n");
611                 error = EIO;
612                 goto err_late;
613         }
614
615         /*
616         **  Do interrupt configuration
617         */
618         if (adapter->msix > 1) /* Do MSIX */
619                 error = em_allocate_msix(adapter);
620         else  /* MSI or Legacy */
621                 error = em_allocate_legacy(adapter);
622         if (error)
623                 goto err_late;
624
625         /*
626          * Get Wake-on-Lan and Management info for later use
627          */
628         em_get_wakeup(dev);
629
630         /* Setup OS specific network interface */
631         em_setup_interface(dev, adapter);
632
633         em_reset(adapter);
634
635         /* Initialize statistics */
636         em_update_stats_counters(adapter);
637
638         adapter->hw.mac.get_link_status = 1;
639         em_update_link_status(adapter);
640
641         /* Indicate SOL/IDER usage */
642         if (e1000_check_reset_block(&adapter->hw))
643                 device_printf(dev,
644                     "PHY reset is blocked due to SOL/IDER session.\n");
645
646         /* Register for VLAN events */
647         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
648             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
649         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
650             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
651
652         em_add_hw_stats(adapter);
653
654         /* Non-AMT based hardware can now take control from firmware */
655         if (adapter->has_manage && !adapter->has_amt)
656                 em_get_hw_control(adapter);
657
658         /* Tell the stack that the interface is not active */
659         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
660
661         adapter->led_dev = led_create(em_led_func, adapter,
662             device_get_nameunit(dev));
663
664         INIT_DEBUGOUT("em_attach: end");
665
666         return (0);
667
668 err_late:
669         em_free_transmit_structures(adapter);
670         em_free_receive_structures(adapter);
671         em_release_hw_control(adapter);
672 err_pci:
673         em_free_pci_resources(adapter);
674         EM_CORE_LOCK_DESTROY(adapter);
675
676         return (error);
677 }
678
679 /*********************************************************************
680  *  Device removal routine
681  *
682  *  The detach entry point is called when the driver is being removed.
683  *  This routine stops the adapter and deallocates all the resources
684  *  that were allocated for driver operation.
685  *
686  *  return 0 on success, positive on failure
687  *********************************************************************/
688
689 static int
690 em_detach(device_t dev)
691 {
692         struct adapter  *adapter = device_get_softc(dev);
693         struct ifnet    *ifp = adapter->ifp;
694
695         INIT_DEBUGOUT("em_detach: begin");
696
697         /* Make sure VLANS are not using driver */
698         if (adapter->ifp->if_vlantrunk != NULL) {
699                 device_printf(dev,"Vlan in use, detach first\n");
700                 return (EBUSY);
701         }
702
703 #ifdef DEVICE_POLLING
704         if (ifp->if_capenable & IFCAP_POLLING)
705                 ether_poll_deregister(ifp);
706 #endif
707
708         if (adapter->led_dev != NULL)
709                 led_destroy(adapter->led_dev);
710
711         EM_CORE_LOCK(adapter);
712         adapter->in_detach = 1;
713         em_stop(adapter);
714         EM_CORE_UNLOCK(adapter);
715         EM_CORE_LOCK_DESTROY(adapter);
716
717         e1000_phy_hw_reset(&adapter->hw);
718
719         em_release_manageability(adapter);
720         em_release_hw_control(adapter);
721
722         /* Unregister VLAN events */
723         if (adapter->vlan_attach != NULL)
724                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
725         if (adapter->vlan_detach != NULL)
726                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
727
728         ether_ifdetach(adapter->ifp);
729         callout_drain(&adapter->timer);
730
731         em_free_pci_resources(adapter);
732         bus_generic_detach(dev);
733         if_free(ifp);
734
735         em_free_transmit_structures(adapter);
736         em_free_receive_structures(adapter);
737
738         em_release_hw_control(adapter);
739
740         return (0);
741 }
742
743 /*********************************************************************
744  *
745  *  Shutdown entry point
746  *
747  **********************************************************************/
748
749 static int
750 em_shutdown(device_t dev)
751 {
752         return em_suspend(dev);
753 }
754
755 /*
756  * Suspend/resume device methods.
757  */
758 static int
759 em_suspend(device_t dev)
760 {
761         struct adapter *adapter = device_get_softc(dev);
762
763         EM_CORE_LOCK(adapter);
764
765         em_release_manageability(adapter);
766         em_release_hw_control(adapter);
767         em_enable_wakeup(dev);
768
769         EM_CORE_UNLOCK(adapter);
770
771         return bus_generic_suspend(dev);
772 }
773
774 static int
775 em_resume(device_t dev)
776 {
777         struct adapter *adapter = device_get_softc(dev);
778         struct ifnet *ifp = adapter->ifp;
779
780         EM_CORE_LOCK(adapter);
781         em_init_locked(adapter);
782         em_init_manageability(adapter);
783         EM_CORE_UNLOCK(adapter);
784         em_start(ifp);
785
786         return bus_generic_resume(dev);
787 }
788
789
790 /*********************************************************************
791  *  Transmit entry point
792  *
793  *  em_start is called by the stack to initiate a transmit.
794  *  The driver will remain in this routine as long as there are
795  *  packets to transmit and transmit resources are available.
796  *  In case resources are not available stack is notified and
797  *  the packet is requeued.
798  **********************************************************************/
799
800 #ifdef EM_MULTIQUEUE
801 static int
802 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
803 {
804         struct adapter  *adapter = txr->adapter;
805         struct mbuf     *next;
806         int             err = 0, enq = 0;
807
808         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
809             IFF_DRV_RUNNING || adapter->link_active == 0) {
810                 if (m != NULL)
811                         err = drbr_enqueue(ifp, txr->br, m);
812                 return (err);
813         }
814
815         /* Call cleanup if number of TX descriptors low */
816         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
817                 em_txeof(txr);
818
819         enq = 0;
820         if (m == NULL) {
821                 next = drbr_dequeue(ifp, txr->br);
822         } else if (drbr_needs_enqueue(ifp, txr->br)) {
823                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
824                         return (err);
825                 next = drbr_dequeue(ifp, txr->br);
826         } else
827                 next = m;
828
829         /* Process the queue */
830         while (next != NULL) {
831                 if ((err = em_xmit(txr, &next)) != 0) {
832                         if (next != NULL)
833                                 err = drbr_enqueue(ifp, txr->br, next);
834                         break;
835                 }
836                 enq++;
837                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
838                 ETHER_BPF_MTAP(ifp, next);
839                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
840                         break;
841                 if (txr->tx_avail < EM_MAX_SCATTER) {
842                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
843                         break;
844                 }
845                 next = drbr_dequeue(ifp, txr->br);
846         }
847
848         if (enq > 0) {
849                 /* Set the watchdog */
850                 txr->watchdog_check = TRUE;
851                 txr->watchdog_time = ticks;
852         }
853         return (err);
854 }
855
856 /*
857 ** Multiqueue capable stack interface, this is not
858 ** yet truely multiqueue, but that is coming...
859 */
860 static int
861 em_mq_start(struct ifnet *ifp, struct mbuf *m)
862 {
863         struct adapter  *adapter = ifp->if_softc;
864         struct tx_ring  *txr;
865         int             i, error = 0;
866
867         /* Which queue to use */
868         if ((m->m_flags & M_FLOWID) != 0)
869                 i = m->m_pkthdr.flowid % adapter->num_queues;
870         else
871                 i = curcpu % adapter->num_queues;
872
873         txr = &adapter->tx_rings[i];
874
875         if (EM_TX_TRYLOCK(txr)) {
876                 error = em_mq_start_locked(ifp, txr, m);
877                 EM_TX_UNLOCK(txr);
878         } else 
879                 error = drbr_enqueue(ifp, txr->br, m);
880
881         return (error);
882 }
883
884 /*
885 ** Flush all ring buffers
886 */
887 static void
888 em_qflush(struct ifnet *ifp)
889 {
890         struct adapter  *adapter = ifp->if_softc;
891         struct tx_ring  *txr = adapter->tx_rings;
892         struct mbuf     *m;
893
894         for (int i = 0; i < adapter->num_queues; i++, txr++) {
895                 EM_TX_LOCK(txr);
896                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
897                         m_freem(m);
898                 EM_TX_UNLOCK(txr);
899         }
900         if_qflush(ifp);
901 }
902
903 #endif /* EM_MULTIQUEUE */
904
905 static void
906 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
907 {
908         struct adapter  *adapter = ifp->if_softc;
909         struct mbuf     *m_head;
910
911         EM_TX_LOCK_ASSERT(txr);
912
913         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
914             IFF_DRV_RUNNING)
915                 return;
916
917         if (!adapter->link_active)
918                 return;
919
920         /* Call cleanup if number of TX descriptors low */
921         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
922                 em_txeof(txr);
923
924         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
925                 if (txr->tx_avail < EM_MAX_SCATTER) {
926                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
927                         break;
928                 }
929                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
930                 if (m_head == NULL)
931                         break;
932                 /*
933                  *  Encapsulation can modify our pointer, and or make it
934                  *  NULL on failure.  In that event, we can't requeue.
935                  */
936                 if (em_xmit(txr, &m_head)) {
937                         if (m_head == NULL)
938                                 break;
939                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
940                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
941                         break;
942                 }
943
944                 /* Send a copy of the frame to the BPF listener */
945                 ETHER_BPF_MTAP(ifp, m_head);
946
947                 /* Set timeout in case hardware has problems transmitting. */
948                 txr->watchdog_time = ticks;
949                 txr->watchdog_check = TRUE;
950         }
951
952         return;
953 }
954
955 static void
956 em_start(struct ifnet *ifp)
957 {
958         struct adapter  *adapter = ifp->if_softc;
959         struct tx_ring  *txr = adapter->tx_rings;
960
961         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
962                 EM_TX_LOCK(txr);
963                 em_start_locked(ifp, txr);
964                 EM_TX_UNLOCK(txr);
965         }
966         return;
967 }
968
969 /*********************************************************************
970  *  Ioctl entry point
971  *
972  *  em_ioctl is called when the user wants to configure the
973  *  interface.
974  *
975  *  return 0 on success, positive on failure
976  **********************************************************************/
977
978 static int
979 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
980 {
981         struct adapter  *adapter = ifp->if_softc;
982         struct ifreq *ifr = (struct ifreq *)data;
983 #ifdef INET
984         struct ifaddr *ifa = (struct ifaddr *)data;
985 #endif
986         int error = 0;
987
988         if (adapter->in_detach)
989                 return (error);
990
991         switch (command) {
992         case SIOCSIFADDR:
993 #ifdef INET
994                 if (ifa->ifa_addr->sa_family == AF_INET) {
995                         /*
996                          * XXX
997                          * Since resetting hardware takes a very long time
998                          * and results in link renegotiation we only
999                          * initialize the hardware only when it is absolutely
1000                          * required.
1001                          */
1002                         ifp->if_flags |= IFF_UP;
1003                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1004                                 EM_CORE_LOCK(adapter);
1005                                 em_init_locked(adapter);
1006                                 EM_CORE_UNLOCK(adapter);
1007                         }
1008                         arp_ifinit(ifp, ifa);
1009                 } else
1010 #endif
1011                         error = ether_ioctl(ifp, command, data);
1012                 break;
1013         case SIOCSIFMTU:
1014             {
1015                 int max_frame_size;
1016
1017                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1018
1019                 EM_CORE_LOCK(adapter);
1020                 switch (adapter->hw.mac.type) {
1021                 case e1000_82571:
1022                 case e1000_82572:
1023                 case e1000_ich9lan:
1024                 case e1000_ich10lan:
1025                 case e1000_82574:
1026                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1027                         max_frame_size = 9234;
1028                         break;
1029                 case e1000_pchlan:
1030                         max_frame_size = 4096;
1031                         break;
1032                         /* Adapters that do not support jumbo frames */
1033                 case e1000_82583:
1034                 case e1000_ich8lan:
1035                         max_frame_size = ETHER_MAX_LEN;
1036                         break;
1037                 default:
1038                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1039                 }
1040                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1041                     ETHER_CRC_LEN) {
1042                         EM_CORE_UNLOCK(adapter);
1043                         error = EINVAL;
1044                         break;
1045                 }
1046
1047                 ifp->if_mtu = ifr->ifr_mtu;
1048                 adapter->max_frame_size =
1049                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1050                 em_init_locked(adapter);
1051                 EM_CORE_UNLOCK(adapter);
1052                 break;
1053             }
1054         case SIOCSIFFLAGS:
1055                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1056                     SIOCSIFFLAGS (Set Interface Flags)");
1057                 EM_CORE_LOCK(adapter);
1058                 if (ifp->if_flags & IFF_UP) {
1059                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1060                                 if ((ifp->if_flags ^ adapter->if_flags) &
1061                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1062                                         em_disable_promisc(adapter);
1063                                         em_set_promisc(adapter);
1064                                 }
1065                         } else
1066                                 em_init_locked(adapter);
1067                 } else
1068                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1069                                 em_stop(adapter);
1070                 adapter->if_flags = ifp->if_flags;
1071                 EM_CORE_UNLOCK(adapter);
1072                 break;
1073         case SIOCADDMULTI:
1074         case SIOCDELMULTI:
1075                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1076                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1077                         EM_CORE_LOCK(adapter);
1078                         em_disable_intr(adapter);
1079                         em_set_multi(adapter);
1080 #ifdef DEVICE_POLLING
1081                         if (!(ifp->if_capenable & IFCAP_POLLING))
1082 #endif
1083                                 em_enable_intr(adapter);
1084                         EM_CORE_UNLOCK(adapter);
1085                 }
1086                 break;
1087         case SIOCSIFMEDIA:
1088                 /* Check SOL/IDER usage */
1089                 EM_CORE_LOCK(adapter);
1090                 if (e1000_check_reset_block(&adapter->hw)) {
1091                         EM_CORE_UNLOCK(adapter);
1092                         device_printf(adapter->dev, "Media change is"
1093                             " blocked due to SOL/IDER session.\n");
1094                         break;
1095                 }
1096                 EM_CORE_UNLOCK(adapter);
1097         case SIOCGIFMEDIA:
1098                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1099                     SIOCxIFMEDIA (Get/Set Interface Media)");
1100                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1101                 break;
1102         case SIOCSIFCAP:
1103             {
1104                 int mask, reinit;
1105
1106                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1107                 reinit = 0;
1108                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1109 #ifdef DEVICE_POLLING
1110                 if (mask & IFCAP_POLLING) {
1111                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1112                                 error = ether_poll_register(em_poll, ifp);
1113                                 if (error)
1114                                         return (error);
1115                                 EM_CORE_LOCK(adapter);
1116                                 em_disable_intr(adapter);
1117                                 ifp->if_capenable |= IFCAP_POLLING;
1118                                 EM_CORE_UNLOCK(adapter);
1119                         } else {
1120                                 error = ether_poll_deregister(ifp);
1121                                 /* Enable interrupt even in error case */
1122                                 EM_CORE_LOCK(adapter);
1123                                 em_enable_intr(adapter);
1124                                 ifp->if_capenable &= ~IFCAP_POLLING;
1125                                 EM_CORE_UNLOCK(adapter);
1126                         }
1127                 }
1128 #endif
1129                 if (mask & IFCAP_HWCSUM) {
1130                         ifp->if_capenable ^= IFCAP_HWCSUM;
1131                         reinit = 1;
1132                 }
1133                 if (mask & IFCAP_TSO4) {
1134                         ifp->if_capenable ^= IFCAP_TSO4;
1135                         reinit = 1;
1136                 }
1137                 if (mask & IFCAP_VLAN_HWTAGGING) {
1138                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1139                         reinit = 1;
1140                 }
1141                 if (mask & IFCAP_VLAN_HWFILTER) {
1142                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1143                         reinit = 1;
1144                 }
1145                 if ((mask & IFCAP_WOL) &&
1146                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1147                         if (mask & IFCAP_WOL_MCAST)
1148                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1149                         if (mask & IFCAP_WOL_MAGIC)
1150                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1151                 }
1152                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1153                         em_init(adapter);
1154                 VLAN_CAPABILITIES(ifp);
1155                 break;
1156             }
1157
1158         default:
1159                 error = ether_ioctl(ifp, command, data);
1160                 break;
1161         }
1162
1163         return (error);
1164 }
1165
1166
1167 /*********************************************************************
1168  *  Init entry point
1169  *
1170  *  This routine is used in two ways. It is used by the stack as
1171  *  init entry point in network interface structure. It is also used
1172  *  by the driver as a hw/sw initialization routine to get to a
1173  *  consistent state.
1174  *
1175  *  return 0 on success, positive on failure
1176  **********************************************************************/
1177
1178 static void
1179 em_init_locked(struct adapter *adapter)
1180 {
1181         struct ifnet    *ifp = adapter->ifp;
1182         device_t        dev = adapter->dev;
1183         u32             pba;
1184
1185         INIT_DEBUGOUT("em_init: begin");
1186
1187         EM_CORE_LOCK_ASSERT(adapter);
1188
1189         em_disable_intr(adapter);
1190         callout_stop(&adapter->timer);
1191
1192         /*
1193          * Packet Buffer Allocation (PBA)
1194          * Writing PBA sets the receive portion of the buffer
1195          * the remainder is used for the transmit buffer.
1196          */
1197         switch (adapter->hw.mac.type) {
1198         /* Total Packet Buffer on these is 48K */
1199         case e1000_82571:
1200         case e1000_82572:
1201         case e1000_80003es2lan:
1202                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1203                 break;
1204         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1205                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1206                 break;
1207         case e1000_82574:
1208         case e1000_82583:
1209                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1210                 break;
1211         case e1000_ich9lan:
1212         case e1000_ich10lan:
1213         case e1000_pchlan:
1214                 pba = E1000_PBA_10K;
1215                 break;
1216         case e1000_ich8lan:
1217                 pba = E1000_PBA_8K;
1218                 break;
1219         default:
1220                 if (adapter->max_frame_size > 8192)
1221                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1222                 else
1223                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1224         }
1225
1226         INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1227         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1228         
1229         /* Get the latest mac address, User can use a LAA */
1230         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1231               ETHER_ADDR_LEN);
1232
1233         /* Put the address into the Receive Address Array */
1234         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1235
1236         /*
1237          * With the 82571 adapter, RAR[0] may be overwritten
1238          * when the other port is reset, we make a duplicate
1239          * in RAR[14] for that eventuality, this assures
1240          * the interface continues to function.
1241          */
1242         if (adapter->hw.mac.type == e1000_82571) {
1243                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1244                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1245                     E1000_RAR_ENTRIES - 1);
1246         }
1247
1248         /* Initialize the hardware */
1249         em_reset(adapter);
1250         em_update_link_status(adapter);
1251
1252         /* Setup VLAN support, basic and offload if available */
1253         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1254
1255         /* Use real VLAN Filter support? */
1256         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1257                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1258                         /* Use real VLAN Filter support */
1259                         em_setup_vlan_hw_support(adapter);
1260                 else {
1261                         u32 ctrl;
1262                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1263                         ctrl |= E1000_CTRL_VME;
1264                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1265                 }
1266         }
1267
1268         /* Set hardware offload abilities */
1269         ifp->if_hwassist = 0;
1270         if (ifp->if_capenable & IFCAP_TXCSUM)
1271                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1272         if (ifp->if_capenable & IFCAP_TSO4)
1273                 ifp->if_hwassist |= CSUM_TSO;
1274
1275         /* Configure for OS presence */
1276         em_init_manageability(adapter);
1277
1278         /* Prepare transmit descriptors and buffers */
1279         em_setup_transmit_structures(adapter);
1280         em_initialize_transmit_unit(adapter);
1281
1282         /* Setup Multicast table */
1283         em_set_multi(adapter);
1284
1285         /* Prepare receive descriptors and buffers */
1286         if (em_setup_receive_structures(adapter)) {
1287                 device_printf(dev, "Could not setup receive structures\n");
1288                 em_stop(adapter);
1289                 return;
1290         }
1291         em_initialize_receive_unit(adapter);
1292
1293         /* Don't lose promiscuous settings */
1294         em_set_promisc(adapter);
1295
1296         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1297         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1298
1299         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1300         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1301
1302         /* MSI/X configuration for 82574 */
1303         if (adapter->hw.mac.type == e1000_82574) {
1304                 int tmp;
1305                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1306                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1307                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1308                 /* Set the IVAR - interrupt vector routing. */
1309                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1310         }
1311
1312 #ifdef DEVICE_POLLING
1313         /*
1314          * Only enable interrupts if we are not polling, make sure
1315          * they are off otherwise.
1316          */
1317         if (ifp->if_capenable & IFCAP_POLLING)
1318                 em_disable_intr(adapter);
1319         else
1320 #endif /* DEVICE_POLLING */
1321                 em_enable_intr(adapter);
1322
1323         /* AMT based hardware can now take control from firmware */
1324         if (adapter->has_manage && adapter->has_amt)
1325                 em_get_hw_control(adapter);
1326
1327         /* Don't reset the phy next time init gets called */
1328         adapter->hw.phy.reset_disable = TRUE;
1329 }
1330
1331 static void
1332 em_init(void *arg)
1333 {
1334         struct adapter *adapter = arg;
1335
1336         EM_CORE_LOCK(adapter);
1337         em_init_locked(adapter);
1338         EM_CORE_UNLOCK(adapter);
1339 }
1340
1341
1342 #ifdef DEVICE_POLLING
1343 /*********************************************************************
1344  *
1345  *  Legacy polling routine: note this only works with single queue
1346  *
1347  *********************************************************************/
1348 static int
1349 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1350 {
1351         struct adapter *adapter = ifp->if_softc;
1352         struct tx_ring  *txr = adapter->tx_rings;
1353         struct rx_ring  *rxr = adapter->rx_rings;
1354         u32             reg_icr;
1355         int             rx_done;
1356
1357         EM_CORE_LOCK(adapter);
1358         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1359                 EM_CORE_UNLOCK(adapter);
1360                 return (0);
1361         }
1362
1363         if (cmd == POLL_AND_CHECK_STATUS) {
1364                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1365                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1366                         callout_stop(&adapter->timer);
1367                         adapter->hw.mac.get_link_status = 1;
1368                         em_update_link_status(adapter);
1369                         callout_reset(&adapter->timer, hz,
1370                             em_local_timer, adapter);
1371                 }
1372         }
1373         EM_CORE_UNLOCK(adapter);
1374
1375         em_rxeof(rxr, count, &rx_done);
1376
1377         EM_TX_LOCK(txr);
1378         em_txeof(txr);
1379 #ifdef EM_MULTIQUEUE
1380         if (!drbr_empty(ifp, txr->br))
1381                 em_mq_start_locked(ifp, txr, NULL);
1382 #else
1383         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1384                 em_start_locked(ifp, txr);
1385 #endif
1386         EM_TX_UNLOCK(txr);
1387
1388         return (rx_done);
1389 }
1390 #endif /* DEVICE_POLLING */
1391
1392
1393 /*********************************************************************
1394  *
1395  *  Fast Legacy/MSI Combined Interrupt Service routine  
1396  *
1397  *********************************************************************/
1398 static int
1399 em_irq_fast(void *arg)
1400 {
1401         struct adapter  *adapter = arg;
1402         struct ifnet    *ifp;
1403         u32             reg_icr;
1404
1405         ifp = adapter->ifp;
1406
1407         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1408
1409         /* Hot eject?  */
1410         if (reg_icr == 0xffffffff)
1411                 return FILTER_STRAY;
1412
1413         /* Definitely not our interrupt.  */
1414         if (reg_icr == 0x0)
1415                 return FILTER_STRAY;
1416
1417         /*
1418          * Starting with the 82571 chip, bit 31 should be used to
1419          * determine whether the interrupt belongs to us.
1420          */
1421         if (adapter->hw.mac.type >= e1000_82571 &&
1422             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1423                 return FILTER_STRAY;
1424
1425         em_disable_intr(adapter);
1426         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1427
1428         /* Link status change */
1429         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1430                 adapter->hw.mac.get_link_status = 1;
1431                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1432         }
1433
1434         if (reg_icr & E1000_ICR_RXO)
1435                 adapter->rx_overruns++;
1436         return FILTER_HANDLED;
1437 }
1438
1439 /* Combined RX/TX handler, used by Legacy and MSI */
1440 static void
1441 em_handle_que(void *context, int pending)
1442 {
1443         struct adapter  *adapter = context;
1444         struct ifnet    *ifp = adapter->ifp;
1445         struct tx_ring  *txr = adapter->tx_rings;
1446         struct rx_ring  *rxr = adapter->rx_rings;
1447         bool            more;
1448
1449
1450         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1451                 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1452
1453                 EM_TX_LOCK(txr);
1454                 if (em_txeof(txr))
1455                         more = TRUE;
1456 #ifdef EM_MULTIQUEUE
1457                 if (!drbr_empty(ifp, txr->br))
1458                         em_mq_start_locked(ifp, txr, NULL);
1459 #else
1460                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1461                         em_start_locked(ifp, txr);
1462 #endif
1463                 EM_TX_UNLOCK(txr);
1464                 if (more) {
1465                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1466                         return;
1467                 }
1468         }
1469
1470         em_enable_intr(adapter);
1471         return;
1472 }
1473
1474
1475 /*********************************************************************
1476  *
1477  *  MSIX Interrupt Service Routines
1478  *
1479  **********************************************************************/
1480 static void
1481 em_msix_tx(void *arg)
1482 {
1483         struct tx_ring *txr = arg;
1484         struct adapter *adapter = txr->adapter;
1485         bool            more;
1486
1487         ++txr->tx_irq;
1488         EM_TX_LOCK(txr);
1489         more = em_txeof(txr);
1490         EM_TX_UNLOCK(txr);
1491         if (more)
1492                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1493         else
1494                 /* Reenable this interrupt */
1495                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1496         return;
1497 }
1498
1499 /*********************************************************************
1500  *
1501  *  MSIX RX Interrupt Service routine
1502  *
1503  **********************************************************************/
1504
1505 static void
1506 em_msix_rx(void *arg)
1507 {
1508         struct rx_ring  *rxr = arg;
1509         struct adapter  *adapter = rxr->adapter;
1510         bool            more;
1511
1512         ++rxr->rx_irq;
1513         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1514         if (more)
1515                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1516         else
1517                 /* Reenable this interrupt */
1518                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1519         return;
1520 }
1521
1522 /*********************************************************************
1523  *
1524  *  MSIX Link Fast Interrupt Service routine
1525  *
1526  **********************************************************************/
1527 static void
1528 em_msix_link(void *arg)
1529 {
1530         struct adapter  *adapter = arg;
1531         u32             reg_icr;
1532
1533         ++adapter->link_irq;
1534         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1535
1536         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1537                 adapter->hw.mac.get_link_status = 1;
1538                 em_handle_link(adapter, 0);
1539         } else
1540                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1541                     EM_MSIX_LINK | E1000_IMS_LSC);
1542         return;
1543 }
1544
1545 static void
1546 em_handle_rx(void *context, int pending)
1547 {
1548         struct rx_ring  *rxr = context;
1549         struct adapter  *adapter = rxr->adapter;
1550         bool            more;
1551
1552         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1553         if (more)
1554                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1555         else
1556                 /* Reenable this interrupt */
1557                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1558 }
1559
1560 static void
1561 em_handle_tx(void *context, int pending)
1562 {
1563         struct tx_ring  *txr = context;
1564         struct adapter  *adapter = txr->adapter;
1565         struct ifnet    *ifp = adapter->ifp;
1566
1567         if (!EM_TX_TRYLOCK(txr))
1568                 return;
1569
1570         em_txeof(txr);
1571
1572 #ifdef EM_MULTIQUEUE
1573         if (!drbr_empty(ifp, txr->br))
1574                 em_mq_start_locked(ifp, txr, NULL);
1575 #else
1576         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1577                 em_start_locked(ifp, txr);
1578 #endif
1579         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1580         EM_TX_UNLOCK(txr);
1581 }
1582
1583 static void
1584 em_handle_link(void *context, int pending)
1585 {
1586         struct adapter  *adapter = context;
1587         struct ifnet *ifp = adapter->ifp;
1588
1589         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1590                 return;
1591
1592         EM_CORE_LOCK(adapter);
1593         callout_stop(&adapter->timer);
1594         em_update_link_status(adapter);
1595         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1596         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1597             EM_MSIX_LINK | E1000_IMS_LSC);
1598         EM_CORE_UNLOCK(adapter);
1599 }
1600
1601
1602 /*********************************************************************
1603  *
1604  *  Media Ioctl callback
1605  *
1606  *  This routine is called whenever the user queries the status of
1607  *  the interface using ifconfig.
1608  *
1609  **********************************************************************/
1610 static void
1611 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1612 {
1613         struct adapter *adapter = ifp->if_softc;
1614         u_char fiber_type = IFM_1000_SX;
1615
1616         INIT_DEBUGOUT("em_media_status: begin");
1617
1618         EM_CORE_LOCK(adapter);
1619         em_update_link_status(adapter);
1620
1621         ifmr->ifm_status = IFM_AVALID;
1622         ifmr->ifm_active = IFM_ETHER;
1623
1624         if (!adapter->link_active) {
1625                 EM_CORE_UNLOCK(adapter);
1626                 return;
1627         }
1628
1629         ifmr->ifm_status |= IFM_ACTIVE;
1630
1631         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1632             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1633                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1634         } else {
1635                 switch (adapter->link_speed) {
1636                 case 10:
1637                         ifmr->ifm_active |= IFM_10_T;
1638                         break;
1639                 case 100:
1640                         ifmr->ifm_active |= IFM_100_TX;
1641                         break;
1642                 case 1000:
1643                         ifmr->ifm_active |= IFM_1000_T;
1644                         break;
1645                 }
1646                 if (adapter->link_duplex == FULL_DUPLEX)
1647                         ifmr->ifm_active |= IFM_FDX;
1648                 else
1649                         ifmr->ifm_active |= IFM_HDX;
1650         }
1651         EM_CORE_UNLOCK(adapter);
1652 }
1653
1654 /*********************************************************************
1655  *
1656  *  Media Ioctl callback
1657  *
1658  *  This routine is called when the user changes speed/duplex using
1659  *  media/mediopt option with ifconfig.
1660  *
1661  **********************************************************************/
1662 static int
1663 em_media_change(struct ifnet *ifp)
1664 {
1665         struct adapter *adapter = ifp->if_softc;
1666         struct ifmedia  *ifm = &adapter->media;
1667
1668         INIT_DEBUGOUT("em_media_change: begin");
1669
1670         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1671                 return (EINVAL);
1672
1673         EM_CORE_LOCK(adapter);
1674         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1675         case IFM_AUTO:
1676                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1677                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1678                 break;
1679         case IFM_1000_LX:
1680         case IFM_1000_SX:
1681         case IFM_1000_T:
1682                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1683                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1684                 break;
1685         case IFM_100_TX:
1686                 adapter->hw.mac.autoneg = FALSE;
1687                 adapter->hw.phy.autoneg_advertised = 0;
1688                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1689                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1690                 else
1691                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1692                 break;
1693         case IFM_10_T:
1694                 adapter->hw.mac.autoneg = FALSE;
1695                 adapter->hw.phy.autoneg_advertised = 0;
1696                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1697                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1698                 else
1699                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1700                 break;
1701         default:
1702                 device_printf(adapter->dev, "Unsupported media type\n");
1703         }
1704
1705         /* As the speed/duplex settings my have changed we need to
1706          * reset the PHY.
1707          */
1708         adapter->hw.phy.reset_disable = FALSE;
1709
1710         em_init_locked(adapter);
1711         EM_CORE_UNLOCK(adapter);
1712
1713         return (0);
1714 }
1715
1716 /*********************************************************************
1717  *
1718  *  This routine maps the mbufs to tx descriptors.
1719  *
1720  *  return 0 on success, positive on failure
1721  **********************************************************************/
1722
1723 static int
1724 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1725 {
1726         struct adapter          *adapter = txr->adapter;
1727         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1728         bus_dmamap_t            map;
1729         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1730         struct e1000_tx_desc    *ctxd = NULL;
1731         struct mbuf             *m_head;
1732         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1733         int                     nsegs, i, j, first, last = 0;
1734         int                     error, do_tso, tso_desc = 0;
1735
1736         m_head = *m_headp;
1737         txd_upper = txd_lower = txd_used = txd_saved = 0;
1738         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1739
1740         /*
1741          * TSO workaround: 
1742          *  If an mbuf is only header we need  
1743          *     to pull 4 bytes of data into it. 
1744          */
1745         if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1746                 m_head = m_pullup(m_head, M_TSO_LEN + 4);
1747                 *m_headp = m_head;
1748                 if (m_head == NULL)
1749                         return (ENOBUFS);
1750         }
1751
1752         /*
1753          * Map the packet for DMA
1754          *
1755          * Capture the first descriptor index,
1756          * this descriptor will have the index
1757          * of the EOP which is the only one that
1758          * now gets a DONE bit writeback.
1759          */
1760         first = txr->next_avail_desc;
1761         tx_buffer = &txr->tx_buffers[first];
1762         tx_buffer_mapped = tx_buffer;
1763         map = tx_buffer->map;
1764
1765         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1766             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1767
1768         /*
1769          * There are two types of errors we can (try) to handle:
1770          * - EFBIG means the mbuf chain was too long and bus_dma ran
1771          *   out of segments.  Defragment the mbuf chain and try again.
1772          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1773          *   at this point in time.  Defer sending and try again later.
1774          * All other errors, in particular EINVAL, are fatal and prevent the
1775          * mbuf chain from ever going through.  Drop it and report error.
1776          */
1777         if (error == EFBIG) {
1778                 struct mbuf *m;
1779
1780                 m = m_defrag(*m_headp, M_DONTWAIT);
1781                 if (m == NULL) {
1782                         adapter->mbuf_alloc_failed++;
1783                         m_freem(*m_headp);
1784                         *m_headp = NULL;
1785                         return (ENOBUFS);
1786                 }
1787                 *m_headp = m;
1788
1789                 /* Try it again */
1790                 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1791                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1792
1793                 if (error) {
1794                         adapter->no_tx_dma_setup++;
1795                         m_freem(*m_headp);
1796                         *m_headp = NULL;
1797                         return (error);
1798                 }
1799         } else if (error != 0) {
1800                 adapter->no_tx_dma_setup++;
1801                 return (error);
1802         }
1803
1804         /*
1805          * TSO Hardware workaround, if this packet is not
1806          * TSO, and is only a single descriptor long, and
1807          * it follows a TSO burst, then we need to add a
1808          * sentinel descriptor to prevent premature writeback.
1809          */
1810         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1811                 if (nsegs == 1)
1812                         tso_desc = TRUE;
1813                 txr->tx_tso = FALSE;
1814         }
1815
1816         if (nsegs > (txr->tx_avail - 2)) {
1817                 txr->no_desc_avail++;
1818                 bus_dmamap_unload(txr->txtag, map);
1819                 return (ENOBUFS);
1820         }
1821         m_head = *m_headp;
1822
1823         /* Do hardware assists */
1824 #if __FreeBSD_version >= 700000
1825         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1826                 error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1827                 if (error != TRUE)
1828                         return (ENXIO); /* something foobar */
1829                 /* we need to make a final sentinel transmit desc */
1830                 tso_desc = TRUE;
1831         } else
1832 #endif
1833         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1834                 em_transmit_checksum_setup(txr,  m_head,
1835                     &txd_upper, &txd_lower);
1836
1837         i = txr->next_avail_desc;
1838
1839         /* Set up our transmit descriptors */
1840         for (j = 0; j < nsegs; j++) {
1841                 bus_size_t seg_len;
1842                 bus_addr_t seg_addr;
1843
1844                 tx_buffer = &txr->tx_buffers[i];
1845                 ctxd = &txr->tx_base[i];
1846                 seg_addr = segs[j].ds_addr;
1847                 seg_len  = segs[j].ds_len;
1848                 /*
1849                 ** TSO Workaround:
1850                 ** If this is the last descriptor, we want to
1851                 ** split it so we have a small final sentinel
1852                 */
1853                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1854                         seg_len -= 4;
1855                         ctxd->buffer_addr = htole64(seg_addr);
1856                         ctxd->lower.data = htole32(
1857                         adapter->txd_cmd | txd_lower | seg_len);
1858                         ctxd->upper.data =
1859                             htole32(txd_upper);
1860                         if (++i == adapter->num_tx_desc)
1861                                 i = 0;
1862                         /* Now make the sentinel */     
1863                         ++txd_used; /* using an extra txd */
1864                         ctxd = &txr->tx_base[i];
1865                         tx_buffer = &txr->tx_buffers[i];
1866                         ctxd->buffer_addr =
1867                             htole64(seg_addr + seg_len);
1868                         ctxd->lower.data = htole32(
1869                         adapter->txd_cmd | txd_lower | 4);
1870                         ctxd->upper.data =
1871                             htole32(txd_upper);
1872                         last = i;
1873                         if (++i == adapter->num_tx_desc)
1874                                 i = 0;
1875                 } else {
1876                         ctxd->buffer_addr = htole64(seg_addr);
1877                         ctxd->lower.data = htole32(
1878                         adapter->txd_cmd | txd_lower | seg_len);
1879                         ctxd->upper.data =
1880                             htole32(txd_upper);
1881                         last = i;
1882                         if (++i == adapter->num_tx_desc)
1883                                 i = 0;
1884                 }
1885                 tx_buffer->m_head = NULL;
1886                 tx_buffer->next_eop = -1;
1887         }
1888
1889         txr->next_avail_desc = i;
1890         txr->tx_avail -= nsegs;
1891         if (tso_desc) /* TSO used an extra for sentinel */
1892                 txr->tx_avail -= txd_used;
1893
1894         if (m_head->m_flags & M_VLANTAG) {
1895                 /* Set the vlan id. */
1896                 ctxd->upper.fields.special =
1897                     htole16(m_head->m_pkthdr.ether_vtag);
1898                 /* Tell hardware to add tag */
1899                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1900         }
1901
1902         tx_buffer->m_head = m_head;
1903         tx_buffer_mapped->map = tx_buffer->map;
1904         tx_buffer->map = map;
1905         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1906
1907         /*
1908          * Last Descriptor of Packet
1909          * needs End Of Packet (EOP)
1910          * and Report Status (RS)
1911          */
1912         ctxd->lower.data |=
1913             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1914         /*
1915          * Keep track in the first buffer which
1916          * descriptor will be written back
1917          */
1918         tx_buffer = &txr->tx_buffers[first];
1919         tx_buffer->next_eop = last;
1920
1921         /*
1922          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1923          * that this frame is available to transmit.
1924          */
1925         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1926             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1927         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1928
1929         return (0);
1930 }
1931
1932 static void
1933 em_set_promisc(struct adapter *adapter)
1934 {
1935         struct ifnet    *ifp = adapter->ifp;
1936         u32             reg_rctl;
1937
1938         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1939
1940         if (ifp->if_flags & IFF_PROMISC) {
1941                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1942                 /* Turn this on if you want to see bad packets */
1943                 if (em_debug_sbp)
1944                         reg_rctl |= E1000_RCTL_SBP;
1945                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1946         } else if (ifp->if_flags & IFF_ALLMULTI) {
1947                 reg_rctl |= E1000_RCTL_MPE;
1948                 reg_rctl &= ~E1000_RCTL_UPE;
1949                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1950         }
1951 }
1952
1953 static void
1954 em_disable_promisc(struct adapter *adapter)
1955 {
1956         u32     reg_rctl;
1957
1958         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1959
1960         reg_rctl &=  (~E1000_RCTL_UPE);
1961         reg_rctl &=  (~E1000_RCTL_MPE);
1962         reg_rctl &=  (~E1000_RCTL_SBP);
1963         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1964 }
1965
1966
1967 /*********************************************************************
1968  *  Multicast Update
1969  *
1970  *  This routine is called whenever multicast address list is updated.
1971  *
1972  **********************************************************************/
1973
1974 static void
1975 em_set_multi(struct adapter *adapter)
1976 {
1977         struct ifnet    *ifp = adapter->ifp;
1978         struct ifmultiaddr *ifma;
1979         u32 reg_rctl = 0;
1980         u8  *mta; /* Multicast array memory */
1981         int mcnt = 0;
1982
1983         IOCTL_DEBUGOUT("em_set_multi: begin");
1984
1985         if (adapter->hw.mac.type == e1000_82542 && 
1986             adapter->hw.revision_id == E1000_REVISION_2) {
1987                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1988                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1989                         e1000_pci_clear_mwi(&adapter->hw);
1990                 reg_rctl |= E1000_RCTL_RST;
1991                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1992                 msec_delay(5);
1993         }
1994
1995         /* Allocate temporary memory to setup array */
1996         mta = malloc(sizeof(u8) *
1997             (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
1998             M_DEVBUF, M_NOWAIT | M_ZERO);
1999         if (mta == NULL)
2000                 panic("em_set_multi memory failure\n");
2001
2002 #if __FreeBSD_version < 800000
2003         IF_ADDR_LOCK(ifp);
2004 #else
2005         if_maddr_rlock(ifp);
2006 #endif
2007         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2008                 if (ifma->ifma_addr->sa_family != AF_LINK)
2009                         continue;
2010
2011                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2012                         break;
2013
2014                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2015                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2016                 mcnt++;
2017         }
2018 #if __FreeBSD_version < 800000
2019         IF_ADDR_UNLOCK(ifp);
2020 #else
2021         if_maddr_runlock(ifp);
2022 #endif
2023         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2024                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2025                 reg_rctl |= E1000_RCTL_MPE;
2026                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2027         } else
2028                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2029
2030         if (adapter->hw.mac.type == e1000_82542 && 
2031             adapter->hw.revision_id == E1000_REVISION_2) {
2032                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2033                 reg_rctl &= ~E1000_RCTL_RST;
2034                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2035                 msec_delay(5);
2036                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2037                         e1000_pci_set_mwi(&adapter->hw);
2038         }
2039         free(mta, M_DEVBUF);
2040 }
2041
2042
2043 /*********************************************************************
2044  *  Timer routine
2045  *
2046  *  This routine checks for link status and updates statistics.
2047  *
2048  **********************************************************************/
2049
2050 static void
2051 em_local_timer(void *arg)
2052 {
2053         struct adapter  *adapter = arg;
2054         struct ifnet    *ifp = adapter->ifp;
2055         struct tx_ring  *txr = adapter->tx_rings;
2056
2057         EM_CORE_LOCK_ASSERT(adapter);
2058
2059         em_update_link_status(adapter);
2060         em_update_stats_counters(adapter);
2061
2062         /* Reset LAA into RAR[0] on 82571 */
2063         if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2064                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2065
2066         /*
2067         ** Check for time since any descriptor was cleaned
2068         */
2069         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2070                 EM_TX_LOCK(txr);
2071                 if (txr->watchdog_check == FALSE) {
2072                         EM_TX_UNLOCK(txr);
2073                         continue;
2074                 }
2075                 if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2076                         goto hung;
2077                 EM_TX_UNLOCK(txr);
2078         }
2079
2080         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2081         return;
2082 hung:
2083         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2084         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2085         adapter->watchdog_events++;
2086         EM_TX_UNLOCK(txr);
2087         em_init_locked(adapter);
2088 }
2089
2090
2091 static void
2092 em_update_link_status(struct adapter *adapter)
2093 {
2094         struct e1000_hw *hw = &adapter->hw;
2095         struct ifnet *ifp = adapter->ifp;
2096         device_t dev = adapter->dev;
2097         u32 link_check = 0;
2098
2099         /* Get the cached link value or read phy for real */
2100         switch (hw->phy.media_type) {
2101         case e1000_media_type_copper:
2102                 if (hw->mac.get_link_status) {
2103                         /* Do the work to read phy */
2104                         e1000_check_for_link(hw);
2105                         link_check = !hw->mac.get_link_status;
2106                         if (link_check) /* ESB2 fix */
2107                                 e1000_cfg_on_link_up(hw);
2108                 } else
2109                         link_check = TRUE;
2110                 break;
2111         case e1000_media_type_fiber:
2112                 e1000_check_for_link(hw);
2113                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2114                                  E1000_STATUS_LU);
2115                 break;
2116         case e1000_media_type_internal_serdes:
2117                 e1000_check_for_link(hw);
2118                 link_check = adapter->hw.mac.serdes_has_link;
2119                 break;
2120         default:
2121         case e1000_media_type_unknown:
2122                 break;
2123         }
2124
2125         /* Now check for a transition */
2126         if (link_check && (adapter->link_active == 0)) {
2127                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2128                     &adapter->link_duplex);
2129                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2130                 if ((adapter->link_speed != SPEED_1000) &&
2131                     ((hw->mac.type == e1000_82571) ||
2132                     (hw->mac.type == e1000_82572))) {
2133                         int tarc0;
2134                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2135                         tarc0 &= ~SPEED_MODE_BIT;
2136                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2137                 }
2138                 if (bootverbose)
2139                         device_printf(dev, "Link is up %d Mbps %s\n",
2140                             adapter->link_speed,
2141                             ((adapter->link_duplex == FULL_DUPLEX) ?
2142                             "Full Duplex" : "Half Duplex"));
2143                 adapter->link_active = 1;
2144                 adapter->smartspeed = 0;
2145                 ifp->if_baudrate = adapter->link_speed * 1000000;
2146                 if_link_state_change(ifp, LINK_STATE_UP);
2147         } else if (!link_check && (adapter->link_active == 1)) {
2148                 ifp->if_baudrate = adapter->link_speed = 0;
2149                 adapter->link_duplex = 0;
2150                 if (bootverbose)
2151                         device_printf(dev, "Link is Down\n");
2152                 adapter->link_active = 0;
2153                 /* Link down, disable watchdog */
2154                 // JFV change later
2155                 //adapter->watchdog_check = FALSE;
2156                 if_link_state_change(ifp, LINK_STATE_DOWN);
2157         }
2158 }
2159
2160 /*********************************************************************
2161  *
2162  *  This routine disables all traffic on the adapter by issuing a
2163  *  global reset on the MAC and deallocates TX/RX buffers.
2164  *
2165  *  This routine should always be called with BOTH the CORE
2166  *  and TX locks.
2167  **********************************************************************/
2168
2169 static void
2170 em_stop(void *arg)
2171 {
2172         struct adapter  *adapter = arg;
2173         struct ifnet    *ifp = adapter->ifp;
2174         struct tx_ring  *txr = adapter->tx_rings;
2175
2176         EM_CORE_LOCK_ASSERT(adapter);
2177
2178         INIT_DEBUGOUT("em_stop: begin");
2179
2180         em_disable_intr(adapter);
2181         callout_stop(&adapter->timer);
2182
2183         /* Tell the stack that the interface is no longer active */
2184         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2185
2186         /* Unarm watchdog timer. */
2187         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2188                 EM_TX_LOCK(txr);
2189                 txr->watchdog_check = FALSE;
2190                 EM_TX_UNLOCK(txr);
2191         }
2192
2193         e1000_reset_hw(&adapter->hw);
2194         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2195
2196         e1000_led_off(&adapter->hw);
2197         e1000_cleanup_led(&adapter->hw);
2198 }
2199
2200
2201 /*********************************************************************
2202  *
2203  *  Determine hardware revision.
2204  *
2205  **********************************************************************/
2206 static void
2207 em_identify_hardware(struct adapter *adapter)
2208 {
2209         device_t dev = adapter->dev;
2210
2211         /* Make sure our PCI config space has the necessary stuff set */
2212         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2213         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2214             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2215                 device_printf(dev, "Memory Access and/or Bus Master bits "
2216                     "were not set!\n");
2217                 adapter->hw.bus.pci_cmd_word |=
2218                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2219                 pci_write_config(dev, PCIR_COMMAND,
2220                     adapter->hw.bus.pci_cmd_word, 2);
2221         }
2222
2223         /* Save off the information about this board */
2224         adapter->hw.vendor_id = pci_get_vendor(dev);
2225         adapter->hw.device_id = pci_get_device(dev);
2226         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2227         adapter->hw.subsystem_vendor_id =
2228             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2229         adapter->hw.subsystem_device_id =
2230             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2231
2232         /* Do Shared Code Init and Setup */
2233         if (e1000_set_mac_type(&adapter->hw)) {
2234                 device_printf(dev, "Setup init failure\n");
2235                 return;
2236         }
2237 }
2238
2239 static int
2240 em_allocate_pci_resources(struct adapter *adapter)
2241 {
2242         device_t        dev = adapter->dev;
2243         int             rid;
2244
2245         rid = PCIR_BAR(0);
2246         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2247             &rid, RF_ACTIVE);
2248         if (adapter->memory == NULL) {
2249                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2250                 return (ENXIO);
2251         }
2252         adapter->osdep.mem_bus_space_tag =
2253             rman_get_bustag(adapter->memory);
2254         adapter->osdep.mem_bus_space_handle =
2255             rman_get_bushandle(adapter->memory);
2256         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2257
2258         /* Default to a single queue */
2259         adapter->num_queues = 1;
2260
2261         /*
2262          * Setup MSI/X or MSI if PCI Express
2263          */
2264         adapter->msix = em_setup_msix(adapter);
2265
2266         adapter->hw.back = &adapter->osdep;
2267
2268         return (0);
2269 }
2270
2271 /*********************************************************************
2272  *
2273  *  Setup the Legacy or MSI Interrupt handler
2274  *
2275  **********************************************************************/
2276 int
2277 em_allocate_legacy(struct adapter *adapter)
2278 {
2279         device_t dev = adapter->dev;
2280         int error, rid = 0;
2281
2282         /* Manually turn off all interrupts */
2283         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2284
2285         if (adapter->msix == 1) /* using MSI */
2286                 rid = 1;
2287         /* We allocate a single interrupt resource */
2288         adapter->res = bus_alloc_resource_any(dev,
2289             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2290         if (adapter->res == NULL) {
2291                 device_printf(dev, "Unable to allocate bus resource: "
2292                     "interrupt\n");
2293                 return (ENXIO);
2294         }
2295
2296         /*
2297          * Allocate a fast interrupt and the associated
2298          * deferred processing contexts.
2299          */
2300         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2301         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2302         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2303             taskqueue_thread_enqueue, &adapter->tq);
2304         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2305             device_get_nameunit(adapter->dev));
2306         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2307             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2308                 device_printf(dev, "Failed to register fast interrupt "
2309                             "handler: %d\n", error);
2310                 taskqueue_free(adapter->tq);
2311                 adapter->tq = NULL;
2312                 return (error);
2313         }
2314         
2315         return (0);
2316 }
2317
2318 /*********************************************************************
2319  *
2320  *  Setup the MSIX Interrupt handlers
2321  *   This is not really Multiqueue, rather
2322  *   its just multiple interrupt vectors.
2323  *
2324  **********************************************************************/
2325 int
2326 em_allocate_msix(struct adapter *adapter)
2327 {
2328         device_t        dev = adapter->dev;
2329         struct          tx_ring *txr = adapter->tx_rings;
2330         struct          rx_ring *rxr = adapter->rx_rings;
2331         int             error, rid, vector = 0;
2332
2333
2334         /* Make sure all interrupts are disabled */
2335         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2336
2337         /* First set up ring resources */
2338         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2339
2340                 /* RX ring */
2341                 rid = vector + 1;
2342
2343                 rxr->res = bus_alloc_resource_any(dev,
2344                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2345                 if (rxr->res == NULL) {
2346                         device_printf(dev,
2347                             "Unable to allocate bus resource: "
2348                             "RX MSIX Interrupt %d\n", i);
2349                         return (ENXIO);
2350                 }
2351                 if ((error = bus_setup_intr(dev, rxr->res,
2352                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2353                     rxr, &rxr->tag)) != 0) {
2354                         device_printf(dev, "Failed to register RX handler");
2355                         return (error);
2356                 }
2357                 rxr->msix = vector++; /* NOTE increment vector for TX */
2358                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2359                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2360                     taskqueue_thread_enqueue, &rxr->tq);
2361                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2362                     device_get_nameunit(adapter->dev));
2363                 /*
2364                 ** Set the bit to enable interrupt
2365                 ** in E1000_IMS -- bits 20 and 21
2366                 ** are for RX0 and RX1, note this has
2367                 ** NOTHING to do with the MSIX vector
2368                 */
2369                 rxr->ims = 1 << (20 + i);
2370                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2371
2372                 /* TX ring */
2373                 rid = vector + 1;
2374                 txr->res = bus_alloc_resource_any(dev,
2375                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2376                 if (txr->res == NULL) {
2377                         device_printf(dev,
2378                             "Unable to allocate bus resource: "
2379                             "TX MSIX Interrupt %d\n", i);
2380                         return (ENXIO);
2381                 }
2382                 if ((error = bus_setup_intr(dev, txr->res,
2383                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2384                     txr, &txr->tag)) != 0) {
2385                         device_printf(dev, "Failed to register TX handler");
2386                         return (error);
2387                 }
2388                 txr->msix = vector++; /* Increment vector for next pass */
2389                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2390                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2391                     taskqueue_thread_enqueue, &txr->tq);
2392                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2393                     device_get_nameunit(adapter->dev));
2394                 /*
2395                 ** Set the bit to enable interrupt
2396                 ** in E1000_IMS -- bits 22 and 23
2397                 ** are for TX0 and TX1, note this has
2398                 ** NOTHING to do with the MSIX vector
2399                 */
2400                 txr->ims = 1 << (22 + i);
2401                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2402         }
2403
2404         /* Link interrupt */
2405         ++rid;
2406         adapter->res = bus_alloc_resource_any(dev,
2407             SYS_RES_IRQ, &rid, RF_ACTIVE);
2408         if (!adapter->res) {
2409                 device_printf(dev,"Unable to allocate "
2410                     "bus resource: Link interrupt [%d]\n", rid);
2411                 return (ENXIO);
2412         }
2413         /* Set the link handler function */
2414         error = bus_setup_intr(dev, adapter->res,
2415             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2416             em_msix_link, adapter, &adapter->tag);
2417         if (error) {
2418                 adapter->res = NULL;
2419                 device_printf(dev, "Failed to register LINK handler");
2420                 return (error);
2421         }
2422         adapter->linkvec = vector;
2423         adapter->ivars |=  (8 | vector) << 16;
2424         adapter->ivars |= 0x80000000;
2425
2426         return (0);
2427 }
2428
2429
2430 static void
2431 em_free_pci_resources(struct adapter *adapter)
2432 {
2433         device_t        dev = adapter->dev;
2434         struct tx_ring  *txr;
2435         struct rx_ring  *rxr;
2436         int             rid;
2437
2438
2439         /*
2440         ** Release all the queue interrupt resources:
2441         */
2442         for (int i = 0; i < adapter->num_queues; i++) {
2443                 txr = &adapter->tx_rings[i];
2444                 rxr = &adapter->rx_rings[i];
2445                 rid = txr->msix +1;
2446                 if (txr->tag != NULL) {
2447                         bus_teardown_intr(dev, txr->res, txr->tag);
2448                         txr->tag = NULL;
2449                 }
2450                 if (txr->res != NULL)
2451                         bus_release_resource(dev, SYS_RES_IRQ,
2452                             rid, txr->res);
2453                 rid = rxr->msix +1;
2454                 if (rxr->tag != NULL) {
2455                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2456                         rxr->tag = NULL;
2457                 }
2458                 if (rxr->res != NULL)
2459                         bus_release_resource(dev, SYS_RES_IRQ,
2460                             rid, rxr->res);
2461         }
2462
2463         if (adapter->linkvec) /* we are doing MSIX */
2464                 rid = adapter->linkvec + 1;
2465         else
2466                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2467
2468         if (adapter->tag != NULL) {
2469                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2470                 adapter->tag = NULL;
2471         }
2472
2473         if (adapter->res != NULL)
2474                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2475
2476
2477         if (adapter->msix)
2478                 pci_release_msi(dev);
2479
2480         if (adapter->msix_mem != NULL)
2481                 bus_release_resource(dev, SYS_RES_MEMORY,
2482                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2483
2484         if (adapter->memory != NULL)
2485                 bus_release_resource(dev, SYS_RES_MEMORY,
2486                     PCIR_BAR(0), adapter->memory);
2487
2488         if (adapter->flash != NULL)
2489                 bus_release_resource(dev, SYS_RES_MEMORY,
2490                     EM_FLASH, adapter->flash);
2491 }
2492
2493 /*
2494  * Setup MSI or MSI/X
2495  */
2496 static int
2497 em_setup_msix(struct adapter *adapter)
2498 {
2499         device_t dev = adapter->dev;
2500         int val = 0;
2501
2502
2503         /* Setup MSI/X for Hartwell */
2504         if ((adapter->hw.mac.type == e1000_82574) &&
2505             (em_enable_msix == TRUE)) {
2506                 /* Map the MSIX BAR */
2507                 int rid = PCIR_BAR(EM_MSIX_BAR);
2508                 adapter->msix_mem = bus_alloc_resource_any(dev,
2509                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2510                 if (!adapter->msix_mem) {
2511                         /* May not be enabled */
2512                         device_printf(adapter->dev,
2513                             "Unable to map MSIX table \n");
2514                         goto msi;
2515                 }
2516                 val = pci_msix_count(dev); 
2517                 if (val != 5) {
2518                         bus_release_resource(dev, SYS_RES_MEMORY,
2519                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2520                         adapter->msix_mem = NULL;
2521                         device_printf(adapter->dev,
2522                             "MSIX vectors wrong, using MSI \n");
2523                         goto msi;
2524                 }
2525                 if (em_msix_queues == 2) {
2526                         val = 5;
2527                         adapter->num_queues = 2;
2528                 } else {
2529                         val = 3;
2530                         adapter->num_queues = 1;
2531                 }
2532                 if (pci_alloc_msix(dev, &val) == 0) {
2533                         device_printf(adapter->dev,
2534                             "Using MSIX interrupts "
2535                             "with %d vectors\n", val);
2536                 }
2537
2538                 return (val);
2539         }
2540 msi:
2541         val = pci_msi_count(dev);
2542         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2543                 adapter->msix = 1;
2544                 device_printf(adapter->dev,"Using MSI interrupt\n");
2545                 return (val);
2546         } 
2547         /* Should only happen due to manual invention */
2548         device_printf(adapter->dev,"Setup MSIX failure\n");
2549         return (0);
2550 }
2551
2552
2553 /*********************************************************************
2554  *
2555  *  Initialize the hardware to a configuration
2556  *  as specified by the adapter structure.
2557  *
2558  **********************************************************************/
2559 static void
2560 em_reset(struct adapter *adapter)
2561 {
2562         device_t        dev = adapter->dev;
2563         struct e1000_hw *hw = &adapter->hw;
2564         u16             rx_buffer_size;
2565
2566         INIT_DEBUGOUT("em_reset: begin");
2567
2568         /* Set up smart power down as default off on newer adapters. */
2569         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2570             hw->mac.type == e1000_82572)) {
2571                 u16 phy_tmp = 0;
2572
2573                 /* Speed up time to link by disabling smart power down. */
2574                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2575                 phy_tmp &= ~IGP02E1000_PM_SPD;
2576                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2577         }
2578
2579         /*
2580          * These parameters control the automatic generation (Tx) and
2581          * response (Rx) to Ethernet PAUSE frames.
2582          * - High water mark should allow for at least two frames to be
2583          *   received after sending an XOFF.
2584          * - Low water mark works best when it is very near the high water mark.
2585          *   This allows the receiver to restart by sending XON when it has
2586          *   drained a bit. Here we use an arbitary value of 1500 which will
2587          *   restart after one full frame is pulled from the buffer. There
2588          *   could be several smaller frames in the buffer and if so they will
2589          *   not trigger the XON until their total number reduces the buffer
2590          *   by 1500.
2591          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2592          */
2593         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2594
2595         hw->fc.high_water = rx_buffer_size -
2596             roundup2(adapter->max_frame_size, 1024);
2597         hw->fc.low_water = hw->fc.high_water - 1500;
2598
2599         if (hw->mac.type == e1000_80003es2lan)
2600                 hw->fc.pause_time = 0xFFFF;
2601         else
2602                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2603
2604         hw->fc.send_xon = TRUE;
2605
2606         /* Set Flow control, use the tunable location if sane */
2607         if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2608                 hw->fc.requested_mode = em_fc_setting;
2609         else
2610                 hw->fc.requested_mode = e1000_fc_none;
2611
2612         /* Override - workaround for PCHLAN issue */
2613         if (hw->mac.type == e1000_pchlan)
2614                 hw->fc.requested_mode = e1000_fc_rx_pause;
2615
2616         /* Issue a global reset */
2617         e1000_reset_hw(hw);
2618         E1000_WRITE_REG(hw, E1000_WUC, 0);
2619
2620         if (e1000_init_hw(hw) < 0) {
2621                 device_printf(dev, "Hardware Initialization Failed\n");
2622                 return;
2623         }
2624
2625         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2626         e1000_get_phy_info(hw);
2627         e1000_check_for_link(hw);
2628         return;
2629 }
2630
2631 /*********************************************************************
2632  *
2633  *  Setup networking device structure and register an interface.
2634  *
2635  **********************************************************************/
2636 static void
2637 em_setup_interface(device_t dev, struct adapter *adapter)
2638 {
2639         struct ifnet   *ifp;
2640
2641         INIT_DEBUGOUT("em_setup_interface: begin");
2642
2643         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2644         if (ifp == NULL)
2645                 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2646         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2647         ifp->if_mtu = ETHERMTU;
2648         ifp->if_init =  em_init;
2649         ifp->if_softc = adapter;
2650         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2651         ifp->if_ioctl = em_ioctl;
2652         ifp->if_start = em_start;
2653         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2654         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2655         IFQ_SET_READY(&ifp->if_snd);
2656
2657         ether_ifattach(ifp, adapter->hw.mac.addr);
2658
2659         ifp->if_capabilities = ifp->if_capenable = 0;
2660
2661 #ifdef EM_MULTIQUEUE
2662         /* Multiqueue tx functions */
2663         ifp->if_transmit = em_mq_start;
2664         ifp->if_qflush = em_qflush;
2665 #endif  
2666
2667         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2668         ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2669
2670         /* Enable TSO by default, can disable with ifconfig */
2671         ifp->if_capabilities |= IFCAP_TSO4;
2672         ifp->if_capenable |= IFCAP_TSO4;
2673
2674         /*
2675          * Tell the upper layer(s) we
2676          * support full VLAN capability
2677          */
2678         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2679         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2680         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2681
2682         /*
2683         ** Dont turn this on by default, if vlans are
2684         ** created on another pseudo device (eg. lagg)
2685         ** then vlan events are not passed thru, breaking
2686         ** operation, but with HW FILTER off it works. If
2687         ** using vlans directly on the em driver you can
2688         ** enable this and get full hardware tag filtering.
2689         */
2690         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2691
2692 #ifdef DEVICE_POLLING
2693         ifp->if_capabilities |= IFCAP_POLLING;
2694 #endif
2695
2696         /* Enable only WOL MAGIC by default */
2697         if (adapter->wol) {
2698                 ifp->if_capabilities |= IFCAP_WOL;
2699                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2700         }
2701                 
2702         /*
2703          * Specify the media types supported by this adapter and register
2704          * callbacks to update media and link information
2705          */
2706         ifmedia_init(&adapter->media, IFM_IMASK,
2707             em_media_change, em_media_status);
2708         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2709             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2710                 u_char fiber_type = IFM_1000_SX;        /* default type */
2711
2712                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
2713                             0, NULL);
2714                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2715         } else {
2716                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2717                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2718                             0, NULL);
2719                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2720                             0, NULL);
2721                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2722                             0, NULL);
2723                 if (adapter->hw.phy.type != e1000_phy_ife) {
2724                         ifmedia_add(&adapter->media,
2725                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2726                         ifmedia_add(&adapter->media,
2727                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2728                 }
2729         }
2730         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2731         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2732 }
2733
2734
2735 /*
2736  * Manage DMA'able memory.
2737  */
2738 static void
2739 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2740 {
2741         if (error)
2742                 return;
2743         *(bus_addr_t *) arg = segs[0].ds_addr;
2744 }
2745
2746 static int
2747 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2748         struct em_dma_alloc *dma, int mapflags)
2749 {
2750         int error;
2751
2752         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2753                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
2754                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2755                                 BUS_SPACE_MAXADDR,      /* highaddr */
2756                                 NULL, NULL,             /* filter, filterarg */
2757                                 size,                   /* maxsize */
2758                                 1,                      /* nsegments */
2759                                 size,                   /* maxsegsize */
2760                                 0,                      /* flags */
2761                                 NULL,                   /* lockfunc */
2762                                 NULL,                   /* lockarg */
2763                                 &dma->dma_tag);
2764         if (error) {
2765                 device_printf(adapter->dev,
2766                     "%s: bus_dma_tag_create failed: %d\n",
2767                     __func__, error);
2768                 goto fail_0;
2769         }
2770
2771         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2772             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2773         if (error) {
2774                 device_printf(adapter->dev,
2775                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2776                     __func__, (uintmax_t)size, error);
2777                 goto fail_2;
2778         }
2779
2780         dma->dma_paddr = 0;
2781         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2782             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2783         if (error || dma->dma_paddr == 0) {
2784                 device_printf(adapter->dev,
2785                     "%s: bus_dmamap_load failed: %d\n",
2786                     __func__, error);
2787                 goto fail_3;
2788         }
2789
2790         return (0);
2791
2792 fail_3:
2793         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2794 fail_2:
2795         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2796         bus_dma_tag_destroy(dma->dma_tag);
2797 fail_0:
2798         dma->dma_map = NULL;
2799         dma->dma_tag = NULL;
2800
2801         return (error);
2802 }
2803
2804 static void
2805 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2806 {
2807         if (dma->dma_tag == NULL)
2808                 return;
2809         if (dma->dma_map != NULL) {
2810                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2811                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2812                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2813                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2814                 dma->dma_map = NULL;
2815         }
2816         bus_dma_tag_destroy(dma->dma_tag);
2817         dma->dma_tag = NULL;
2818 }
2819
2820
2821 /*********************************************************************
2822  *
2823  *  Allocate memory for the transmit and receive rings, and then
2824  *  the descriptors associated with each, called only once at attach.
2825  *
2826  **********************************************************************/
2827 static int
2828 em_allocate_queues(struct adapter *adapter)
2829 {
2830         device_t                dev = adapter->dev;
2831         struct tx_ring          *txr = NULL;
2832         struct rx_ring          *rxr = NULL;
2833         int rsize, tsize, error = E1000_SUCCESS;
2834         int txconf = 0, rxconf = 0;
2835
2836
2837         /* Allocate the TX ring struct memory */
2838         if (!(adapter->tx_rings =
2839             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2840             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2841                 device_printf(dev, "Unable to allocate TX ring memory\n");
2842                 error = ENOMEM;
2843                 goto fail;
2844         }
2845
2846         /* Now allocate the RX */
2847         if (!(adapter->rx_rings =
2848             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2849             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2850                 device_printf(dev, "Unable to allocate RX ring memory\n");
2851                 error = ENOMEM;
2852                 goto rx_fail;
2853         }
2854
2855         tsize = roundup2(adapter->num_tx_desc *
2856             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2857         /*
2858          * Now set up the TX queues, txconf is needed to handle the
2859          * possibility that things fail midcourse and we need to
2860          * undo memory gracefully
2861          */ 
2862         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2863                 /* Set up some basics */
2864                 txr = &adapter->tx_rings[i];
2865                 txr->adapter = adapter;
2866                 txr->me = i;
2867
2868                 /* Initialize the TX lock */
2869                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2870                     device_get_nameunit(dev), txr->me);
2871                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2872
2873                 if (em_dma_malloc(adapter, tsize,
2874                         &txr->txdma, BUS_DMA_NOWAIT)) {
2875                         device_printf(dev,
2876                             "Unable to allocate TX Descriptor memory\n");
2877                         error = ENOMEM;
2878                         goto err_tx_desc;
2879                 }
2880                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2881                 bzero((void *)txr->tx_base, tsize);
2882
2883                 if (em_allocate_transmit_buffers(txr)) {
2884                         device_printf(dev,
2885                             "Critical Failure setting up transmit buffers\n");
2886                         error = ENOMEM;
2887                         goto err_tx_desc;
2888                 }
2889 #if __FreeBSD_version >= 800000
2890                 /* Allocate a buf ring */
2891                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
2892                     M_WAITOK, &txr->tx_mtx);
2893 #endif
2894         }
2895
2896         /*
2897          * Next the RX queues...
2898          */ 
2899         rsize = roundup2(adapter->num_rx_desc *
2900             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2901         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2902                 rxr = &adapter->rx_rings[i];
2903                 rxr->adapter = adapter;
2904                 rxr->me = i;
2905
2906                 /* Initialize the RX lock */
2907                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2908                     device_get_nameunit(dev), txr->me);
2909                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2910
2911                 if (em_dma_malloc(adapter, rsize,
2912                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2913                         device_printf(dev,
2914                             "Unable to allocate RxDescriptor memory\n");
2915                         error = ENOMEM;
2916                         goto err_rx_desc;
2917                 }
2918                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2919                 bzero((void *)rxr->rx_base, rsize);
2920
2921                 /* Allocate receive buffers for the ring*/
2922                 if (em_allocate_receive_buffers(rxr)) {
2923                         device_printf(dev,
2924                             "Critical Failure setting up receive buffers\n");
2925                         error = ENOMEM;
2926                         goto err_rx_desc;
2927                 }
2928         }
2929
2930         return (0);
2931
2932 err_rx_desc:
2933         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2934                 em_dma_free(adapter, &rxr->rxdma);
2935 err_tx_desc:
2936         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2937                 em_dma_free(adapter, &txr->txdma);
2938         free(adapter->rx_rings, M_DEVBUF);
2939 rx_fail:
2940 #if __FreeBSD_version >= 800000
2941         buf_ring_free(txr->br, M_DEVBUF);
2942 #endif
2943         free(adapter->tx_rings, M_DEVBUF);
2944 fail:
2945         return (error);
2946 }
2947
2948
2949 /*********************************************************************
2950  *
2951  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2952  *  the information needed to transmit a packet on the wire. This is
2953  *  called only once at attach, setup is done every reset.
2954  *
2955  **********************************************************************/
2956 static int
2957 em_allocate_transmit_buffers(struct tx_ring *txr)
2958 {
2959         struct adapter *adapter = txr->adapter;
2960         device_t dev = adapter->dev;
2961         struct em_buffer *txbuf;
2962         int error, i;
2963
2964         /*
2965          * Setup DMA descriptor areas.
2966          */
2967         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2968                                1, 0,                    /* alignment, bounds */
2969                                BUS_SPACE_MAXADDR,       /* lowaddr */
2970                                BUS_SPACE_MAXADDR,       /* highaddr */
2971                                NULL, NULL,              /* filter, filterarg */
2972                                EM_TSO_SIZE,             /* maxsize */
2973                                EM_MAX_SCATTER,          /* nsegments */
2974                                PAGE_SIZE,               /* maxsegsize */
2975                                0,                       /* flags */
2976                                NULL,                    /* lockfunc */
2977                                NULL,                    /* lockfuncarg */
2978                                &txr->txtag))) {
2979                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2980                 goto fail;
2981         }
2982
2983         if (!(txr->tx_buffers =
2984             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
2985             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2986                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2987                 error = ENOMEM;
2988                 goto fail;
2989         }
2990
2991         /* Create the descriptor buffer dma maps */
2992         txbuf = txr->tx_buffers;
2993         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2994                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2995                 if (error != 0) {
2996                         device_printf(dev, "Unable to create TX DMA map\n");
2997                         goto fail;
2998                 }
2999         }
3000
3001         return 0;
3002 fail:
3003         /* We free all, it handles case where we are in the middle */
3004         em_free_transmit_structures(adapter);
3005         return (error);
3006 }
3007
3008 /*********************************************************************
3009  *
3010  *  Initialize a transmit ring.
3011  *
3012  **********************************************************************/
3013 static void
3014 em_setup_transmit_ring(struct tx_ring *txr)
3015 {
3016         struct adapter *adapter = txr->adapter;
3017         struct em_buffer *txbuf;
3018         int i;
3019
3020         /* Clear the old descriptor contents */
3021         EM_TX_LOCK(txr);
3022         bzero((void *)txr->tx_base,
3023               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3024         /* Reset indices */
3025         txr->next_avail_desc = 0;
3026         txr->next_to_clean = 0;
3027
3028         /* Free any existing tx buffers. */
3029         txbuf = txr->tx_buffers;
3030         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3031                 if (txbuf->m_head != NULL) {
3032                         bus_dmamap_sync(txr->txtag, txbuf->map,
3033                             BUS_DMASYNC_POSTWRITE);
3034                         bus_dmamap_unload(txr->txtag, txbuf->map);
3035                         m_freem(txbuf->m_head);
3036                         txbuf->m_head = NULL;
3037                 }
3038                 /* clear the watch index */
3039                 txbuf->next_eop = -1;
3040         }
3041
3042         /* Set number of descriptors available */
3043         txr->tx_avail = adapter->num_tx_desc;
3044
3045         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3046             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3047         EM_TX_UNLOCK(txr);
3048 }
3049
3050 /*********************************************************************
3051  *
3052  *  Initialize all transmit rings.
3053  *
3054  **********************************************************************/
3055 static void
3056 em_setup_transmit_structures(struct adapter *adapter)
3057 {
3058         struct tx_ring *txr = adapter->tx_rings;
3059
3060         for (int i = 0; i < adapter->num_queues; i++, txr++)
3061                 em_setup_transmit_ring(txr);
3062
3063         return;
3064 }
3065
3066 /*********************************************************************
3067  *
3068  *  Enable transmit unit.
3069  *
3070  **********************************************************************/
3071 static void
3072 em_initialize_transmit_unit(struct adapter *adapter)
3073 {
3074         struct tx_ring  *txr = adapter->tx_rings;
3075         struct e1000_hw *hw = &adapter->hw;
3076         u32     tctl, tarc, tipg = 0;
3077
3078          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3079
3080         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3081                 u64 bus_addr = txr->txdma.dma_paddr;
3082                 /* Base and Len of TX Ring */
3083                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3084                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3085                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3086                     (u32)(bus_addr >> 32));
3087                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3088                     (u32)bus_addr);
3089                 /* Init the HEAD/TAIL indices */
3090                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3091                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3092
3093                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3094                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3095                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3096
3097                 txr->watchdog_check = FALSE;
3098         }
3099
3100         /* Set the default values for the Tx Inter Packet Gap timer */
3101         switch (adapter->hw.mac.type) {
3102         case e1000_82542:
3103                 tipg = DEFAULT_82542_TIPG_IPGT;
3104                 tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3105                 tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3106                 break;
3107         case e1000_80003es2lan:
3108                 tipg = DEFAULT_82543_TIPG_IPGR1;
3109                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3110                     E1000_TIPG_IPGR2_SHIFT;
3111                 break;
3112         default:
3113                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3114                     (adapter->hw.phy.media_type ==
3115                     e1000_media_type_internal_serdes))
3116                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3117                 else
3118                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3119                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3120                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3121         }
3122
3123         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3124         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3125
3126         if(adapter->hw.mac.type >= e1000_82540)
3127                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3128                     adapter->tx_abs_int_delay.value);
3129
3130         if ((adapter->hw.mac.type == e1000_82571) ||
3131             (adapter->hw.mac.type == e1000_82572)) {
3132                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3133                 tarc |= SPEED_MODE_BIT;
3134                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3135         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3136                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3137                 tarc |= 1;
3138                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3139                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3140                 tarc |= 1;
3141                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3142         }
3143
3144         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3145         if (adapter->tx_int_delay.value > 0)
3146                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3147
3148         /* Program the Transmit Control Register */
3149         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3150         tctl &= ~E1000_TCTL_CT;
3151         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3152                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3153
3154         if (adapter->hw.mac.type >= e1000_82571)
3155                 tctl |= E1000_TCTL_MULR;
3156
3157         /* This write will effectively turn on the transmit unit. */
3158         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3159
3160 }
3161
3162
3163 /*********************************************************************
3164  *
3165  *  Free all transmit rings.
3166  *
3167  **********************************************************************/
3168 static void
3169 em_free_transmit_structures(struct adapter *adapter)
3170 {
3171         struct tx_ring *txr = adapter->tx_rings;
3172
3173         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3174                 EM_TX_LOCK(txr);
3175                 em_free_transmit_buffers(txr);
3176                 em_dma_free(adapter, &txr->txdma);
3177                 EM_TX_UNLOCK(txr);
3178                 EM_TX_LOCK_DESTROY(txr);
3179         }
3180
3181         free(adapter->tx_rings, M_DEVBUF);
3182 }
3183
3184 /*********************************************************************
3185  *
3186  *  Free transmit ring related data structures.
3187  *
3188  **********************************************************************/
3189 static void
3190 em_free_transmit_buffers(struct tx_ring *txr)
3191 {
3192         struct adapter          *adapter = txr->adapter;
3193         struct em_buffer        *txbuf;
3194
3195         INIT_DEBUGOUT("free_transmit_ring: begin");
3196
3197         if (txr->tx_buffers == NULL)
3198                 return;
3199
3200         for (int i = 0; i < adapter->num_tx_desc; i++) {
3201                 txbuf = &txr->tx_buffers[i];
3202                 if (txbuf->m_head != NULL) {
3203                         bus_dmamap_sync(txr->txtag, txbuf->map,
3204                             BUS_DMASYNC_POSTWRITE);
3205                         bus_dmamap_unload(txr->txtag,
3206                             txbuf->map);
3207                         m_freem(txbuf->m_head);
3208                         txbuf->m_head = NULL;
3209                         if (txbuf->map != NULL) {
3210                                 bus_dmamap_destroy(txr->txtag,
3211                                     txbuf->map);
3212                                 txbuf->map = NULL;
3213                         }
3214                 } else if (txbuf->map != NULL) {
3215                         bus_dmamap_unload(txr->txtag,
3216                             txbuf->map);
3217                         bus_dmamap_destroy(txr->txtag,
3218                             txbuf->map);
3219                         txbuf->map = NULL;
3220                 }
3221         }
3222 #if __FreeBSD_version >= 800000
3223         if (txr->br != NULL)
3224                 buf_ring_free(txr->br, M_DEVBUF);
3225 #endif
3226         if (txr->tx_buffers != NULL) {
3227                 free(txr->tx_buffers, M_DEVBUF);
3228                 txr->tx_buffers = NULL;
3229         }
3230         if (txr->txtag != NULL) {
3231                 bus_dma_tag_destroy(txr->txtag);
3232                 txr->txtag = NULL;
3233         }
3234         return;
3235 }
3236
3237
3238 /*********************************************************************
3239  *
3240  *  The offload context needs to be set when we transfer the first
3241  *  packet of a particular protocol (TCP/UDP). This routine has been
3242  *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3243  *
3244  *  Added back the old method of keeping the current context type
3245  *  and not setting if unnecessary, as this is reported to be a
3246  *  big performance win.  -jfv
3247  **********************************************************************/
3248 static void
3249 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3250     u32 *txd_upper, u32 *txd_lower)
3251 {
3252         struct adapter                  *adapter = txr->adapter;
3253         struct e1000_context_desc       *TXD = NULL;
3254         struct em_buffer *tx_buffer;
3255         struct ether_vlan_header *eh;
3256         struct ip *ip = NULL;
3257         struct ip6_hdr *ip6;
3258         int cur, ehdrlen;
3259         u32 cmd, hdr_len, ip_hlen;
3260         u16 etype;
3261         u8 ipproto;
3262
3263
3264         cmd = hdr_len = ipproto = 0;
3265         cur = txr->next_avail_desc;
3266
3267         /*
3268          * Determine where frame payload starts.
3269          * Jump over vlan headers if already present,
3270          * helpful for QinQ too.
3271          */
3272         eh = mtod(mp, struct ether_vlan_header *);
3273         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3274                 etype = ntohs(eh->evl_proto);
3275                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3276         } else {
3277                 etype = ntohs(eh->evl_encap_proto);
3278                 ehdrlen = ETHER_HDR_LEN;
3279         }
3280
3281         /*
3282          * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3283          * TODO: Support SCTP too when it hits the tree.
3284          */
3285         switch (etype) {
3286         case ETHERTYPE_IP:
3287                 ip = (struct ip *)(mp->m_data + ehdrlen);
3288                 ip_hlen = ip->ip_hl << 2;
3289
3290                 /* Setup of IP header checksum. */
3291                 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3292                         /*
3293                          * Start offset for header checksum calculation.
3294                          * End offset for header checksum calculation.
3295                          * Offset of place to put the checksum.
3296                          */
3297                         TXD = (struct e1000_context_desc *)
3298                             &txr->tx_base[cur];
3299                         TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3300                         TXD->lower_setup.ip_fields.ipcse =
3301                             htole16(ehdrlen + ip_hlen);
3302                         TXD->lower_setup.ip_fields.ipcso =
3303                             ehdrlen + offsetof(struct ip, ip_sum);
3304                         cmd |= E1000_TXD_CMD_IP;
3305                         *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3306                 }
3307
3308                 if (mp->m_len < ehdrlen + ip_hlen)
3309                         return; /* failure */
3310
3311                 hdr_len = ehdrlen + ip_hlen;
3312                 ipproto = ip->ip_p;
3313
3314                 break;
3315         case ETHERTYPE_IPV6:
3316                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3317                 ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3318
3319                 if (mp->m_len < ehdrlen + ip_hlen)
3320                         return; /* failure */
3321
3322                 /* IPv6 doesn't have a header checksum. */
3323
3324                 hdr_len = ehdrlen + ip_hlen;
3325                 ipproto = ip6->ip6_nxt;
3326
3327                 break;
3328         default:
3329                 *txd_upper = 0;
3330                 *txd_lower = 0;
3331                 return;
3332         }
3333
3334         switch (ipproto) {
3335         case IPPROTO_TCP:
3336                 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3337                         *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3338                         *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3339                         /* no need for context if already set */
3340                         if (txr->last_hw_offload == CSUM_TCP)
3341                                 return;
3342                         txr->last_hw_offload = CSUM_TCP;
3343                         /*
3344                          * Start offset for payload checksum calculation.
3345                          * End offset for payload checksum calculation.
3346                          * Offset of place to put the checksum.
3347                          */
3348                         TXD = (struct e1000_context_desc *)
3349                             &txr->tx_base[cur];
3350                         TXD->upper_setup.tcp_fields.tucss = hdr_len;
3351                         TXD->upper_setup.tcp_fields.tucse = htole16(0);
3352                         TXD->upper_setup.tcp_fields.tucso =
3353                             hdr_len + offsetof(struct tcphdr, th_sum);
3354                         cmd |= E1000_TXD_CMD_TCP;
3355                 }
3356                 break;
3357         case IPPROTO_UDP:
3358         {
3359                 if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3360                         *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3361                         *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3362                         /* no need for context if already set */
3363                         if (txr->last_hw_offload == CSUM_UDP)
3364                                 return;
3365                         txr->last_hw_offload = CSUM_UDP;
3366                         /*
3367                          * Start offset for header checksum calculation.
3368                          * End offset for header checksum calculation.
3369                          * Offset of place to put the checksum.
3370                          */
3371                         TXD = (struct e1000_context_desc *)
3372                             &txr->tx_base[cur];
3373                         TXD->upper_setup.tcp_fields.tucss = hdr_len;
3374                         TXD->upper_setup.tcp_fields.tucse = htole16(0);
3375                         TXD->upper_setup.tcp_fields.tucso =
3376                             hdr_len + offsetof(struct udphdr, uh_sum);
3377                 }
3378                 /* Fall Thru */
3379         }
3380         default:
3381                 break;
3382         }
3383
3384         TXD->tcp_seg_setup.data = htole32(0);
3385         TXD->cmd_and_length =
3386             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3387         tx_buffer = &txr->tx_buffers[cur];
3388         tx_buffer->m_head = NULL;
3389         tx_buffer->next_eop = -1;
3390
3391         if (++cur == adapter->num_tx_desc)
3392                 cur = 0;
3393
3394         txr->tx_avail--;
3395         txr->next_avail_desc = cur;
3396 }
3397
3398
3399 /**********************************************************************
3400  *
3401  *  Setup work for hardware segmentation offload (TSO)
3402  *
3403  **********************************************************************/
3404 static bool
3405 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3406    u32 *txd_lower)
3407 {
3408         struct adapter                  *adapter = txr->adapter;
3409         struct e1000_context_desc       *TXD;
3410         struct em_buffer                *tx_buffer;
3411         struct ether_vlan_header        *eh;
3412         struct ip                       *ip;
3413         struct ip6_hdr                  *ip6;
3414         struct tcphdr                   *th;
3415         int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3416         u16 etype;
3417
3418         /*
3419          * This function could/should be extended to support IP/IPv6
3420          * fragmentation as well.  But as they say, one step at a time.
3421          */
3422
3423         /*
3424          * Determine where frame payload starts.
3425          * Jump over vlan headers if already present,
3426          * helpful for QinQ too.
3427          */
3428         eh = mtod(mp, struct ether_vlan_header *);
3429         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3430                 etype = ntohs(eh->evl_proto);
3431                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3432         } else {
3433                 etype = ntohs(eh->evl_encap_proto);
3434                 ehdrlen = ETHER_HDR_LEN;
3435         }
3436
3437         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3438         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3439                 return FALSE;   /* -1 */
3440
3441         /*
3442          * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3443          * TODO: Support SCTP too when it hits the tree.
3444          */
3445         switch (etype) {
3446         case ETHERTYPE_IP:
3447                 isip6 = 0;
3448                 ip = (struct ip *)(mp->m_data + ehdrlen);
3449                 if (ip->ip_p != IPPROTO_TCP)
3450                         return FALSE;   /* 0 */
3451                 ip->ip_len = 0;
3452                 ip->ip_sum = 0;
3453                 ip_hlen = ip->ip_hl << 2;
3454                 if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3455                         return FALSE;   /* -1 */
3456                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3457 #if 1
3458                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3459                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3460 #else
3461                 th->th_sum = mp->m_pkthdr.csum_data;
3462 #endif
3463                 break;
3464         case ETHERTYPE_IPV6:
3465                 isip6 = 1;
3466                 return FALSE;                   /* Not supported yet. */
3467                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3468                 if (ip6->ip6_nxt != IPPROTO_TCP)
3469                         return FALSE;   /* 0 */
3470                 ip6->ip6_plen = 0;
3471                 ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3472                 if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3473                         return FALSE;   /* -1 */
3474                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3475 #if 0
3476                 th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3477                     htons(IPPROTO_TCP));        /* XXX: function notyet. */
3478 #else
3479                 th->th_sum = mp->m_pkthdr.csum_data;
3480 #endif
3481                 break;
3482         default:
3483                 return FALSE;
3484         }
3485         hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3486
3487         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3488                       E1000_TXD_DTYP_D |        /* Data descr type */
3489                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3490
3491         /* IP and/or TCP header checksum calculation and insertion. */
3492         *txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3493                       E1000_TXD_POPTS_TXSM) << 8;
3494
3495         cur = txr->next_avail_desc;
3496         tx_buffer = &txr->tx_buffers[cur];
3497         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3498
3499         /* IPv6 doesn't have a header checksum. */
3500         if (!isip6) {
3501                 /*
3502                  * Start offset for header checksum calculation.
3503                  * End offset for header checksum calculation.
3504                  * Offset of place put the checksum.
3505                  */
3506                 TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3507                 TXD->lower_setup.ip_fields.ipcse =
3508                     htole16(ehdrlen + ip_hlen - 1);
3509                 TXD->lower_setup.ip_fields.ipcso =
3510                     ehdrlen + offsetof(struct ip, ip_sum);
3511         }
3512         /*
3513          * Start offset for payload checksum calculation.
3514          * End offset for payload checksum calculation.
3515          * Offset of place to put the checksum.
3516          */
3517         TXD->upper_setup.tcp_fields.tucss =
3518             ehdrlen + ip_hlen;
3519         TXD->upper_setup.tcp_fields.tucse = 0;
3520         TXD->upper_setup.tcp_fields.tucso =
3521             ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3522         /*
3523          * Payload size per packet w/o any headers.
3524          * Length of all headers up to payload.
3525          */
3526         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3527         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3528
3529         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3530                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3531                                 E1000_TXD_CMD_TSE |     /* TSE context */
3532                                 (isip6 ? 0 : E1000_TXD_CMD_IP) | 
3533                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3534                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3535
3536         tx_buffer->m_head = NULL;
3537         tx_buffer->next_eop = -1;
3538
3539         if (++cur == adapter->num_tx_desc)
3540                 cur = 0;
3541
3542         txr->tx_avail--;
3543         txr->next_avail_desc = cur;
3544         txr->tx_tso = TRUE;
3545
3546         return TRUE;
3547 }
3548
3549
3550 /**********************************************************************
3551  *
3552  *  Examine each tx_buffer in the used queue. If the hardware is done
3553  *  processing the packet then free associated resources. The
3554  *  tx_buffer is put back on the free queue.
3555  *
3556  **********************************************************************/
3557 static bool
3558 em_txeof(struct tx_ring *txr)
3559 {
3560         struct adapter  *adapter = txr->adapter;
3561         int first, last, done, num_avail;
3562         struct em_buffer *tx_buffer;
3563         struct e1000_tx_desc   *tx_desc, *eop_desc;
3564         struct ifnet   *ifp = adapter->ifp;
3565
3566         EM_TX_LOCK_ASSERT(txr);
3567
3568         if (txr->tx_avail == adapter->num_tx_desc)
3569                 return (FALSE);
3570
3571         num_avail = txr->tx_avail;
3572         first = txr->next_to_clean;
3573         tx_desc = &txr->tx_base[first];
3574         tx_buffer = &txr->tx_buffers[first];
3575         last = tx_buffer->next_eop;
3576         eop_desc = &txr->tx_base[last];
3577
3578         /*
3579          * What this does is get the index of the
3580          * first descriptor AFTER the EOP of the 
3581          * first packet, that way we can do the
3582          * simple comparison on the inner while loop.
3583          */
3584         if (++last == adapter->num_tx_desc)
3585                 last = 0;
3586         done = last;
3587
3588         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3589             BUS_DMASYNC_POSTREAD);
3590
3591         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3592                 /* We clean the range of the packet */
3593                 while (first != done) {
3594                         tx_desc->upper.data = 0;
3595                         tx_desc->lower.data = 0;
3596                         tx_desc->buffer_addr = 0;
3597                         ++num_avail;
3598
3599                         if (tx_buffer->m_head) {
3600                                 ifp->if_opackets++;
3601                                 bus_dmamap_sync(txr->txtag,
3602                                     tx_buffer->map,
3603                                     BUS_DMASYNC_POSTWRITE);
3604                                 bus_dmamap_unload(txr->txtag,
3605                                     tx_buffer->map);
3606
3607                                 m_freem(tx_buffer->m_head);
3608                                 tx_buffer->m_head = NULL;
3609                         }
3610                         tx_buffer->next_eop = -1;
3611                         txr->watchdog_time = ticks;
3612
3613                         if (++first == adapter->num_tx_desc)
3614                                 first = 0;
3615
3616                         tx_buffer = &txr->tx_buffers[first];
3617                         tx_desc = &txr->tx_base[first];
3618                 }
3619                 /* See if we can continue to the next packet */
3620                 last = tx_buffer->next_eop;
3621                 if (last != -1) {
3622                         eop_desc = &txr->tx_base[last];
3623                         /* Get new done point */
3624                         if (++last == adapter->num_tx_desc) last = 0;
3625                         done = last;
3626                 } else
3627                         break;
3628         }
3629         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3630             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3631
3632         txr->next_to_clean = first;
3633
3634         /*
3635          * If we have enough room, clear IFF_DRV_OACTIVE to
3636          * tell the stack that it is OK to send packets.
3637          * If there are no pending descriptors, clear the watchdog.
3638          */
3639         if (num_avail > EM_TX_CLEANUP_THRESHOLD) {                
3640                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3641                 if (num_avail == adapter->num_tx_desc) {
3642                         txr->watchdog_check = FALSE;
3643                         txr->tx_avail = num_avail;
3644                         return (FALSE);
3645                 } 
3646         }
3647
3648         txr->tx_avail = num_avail;
3649         return (TRUE);
3650 }
3651
3652
3653 /*********************************************************************
3654  *
3655  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3656  *
3657  **********************************************************************/
3658 static void
3659 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3660 {
3661         struct adapter          *adapter = rxr->adapter;
3662         struct mbuf             *m;
3663         bus_dma_segment_t       segs[1];
3664         bus_dmamap_t            map;
3665         struct em_buffer        *rxbuf;
3666         int                     i, error, nsegs, cleaned;
3667
3668         i = rxr->next_to_refresh;
3669         cleaned = -1;
3670         while (i != limit) {
3671                 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3672                 if (m == NULL)
3673                         goto update;
3674                 m->m_len = m->m_pkthdr.len = MCLBYTES;
3675
3676                 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3677                         m_adj(m, ETHER_ALIGN);
3678
3679                 /*
3680                  * Using memory from the mbuf cluster pool, invoke the
3681                  * bus_dma machinery to arrange the memory mapping.
3682                  */
3683                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3684                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3685                 if (error != 0) {
3686                         m_free(m);
3687                         goto update;
3688                 }
3689
3690                 /* If nsegs is wrong then the stack is corrupt. */
3691                 KASSERT(nsegs == 1, ("Too many segments returned!"));
3692         
3693                 rxbuf = &rxr->rx_buffers[i];
3694                 if (rxbuf->m_head != NULL)
3695                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3696         
3697                 map = rxbuf->map;
3698                 rxbuf->map = rxr->rx_sparemap;
3699                 rxr->rx_sparemap = map;
3700                 bus_dmamap_sync(rxr->rxtag,
3701                     rxbuf->map, BUS_DMASYNC_PREREAD);
3702                 rxbuf->m_head = m;
3703                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3704
3705                 cleaned = i;
3706                 /* Calculate next index */
3707                 if (++i == adapter->num_rx_desc)
3708                         i = 0;
3709                 /* This is the work marker for refresh */
3710                 rxr->next_to_refresh = i;
3711         }
3712 update:
3713         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3714             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3715         if (cleaned != -1) /* Update tail index */
3716                 E1000_WRITE_REG(&adapter->hw,
3717                     E1000_RDT(rxr->me), cleaned);
3718
3719         return;
3720 }
3721
3722
3723 /*********************************************************************
3724  *
3725  *  Allocate memory for rx_buffer structures. Since we use one
3726  *  rx_buffer per received packet, the maximum number of rx_buffer's
3727  *  that we'll need is equal to the number of receive descriptors
3728  *  that we've allocated.
3729  *
3730  **********************************************************************/
3731 static int
3732 em_allocate_receive_buffers(struct rx_ring *rxr)
3733 {
3734         struct adapter          *adapter = rxr->adapter;
3735         device_t                dev = adapter->dev;
3736         struct em_buffer        *rxbuf;
3737         int                     error;
3738
3739         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3740             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3741         if (rxr->rx_buffers == NULL) {
3742                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3743                 return (ENOMEM);
3744         }
3745
3746         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3747                                 1, 0,                   /* alignment, bounds */
3748                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3749                                 BUS_SPACE_MAXADDR,      /* highaddr */
3750                                 NULL, NULL,             /* filter, filterarg */
3751                                 MCLBYTES,               /* maxsize */
3752                                 1,                      /* nsegments */
3753                                 MCLBYTES,               /* maxsegsize */
3754                                 0,                      /* flags */
3755                                 NULL,                   /* lockfunc */
3756                                 NULL,                   /* lockarg */
3757                                 &rxr->rxtag);
3758         if (error) {
3759                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3760                     __func__, error);
3761                 goto fail;
3762         }
3763
3764         /* Create the spare map (used by getbuf) */
3765         error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3766              &rxr->rx_sparemap);
3767         if (error) {
3768                 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3769                     __func__, error);
3770                 goto fail;
3771         }
3772
3773         rxbuf = rxr->rx_buffers;
3774         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3775                 rxbuf = &rxr->rx_buffers[i];
3776                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3777                     &rxbuf->map);
3778                 if (error) {
3779                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3780                             __func__, error);
3781                         goto fail;
3782                 }
3783         }
3784
3785         return (0);
3786
3787 fail:
3788         em_free_receive_structures(adapter);
3789         return (error);
3790 }
3791
3792
3793 /*********************************************************************
3794  *
3795  *  Initialize a receive ring and its buffers.
3796  *
3797  **********************************************************************/
3798 static int
3799 em_setup_receive_ring(struct rx_ring *rxr)
3800 {
3801         struct  adapter         *adapter = rxr->adapter;
3802         struct em_buffer        *rxbuf;
3803         bus_dma_segment_t       seg[1];
3804         int                     rsize, nsegs, error;
3805
3806
3807         /* Clear the ring contents */
3808         EM_RX_LOCK(rxr);
3809         rsize = roundup2(adapter->num_rx_desc *
3810             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3811         bzero((void *)rxr->rx_base, rsize);
3812
3813         /*
3814         ** Free current RX buffer structs and their mbufs
3815         */
3816         for (int i = 0; i < adapter->num_rx_desc; i++) {
3817                 rxbuf = &rxr->rx_buffers[i];
3818                 if (rxbuf->m_head != NULL) {
3819                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3820                             BUS_DMASYNC_POSTREAD);
3821                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3822                         m_freem(rxbuf->m_head);
3823                 }
3824         }
3825
3826         /* Now replenish the mbufs */
3827         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3828
3829                 rxbuf = &rxr->rx_buffers[j];
3830                 rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3831                 if (rxbuf->m_head == NULL)
3832                         panic("RX ring hdr initialization failed!\n");
3833                 rxbuf->m_head->m_len = MCLBYTES;
3834                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3835                 rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3836
3837                 /* Get the memory mapping */
3838                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3839                     rxbuf->map, rxbuf->m_head, seg,
3840                     &nsegs, BUS_DMA_NOWAIT);
3841                 if (error != 0)
3842                         panic("RX ring dma initialization failed!\n");
3843                 bus_dmamap_sync(rxr->rxtag,
3844                     rxbuf->map, BUS_DMASYNC_PREREAD);
3845
3846                 /* Update descriptor */
3847                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3848         }
3849
3850
3851         /* Setup our descriptor indices */
3852         rxr->next_to_check = 0;
3853         rxr->next_to_refresh = 0;
3854
3855         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3856             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3857
3858         EM_RX_UNLOCK(rxr);
3859         return (0);
3860 }
3861
3862 /*********************************************************************
3863  *
3864  *  Initialize all receive rings.
3865  *
3866  **********************************************************************/
3867 static int
3868 em_setup_receive_structures(struct adapter *adapter)
3869 {
3870         struct rx_ring *rxr = adapter->rx_rings;
3871         int j;
3872
3873         for (j = 0; j < adapter->num_queues; j++, rxr++)
3874                 if (em_setup_receive_ring(rxr))
3875                         goto fail;
3876
3877         return (0);
3878 fail:
3879         /*
3880          * Free RX buffers allocated so far, we will only handle
3881          * the rings that completed, the failing case will have
3882          * cleaned up for itself. 'j' failed, so its the terminus.
3883          */
3884         for (int i = 0; i < j; ++i) {
3885                 rxr = &adapter->rx_rings[i];
3886                 for (int n = 0; n < adapter->num_rx_desc; n++) {
3887                         struct em_buffer *rxbuf;
3888                         rxbuf = &rxr->rx_buffers[n];
3889                         if (rxbuf->m_head != NULL) {
3890                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3891                                   BUS_DMASYNC_POSTREAD);
3892                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3893                                 m_freem(rxbuf->m_head);
3894                                 rxbuf->m_head = NULL;
3895                         }
3896                 }
3897         }
3898
3899         return (ENOBUFS);
3900 }
3901
3902 /*********************************************************************
3903  *
3904  *  Free all receive rings.
3905  *
3906  **********************************************************************/
3907 static void
3908 em_free_receive_structures(struct adapter *adapter)
3909 {
3910         struct rx_ring *rxr = adapter->rx_rings;
3911
3912         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3913                 em_free_receive_buffers(rxr);
3914                 /* Free the ring memory as well */
3915                 em_dma_free(adapter, &rxr->rxdma);
3916                 EM_RX_LOCK_DESTROY(rxr);
3917         }
3918
3919         free(adapter->rx_rings, M_DEVBUF);
3920 }
3921
3922
3923 /*********************************************************************
3924  *
3925  *  Free receive ring data structures
3926  *
3927  **********************************************************************/
3928 static void
3929 em_free_receive_buffers(struct rx_ring *rxr)
3930 {
3931         struct adapter          *adapter = rxr->adapter;
3932         struct em_buffer        *rxbuf = NULL;
3933
3934         INIT_DEBUGOUT("free_receive_buffers: begin");
3935
3936         if (rxr->rx_sparemap) {
3937                 bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3938                 rxr->rx_sparemap = NULL;
3939         }
3940
3941         if (rxr->rx_buffers != NULL) {
3942                 for (int i = 0; i < adapter->num_rx_desc; i++) {
3943                         rxbuf = &rxr->rx_buffers[i];
3944                         if (rxbuf->map != NULL) {
3945                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3946                                     BUS_DMASYNC_POSTREAD);
3947                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3948                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3949                         }
3950                         if (rxbuf->m_head != NULL) {
3951                                 m_freem(rxbuf->m_head);
3952                                 rxbuf->m_head = NULL;
3953                         }
3954                 }
3955                 free(rxr->rx_buffers, M_DEVBUF);
3956                 rxr->rx_buffers = NULL;
3957         }
3958
3959         if (rxr->rxtag != NULL) {
3960                 bus_dma_tag_destroy(rxr->rxtag);
3961                 rxr->rxtag = NULL;
3962         }
3963
3964         return;
3965 }
3966
3967
3968 /*********************************************************************
3969  *
3970  *  Enable receive unit.
3971  *
3972  **********************************************************************/
3973 #define MAX_INTS_PER_SEC        8000
3974 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
3975
3976 static void
3977 em_initialize_receive_unit(struct adapter *adapter)
3978 {
3979         struct rx_ring  *rxr = adapter->rx_rings;
3980         struct ifnet    *ifp = adapter->ifp;
3981         struct e1000_hw *hw = &adapter->hw;
3982         u64     bus_addr;
3983         u32     rctl, rxcsum;
3984
3985         INIT_DEBUGOUT("em_initialize_receive_units: begin");
3986
3987         /*
3988          * Make sure receives are disabled while setting
3989          * up the descriptor ring
3990          */
3991         rctl = E1000_READ_REG(hw, E1000_RCTL);
3992         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3993
3994         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3995             adapter->rx_abs_int_delay.value);
3996         /*
3997          * Set the interrupt throttling rate. Value is calculated
3998          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3999          */
4000         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4001
4002         /*
4003         ** When using MSIX interrupts we need to throttle
4004         ** using the EITR register (82574 only)
4005         */
4006         if (hw->mac.type == e1000_82574)
4007                 for (int i = 0; i < 4; i++)
4008                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4009                             DEFAULT_ITR);
4010
4011         /* Disable accelerated ackknowledge */
4012         if (adapter->hw.mac.type == e1000_82574)
4013                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4014
4015         if (ifp->if_capenable & IFCAP_RXCSUM) {
4016                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4017                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4018                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4019         }
4020
4021         /*
4022         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4023         ** long latencies are observed, like Lenovo X60. This
4024         ** change eliminates the problem, but since having positive
4025         ** values in RDTR is a known source of problems on other
4026         ** platforms another solution is being sought.
4027         */
4028         if (hw->mac.type == e1000_82573)
4029                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4030
4031         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4032                 /* Setup the Base and Length of the Rx Descriptor Ring */
4033                 bus_addr = rxr->rxdma.dma_paddr;
4034                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4035                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4036                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4037                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4038                 /* Setup the Head and Tail Descriptor Pointers */
4039                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4040                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4041         }
4042
4043         /* Setup the Receive Control Register */
4044         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4045         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4046             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4047             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4048
4049         /* Strip the CRC */
4050         rctl |= E1000_RCTL_SECRC;
4051
4052         /* Make sure VLAN Filters are off */
4053         rctl &= ~E1000_RCTL_VFE;
4054         rctl &= ~E1000_RCTL_SBP;
4055         rctl |= E1000_RCTL_SZ_2048;
4056         if (ifp->if_mtu > ETHERMTU)
4057                 rctl |= E1000_RCTL_LPE;
4058         else
4059                 rctl &= ~E1000_RCTL_LPE;
4060
4061         /* Write out the settings */
4062         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4063
4064         return;
4065 }
4066
4067
4068 /*********************************************************************
4069  *
4070  *  This routine executes in interrupt context. It replenishes
4071  *  the mbufs in the descriptor and sends data which has been
4072  *  dma'ed into host memory to upper layer.
4073  *
4074  *  We loop at most count times if count is > 0, or until done if
4075  *  count < 0.
4076  *  
4077  *  For polling we also now return the number of cleaned packets
4078  *********************************************************************/
4079 static bool
4080 em_rxeof(struct rx_ring *rxr, int count, int *done)
4081 {
4082         struct adapter          *adapter = rxr->adapter;
4083         struct ifnet            *ifp = adapter->ifp;
4084         struct mbuf             *mp, *sendmp;
4085         u8                      status = 0;
4086         u16                     len;
4087         int                     i, processed, rxdone = 0;
4088         bool                    eop;
4089         struct e1000_rx_desc    *cur;
4090
4091         EM_RX_LOCK(rxr);
4092
4093         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4094
4095                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4096                         break;
4097
4098                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4099                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4100
4101                 cur = &rxr->rx_base[i];
4102                 status = cur->status;
4103                 mp = sendmp = NULL;
4104
4105                 if ((status & E1000_RXD_STAT_DD) == 0)
4106                         break;
4107
4108                 len = le16toh(cur->length);
4109                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4110                 count--;
4111
4112                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4113
4114                         /* Assign correct length to the current fragment */
4115                         mp = rxr->rx_buffers[i].m_head;
4116                         mp->m_len = len;
4117
4118                         if (rxr->fmp == NULL) {
4119                                 mp->m_pkthdr.len = len;
4120                                 rxr->fmp = mp; /* Store the first mbuf */
4121                                 rxr->lmp = mp;
4122                         } else {
4123                                 /* Chain mbuf's together */
4124                                 mp->m_flags &= ~M_PKTHDR;
4125                                 rxr->lmp->m_next = mp;
4126                                 rxr->lmp = rxr->lmp->m_next;
4127                                 rxr->fmp->m_pkthdr.len += len;
4128                         }
4129
4130                         if (eop) {
4131                                 rxr->fmp->m_pkthdr.rcvif = ifp;
4132                                 ifp->if_ipackets++;
4133                                 em_receive_checksum(cur, rxr->fmp);
4134 #ifndef __NO_STRICT_ALIGNMENT
4135                                 if (adapter->max_frame_size >
4136                                     (MCLBYTES - ETHER_ALIGN) &&
4137                                     em_fixup_rx(rxr) != 0)
4138                                         goto skip;
4139 #endif
4140                                 if (status & E1000_RXD_STAT_VP) {
4141                                         rxr->fmp->m_pkthdr.ether_vtag =
4142                                             (le16toh(cur->special) &
4143                                             E1000_RXD_SPC_VLAN_MASK);
4144                                         rxr->fmp->m_flags |= M_VLANTAG;
4145                                 }
4146 #ifdef EM_MULTIQUEUE
4147                                 rxr->fmp->m_pkthdr.flowid = curcpu;
4148                                 rxr->fmp->m_flags |= M_FLOWID;
4149 #endif
4150 #ifndef __NO_STRICT_ALIGNMENT
4151 skip:
4152 #endif
4153                                 sendmp = rxr->fmp;
4154                                 rxr->fmp = NULL;
4155                                 rxr->lmp = NULL;
4156                         }
4157                 } else {
4158                         ifp->if_ierrors++;
4159                         /* Reuse loaded DMA map and just update mbuf chain */
4160                         mp = rxr->rx_buffers[i].m_head;
4161                         mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4162                         mp->m_data = mp->m_ext.ext_buf;
4163                         mp->m_next = NULL;
4164                         if (adapter->max_frame_size <=
4165                             (MCLBYTES - ETHER_ALIGN))
4166                                 m_adj(mp, ETHER_ALIGN);
4167                         if (rxr->fmp != NULL) {
4168                                 m_freem(rxr->fmp);
4169                                 rxr->fmp = NULL;
4170                                 rxr->lmp = NULL;
4171                         }
4172                         sendmp = NULL;
4173                 }
4174
4175                 /* Zero out the receive descriptors status. */
4176                 cur->status = 0;
4177                 ++rxdone;       /* cumulative for POLL */
4178                 ++processed;
4179
4180                 /* Advance our pointers to the next descriptor. */
4181                 if (++i == adapter->num_rx_desc)
4182                         i = 0;
4183
4184                 /* Send to the stack */
4185                 if (sendmp != NULL) {
4186                         rxr->next_to_check = i;
4187                         EM_RX_UNLOCK(rxr);
4188                         (*ifp->if_input)(ifp, sendmp);
4189                         EM_RX_LOCK(rxr);
4190                         i = rxr->next_to_check;
4191                 }
4192
4193                 /* Only refresh mbufs every 8 descriptors */
4194                 if (processed == 8) {
4195                         em_refresh_mbufs(rxr, i);
4196                         processed = 0;
4197                 }
4198         }
4199
4200         /* Catch any remaining refresh work */
4201         if (processed != 0) {
4202                 em_refresh_mbufs(rxr, i);
4203                 processed = 0;
4204         }
4205
4206         rxr->next_to_check = i;
4207         if (done != NULL)
4208                 *done = rxdone;
4209         EM_RX_UNLOCK(rxr);
4210
4211         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4212 }
4213
4214 #ifndef __NO_STRICT_ALIGNMENT
4215 /*
4216  * When jumbo frames are enabled we should realign entire payload on
4217  * architecures with strict alignment. This is serious design mistake of 8254x
4218  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4219  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4220  * payload. On architecures without strict alignment restrictions 8254x still
4221  * performs unaligned memory access which would reduce the performance too.
4222  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4223  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4224  * existing mbuf chain.
4225  *
4226  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4227  * not used at all on architectures with strict alignment.
4228  */
4229 static int
4230 em_fixup_rx(struct rx_ring *rxr)
4231 {
4232         struct adapter *adapter = rxr->adapter;
4233         struct mbuf *m, *n;
4234         int error;
4235
4236         error = 0;
4237         m = rxr->fmp;
4238         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4239                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4240                 m->m_data += ETHER_HDR_LEN;
4241         } else {
4242                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4243                 if (n != NULL) {
4244                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4245                         m->m_data += ETHER_HDR_LEN;
4246                         m->m_len -= ETHER_HDR_LEN;
4247                         n->m_len = ETHER_HDR_LEN;
4248                         M_MOVE_PKTHDR(n, m);
4249                         n->m_next = m;
4250                         rxr->fmp = n;
4251                 } else {
4252                         adapter->dropped_pkts++;
4253                         m_freem(rxr->fmp);
4254                         rxr->fmp = NULL;
4255                         error = ENOMEM;
4256                 }
4257         }
4258
4259         return (error);
4260 }
4261 #endif
4262
4263 /*********************************************************************
4264  *
4265  *  Verify that the hardware indicated that the checksum is valid.
4266  *  Inform the stack about the status of checksum so that stack
4267  *  doesn't spend time verifying the checksum.
4268  *
4269  *********************************************************************/
4270 static void
4271 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4272 {
4273         /* Ignore Checksum bit is set */
4274         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4275                 mp->m_pkthdr.csum_flags = 0;
4276                 return;
4277         }
4278
4279         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4280                 /* Did it pass? */
4281                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4282                         /* IP Checksum Good */
4283                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4284                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4285
4286                 } else {
4287                         mp->m_pkthdr.csum_flags = 0;
4288                 }
4289         }
4290
4291         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4292                 /* Did it pass? */
4293                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4294                         mp->m_pkthdr.csum_flags |=
4295                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4296                         mp->m_pkthdr.csum_data = htons(0xffff);
4297                 }
4298         }
4299 }
4300
4301 /*
4302  * This routine is run via an vlan
4303  * config EVENT
4304  */
4305 static void
4306 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4307 {
4308         struct adapter  *adapter = ifp->if_softc;
4309         u32             index, bit;
4310
4311         if (ifp->if_softc !=  arg)   /* Not our event */
4312                 return;
4313
4314         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4315                 return;
4316
4317         index = (vtag >> 5) & 0x7F;
4318         bit = vtag & 0x1F;
4319         em_shadow_vfta[index] |= (1 << bit);
4320         ++adapter->num_vlans;
4321         /* Re-init to load the changes */
4322         em_init(adapter);
4323 }
4324
4325 /*
4326  * This routine is run via an vlan
4327  * unconfig EVENT
4328  */
4329 static void
4330 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4331 {
4332         struct adapter  *adapter = ifp->if_softc;
4333         u32             index, bit;
4334
4335         if (ifp->if_softc !=  arg)
4336                 return;
4337
4338         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4339                 return;
4340
4341         index = (vtag >> 5) & 0x7F;
4342         bit = vtag & 0x1F;
4343         em_shadow_vfta[index] &= ~(1 << bit);
4344         --adapter->num_vlans;
4345         /* Re-init to load the changes */
4346         em_init(adapter);
4347 }
4348
4349 static void
4350 em_setup_vlan_hw_support(struct adapter *adapter)
4351 {
4352         struct e1000_hw *hw = &adapter->hw;
4353         u32             reg;
4354
4355         /*
4356         ** We get here thru init_locked, meaning
4357         ** a soft reset, this has already cleared
4358         ** the VFTA and other state, so if there
4359         ** have been no vlan's registered do nothing.
4360         */
4361         if (adapter->num_vlans == 0)
4362                 return;
4363
4364         /*
4365         ** A soft reset zero's out the VFTA, so
4366         ** we need to repopulate it now.
4367         */
4368         for (int i = 0; i < EM_VFTA_SIZE; i++)
4369                 if (em_shadow_vfta[i] != 0)
4370                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4371                             i, em_shadow_vfta[i]);
4372
4373         reg = E1000_READ_REG(hw, E1000_CTRL);
4374         reg |= E1000_CTRL_VME;
4375         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4376
4377         /* Enable the Filter Table */
4378         reg = E1000_READ_REG(hw, E1000_RCTL);
4379         reg &= ~E1000_RCTL_CFIEN;
4380         reg |= E1000_RCTL_VFE;
4381         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4382
4383         /* Update the frame size */
4384         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4385             adapter->max_frame_size + VLAN_TAG_SIZE);
4386 }
4387
4388 static void
4389 em_enable_intr(struct adapter *adapter)
4390 {
4391         struct e1000_hw *hw = &adapter->hw;
4392         u32 ims_mask = IMS_ENABLE_MASK;
4393
4394         if (hw->mac.type == e1000_82574) {
4395                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4396                 ims_mask |= EM_MSIX_MASK;
4397         } 
4398         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4399 }
4400
4401 static void
4402 em_disable_intr(struct adapter *adapter)
4403 {
4404         struct e1000_hw *hw = &adapter->hw;
4405
4406         if (hw->mac.type == e1000_82574)
4407                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4408         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4409 }
4410
4411 /*
4412  * Bit of a misnomer, what this really means is
4413  * to enable OS management of the system... aka
4414  * to disable special hardware management features 
4415  */
4416 static void
4417 em_init_manageability(struct adapter *adapter)
4418 {
4419         /* A shared code workaround */
4420 #define E1000_82542_MANC2H E1000_MANC2H
4421         if (adapter->has_manage) {
4422                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4423                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4424
4425                 /* disable hardware interception of ARP */
4426                 manc &= ~(E1000_MANC_ARP_EN);
4427
4428                 /* enable receiving management packets to the host */
4429                 manc |= E1000_MANC_EN_MNG2HOST;
4430 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4431 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4432                 manc2h |= E1000_MNG2HOST_PORT_623;
4433                 manc2h |= E1000_MNG2HOST_PORT_664;
4434                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4435                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4436         }
4437 }
4438
4439 /*
4440  * Give control back to hardware management
4441  * controller if there is one.
4442  */
4443 static void
4444 em_release_manageability(struct adapter *adapter)
4445 {
4446         if (adapter->has_manage) {
4447                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4448
4449                 /* re-enable hardware interception of ARP */
4450                 manc |= E1000_MANC_ARP_EN;
4451                 manc &= ~E1000_MANC_EN_MNG2HOST;
4452
4453                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4454         }
4455 }
4456
4457 /*
4458  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4459  * For ASF and Pass Through versions of f/w this means
4460  * that the driver is loaded. For AMT version type f/w
4461  * this means that the network i/f is open.
4462  */
4463 static void
4464 em_get_hw_control(struct adapter *adapter)
4465 {
4466         u32 ctrl_ext, swsm;
4467
4468         if (adapter->hw.mac.type == e1000_82573) {
4469                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4470                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4471                     swsm | E1000_SWSM_DRV_LOAD);
4472                 return;
4473         }
4474         /* else */
4475         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4476         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4477             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4478         return;
4479 }
4480
4481 /*
4482  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4483  * For ASF and Pass Through versions of f/w this means that
4484  * the driver is no longer loaded. For AMT versions of the
4485  * f/w this means that the network i/f is closed.
4486  */
4487 static void
4488 em_release_hw_control(struct adapter *adapter)
4489 {
4490         u32 ctrl_ext, swsm;
4491
4492         if (!adapter->has_manage)
4493                 return;
4494
4495         if (adapter->hw.mac.type == e1000_82573) {
4496                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4497                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4498                     swsm & ~E1000_SWSM_DRV_LOAD);
4499                 return;
4500         }
4501         /* else */
4502         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4503         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4504             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4505         return;
4506 }
4507
4508 static int
4509 em_is_valid_ether_addr(u8 *addr)
4510 {
4511         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4512
4513         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4514                 return (FALSE);
4515         }
4516
4517         return (TRUE);
4518 }
4519
4520 /*
4521 ** Parse the interface capabilities with regard
4522 ** to both system management and wake-on-lan for
4523 ** later use.
4524 */
4525 static void
4526 em_get_wakeup(device_t dev)
4527 {
4528         struct adapter  *adapter = device_get_softc(dev);
4529         u16             eeprom_data = 0, device_id, apme_mask;
4530
4531         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4532         apme_mask = EM_EEPROM_APME;
4533
4534         switch (adapter->hw.mac.type) {
4535         case e1000_82573:
4536         case e1000_82583:
4537                 adapter->has_amt = TRUE;
4538                 /* Falls thru */
4539         case e1000_82571:
4540         case e1000_82572:
4541         case e1000_80003es2lan:
4542                 if (adapter->hw.bus.func == 1) {
4543                         e1000_read_nvm(&adapter->hw,
4544                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4545                         break;
4546                 } else
4547                         e1000_read_nvm(&adapter->hw,
4548                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4549                 break;
4550         case e1000_ich8lan:
4551         case e1000_ich9lan:
4552         case e1000_ich10lan:
4553         case e1000_pchlan:
4554                 apme_mask = E1000_WUC_APME;
4555                 adapter->has_amt = TRUE;
4556                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4557                 break;
4558         default:
4559                 e1000_read_nvm(&adapter->hw,
4560                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4561                 break;
4562         }
4563         if (eeprom_data & apme_mask)
4564                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4565         /*
4566          * We have the eeprom settings, now apply the special cases
4567          * where the eeprom may be wrong or the board won't support
4568          * wake on lan on a particular port
4569          */
4570         device_id = pci_get_device(dev);
4571         switch (device_id) {
4572         case E1000_DEV_ID_82571EB_FIBER:
4573                 /* Wake events only supported on port A for dual fiber
4574                  * regardless of eeprom setting */
4575                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4576                     E1000_STATUS_FUNC_1)
4577                         adapter->wol = 0;
4578                 break;
4579         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4580         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4581         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4582                 /* if quad port adapter, disable WoL on all but port A */
4583                 if (global_quad_port_a != 0)
4584                         adapter->wol = 0;
4585                 /* Reset for multiple quad port adapters */
4586                 if (++global_quad_port_a == 4)
4587                         global_quad_port_a = 0;
4588                 break;
4589         }
4590         return;
4591 }
4592
4593
4594 /*
4595  * Enable PCI Wake On Lan capability
4596  */
4597 static void
4598 em_enable_wakeup(device_t dev)
4599 {
4600         struct adapter  *adapter = device_get_softc(dev);
4601         struct ifnet    *ifp = adapter->ifp;
4602         u32             pmc, ctrl, ctrl_ext, rctl;
4603         u16             status;
4604
4605         if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4606                 return;
4607
4608         /* Advertise the wakeup capability */
4609         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4610         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4611         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4612         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4613
4614         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4615             (adapter->hw.mac.type == e1000_pchlan) ||
4616             (adapter->hw.mac.type == e1000_ich9lan) ||
4617             (adapter->hw.mac.type == e1000_ich10lan)) {
4618                 e1000_disable_gig_wol_ich8lan(&adapter->hw);
4619                 e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4620         }
4621
4622         /* Keep the laser running on Fiber adapters */
4623         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4624             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4625                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4626                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4627                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4628         }
4629
4630         /*
4631         ** Determine type of Wakeup: note that wol
4632         ** is set with all bits on by default.
4633         */
4634         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4635                 adapter->wol &= ~E1000_WUFC_MAG;
4636
4637         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4638                 adapter->wol &= ~E1000_WUFC_MC;
4639         else {
4640                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4641                 rctl |= E1000_RCTL_MPE;
4642                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4643         }
4644
4645         if (adapter->hw.mac.type == e1000_pchlan) {
4646                 if (em_enable_phy_wakeup(adapter))
4647                         return;
4648         } else {
4649                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4650                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4651         }
4652
4653         if (adapter->hw.phy.type == e1000_phy_igp_3)
4654                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4655
4656         /* Request PME */
4657         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4658         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4659         if (ifp->if_capenable & IFCAP_WOL)
4660                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4661         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4662
4663         return;
4664 }
4665
4666 /*
4667 ** WOL in the newer chipset interfaces (pchlan)
4668 ** require thing to be copied into the phy
4669 */
4670 static int
4671 em_enable_phy_wakeup(struct adapter *adapter)
4672 {
4673         struct e1000_hw *hw = &adapter->hw;
4674         u32 mreg, ret = 0;
4675         u16 preg;
4676
4677         /* copy MAC RARs to PHY RARs */
4678         for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4679                 mreg = E1000_READ_REG(hw, E1000_RAL(i));
4680                 e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4681                 e1000_write_phy_reg(hw, BM_RAR_M(i),
4682                     (u16)((mreg >> 16) & 0xFFFF));
4683                 mreg = E1000_READ_REG(hw, E1000_RAH(i));
4684                 e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4685                 e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4686                     (u16)((mreg >> 16) & 0xFFFF));
4687         }
4688
4689         /* copy MAC MTA to PHY MTA */
4690         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4691                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4692                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4693                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4694                     (u16)((mreg >> 16) & 0xFFFF));
4695         }
4696
4697         /* configure PHY Rx Control register */
4698         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4699         mreg = E1000_READ_REG(hw, E1000_RCTL);
4700         if (mreg & E1000_RCTL_UPE)
4701                 preg |= BM_RCTL_UPE;
4702         if (mreg & E1000_RCTL_MPE)
4703                 preg |= BM_RCTL_MPE;
4704         preg &= ~(BM_RCTL_MO_MASK);
4705         if (mreg & E1000_RCTL_MO_3)
4706                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4707                                 << BM_RCTL_MO_SHIFT);
4708         if (mreg & E1000_RCTL_BAM)
4709                 preg |= BM_RCTL_BAM;
4710         if (mreg & E1000_RCTL_PMCF)
4711                 preg |= BM_RCTL_PMCF;
4712         mreg = E1000_READ_REG(hw, E1000_CTRL);
4713         if (mreg & E1000_CTRL_RFCE)
4714                 preg |= BM_RCTL_RFCE;
4715         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4716
4717         /* enable PHY wakeup in MAC register */
4718         E1000_WRITE_REG(hw, E1000_WUC,
4719             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4720         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4721
4722         /* configure and enable PHY wakeup in PHY registers */
4723         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4724         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4725
4726         /* activate PHY wakeup */
4727         ret = hw->phy.ops.acquire(hw);
4728         if (ret) {
4729                 printf("Could not acquire PHY\n");
4730                 return ret;
4731         }
4732         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4733                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4734         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4735         if (ret) {
4736                 printf("Could not read PHY page 769\n");
4737                 goto out;
4738         }
4739         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4740         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4741         if (ret)
4742                 printf("Could not set PHY Host Wakeup bit\n");
4743 out:
4744         hw->phy.ops.release(hw);
4745
4746         return ret;
4747 }
4748
4749 static void
4750 em_led_func(void *arg, int onoff)
4751 {
4752         struct adapter  *adapter = arg;
4753  
4754         EM_CORE_LOCK(adapter);
4755         if (onoff) {
4756                 e1000_setup_led(&adapter->hw);
4757                 e1000_led_on(&adapter->hw);
4758         } else {
4759                 e1000_led_off(&adapter->hw);
4760                 e1000_cleanup_led(&adapter->hw);
4761         }
4762         EM_CORE_UNLOCK(adapter);
4763 }
4764
4765 /**********************************************************************
4766  *
4767  *  Update the board statistics counters.
4768  *
4769  **********************************************************************/
4770 static void
4771 em_update_stats_counters(struct adapter *adapter)
4772 {
4773         struct ifnet   *ifp;
4774
4775         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4776            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4777                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4778                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4779         }
4780         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4781         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4782         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4783         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4784
4785         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4786         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4787         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4788         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4789         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4790         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4791         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4792         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4793         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4794         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4795         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4796         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4797         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4798         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4799         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4800         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4801         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4802         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4803         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4804         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4805
4806         /* For the 64-bit byte counters the low dword must be read first. */
4807         /* Both registers clear on the read of the high dword */
4808
4809         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4810         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4811
4812         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4813         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4814         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4815         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4816         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4817
4818         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4819         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4820
4821         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4822         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4823         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4824         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4825         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4826         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4827         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4828         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4829         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4830         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4831
4832         if (adapter->hw.mac.type >= e1000_82543) {
4833                 adapter->stats.algnerrc += 
4834                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4835                 adapter->stats.rxerrc += 
4836                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4837                 adapter->stats.tncrs += 
4838                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4839                 adapter->stats.cexterr += 
4840                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4841                 adapter->stats.tsctc += 
4842                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4843                 adapter->stats.tsctfc += 
4844                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4845         }
4846         ifp = adapter->ifp;
4847
4848         ifp->if_collisions = adapter->stats.colc;
4849
4850         /* Rx Errors */
4851         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4852             adapter->stats.crcerrs + adapter->stats.algnerrc +
4853             adapter->stats.ruc + adapter->stats.roc +
4854             adapter->stats.mpc + adapter->stats.cexterr;
4855
4856         /* Tx Errors */
4857         ifp->if_oerrors = adapter->stats.ecol +
4858             adapter->stats.latecol + adapter->watchdog_events;
4859 }
4860
4861
4862 /*
4863  * Add sysctl variables, one per statistic, to the system.
4864  */
4865 static void
4866 em_add_hw_stats(struct adapter *adapter)
4867 {
4868
4869         device_t dev = adapter->dev;
4870
4871         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4872         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
4873         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
4874         struct e1000_hw_stats *stats = &adapter->stats;
4875
4876         struct sysctl_oid *stat_node, *int_node, *host_node;
4877         struct sysctl_oid_list *stat_list, *int_list, *host_list;
4878
4879         /* Driver Statistics */
4880         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
4881                         CTLFLAG_RD, &adapter->link_irq, 0,
4882                         "Link MSIX IRQ Handled");
4883         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
4884                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
4885                          "Std mbuf failed");
4886         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
4887                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
4888                          "Std mbuf cluster failed");
4889         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
4890                         CTLFLAG_RD, &adapter->dropped_pkts,
4891                         "Driver dropped packets");
4892         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
4893                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
4894                         "Driver tx dma failure in xmit");
4895
4896         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
4897                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
4898                         "Flow Control High Watermark");
4899         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
4900                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
4901                         "Flow Control Low Watermark");
4902
4903         /* MAC stats get the own sub node */
4904
4905         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
4906                                     CTLFLAG_RD, NULL, "Statistics");
4907         stat_list = SYSCTL_CHILDREN(stat_node);
4908
4909         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
4910                         CTLFLAG_RD, &stats->ecol,
4911                         "Excessive collisions");
4912         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
4913                         CTLFLAG_RD, &adapter->stats.symerrs,
4914                         "Symbol Errors");
4915         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
4916                         CTLFLAG_RD, &adapter->stats.sec,
4917                         "Sequence Errors");
4918         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
4919                         CTLFLAG_RD, &adapter->stats.dc,
4920                         "Defer Count");
4921         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
4922                         CTLFLAG_RD, &adapter->stats.mpc,
4923                         "Missed Packets");
4924         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
4925                         CTLFLAG_RD, &adapter->stats.rnbc,
4926                         "Receive No Buffers");
4927         /* RLEC is inaccurate on some hardware, calculate our own. */
4928 /*      SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_len_errs", */
4929 /*                      CTLFLAG_RD, adapter->stats.roc + adapter->stats.ruc, */
4930 /*                      "Receive Length Errors"); */
4931
4932         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
4933                         CTLFLAG_RD, &adapter->stats.rxerrc,
4934                         "Receive Errors");
4935         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
4936                         CTLFLAG_RD, &adapter->stats.crcerrs,
4937                         "CRC errors");
4938         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
4939                         CTLFLAG_RD, &adapter->stats.algnerrc,
4940                         "Alignment Errors");
4941         /* On 82575 these are collision counts */
4942         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
4943                         CTLFLAG_RD, &adapter->stats.cexterr,
4944                         "Collision/Carrier extension errors");
4945         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_overruns",
4946                         CTLFLAG_RD, &adapter->rx_overruns,
4947                         "RX overruns");
4948         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "watchdog_timeouts",
4949                         CTLFLAG_RD, &adapter->watchdog_events,
4950                         "Watchdog timeouts");
4951         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
4952                         CTLFLAG_RD, &adapter->stats.xonrxc,
4953                         "XON Received");
4954         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
4955                         CTLFLAG_RD, &adapter->stats.xontxc,
4956                         "XON Transmitted");
4957         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
4958                         CTLFLAG_RD, &adapter->stats.xoffrxc,
4959                         "XOFF Received");
4960         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
4961                         CTLFLAG_RD, &adapter->stats.xofftxc,
4962                         "XOFF Transmitted");
4963
4964         /* Packet Reception Stats */
4965         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
4966                         CTLFLAG_RD, &adapter->stats.tpr,
4967                         "Total Packets Received ");
4968         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
4969                         CTLFLAG_RD, &adapter->stats.gprc,
4970                         "Good Packets Received");
4971         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
4972                         CTLFLAG_RD, &adapter->stats.bprc,
4973                         "Broadcast Packets Received");
4974         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
4975                         CTLFLAG_RD, &adapter->stats.mprc,
4976                         "Multicast Packets Received");
4977         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
4978                         CTLFLAG_RD, &adapter->stats.prc64,
4979                         "64 byte frames received ");
4980         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
4981                         CTLFLAG_RD, &adapter->stats.prc127,
4982                         "65-127 byte frames received");
4983         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
4984                         CTLFLAG_RD, &adapter->stats.prc255,
4985                         "128-255 byte frames received");
4986         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
4987                         CTLFLAG_RD, &adapter->stats.prc511,
4988                         "256-511 byte frames received");
4989         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
4990                         CTLFLAG_RD, &adapter->stats.prc1023,
4991                         "512-1023 byte frames received");
4992         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
4993                         CTLFLAG_RD, &adapter->stats.prc1522,
4994                         "1023-1522 byte frames received");
4995         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
4996                         CTLFLAG_RD, &adapter->stats.gorc, 
4997                         "Good Octets Received"); 
4998
4999         /* Packet Transmission Stats */
5000         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octest_txd", 
5001                         CTLFLAG_RD, &adapter->stats.gotc, 
5002                         "Good Octest Transmitted"); 
5003         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5004                         CTLFLAG_RD, &adapter->stats.tpt,
5005                         "Total Packets Transmitted");
5006         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5007                         CTLFLAG_RD, &adapter->stats.gptc,
5008                         "Good Packets Transmitted");
5009         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5010                         CTLFLAG_RD, &adapter->stats.bptc,
5011                         "Broadcast Packets Transmitted");
5012         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5013                         CTLFLAG_RD, &adapter->stats.mptc,
5014                         "Multicast Packets Transmitted");
5015         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5016                         CTLFLAG_RD, &adapter->stats.ptc64,
5017                         "64 byte frames transmitted ");
5018         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5019                         CTLFLAG_RD, &adapter->stats.ptc127,
5020                         "65-127 byte frames transmitted");
5021         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5022                         CTLFLAG_RD, &adapter->stats.ptc255,
5023                         "128-255 byte frames transmitted");
5024         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5025                         CTLFLAG_RD, &adapter->stats.ptc511,
5026                         "256-511 byte frames transmitted");
5027         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5028                         CTLFLAG_RD, &adapter->stats.ptc1023,
5029                         "512-1023 byte frames transmitted");
5030         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5031                         CTLFLAG_RD, &adapter->stats.ptc1522,
5032                         "1024-1522 byte frames transmitted");
5033         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5034                         CTLFLAG_RD, &adapter->stats.tsctc,
5035                         "TSO Contexts Transmitted");
5036         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5037                         CTLFLAG_RD, &adapter->stats.tsctfc,
5038                         "TSO Contexts Failed");
5039
5040
5041         /* Interrupt Stats */
5042
5043         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5044                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5045         int_list = SYSCTL_CHILDREN(int_node);
5046
5047         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5048                         CTLFLAG_RD, &adapter->stats.iac,
5049                         "Interrupt Assertion Count");
5050
5051         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5052                         CTLFLAG_RD, &adapter->stats.icrxptc,
5053                         "Interrupt Cause Rx Pkt Timer Expire Count");
5054
5055         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5056                         CTLFLAG_RD, &adapter->stats.icrxatc,
5057                         "Interrupt Cause Rx Abs Timer Expire Count");
5058
5059         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5060                         CTLFLAG_RD, &adapter->stats.ictxptc,
5061                         "Interrupt Cause Tx Pkt Timer Expire Count");
5062
5063         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5064                         CTLFLAG_RD, &adapter->stats.ictxatc,
5065                         "Interrupt Cause Tx Abs Timer Expire Count");
5066
5067         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5068                         CTLFLAG_RD, &adapter->stats.ictxqec,
5069                         "Interrupt Cause Tx Queue Empty Count");
5070
5071         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5072                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5073                         "Interrupt Cause Tx Queue Min Thresh Count");
5074
5075         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5076                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5077                         "Interrupt Cause Rx Desc Min Thresh Count");
5078
5079         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5080                         CTLFLAG_RD, &adapter->stats.icrxoc,
5081                         "Interrupt Cause Receiver Overrun Count");
5082
5083         /* Host to Card Stats */
5084
5085         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5086                                     CTLFLAG_RD, NULL, 
5087                                     "Host to Card Statistics");
5088
5089         host_list = SYSCTL_CHILDREN(host_node);
5090
5091         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5092                         CTLFLAG_RD, &adapter->stats.cbtmpc,
5093                         "Circuit Breaker Tx Packet Count");
5094
5095         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5096                         CTLFLAG_RD, &adapter->stats.htdpmc,
5097                         "Host Transmit Discarded Packets");
5098
5099         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5100                         CTLFLAG_RD, &adapter->stats.rpthc,
5101                         "Rx Packets To Host");
5102
5103         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5104                         CTLFLAG_RD, &adapter->stats.cbrmpc,
5105                         "Circuit Breaker Rx Packet Count");
5106
5107         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5108                         CTLFLAG_RD, &adapter->stats.cbrdpc,
5109                         "Circuit Breaker Rx Dropped Count");
5110
5111         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5112                         CTLFLAG_RD, &adapter->stats.hgptc,
5113                         "Host Good Packets Tx Count");
5114
5115         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5116                         CTLFLAG_RD, &adapter->stats.htcbdpc,
5117                         "Host Tx Circuit Breaker Dropped Count");
5118
5119         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5120                         CTLFLAG_RD, &adapter->stats.hgorc,
5121                         "Host Good Octets Received Count");
5122
5123         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5124                         CTLFLAG_RD, &adapter->stats.hgotc,
5125                         "Host Good Octets Transmit Count");
5126
5127         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5128                         CTLFLAG_RD, &adapter->stats.lenerrs,
5129                         "Length Errors");
5130
5131         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5132                         CTLFLAG_RD, &adapter->stats.scvpc,
5133                         "SerDes/SGMII Code Violation Pkt Count");
5134
5135         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5136                         CTLFLAG_RD, &adapter->stats.hrmpc,
5137                         "Header Redirection Missed Packet Count");
5138
5139
5140
5141 }
5142
5143 /**********************************************************************
5144  *
5145  *  This routine provides a way to dump out the adapter eeprom,
5146  *  often a useful debug/service tool. This only dumps the first
5147  *  32 words, stuff that matters is in that extent.
5148  *
5149  **********************************************************************/
5150
5151 static int
5152 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5153 {
5154         struct adapter *adapter;
5155         int error;
5156         int result;
5157
5158         result = -1;
5159         error = sysctl_handle_int(oidp, &result, 0, req);
5160
5161         if (error || !req->newptr)
5162                 return (error);
5163
5164         /*
5165          * This value will cause a hex dump of the
5166          * first 32 16-bit words of the EEPROM to
5167          * the screen.
5168          */
5169         if (result == 1) {
5170                 adapter = (struct adapter *)arg1;
5171                 em_print_nvm_info(adapter);
5172         }
5173
5174         return (error);
5175 }
5176
5177 static void
5178 em_print_nvm_info(struct adapter *adapter)
5179 {
5180         u16     eeprom_data;
5181         int     i, j, row = 0;
5182
5183         /* Its a bit crude, but it gets the job done */
5184         printf("\nInterface EEPROM Dump:\n");
5185         printf("Offset\n0x0000  ");
5186         for (i = 0, j = 0; i < 32; i++, j++) {
5187                 if (j == 8) { /* Make the offset block */
5188                         j = 0; ++row;
5189                         printf("\n0x00%x0  ",row);
5190                 }
5191                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5192                 printf("%04x ", eeprom_data);
5193         }
5194         printf("\n");
5195 }
5196
5197 static int
5198 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5199 {
5200         struct em_int_delay_info *info;
5201         struct adapter *adapter;
5202         u32 regval;
5203         int error, usecs, ticks;
5204
5205         info = (struct em_int_delay_info *)arg1;
5206         usecs = info->value;
5207         error = sysctl_handle_int(oidp, &usecs, 0, req);
5208         if (error != 0 || req->newptr == NULL)
5209                 return (error);
5210         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5211                 return (EINVAL);
5212         info->value = usecs;
5213         ticks = EM_USECS_TO_TICKS(usecs);
5214
5215         adapter = info->adapter;
5216         
5217         EM_CORE_LOCK(adapter);
5218         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5219         regval = (regval & ~0xffff) | (ticks & 0xffff);
5220         /* Handle a few special cases. */
5221         switch (info->offset) {
5222         case E1000_RDTR:
5223                 break;
5224         case E1000_TIDV:
5225                 if (ticks == 0) {
5226                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5227                         /* Don't write 0 into the TIDV register. */
5228                         regval++;
5229                 } else
5230                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5231                 break;
5232         }
5233         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5234         EM_CORE_UNLOCK(adapter);
5235         return (0);
5236 }
5237
5238 static void
5239 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5240         const char *description, struct em_int_delay_info *info,
5241         int offset, int value)
5242 {
5243         info->adapter = adapter;
5244         info->offset = offset;
5245         info->value = value;
5246         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5247             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5248             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5249             info, 0, em_sysctl_int_delay, "I", description);
5250 }
5251
5252 static void
5253 em_add_rx_process_limit(struct adapter *adapter, const char *name,
5254         const char *description, int *limit, int value)
5255 {
5256         *limit = value;
5257         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5258             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5259             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5260 }
5261
5262