]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
Import LLVM/clang from vendor stripped of docs/ test/ website/ www/ examples/
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int     em_display_debug_stats = 0;
92
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.0.5";
97
98
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111         /* Intel(R) PRO/1000 Network Connection */
112         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         /* required last entry */
173         { 0, 0, 0, 0, 0}
174 };
175
176 /*********************************************************************
177  *  Table of branding strings for all supported NICs.
178  *********************************************************************/
179
180 static char *em_strings[] = {
181         "Intel(R) PRO/1000 Network Connection"
182 };
183
184 /*********************************************************************
185  *  Function prototypes
186  *********************************************************************/
187 static int      em_probe(device_t);
188 static int      em_attach(device_t);
189 static int      em_detach(device_t);
190 static int      em_shutdown(device_t);
191 static int      em_suspend(device_t);
192 static int      em_resume(device_t);
193 static void     em_start(struct ifnet *);
194 static void     em_start_locked(struct ifnet *, struct tx_ring *);
195 #ifdef EM_MULTIQUEUE
196 static int      em_mq_start(struct ifnet *, struct mbuf *);
197 static int      em_mq_start_locked(struct ifnet *,
198                     struct tx_ring *, struct mbuf *);
199 static void     em_qflush(struct ifnet *);
200 #endif
201 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
202 static void     em_init(void *);
203 static void     em_init_locked(struct adapter *);
204 static void     em_stop(void *);
205 static void     em_media_status(struct ifnet *, struct ifmediareq *);
206 static int      em_media_change(struct ifnet *);
207 static void     em_identify_hardware(struct adapter *);
208 static int      em_allocate_pci_resources(struct adapter *);
209 static int      em_allocate_legacy(struct adapter *);
210 static int      em_allocate_msix(struct adapter *);
211 static int      em_allocate_queues(struct adapter *);
212 static int      em_setup_msix(struct adapter *);
213 static void     em_free_pci_resources(struct adapter *);
214 static void     em_local_timer(void *);
215 static void     em_reset(struct adapter *);
216 static void     em_setup_interface(device_t, struct adapter *);
217
218 static void     em_setup_transmit_structures(struct adapter *);
219 static void     em_initialize_transmit_unit(struct adapter *);
220 static int      em_allocate_transmit_buffers(struct tx_ring *);
221 static void     em_free_transmit_structures(struct adapter *);
222 static void     em_free_transmit_buffers(struct tx_ring *);
223
224 static int      em_setup_receive_structures(struct adapter *);
225 static int      em_allocate_receive_buffers(struct rx_ring *);
226 static void     em_initialize_receive_unit(struct adapter *);
227 static void     em_free_receive_structures(struct adapter *);
228 static void     em_free_receive_buffers(struct rx_ring *);
229
230 static void     em_enable_intr(struct adapter *);
231 static void     em_disable_intr(struct adapter *);
232 static void     em_update_stats_counters(struct adapter *);
233 static bool     em_txeof(struct tx_ring *);
234 static int      em_rxeof(struct rx_ring *, int);
235 #ifndef __NO_STRICT_ALIGNMENT
236 static int      em_fixup_rx(struct rx_ring *);
237 #endif
238 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
239 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
240                     u32 *, u32 *);
241 static bool     em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
242 static void     em_set_promisc(struct adapter *);
243 static void     em_disable_promisc(struct adapter *);
244 static void     em_set_multi(struct adapter *);
245 static void     em_print_hw_stats(struct adapter *);
246 static void     em_update_link_status(struct adapter *);
247 static void     em_refresh_mbufs(struct rx_ring *, int);
248 static void     em_register_vlan(void *, struct ifnet *, u16);
249 static void     em_unregister_vlan(void *, struct ifnet *, u16);
250 static void     em_setup_vlan_hw_support(struct adapter *);
251 static int      em_xmit(struct tx_ring *, struct mbuf **);
252 static int      em_dma_malloc(struct adapter *, bus_size_t,
253                     struct em_dma_alloc *, int);
254 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
255 static void     em_print_debug_info(struct adapter *);
256 static void     em_print_nvm_info(struct adapter *);
257 static int      em_is_valid_ether_addr(u8 *);
258 static int      em_sysctl_stats(SYSCTL_HANDLER_ARGS);
259 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
260 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
261 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
262                     const char *, struct em_int_delay_info *, int, int);
263 /* Management and WOL Support */
264 static void     em_init_manageability(struct adapter *);
265 static void     em_release_manageability(struct adapter *);
266 static void     em_get_hw_control(struct adapter *);
267 static void     em_release_hw_control(struct adapter *);
268 static void     em_get_wakeup(device_t);
269 static void     em_enable_wakeup(device_t);
270 static int      em_enable_phy_wakeup(struct adapter *);
271 static void     em_led_func(void *, int);
272
273 static int      em_irq_fast(void *);
274
275 /* MSIX handlers */
276 static void     em_msix_tx(void *);
277 static void     em_msix_rx(void *);
278 static void     em_msix_link(void *);
279 static void     em_handle_tx(void *context, int pending);
280 static void     em_handle_rx(void *context, int pending);
281 static void     em_handle_link(void *context, int pending);
282
283 static void     em_add_rx_process_limit(struct adapter *, const char *,
284                     const char *, int *, int);
285
286 #ifdef DEVICE_POLLING
287 static poll_handler_t em_poll;
288 #endif /* POLLING */
289
290 /*********************************************************************
291  *  FreeBSD Device Interface Entry Points
292  *********************************************************************/
293
294 static device_method_t em_methods[] = {
295         /* Device interface */
296         DEVMETHOD(device_probe, em_probe),
297         DEVMETHOD(device_attach, em_attach),
298         DEVMETHOD(device_detach, em_detach),
299         DEVMETHOD(device_shutdown, em_shutdown),
300         DEVMETHOD(device_suspend, em_suspend),
301         DEVMETHOD(device_resume, em_resume),
302         {0, 0}
303 };
304
305 static driver_t em_driver = {
306         "em", em_methods, sizeof(struct adapter),
307 };
308
309 devclass_t em_devclass;
310 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
311 MODULE_DEPEND(em, pci, 1, 1, 1);
312 MODULE_DEPEND(em, ether, 1, 1, 1);
313
314 /*********************************************************************
315  *  Tunable default values.
316  *********************************************************************/
317
318 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
319 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
320 #define M_TSO_LEN                       66
321
322 /* Allow common code without TSO */
323 #ifndef CSUM_TSO
324 #define CSUM_TSO        0
325 #endif
326
327 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
328 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
329 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
330 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
331
332 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
333 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
334 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
335 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
336
337 static int em_rxd = EM_DEFAULT_RXD;
338 static int em_txd = EM_DEFAULT_TXD;
339 TUNABLE_INT("hw.em.rxd", &em_rxd);
340 TUNABLE_INT("hw.em.txd", &em_txd);
341
342 static int em_smart_pwr_down = FALSE;
343 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
344
345 /* Controls whether promiscuous also shows bad packets */
346 static int em_debug_sbp = FALSE;
347 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
348
349 /* Local controls for MSI/MSIX */
350 #ifdef EM_MULTIQUEUE
351 static int em_enable_msix = TRUE;
352 static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
353 #else
354 static int em_enable_msix = FALSE;
355 static int em_msix_queues = 0; /* disable */
356 #endif
357 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
358 TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
359
360 /* How many packets rxeof tries to clean at a time */
361 static int em_rx_process_limit = 100;
362 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
363
364 /* Flow control setting - default to FULL */
365 static int em_fc_setting = e1000_fc_full;
366 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
367
368 /*
369 ** Shadow VFTA table, this is needed because
370 ** the real vlan filter table gets cleared during
371 ** a soft reset and the driver needs to be able
372 ** to repopulate it.
373 */
374 static u32 em_shadow_vfta[EM_VFTA_SIZE];
375
376 /* Global used in WOL setup with multiport cards */
377 static int global_quad_port_a = 0;
378
379 /*********************************************************************
380  *  Device identification routine
381  *
382  *  em_probe determines if the driver should be loaded on
383  *  adapter based on PCI vendor/device id of the adapter.
384  *
385  *  return BUS_PROBE_DEFAULT on success, positive on failure
386  *********************************************************************/
387
388 static int
389 em_probe(device_t dev)
390 {
391         char            adapter_name[60];
392         u16             pci_vendor_id = 0;
393         u16             pci_device_id = 0;
394         u16             pci_subvendor_id = 0;
395         u16             pci_subdevice_id = 0;
396         em_vendor_info_t *ent;
397
398         INIT_DEBUGOUT("em_probe: begin");
399
400         pci_vendor_id = pci_get_vendor(dev);
401         if (pci_vendor_id != EM_VENDOR_ID)
402                 return (ENXIO);
403
404         pci_device_id = pci_get_device(dev);
405         pci_subvendor_id = pci_get_subvendor(dev);
406         pci_subdevice_id = pci_get_subdevice(dev);
407
408         ent = em_vendor_info_array;
409         while (ent->vendor_id != 0) {
410                 if ((pci_vendor_id == ent->vendor_id) &&
411                     (pci_device_id == ent->device_id) &&
412
413                     ((pci_subvendor_id == ent->subvendor_id) ||
414                     (ent->subvendor_id == PCI_ANY_ID)) &&
415
416                     ((pci_subdevice_id == ent->subdevice_id) ||
417                     (ent->subdevice_id == PCI_ANY_ID))) {
418                         sprintf(adapter_name, "%s %s",
419                                 em_strings[ent->index],
420                                 em_driver_version);
421                         device_set_desc_copy(dev, adapter_name);
422                         return (BUS_PROBE_DEFAULT);
423                 }
424                 ent++;
425         }
426
427         return (ENXIO);
428 }
429
430 /*********************************************************************
431  *  Device initialization routine
432  *
433  *  The attach entry point is called when the driver is being loaded.
434  *  This routine identifies the type of hardware, allocates all resources
435  *  and initializes the hardware.
436  *
437  *  return 0 on success, positive on failure
438  *********************************************************************/
439
440 static int
441 em_attach(device_t dev)
442 {
443         struct adapter  *adapter;
444         int             error = 0;
445
446         INIT_DEBUGOUT("em_attach: begin");
447
448         adapter = device_get_softc(dev);
449         adapter->dev = adapter->osdep.dev = dev;
450         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
451
452         /* SYSCTL stuff */
453         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
454             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
455             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
456             em_sysctl_debug_info, "I", "Debug Information");
457
458         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
459             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
460             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
461             em_sysctl_stats, "I", "Statistics");
462
463         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
464
465         /* Determine hardware and mac info */
466         em_identify_hardware(adapter);
467
468         /* Setup PCI resources */
469         if (em_allocate_pci_resources(adapter)) {
470                 device_printf(dev, "Allocation of PCI resources failed\n");
471                 error = ENXIO;
472                 goto err_pci;
473         }
474
475         /*
476         ** For ICH8 and family we need to
477         ** map the flash memory, and this
478         ** must happen after the MAC is 
479         ** identified
480         */
481         if ((adapter->hw.mac.type == e1000_ich8lan) ||
482             (adapter->hw.mac.type == e1000_pchlan) ||
483             (adapter->hw.mac.type == e1000_ich9lan) ||
484             (adapter->hw.mac.type == e1000_ich10lan)) {
485                 int rid = EM_BAR_TYPE_FLASH;
486                 adapter->flash = bus_alloc_resource_any(dev,
487                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
488                 if (adapter->flash == NULL) {
489                         device_printf(dev, "Mapping of Flash failed\n");
490                         error = ENXIO;
491                         goto err_pci;
492                 }
493                 /* This is used in the shared code */
494                 adapter->hw.flash_address = (u8 *)adapter->flash;
495                 adapter->osdep.flash_bus_space_tag =
496                     rman_get_bustag(adapter->flash);
497                 adapter->osdep.flash_bus_space_handle =
498                     rman_get_bushandle(adapter->flash);
499         }
500
501         /* Do Shared Code initialization */
502         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
503                 device_printf(dev, "Setup of Shared code failed\n");
504                 error = ENXIO;
505                 goto err_pci;
506         }
507
508         e1000_get_bus_info(&adapter->hw);
509
510         /* Set up some sysctls for the tunable interrupt delays */
511         em_add_int_delay_sysctl(adapter, "rx_int_delay",
512             "receive interrupt delay in usecs", &adapter->rx_int_delay,
513             E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
514         em_add_int_delay_sysctl(adapter, "tx_int_delay",
515             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
516             E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
517         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
518             "receive interrupt delay limit in usecs",
519             &adapter->rx_abs_int_delay,
520             E1000_REGISTER(&adapter->hw, E1000_RADV),
521             em_rx_abs_int_delay_dflt);
522         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
523             "transmit interrupt delay limit in usecs",
524             &adapter->tx_abs_int_delay,
525             E1000_REGISTER(&adapter->hw, E1000_TADV),
526             em_tx_abs_int_delay_dflt);
527
528         /* Sysctls for limiting the amount of work done in the taskqueue */
529         em_add_rx_process_limit(adapter, "rx_processing_limit",
530             "max number of rx packets to process", &adapter->rx_process_limit,
531             em_rx_process_limit);
532
533         /*
534          * Validate number of transmit and receive descriptors. It
535          * must not exceed hardware maximum, and must be multiple
536          * of E1000_DBA_ALIGN.
537          */
538         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
539             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
540                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
541                     EM_DEFAULT_TXD, em_txd);
542                 adapter->num_tx_desc = EM_DEFAULT_TXD;
543         } else
544                 adapter->num_tx_desc = em_txd;
545
546         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
547             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
548                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
549                     EM_DEFAULT_RXD, em_rxd);
550                 adapter->num_rx_desc = EM_DEFAULT_RXD;
551         } else
552                 adapter->num_rx_desc = em_rxd;
553
554         adapter->hw.mac.autoneg = DO_AUTO_NEG;
555         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
556         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
557
558         /* Copper options */
559         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
560                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
561                 adapter->hw.phy.disable_polarity_correction = FALSE;
562                 adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
563         }
564
565         /*
566          * Set the frame limits assuming
567          * standard ethernet sized frames.
568          */
569         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
570         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
571
572         /*
573          * This controls when hardware reports transmit completion
574          * status.
575          */
576         adapter->hw.mac.report_tx_early = 1;
577
578         /* 
579         ** Get queue/ring memory
580         */
581         if (em_allocate_queues(adapter)) {
582                 error = ENOMEM;
583                 goto err_pci;
584         }
585
586         /*
587         ** Start from a known state, this is
588         ** important in reading the nvm and
589         ** mac from that.
590         */
591         e1000_reset_hw(&adapter->hw);
592
593         /* Make sure we have a good EEPROM before we read from it */
594         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
595                 /*
596                 ** Some PCI-E parts fail the first check due to
597                 ** the link being in sleep state, call it again,
598                 ** if it fails a second time its a real issue.
599                 */
600                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
601                         device_printf(dev,
602                             "The EEPROM Checksum Is Not Valid\n");
603                         error = EIO;
604                         goto err_late;
605                 }
606         }
607
608         /* Copy the permanent MAC address out of the EEPROM */
609         if (e1000_read_mac_addr(&adapter->hw) < 0) {
610                 device_printf(dev, "EEPROM read error while reading MAC"
611                     " address\n");
612                 error = EIO;
613                 goto err_late;
614         }
615
616         if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
617                 device_printf(dev, "Invalid MAC address\n");
618                 error = EIO;
619                 goto err_late;
620         }
621
622         /*
623         **  Do interrupt configuration
624         */
625         if (adapter->msix > 1) /* Do MSIX */
626                 error = em_allocate_msix(adapter);
627         else  /* MSI or Legacy */
628                 error = em_allocate_legacy(adapter);
629         if (error)
630                 goto err_late;
631
632         /*
633          * Get Wake-on-Lan and Management info for later use
634          */
635         em_get_wakeup(dev);
636
637         /* Setup OS specific network interface */
638         em_setup_interface(dev, adapter);
639
640         em_reset(adapter);
641
642         /* Initialize statistics */
643         em_update_stats_counters(adapter);
644
645         adapter->hw.mac.get_link_status = 1;
646         em_update_link_status(adapter);
647
648         /* Indicate SOL/IDER usage */
649         if (e1000_check_reset_block(&adapter->hw))
650                 device_printf(dev,
651                     "PHY reset is blocked due to SOL/IDER session.\n");
652
653         /* Register for VLAN events */
654         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
655             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
656         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
657             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
658
659         /* Non-AMT based hardware can now take control from firmware */
660         if (adapter->has_manage && !adapter->has_amt)
661                 em_get_hw_control(adapter);
662
663         /* Tell the stack that the interface is not active */
664         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
665
666         adapter->led_dev = led_create(em_led_func, adapter,
667             device_get_nameunit(dev));
668
669         INIT_DEBUGOUT("em_attach: end");
670
671         return (0);
672
673 err_late:
674         em_free_transmit_structures(adapter);
675         em_free_receive_structures(adapter);
676         em_release_hw_control(adapter);
677 err_pci:
678         em_free_pci_resources(adapter);
679         EM_CORE_LOCK_DESTROY(adapter);
680
681         return (error);
682 }
683
684 /*********************************************************************
685  *  Device removal routine
686  *
687  *  The detach entry point is called when the driver is being removed.
688  *  This routine stops the adapter and deallocates all the resources
689  *  that were allocated for driver operation.
690  *
691  *  return 0 on success, positive on failure
692  *********************************************************************/
693
694 static int
695 em_detach(device_t dev)
696 {
697         struct adapter  *adapter = device_get_softc(dev);
698         struct ifnet    *ifp = adapter->ifp;
699
700         INIT_DEBUGOUT("em_detach: begin");
701
702         /* Make sure VLANS are not using driver */
703         if (adapter->ifp->if_vlantrunk != NULL) {
704                 device_printf(dev,"Vlan in use, detach first\n");
705                 return (EBUSY);
706         }
707
708 #ifdef DEVICE_POLLING
709         if (ifp->if_capenable & IFCAP_POLLING)
710                 ether_poll_deregister(ifp);
711 #endif
712
713         if (adapter->led_dev != NULL)
714                 led_destroy(adapter->led_dev);
715
716         EM_CORE_LOCK(adapter);
717         adapter->in_detach = 1;
718         em_stop(adapter);
719         EM_CORE_UNLOCK(adapter);
720         EM_CORE_LOCK_DESTROY(adapter);
721
722         e1000_phy_hw_reset(&adapter->hw);
723
724         em_release_manageability(adapter);
725         em_release_hw_control(adapter);
726
727         /* Unregister VLAN events */
728         if (adapter->vlan_attach != NULL)
729                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
730         if (adapter->vlan_detach != NULL)
731                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
732
733         ether_ifdetach(adapter->ifp);
734         callout_drain(&adapter->timer);
735
736         em_free_pci_resources(adapter);
737         bus_generic_detach(dev);
738         if_free(ifp);
739
740         em_free_transmit_structures(adapter);
741         em_free_receive_structures(adapter);
742
743         em_release_hw_control(adapter);
744
745         return (0);
746 }
747
748 /*********************************************************************
749  *
750  *  Shutdown entry point
751  *
752  **********************************************************************/
753
754 static int
755 em_shutdown(device_t dev)
756 {
757         return em_suspend(dev);
758 }
759
760 /*
761  * Suspend/resume device methods.
762  */
763 static int
764 em_suspend(device_t dev)
765 {
766         struct adapter *adapter = device_get_softc(dev);
767
768         EM_CORE_LOCK(adapter);
769
770         em_release_manageability(adapter);
771         em_release_hw_control(adapter);
772         em_enable_wakeup(dev);
773
774         EM_CORE_UNLOCK(adapter);
775
776         return bus_generic_suspend(dev);
777 }
778
779 static int
780 em_resume(device_t dev)
781 {
782         struct adapter *adapter = device_get_softc(dev);
783         struct ifnet *ifp = adapter->ifp;
784
785         EM_CORE_LOCK(adapter);
786         em_init_locked(adapter);
787         em_init_manageability(adapter);
788         EM_CORE_UNLOCK(adapter);
789         em_start(ifp);
790
791         return bus_generic_resume(dev);
792 }
793
794
795 /*********************************************************************
796  *  Transmit entry point
797  *
798  *  em_start is called by the stack to initiate a transmit.
799  *  The driver will remain in this routine as long as there are
800  *  packets to transmit and transmit resources are available.
801  *  In case resources are not available stack is notified and
802  *  the packet is requeued.
803  **********************************************************************/
804
805 #ifdef EM_MULTIQUEUE
806 static int
807 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
808 {
809         struct adapter  *adapter = txr->adapter;
810         struct mbuf     *next;
811         int             err = 0, enq = 0;
812
813         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
814             IFF_DRV_RUNNING || adapter->link_active == 0) {
815                 if (m != NULL)
816                         err = drbr_enqueue(ifp, txr->br, m);
817                 return (err);
818         }
819
820         /* Call cleanup if number of TX descriptors low */
821         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
822                 em_txeof(txr);
823
824         enq = 0;
825         if (m == NULL) {
826                 next = drbr_dequeue(ifp, txr->br);
827         } else if (drbr_needs_enqueue(ifp, txr->br)) {
828                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
829                         return (err);
830                 next = drbr_dequeue(ifp, txr->br);
831         } else
832                 next = m;
833
834         /* Process the queue */
835         while (next != NULL) {
836                 if ((err = em_xmit(txr, &next)) != 0) {
837                         if (next != NULL)
838                                 err = drbr_enqueue(ifp, txr->br, next);
839                         break;
840                 }
841                 enq++;
842                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
843                 ETHER_BPF_MTAP(ifp, next);
844                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
845                         break;
846                 if (txr->tx_avail < EM_MAX_SCATTER) {
847                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
848                         break;
849                 }
850                 next = drbr_dequeue(ifp, txr->br);
851         }
852
853         if (enq > 0) {
854                 /* Set the watchdog */
855                 txr->watchdog_check = TRUE;
856                 txr->watchdog_time = ticks;
857         }
858         return (err);
859 }
860
861 /*
862 ** Multiqueue capable stack interface, this is not
863 ** yet truely multiqueue, but that is coming...
864 */
865 static int
866 em_mq_start(struct ifnet *ifp, struct mbuf *m)
867 {
868         struct adapter  *adapter = ifp->if_softc;
869         struct tx_ring  *txr;
870         int             i, error = 0;
871
872         /* Which queue to use */
873         if ((m->m_flags & M_FLOWID) != 0)
874                 i = m->m_pkthdr.flowid % adapter->num_queues;
875         else
876                 i = curcpu % adapter->num_queues;
877
878         txr = &adapter->tx_rings[i];
879
880         if (EM_TX_TRYLOCK(txr)) {
881                 error = em_mq_start_locked(ifp, txr, m);
882                 EM_TX_UNLOCK(txr);
883         } else 
884                 error = drbr_enqueue(ifp, txr->br, m);
885
886         return (error);
887 }
888
889 /*
890 ** Flush all ring buffers
891 */
892 static void
893 em_qflush(struct ifnet *ifp)
894 {
895         struct adapter  *adapter = ifp->if_softc;
896         struct tx_ring  *txr = adapter->tx_rings;
897         struct mbuf     *m;
898
899         for (int i = 0; i < adapter->num_queues; i++, txr++) {
900                 EM_TX_LOCK(txr);
901                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
902                         m_freem(m);
903                 EM_TX_UNLOCK(txr);
904         }
905         if_qflush(ifp);
906 }
907
908 #endif /* EM_MULTIQUEUE */
909
910 static void
911 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
912 {
913         struct adapter  *adapter = ifp->if_softc;
914         struct mbuf     *m_head;
915
916         EM_TX_LOCK_ASSERT(txr);
917
918         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
919             IFF_DRV_RUNNING)
920                 return;
921
922         if (!adapter->link_active)
923                 return;
924
925         /* Call cleanup if number of TX descriptors low */
926         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
927                 em_txeof(txr);
928
929         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
930                 if (txr->tx_avail < EM_MAX_SCATTER) {
931                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
932                         break;
933                 }
934                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
935                 if (m_head == NULL)
936                         break;
937                 /*
938                  *  Encapsulation can modify our pointer, and or make it
939                  *  NULL on failure.  In that event, we can't requeue.
940                  */
941                 if (em_xmit(txr, &m_head)) {
942                         if (m_head == NULL)
943                                 break;
944                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
945                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
946                         break;
947                 }
948
949                 /* Send a copy of the frame to the BPF listener */
950                 ETHER_BPF_MTAP(ifp, m_head);
951
952                 /* Set timeout in case hardware has problems transmitting. */
953                 txr->watchdog_time = ticks;
954                 txr->watchdog_check = TRUE;
955         }
956
957         return;
958 }
959
960 static void
961 em_start(struct ifnet *ifp)
962 {
963         struct adapter  *adapter = ifp->if_softc;
964         struct tx_ring  *txr = adapter->tx_rings;
965
966         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
967                 EM_TX_LOCK(txr);
968                 em_start_locked(ifp, txr);
969                 EM_TX_UNLOCK(txr);
970         }
971         return;
972 }
973
974 /*********************************************************************
975  *  Ioctl entry point
976  *
977  *  em_ioctl is called when the user wants to configure the
978  *  interface.
979  *
980  *  return 0 on success, positive on failure
981  **********************************************************************/
982
983 static int
984 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
985 {
986         struct adapter  *adapter = ifp->if_softc;
987         struct ifreq *ifr = (struct ifreq *)data;
988 #ifdef INET
989         struct ifaddr *ifa = (struct ifaddr *)data;
990 #endif
991         int error = 0;
992
993         if (adapter->in_detach)
994                 return (error);
995
996         switch (command) {
997         case SIOCSIFADDR:
998 #ifdef INET
999                 if (ifa->ifa_addr->sa_family == AF_INET) {
1000                         /*
1001                          * XXX
1002                          * Since resetting hardware takes a very long time
1003                          * and results in link renegotiation we only
1004                          * initialize the hardware only when it is absolutely
1005                          * required.
1006                          */
1007                         ifp->if_flags |= IFF_UP;
1008                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1009                                 EM_CORE_LOCK(adapter);
1010                                 em_init_locked(adapter);
1011                                 EM_CORE_UNLOCK(adapter);
1012                         }
1013                         arp_ifinit(ifp, ifa);
1014                 } else
1015 #endif
1016                         error = ether_ioctl(ifp, command, data);
1017                 break;
1018         case SIOCSIFMTU:
1019             {
1020                 int max_frame_size;
1021
1022                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1023
1024                 EM_CORE_LOCK(adapter);
1025                 switch (adapter->hw.mac.type) {
1026                 case e1000_82571:
1027                 case e1000_82572:
1028                 case e1000_ich9lan:
1029                 case e1000_ich10lan:
1030                 case e1000_82574:
1031                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1032                         max_frame_size = 9234;
1033                         break;
1034                 case e1000_pchlan:
1035                         max_frame_size = 4096;
1036                         break;
1037                         /* Adapters that do not support jumbo frames */
1038                 case e1000_82583:
1039                 case e1000_ich8lan:
1040                         max_frame_size = ETHER_MAX_LEN;
1041                         break;
1042                 default:
1043                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1044                 }
1045                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1046                     ETHER_CRC_LEN) {
1047                         EM_CORE_UNLOCK(adapter);
1048                         error = EINVAL;
1049                         break;
1050                 }
1051
1052                 ifp->if_mtu = ifr->ifr_mtu;
1053                 adapter->max_frame_size =
1054                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1055                 em_init_locked(adapter);
1056                 EM_CORE_UNLOCK(adapter);
1057                 break;
1058             }
1059         case SIOCSIFFLAGS:
1060                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1061                     SIOCSIFFLAGS (Set Interface Flags)");
1062                 EM_CORE_LOCK(adapter);
1063                 if (ifp->if_flags & IFF_UP) {
1064                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1065                                 if ((ifp->if_flags ^ adapter->if_flags) &
1066                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1067                                         em_disable_promisc(adapter);
1068                                         em_set_promisc(adapter);
1069                                 }
1070                         } else
1071                                 em_init_locked(adapter);
1072                 } else
1073                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1074                                 em_stop(adapter);
1075                 adapter->if_flags = ifp->if_flags;
1076                 EM_CORE_UNLOCK(adapter);
1077                 break;
1078         case SIOCADDMULTI:
1079         case SIOCDELMULTI:
1080                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1081                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1082                         EM_CORE_LOCK(adapter);
1083                         em_disable_intr(adapter);
1084                         em_set_multi(adapter);
1085 #ifdef DEVICE_POLLING
1086                         if (!(ifp->if_capenable & IFCAP_POLLING))
1087 #endif
1088                                 em_enable_intr(adapter);
1089                         EM_CORE_UNLOCK(adapter);
1090                 }
1091                 break;
1092         case SIOCSIFMEDIA:
1093                 /* Check SOL/IDER usage */
1094                 EM_CORE_LOCK(adapter);
1095                 if (e1000_check_reset_block(&adapter->hw)) {
1096                         EM_CORE_UNLOCK(adapter);
1097                         device_printf(adapter->dev, "Media change is"
1098                             " blocked due to SOL/IDER session.\n");
1099                         break;
1100                 }
1101                 EM_CORE_UNLOCK(adapter);
1102         case SIOCGIFMEDIA:
1103                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1104                     SIOCxIFMEDIA (Get/Set Interface Media)");
1105                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1106                 break;
1107         case SIOCSIFCAP:
1108             {
1109                 int mask, reinit;
1110
1111                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1112                 reinit = 0;
1113                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1114 #ifdef DEVICE_POLLING
1115                 if (mask & IFCAP_POLLING) {
1116                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1117                                 error = ether_poll_register(em_poll, ifp);
1118                                 if (error)
1119                                         return (error);
1120                                 EM_CORE_LOCK(adapter);
1121                                 em_disable_intr(adapter);
1122                                 ifp->if_capenable |= IFCAP_POLLING;
1123                                 EM_CORE_UNLOCK(adapter);
1124                         } else {
1125                                 error = ether_poll_deregister(ifp);
1126                                 /* Enable interrupt even in error case */
1127                                 EM_CORE_LOCK(adapter);
1128                                 em_enable_intr(adapter);
1129                                 ifp->if_capenable &= ~IFCAP_POLLING;
1130                                 EM_CORE_UNLOCK(adapter);
1131                         }
1132                 }
1133 #endif
1134                 if (mask & IFCAP_HWCSUM) {
1135                         ifp->if_capenable ^= IFCAP_HWCSUM;
1136                         reinit = 1;
1137                 }
1138                 if (mask & IFCAP_TSO4) {
1139                         ifp->if_capenable ^= IFCAP_TSO4;
1140                         reinit = 1;
1141                 }
1142                 if (mask & IFCAP_VLAN_HWTAGGING) {
1143                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1144                         reinit = 1;
1145                 }
1146                 if (mask & IFCAP_VLAN_HWFILTER) {
1147                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1148                         reinit = 1;
1149                 }
1150                 if ((mask & IFCAP_WOL) &&
1151                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1152                         if (mask & IFCAP_WOL_MCAST)
1153                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1154                         if (mask & IFCAP_WOL_MAGIC)
1155                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1156                 }
1157                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1158                         em_init(adapter);
1159                 VLAN_CAPABILITIES(ifp);
1160                 break;
1161             }
1162
1163         default:
1164                 error = ether_ioctl(ifp, command, data);
1165                 break;
1166         }
1167
1168         return (error);
1169 }
1170
1171
1172 /*********************************************************************
1173  *  Init entry point
1174  *
1175  *  This routine is used in two ways. It is used by the stack as
1176  *  init entry point in network interface structure. It is also used
1177  *  by the driver as a hw/sw initialization routine to get to a
1178  *  consistent state.
1179  *
1180  *  return 0 on success, positive on failure
1181  **********************************************************************/
1182
1183 static void
1184 em_init_locked(struct adapter *adapter)
1185 {
1186         struct ifnet    *ifp = adapter->ifp;
1187         device_t        dev = adapter->dev;
1188         u32             pba;
1189
1190         INIT_DEBUGOUT("em_init: begin");
1191
1192         EM_CORE_LOCK_ASSERT(adapter);
1193
1194         em_disable_intr(adapter);
1195         callout_stop(&adapter->timer);
1196
1197         /*
1198          * Packet Buffer Allocation (PBA)
1199          * Writing PBA sets the receive portion of the buffer
1200          * the remainder is used for the transmit buffer.
1201          */
1202         switch (adapter->hw.mac.type) {
1203         /* Total Packet Buffer on these is 48K */
1204         case e1000_82571:
1205         case e1000_82572:
1206         case e1000_80003es2lan:
1207                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1208                 break;
1209         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1210                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1211                 break;
1212         case e1000_82574:
1213         case e1000_82583:
1214                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1215                 break;
1216         case e1000_ich9lan:
1217         case e1000_ich10lan:
1218         case e1000_pchlan:
1219                 pba = E1000_PBA_10K;
1220                 break;
1221         case e1000_ich8lan:
1222                 pba = E1000_PBA_8K;
1223                 break;
1224         default:
1225                 if (adapter->max_frame_size > 8192)
1226                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1227                 else
1228                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1229         }
1230
1231         INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1232         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1233         
1234         /* Get the latest mac address, User can use a LAA */
1235         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1236               ETHER_ADDR_LEN);
1237
1238         /* Put the address into the Receive Address Array */
1239         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1240
1241         /*
1242          * With the 82571 adapter, RAR[0] may be overwritten
1243          * when the other port is reset, we make a duplicate
1244          * in RAR[14] for that eventuality, this assures
1245          * the interface continues to function.
1246          */
1247         if (adapter->hw.mac.type == e1000_82571) {
1248                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1249                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1250                     E1000_RAR_ENTRIES - 1);
1251         }
1252
1253         /* Initialize the hardware */
1254         em_reset(adapter);
1255         em_update_link_status(adapter);
1256
1257         /* Setup VLAN support, basic and offload if available */
1258         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1259
1260         /* Use real VLAN Filter support? */
1261         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1262                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1263                         /* Use real VLAN Filter support */
1264                         em_setup_vlan_hw_support(adapter);
1265                 else {
1266                         u32 ctrl;
1267                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1268                         ctrl |= E1000_CTRL_VME;
1269                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1270                 }
1271         }
1272
1273         /* Set hardware offload abilities */
1274         ifp->if_hwassist = 0;
1275         if (ifp->if_capenable & IFCAP_TXCSUM)
1276                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1277         if (ifp->if_capenable & IFCAP_TSO4)
1278                 ifp->if_hwassist |= CSUM_TSO;
1279
1280         /* Configure for OS presence */
1281         em_init_manageability(adapter);
1282
1283         /* Prepare transmit descriptors and buffers */
1284         em_setup_transmit_structures(adapter);
1285         em_initialize_transmit_unit(adapter);
1286
1287         /* Setup Multicast table */
1288         em_set_multi(adapter);
1289
1290         /* Prepare receive descriptors and buffers */
1291         if (em_setup_receive_structures(adapter)) {
1292                 device_printf(dev, "Could not setup receive structures\n");
1293                 em_stop(adapter);
1294                 return;
1295         }
1296         em_initialize_receive_unit(adapter);
1297
1298         /* Don't lose promiscuous settings */
1299         em_set_promisc(adapter);
1300
1301         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1302         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1303
1304         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1305         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1306
1307         /* MSI/X configuration for 82574 */
1308         if (adapter->hw.mac.type == e1000_82574) {
1309                 int tmp;
1310                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1311                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1312                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1313                 /* Set the IVAR - interrupt vector routing. */
1314                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1315         }
1316
1317 #ifdef DEVICE_POLLING
1318         /*
1319          * Only enable interrupts if we are not polling, make sure
1320          * they are off otherwise.
1321          */
1322         if (ifp->if_capenable & IFCAP_POLLING)
1323                 em_disable_intr(adapter);
1324         else
1325 #endif /* DEVICE_POLLING */
1326                 em_enable_intr(adapter);
1327
1328         /* AMT based hardware can now take control from firmware */
1329         if (adapter->has_manage && adapter->has_amt)
1330                 em_get_hw_control(adapter);
1331
1332         /* Don't reset the phy next time init gets called */
1333         adapter->hw.phy.reset_disable = TRUE;
1334 }
1335
1336 static void
1337 em_init(void *arg)
1338 {
1339         struct adapter *adapter = arg;
1340
1341         EM_CORE_LOCK(adapter);
1342         em_init_locked(adapter);
1343         EM_CORE_UNLOCK(adapter);
1344 }
1345
1346
1347 #ifdef DEVICE_POLLING
1348 /*********************************************************************
1349  *
1350  *  Legacy polling routine: note this only works with single queue
1351  *
1352  *********************************************************************/
1353 static int
1354 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1355 {
1356         struct adapter *adapter = ifp->if_softc;
1357         struct tx_ring  *txr = adapter->tx_rings;
1358         struct rx_ring  *rxr = adapter->rx_rings;
1359         u32             reg_icr, rx_done = 0;
1360
1361         EM_CORE_LOCK(adapter);
1362         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1363                 EM_CORE_UNLOCK(adapter);
1364                 return (rx_done);
1365         }
1366
1367         if (cmd == POLL_AND_CHECK_STATUS) {
1368                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1369                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1370                         callout_stop(&adapter->timer);
1371                         adapter->hw.mac.get_link_status = 1;
1372                         em_update_link_status(adapter);
1373                         callout_reset(&adapter->timer, hz,
1374                             em_local_timer, adapter);
1375                 }
1376         }
1377         EM_CORE_UNLOCK(adapter);
1378
1379         rx_done = em_rxeof(rxr, count);
1380
1381         EM_TX_LOCK(txr);
1382         em_txeof(txr);
1383 #ifdef EM_MULTIQUEUE
1384         if (!drbr_empty(ifp, txr->br))
1385                 em_mq_start_locked(ifp, txr, NULL);
1386 #else
1387         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1388                 em_start_locked(ifp, txr);
1389 #endif
1390         EM_TX_UNLOCK(txr);
1391
1392         return (rx_done);
1393 }
1394 #endif /* DEVICE_POLLING */
1395
1396
1397 /*********************************************************************
1398  *
1399  *  Fast Legacy/MSI Combined Interrupt Service routine  
1400  *
1401  *********************************************************************/
1402 static int
1403 em_irq_fast(void *arg)
1404 {
1405         struct adapter  *adapter = arg;
1406         struct ifnet    *ifp;
1407         u32             reg_icr;
1408
1409         ifp = adapter->ifp;
1410
1411         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1412
1413         /* Hot eject?  */
1414         if (reg_icr == 0xffffffff)
1415                 return FILTER_STRAY;
1416
1417         /* Definitely not our interrupt.  */
1418         if (reg_icr == 0x0)
1419                 return FILTER_STRAY;
1420
1421         /*
1422          * Starting with the 82571 chip, bit 31 should be used to
1423          * determine whether the interrupt belongs to us.
1424          */
1425         if (adapter->hw.mac.type >= e1000_82571 &&
1426             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1427                 return FILTER_STRAY;
1428
1429         em_disable_intr(adapter);
1430         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1431
1432         /* Link status change */
1433         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1434                 adapter->hw.mac.get_link_status = 1;
1435                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1436         }
1437
1438         if (reg_icr & E1000_ICR_RXO)
1439                 adapter->rx_overruns++;
1440         return FILTER_HANDLED;
1441 }
1442
1443 /* Combined RX/TX handler, used by Legacy and MSI */
1444 static void
1445 em_handle_que(void *context, int pending)
1446 {
1447         struct adapter  *adapter = context;
1448         struct ifnet    *ifp = adapter->ifp;
1449         struct tx_ring  *txr = adapter->tx_rings;
1450         struct rx_ring  *rxr = adapter->rx_rings;
1451         bool            more_rx;
1452
1453
1454         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1455                 more_rx = em_rxeof(rxr, adapter->rx_process_limit);
1456
1457                 EM_TX_LOCK(txr);
1458                 em_txeof(txr);
1459 #ifdef EM_MULTIQUEUE
1460                 if (!drbr_empty(ifp, txr->br))
1461                         em_mq_start_locked(ifp, txr, NULL);
1462 #else
1463                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1464                         em_start_locked(ifp, txr);
1465 #endif
1466                 EM_TX_UNLOCK(txr);
1467                 if (more_rx) {
1468                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1469                         return;
1470                 }
1471         }
1472
1473         em_enable_intr(adapter);
1474         return;
1475 }
1476
1477
1478 /*********************************************************************
1479  *
1480  *  MSIX Interrupt Service Routines
1481  *
1482  **********************************************************************/
1483 static void
1484 em_msix_tx(void *arg)
1485 {
1486         struct tx_ring *txr = arg;
1487         struct adapter *adapter = txr->adapter;
1488         bool            more;
1489
1490         ++txr->tx_irq;
1491         EM_TX_LOCK(txr);
1492         more = em_txeof(txr);
1493         EM_TX_UNLOCK(txr);
1494         if (more)
1495                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1496         else
1497                 /* Reenable this interrupt */
1498                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1499         return;
1500 }
1501
1502 /*********************************************************************
1503  *
1504  *  MSIX RX Interrupt Service routine
1505  *
1506  **********************************************************************/
1507
1508 static void
1509 em_msix_rx(void *arg)
1510 {
1511         struct rx_ring  *rxr = arg;
1512         struct adapter  *adapter = rxr->adapter;
1513         bool            more;
1514
1515         ++rxr->rx_irq;
1516         more = em_rxeof(rxr, adapter->rx_process_limit);
1517         if (more)
1518                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1519         else
1520                 /* Reenable this interrupt */
1521                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1522         return;
1523 }
1524
1525 /*********************************************************************
1526  *
1527  *  MSIX Link Fast Interrupt Service routine
1528  *
1529  **********************************************************************/
1530 static void
1531 em_msix_link(void *arg)
1532 {
1533         struct adapter  *adapter = arg;
1534         u32             reg_icr;
1535
1536         ++adapter->link_irq;
1537         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1538
1539         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1540                 adapter->hw.mac.get_link_status = 1;
1541                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1542         } else
1543                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1544                     EM_MSIX_LINK | E1000_IMS_LSC);
1545         return;
1546 }
1547
1548 static void
1549 em_handle_rx(void *context, int pending)
1550 {
1551         struct rx_ring  *rxr = context;
1552         struct adapter  *adapter = rxr->adapter;
1553         bool            more;
1554
1555         more = em_rxeof(rxr, adapter->rx_process_limit);
1556         if (more)
1557                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1558         else
1559                 /* Reenable this interrupt */
1560                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1561 }
1562
1563 static void
1564 em_handle_tx(void *context, int pending)
1565 {
1566         struct tx_ring  *txr = context;
1567         struct adapter  *adapter = txr->adapter;
1568         struct ifnet    *ifp = adapter->ifp;
1569
1570         if (!EM_TX_TRYLOCK(txr))
1571                 return;
1572
1573         em_txeof(txr);
1574
1575 #ifdef EM_MULTIQUEUE
1576         if (!drbr_empty(ifp, txr->br))
1577                 em_mq_start_locked(ifp, txr, NULL);
1578 #else
1579         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1580                 em_start_locked(ifp, txr);
1581 #endif
1582         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1583         EM_TX_UNLOCK(txr);
1584 }
1585
1586 static void
1587 em_handle_link(void *context, int pending)
1588 {
1589         struct adapter  *adapter = context;
1590         struct ifnet *ifp = adapter->ifp;
1591
1592         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1593                 return;
1594
1595         EM_CORE_LOCK(adapter);
1596         callout_stop(&adapter->timer);
1597         em_update_link_status(adapter);
1598         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1599         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1600             EM_MSIX_LINK | E1000_IMS_LSC);
1601         EM_CORE_UNLOCK(adapter);
1602 }
1603
1604
1605 /*********************************************************************
1606  *
1607  *  Media Ioctl callback
1608  *
1609  *  This routine is called whenever the user queries the status of
1610  *  the interface using ifconfig.
1611  *
1612  **********************************************************************/
1613 static void
1614 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1615 {
1616         struct adapter *adapter = ifp->if_softc;
1617         u_char fiber_type = IFM_1000_SX;
1618
1619         INIT_DEBUGOUT("em_media_status: begin");
1620
1621         EM_CORE_LOCK(adapter);
1622         em_update_link_status(adapter);
1623
1624         ifmr->ifm_status = IFM_AVALID;
1625         ifmr->ifm_active = IFM_ETHER;
1626
1627         if (!adapter->link_active) {
1628                 EM_CORE_UNLOCK(adapter);
1629                 return;
1630         }
1631
1632         ifmr->ifm_status |= IFM_ACTIVE;
1633
1634         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1635             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1636                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1637         } else {
1638                 switch (adapter->link_speed) {
1639                 case 10:
1640                         ifmr->ifm_active |= IFM_10_T;
1641                         break;
1642                 case 100:
1643                         ifmr->ifm_active |= IFM_100_TX;
1644                         break;
1645                 case 1000:
1646                         ifmr->ifm_active |= IFM_1000_T;
1647                         break;
1648                 }
1649                 if (adapter->link_duplex == FULL_DUPLEX)
1650                         ifmr->ifm_active |= IFM_FDX;
1651                 else
1652                         ifmr->ifm_active |= IFM_HDX;
1653         }
1654         EM_CORE_UNLOCK(adapter);
1655 }
1656
1657 /*********************************************************************
1658  *
1659  *  Media Ioctl callback
1660  *
1661  *  This routine is called when the user changes speed/duplex using
1662  *  media/mediopt option with ifconfig.
1663  *
1664  **********************************************************************/
1665 static int
1666 em_media_change(struct ifnet *ifp)
1667 {
1668         struct adapter *adapter = ifp->if_softc;
1669         struct ifmedia  *ifm = &adapter->media;
1670
1671         INIT_DEBUGOUT("em_media_change: begin");
1672
1673         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1674                 return (EINVAL);
1675
1676         EM_CORE_LOCK(adapter);
1677         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1678         case IFM_AUTO:
1679                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1680                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1681                 break;
1682         case IFM_1000_LX:
1683         case IFM_1000_SX:
1684         case IFM_1000_T:
1685                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1686                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1687                 break;
1688         case IFM_100_TX:
1689                 adapter->hw.mac.autoneg = FALSE;
1690                 adapter->hw.phy.autoneg_advertised = 0;
1691                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1692                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1693                 else
1694                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1695                 break;
1696         case IFM_10_T:
1697                 adapter->hw.mac.autoneg = FALSE;
1698                 adapter->hw.phy.autoneg_advertised = 0;
1699                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1700                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1701                 else
1702                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1703                 break;
1704         default:
1705                 device_printf(adapter->dev, "Unsupported media type\n");
1706         }
1707
1708         /* As the speed/duplex settings my have changed we need to
1709          * reset the PHY.
1710          */
1711         adapter->hw.phy.reset_disable = FALSE;
1712
1713         em_init_locked(adapter);
1714         EM_CORE_UNLOCK(adapter);
1715
1716         return (0);
1717 }
1718
1719 /*********************************************************************
1720  *
1721  *  This routine maps the mbufs to tx descriptors.
1722  *
1723  *  return 0 on success, positive on failure
1724  **********************************************************************/
1725
1726 static int
1727 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1728 {
1729         struct adapter          *adapter = txr->adapter;
1730         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1731         bus_dmamap_t            map;
1732         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1733         struct e1000_tx_desc    *ctxd = NULL;
1734         struct mbuf             *m_head;
1735         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1736         int                     nsegs, i, j, first, last = 0;
1737         int                     error, do_tso, tso_desc = 0;
1738
1739         m_head = *m_headp;
1740         txd_upper = txd_lower = txd_used = txd_saved = 0;
1741         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1742
1743         /*
1744          * TSO workaround: 
1745          *  If an mbuf is only header we need  
1746          *     to pull 4 bytes of data into it. 
1747          */
1748         if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1749                 m_head = m_pullup(m_head, M_TSO_LEN + 4);
1750                 *m_headp = m_head;
1751                 if (m_head == NULL)
1752                         return (ENOBUFS);
1753         }
1754
1755         /*
1756          * Map the packet for DMA
1757          *
1758          * Capture the first descriptor index,
1759          * this descriptor will have the index
1760          * of the EOP which is the only one that
1761          * now gets a DONE bit writeback.
1762          */
1763         first = txr->next_avail_desc;
1764         tx_buffer = &txr->tx_buffers[first];
1765         tx_buffer_mapped = tx_buffer;
1766         map = tx_buffer->map;
1767
1768         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1769             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1770
1771         /*
1772          * There are two types of errors we can (try) to handle:
1773          * - EFBIG means the mbuf chain was too long and bus_dma ran
1774          *   out of segments.  Defragment the mbuf chain and try again.
1775          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1776          *   at this point in time.  Defer sending and try again later.
1777          * All other errors, in particular EINVAL, are fatal and prevent the
1778          * mbuf chain from ever going through.  Drop it and report error.
1779          */
1780         if (error == EFBIG) {
1781                 struct mbuf *m;
1782
1783                 m = m_defrag(*m_headp, M_DONTWAIT);
1784                 if (m == NULL) {
1785                         adapter->mbuf_alloc_failed++;
1786                         m_freem(*m_headp);
1787                         *m_headp = NULL;
1788                         return (ENOBUFS);
1789                 }
1790                 *m_headp = m;
1791
1792                 /* Try it again */
1793                 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1794                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1795
1796                 if (error) {
1797                         adapter->no_tx_dma_setup++;
1798                         m_freem(*m_headp);
1799                         *m_headp = NULL;
1800                         return (error);
1801                 }
1802         } else if (error != 0) {
1803                 adapter->no_tx_dma_setup++;
1804                 return (error);
1805         }
1806
1807         /*
1808          * TSO Hardware workaround, if this packet is not
1809          * TSO, and is only a single descriptor long, and
1810          * it follows a TSO burst, then we need to add a
1811          * sentinel descriptor to prevent premature writeback.
1812          */
1813         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1814                 if (nsegs == 1)
1815                         tso_desc = TRUE;
1816                 txr->tx_tso = FALSE;
1817         }
1818
1819         if (nsegs > (txr->tx_avail - 2)) {
1820                 txr->no_desc_avail++;
1821                 bus_dmamap_unload(txr->txtag, map);
1822                 return (ENOBUFS);
1823         }
1824         m_head = *m_headp;
1825
1826         /* Do hardware assists */
1827 #if __FreeBSD_version >= 700000
1828         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1829                 error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1830                 if (error != TRUE)
1831                         return (ENXIO); /* something foobar */
1832                 /* we need to make a final sentinel transmit desc */
1833                 tso_desc = TRUE;
1834         } else
1835 #endif
1836         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1837                 em_transmit_checksum_setup(txr,  m_head,
1838                     &txd_upper, &txd_lower);
1839
1840         i = txr->next_avail_desc;
1841
1842         /* Set up our transmit descriptors */
1843         for (j = 0; j < nsegs; j++) {
1844                 bus_size_t seg_len;
1845                 bus_addr_t seg_addr;
1846
1847                 tx_buffer = &txr->tx_buffers[i];
1848                 ctxd = &txr->tx_base[i];
1849                 seg_addr = segs[j].ds_addr;
1850                 seg_len  = segs[j].ds_len;
1851                 /*
1852                 ** TSO Workaround:
1853                 ** If this is the last descriptor, we want to
1854                 ** split it so we have a small final sentinel
1855                 */
1856                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1857                         seg_len -= 4;
1858                         ctxd->buffer_addr = htole64(seg_addr);
1859                         ctxd->lower.data = htole32(
1860                         adapter->txd_cmd | txd_lower | seg_len);
1861                         ctxd->upper.data =
1862                             htole32(txd_upper);
1863                         if (++i == adapter->num_tx_desc)
1864                                 i = 0;
1865                         /* Now make the sentinel */     
1866                         ++txd_used; /* using an extra txd */
1867                         ctxd = &txr->tx_base[i];
1868                         tx_buffer = &txr->tx_buffers[i];
1869                         ctxd->buffer_addr =
1870                             htole64(seg_addr + seg_len);
1871                         ctxd->lower.data = htole32(
1872                         adapter->txd_cmd | txd_lower | 4);
1873                         ctxd->upper.data =
1874                             htole32(txd_upper);
1875                         last = i;
1876                         if (++i == adapter->num_tx_desc)
1877                                 i = 0;
1878                 } else {
1879                         ctxd->buffer_addr = htole64(seg_addr);
1880                         ctxd->lower.data = htole32(
1881                         adapter->txd_cmd | txd_lower | seg_len);
1882                         ctxd->upper.data =
1883                             htole32(txd_upper);
1884                         last = i;
1885                         if (++i == adapter->num_tx_desc)
1886                                 i = 0;
1887                 }
1888                 tx_buffer->m_head = NULL;
1889                 tx_buffer->next_eop = -1;
1890         }
1891
1892         txr->next_avail_desc = i;
1893         txr->tx_avail -= nsegs;
1894         if (tso_desc) /* TSO used an extra for sentinel */
1895                 txr->tx_avail -= txd_used;
1896
1897         if (m_head->m_flags & M_VLANTAG) {
1898                 /* Set the vlan id. */
1899                 ctxd->upper.fields.special =
1900                     htole16(m_head->m_pkthdr.ether_vtag);
1901                 /* Tell hardware to add tag */
1902                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1903         }
1904
1905         tx_buffer->m_head = m_head;
1906         tx_buffer_mapped->map = tx_buffer->map;
1907         tx_buffer->map = map;
1908         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1909
1910         /*
1911          * Last Descriptor of Packet
1912          * needs End Of Packet (EOP)
1913          * and Report Status (RS)
1914          */
1915         ctxd->lower.data |=
1916             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1917         /*
1918          * Keep track in the first buffer which
1919          * descriptor will be written back
1920          */
1921         tx_buffer = &txr->tx_buffers[first];
1922         tx_buffer->next_eop = last;
1923
1924         /*
1925          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1926          * that this frame is available to transmit.
1927          */
1928         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1929             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1930         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1931
1932         return (0);
1933 }
1934
1935 static void
1936 em_set_promisc(struct adapter *adapter)
1937 {
1938         struct ifnet    *ifp = adapter->ifp;
1939         u32             reg_rctl;
1940
1941         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1942
1943         if (ifp->if_flags & IFF_PROMISC) {
1944                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1945                 /* Turn this on if you want to see bad packets */
1946                 if (em_debug_sbp)
1947                         reg_rctl |= E1000_RCTL_SBP;
1948                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1949         } else if (ifp->if_flags & IFF_ALLMULTI) {
1950                 reg_rctl |= E1000_RCTL_MPE;
1951                 reg_rctl &= ~E1000_RCTL_UPE;
1952                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1953         }
1954 }
1955
1956 static void
1957 em_disable_promisc(struct adapter *adapter)
1958 {
1959         u32     reg_rctl;
1960
1961         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1962
1963         reg_rctl &=  (~E1000_RCTL_UPE);
1964         reg_rctl &=  (~E1000_RCTL_MPE);
1965         reg_rctl &=  (~E1000_RCTL_SBP);
1966         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1967 }
1968
1969
1970 /*********************************************************************
1971  *  Multicast Update
1972  *
1973  *  This routine is called whenever multicast address list is updated.
1974  *
1975  **********************************************************************/
1976
1977 static void
1978 em_set_multi(struct adapter *adapter)
1979 {
1980         struct ifnet    *ifp = adapter->ifp;
1981         struct ifmultiaddr *ifma;
1982         u32 reg_rctl = 0;
1983         u8  *mta; /* Multicast array memory */
1984         int mcnt = 0;
1985
1986         IOCTL_DEBUGOUT("em_set_multi: begin");
1987
1988         if (adapter->hw.mac.type == e1000_82542 && 
1989             adapter->hw.revision_id == E1000_REVISION_2) {
1990                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1991                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1992                         e1000_pci_clear_mwi(&adapter->hw);
1993                 reg_rctl |= E1000_RCTL_RST;
1994                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1995                 msec_delay(5);
1996         }
1997
1998         /* Allocate temporary memory to setup array */
1999         mta = malloc(sizeof(u8) *
2000             (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
2001             M_DEVBUF, M_NOWAIT | M_ZERO);
2002         if (mta == NULL)
2003                 panic("em_set_multi memory failure\n");
2004
2005 #if __FreeBSD_version < 800000
2006         IF_ADDR_LOCK(ifp);
2007 #else
2008         if_maddr_rlock(ifp);
2009 #endif
2010         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2011                 if (ifma->ifma_addr->sa_family != AF_LINK)
2012                         continue;
2013
2014                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2015                         break;
2016
2017                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2018                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2019                 mcnt++;
2020         }
2021 #if __FreeBSD_version < 800000
2022         IF_ADDR_UNLOCK(ifp);
2023 #else
2024         if_maddr_runlock(ifp);
2025 #endif
2026         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2027                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2028                 reg_rctl |= E1000_RCTL_MPE;
2029                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2030         } else
2031                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2032
2033         if (adapter->hw.mac.type == e1000_82542 && 
2034             adapter->hw.revision_id == E1000_REVISION_2) {
2035                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2036                 reg_rctl &= ~E1000_RCTL_RST;
2037                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2038                 msec_delay(5);
2039                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2040                         e1000_pci_set_mwi(&adapter->hw);
2041         }
2042         free(mta, M_DEVBUF);
2043 }
2044
2045
2046 /*********************************************************************
2047  *  Timer routine
2048  *
2049  *  This routine checks for link status and updates statistics.
2050  *
2051  **********************************************************************/
2052
2053 static void
2054 em_local_timer(void *arg)
2055 {
2056         struct adapter  *adapter = arg;
2057         struct ifnet    *ifp = adapter->ifp;
2058         struct tx_ring  *txr = adapter->tx_rings;
2059
2060         EM_CORE_LOCK_ASSERT(adapter);
2061
2062         em_update_link_status(adapter);
2063         em_update_stats_counters(adapter);
2064
2065         /* Reset LAA into RAR[0] on 82571 */
2066         if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2067                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2068
2069         if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2070                 em_print_hw_stats(adapter);
2071
2072         /*
2073         ** Check for time since any descriptor was cleaned
2074         */
2075         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2076                 EM_TX_LOCK(txr);
2077                 if (txr->watchdog_check == FALSE) {
2078                         EM_TX_UNLOCK(txr);
2079                         continue;
2080                 }
2081                 if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2082                         goto hung;
2083                 EM_TX_UNLOCK(txr);
2084         }
2085
2086         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2087         return;
2088 hung:
2089         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2090         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2091         adapter->watchdog_events++;
2092         EM_TX_UNLOCK(txr);
2093         em_init_locked(adapter);
2094 }
2095
2096
2097 static void
2098 em_update_link_status(struct adapter *adapter)
2099 {
2100         struct e1000_hw *hw = &adapter->hw;
2101         struct ifnet *ifp = adapter->ifp;
2102         device_t dev = adapter->dev;
2103         u32 link_check = 0;
2104
2105         /* Get the cached link value or read phy for real */
2106         switch (hw->phy.media_type) {
2107         case e1000_media_type_copper:
2108                 if (hw->mac.get_link_status) {
2109                         /* Do the work to read phy */
2110                         e1000_check_for_link(hw);
2111                         link_check = !hw->mac.get_link_status;
2112                         if (link_check) /* ESB2 fix */
2113                                 e1000_cfg_on_link_up(hw);
2114                 } else
2115                         link_check = TRUE;
2116                 break;
2117         case e1000_media_type_fiber:
2118                 e1000_check_for_link(hw);
2119                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2120                                  E1000_STATUS_LU);
2121                 break;
2122         case e1000_media_type_internal_serdes:
2123                 e1000_check_for_link(hw);
2124                 link_check = adapter->hw.mac.serdes_has_link;
2125                 break;
2126         default:
2127         case e1000_media_type_unknown:
2128                 break;
2129         }
2130
2131         /* Now check for a transition */
2132         if (link_check && (adapter->link_active == 0)) {
2133                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2134                     &adapter->link_duplex);
2135                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2136                 if ((adapter->link_speed != SPEED_1000) &&
2137                     ((hw->mac.type == e1000_82571) ||
2138                     (hw->mac.type == e1000_82572))) {
2139                         int tarc0;
2140                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2141                         tarc0 &= ~SPEED_MODE_BIT;
2142                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2143                 }
2144                 if (bootverbose)
2145                         device_printf(dev, "Link is up %d Mbps %s\n",
2146                             adapter->link_speed,
2147                             ((adapter->link_duplex == FULL_DUPLEX) ?
2148                             "Full Duplex" : "Half Duplex"));
2149                 adapter->link_active = 1;
2150                 adapter->smartspeed = 0;
2151                 ifp->if_baudrate = adapter->link_speed * 1000000;
2152                 if_link_state_change(ifp, LINK_STATE_UP);
2153         } else if (!link_check && (adapter->link_active == 1)) {
2154                 ifp->if_baudrate = adapter->link_speed = 0;
2155                 adapter->link_duplex = 0;
2156                 if (bootverbose)
2157                         device_printf(dev, "Link is Down\n");
2158                 adapter->link_active = 0;
2159                 /* Link down, disable watchdog */
2160                 // JFV change later
2161                 //adapter->watchdog_check = FALSE;
2162                 if_link_state_change(ifp, LINK_STATE_DOWN);
2163         }
2164 }
2165
2166 /*********************************************************************
2167  *
2168  *  This routine disables all traffic on the adapter by issuing a
2169  *  global reset on the MAC and deallocates TX/RX buffers.
2170  *
2171  *  This routine should always be called with BOTH the CORE
2172  *  and TX locks.
2173  **********************************************************************/
2174
2175 static void
2176 em_stop(void *arg)
2177 {
2178         struct adapter  *adapter = arg;
2179         struct ifnet    *ifp = adapter->ifp;
2180         struct tx_ring  *txr = adapter->tx_rings;
2181
2182         EM_CORE_LOCK_ASSERT(adapter);
2183
2184         INIT_DEBUGOUT("em_stop: begin");
2185
2186         em_disable_intr(adapter);
2187         callout_stop(&adapter->timer);
2188
2189         /* Tell the stack that the interface is no longer active */
2190         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2191
2192         /* Unarm watchdog timer. */
2193         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2194                 EM_TX_LOCK(txr);
2195                 txr->watchdog_check = FALSE;
2196                 EM_TX_UNLOCK(txr);
2197         }
2198
2199         e1000_reset_hw(&adapter->hw);
2200         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2201
2202         e1000_led_off(&adapter->hw);
2203         e1000_cleanup_led(&adapter->hw);
2204 }
2205
2206
2207 /*********************************************************************
2208  *
2209  *  Determine hardware revision.
2210  *
2211  **********************************************************************/
2212 static void
2213 em_identify_hardware(struct adapter *adapter)
2214 {
2215         device_t dev = adapter->dev;
2216
2217         /* Make sure our PCI config space has the necessary stuff set */
2218         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2219         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2220             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2221                 device_printf(dev, "Memory Access and/or Bus Master bits "
2222                     "were not set!\n");
2223                 adapter->hw.bus.pci_cmd_word |=
2224                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2225                 pci_write_config(dev, PCIR_COMMAND,
2226                     adapter->hw.bus.pci_cmd_word, 2);
2227         }
2228
2229         /* Save off the information about this board */
2230         adapter->hw.vendor_id = pci_get_vendor(dev);
2231         adapter->hw.device_id = pci_get_device(dev);
2232         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2233         adapter->hw.subsystem_vendor_id =
2234             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2235         adapter->hw.subsystem_device_id =
2236             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2237
2238         /* Do Shared Code Init and Setup */
2239         if (e1000_set_mac_type(&adapter->hw)) {
2240                 device_printf(dev, "Setup init failure\n");
2241                 return;
2242         }
2243 }
2244
2245 static int
2246 em_allocate_pci_resources(struct adapter *adapter)
2247 {
2248         device_t        dev = adapter->dev;
2249         int             rid;
2250
2251         rid = PCIR_BAR(0);
2252         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2253             &rid, RF_ACTIVE);
2254         if (adapter->memory == NULL) {
2255                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2256                 return (ENXIO);
2257         }
2258         adapter->osdep.mem_bus_space_tag =
2259             rman_get_bustag(adapter->memory);
2260         adapter->osdep.mem_bus_space_handle =
2261             rman_get_bushandle(adapter->memory);
2262         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2263
2264         /* Default to a single queue */
2265         adapter->num_queues = 1;
2266
2267         /*
2268          * Setup MSI/X or MSI if PCI Express
2269          */
2270         adapter->msix = em_setup_msix(adapter);
2271
2272         adapter->hw.back = &adapter->osdep;
2273
2274         return (0);
2275 }
2276
2277 /*********************************************************************
2278  *
2279  *  Setup the Legacy or MSI Interrupt handler
2280  *
2281  **********************************************************************/
2282 int
2283 em_allocate_legacy(struct adapter *adapter)
2284 {
2285         device_t dev = adapter->dev;
2286         int error, rid = 0;
2287
2288         /* Manually turn off all interrupts */
2289         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2290
2291         if (adapter->msix == 1) /* using MSI */
2292                 rid = 1;
2293         /* We allocate a single interrupt resource */
2294         adapter->res = bus_alloc_resource_any(dev,
2295             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2296         if (adapter->res == NULL) {
2297                 device_printf(dev, "Unable to allocate bus resource: "
2298                     "interrupt\n");
2299                 return (ENXIO);
2300         }
2301
2302         /*
2303          * Allocate a fast interrupt and the associated
2304          * deferred processing contexts.
2305          */
2306         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2307         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2308         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2309             taskqueue_thread_enqueue, &adapter->tq);
2310         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2311             device_get_nameunit(adapter->dev));
2312         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2313             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2314                 device_printf(dev, "Failed to register fast interrupt "
2315                             "handler: %d\n", error);
2316                 taskqueue_free(adapter->tq);
2317                 adapter->tq = NULL;
2318                 return (error);
2319         }
2320         
2321         return (0);
2322 }
2323
2324 /*********************************************************************
2325  *
2326  *  Setup the MSIX Interrupt handlers
2327  *   This is not really Multiqueue, rather
2328  *   its just multiple interrupt vectors.
2329  *
2330  **********************************************************************/
2331 int
2332 em_allocate_msix(struct adapter *adapter)
2333 {
2334         device_t        dev = adapter->dev;
2335         struct          tx_ring *txr = adapter->tx_rings;
2336         struct          rx_ring *rxr = adapter->rx_rings;
2337         int             error, rid, vector = 0;
2338
2339
2340         /* Make sure all interrupts are disabled */
2341         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2342
2343         /* First set up ring resources */
2344         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2345
2346                 /* RX ring */
2347                 rid = vector + 1;
2348
2349                 rxr->res = bus_alloc_resource_any(dev,
2350                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2351                 if (rxr->res == NULL) {
2352                         device_printf(dev,
2353                             "Unable to allocate bus resource: "
2354                             "RX MSIX Interrupt %d\n", i);
2355                         return (ENXIO);
2356                 }
2357                 if ((error = bus_setup_intr(dev, rxr->res,
2358                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2359                     rxr, &rxr->tag)) != 0) {
2360                         device_printf(dev, "Failed to register RX handler");
2361                         return (error);
2362                 }
2363                 rxr->msix = vector++; /* NOTE increment vector for TX */
2364                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2365                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2366                     taskqueue_thread_enqueue, &rxr->tq);
2367                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2368                     device_get_nameunit(adapter->dev));
2369                 /*
2370                 ** Set the bit to enable interrupt
2371                 ** in E1000_IMS -- bits 20 and 21
2372                 ** are for RX0 and RX1, note this has
2373                 ** NOTHING to do with the MSIX vector
2374                 */
2375                 rxr->ims = 1 << (20 + i);
2376                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2377
2378                 /* TX ring */
2379                 rid = vector + 1;
2380                 txr->res = bus_alloc_resource_any(dev,
2381                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2382                 if (txr->res == NULL) {
2383                         device_printf(dev,
2384                             "Unable to allocate bus resource: "
2385                             "TX MSIX Interrupt %d\n", i);
2386                         return (ENXIO);
2387                 }
2388                 if ((error = bus_setup_intr(dev, txr->res,
2389                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2390                     txr, &txr->tag)) != 0) {
2391                         device_printf(dev, "Failed to register TX handler");
2392                         return (error);
2393                 }
2394                 txr->msix = vector++; /* Increment vector for next pass */
2395                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2396                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2397                     taskqueue_thread_enqueue, &txr->tq);
2398                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2399                     device_get_nameunit(adapter->dev));
2400                 /*
2401                 ** Set the bit to enable interrupt
2402                 ** in E1000_IMS -- bits 22 and 23
2403                 ** are for TX0 and TX1, note this has
2404                 ** NOTHING to do with the MSIX vector
2405                 */
2406                 txr->ims = 1 << (22 + i);
2407                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2408         }
2409
2410         /* Link interrupt */
2411         ++rid;
2412         adapter->res = bus_alloc_resource_any(dev,
2413             SYS_RES_IRQ, &rid, RF_ACTIVE);
2414         if (!adapter->res) {
2415                 device_printf(dev,"Unable to allocate "
2416                     "bus resource: Link interrupt [%d]\n", rid);
2417                 return (ENXIO);
2418         }
2419         /* Set the link handler function */
2420         error = bus_setup_intr(dev, adapter->res,
2421             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2422             em_msix_link, adapter, &adapter->tag);
2423         if (error) {
2424                 adapter->res = NULL;
2425                 device_printf(dev, "Failed to register LINK handler");
2426                 return (error);
2427         }
2428         adapter->linkvec = vector;
2429         adapter->ivars |=  (8 | vector) << 16;
2430         adapter->ivars |= 0x80000000;
2431         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2432         adapter->tq = taskqueue_create_fast("em_link", M_NOWAIT,
2433             taskqueue_thread_enqueue, &adapter->tq);
2434         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2435             device_get_nameunit(adapter->dev));
2436
2437         return (0);
2438 }
2439
2440
2441 static void
2442 em_free_pci_resources(struct adapter *adapter)
2443 {
2444         device_t        dev = adapter->dev;
2445         struct tx_ring  *txr;
2446         struct rx_ring  *rxr;
2447         int             rid;
2448
2449
2450         /*
2451         ** Release all the queue interrupt resources:
2452         */
2453         for (int i = 0; i < adapter->num_queues; i++) {
2454                 txr = &adapter->tx_rings[i];
2455                 rxr = &adapter->rx_rings[i];
2456                 rid = txr->msix +1;
2457                 if (txr->tag != NULL) {
2458                         bus_teardown_intr(dev, txr->res, txr->tag);
2459                         txr->tag = NULL;
2460                 }
2461                 if (txr->res != NULL)
2462                         bus_release_resource(dev, SYS_RES_IRQ,
2463                             rid, txr->res);
2464                 rid = rxr->msix +1;
2465                 if (rxr->tag != NULL) {
2466                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2467                         rxr->tag = NULL;
2468                 }
2469                 if (rxr->res != NULL)
2470                         bus_release_resource(dev, SYS_RES_IRQ,
2471                             rid, rxr->res);
2472         }
2473
2474         if (adapter->linkvec) /* we are doing MSIX */
2475                 rid = adapter->linkvec + 1;
2476         else
2477                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2478
2479         if (adapter->tag != NULL) {
2480                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2481                 adapter->tag = NULL;
2482         }
2483
2484         if (adapter->res != NULL)
2485                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2486
2487
2488         if (adapter->msix)
2489                 pci_release_msi(dev);
2490
2491         if (adapter->msix_mem != NULL)
2492                 bus_release_resource(dev, SYS_RES_MEMORY,
2493                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2494
2495         if (adapter->memory != NULL)
2496                 bus_release_resource(dev, SYS_RES_MEMORY,
2497                     PCIR_BAR(0), adapter->memory);
2498
2499         if (adapter->flash != NULL)
2500                 bus_release_resource(dev, SYS_RES_MEMORY,
2501                     EM_FLASH, adapter->flash);
2502 }
2503
2504 /*
2505  * Setup MSI or MSI/X
2506  */
2507 static int
2508 em_setup_msix(struct adapter *adapter)
2509 {
2510         device_t dev = adapter->dev;
2511         int val = 0;
2512
2513
2514         /* Setup MSI/X for Hartwell */
2515         if ((adapter->hw.mac.type == e1000_82574) &&
2516             (em_enable_msix == TRUE)) {
2517                 /* Map the MSIX BAR */
2518                 int rid = PCIR_BAR(EM_MSIX_BAR);
2519                 adapter->msix_mem = bus_alloc_resource_any(dev,
2520                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2521                 if (!adapter->msix_mem) {
2522                         /* May not be enabled */
2523                         device_printf(adapter->dev,
2524                             "Unable to map MSIX table \n");
2525                         goto msi;
2526                 }
2527                 val = pci_msix_count(dev); 
2528                 if (val != 5) {
2529                         bus_release_resource(dev, SYS_RES_MEMORY,
2530                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2531                         adapter->msix_mem = NULL;
2532                         device_printf(adapter->dev,
2533                             "MSIX vectors wrong, using MSI \n");
2534                         goto msi;
2535                 }
2536                 if (em_msix_queues == 2) {
2537                         val = 5;
2538                         adapter->num_queues = 2;
2539                 } else {
2540                         val = 3;
2541                         adapter->num_queues = 1;
2542                 }
2543                 if (pci_alloc_msix(dev, &val) == 0) {
2544                         device_printf(adapter->dev,
2545                             "Using MSIX interrupts "
2546                             "with %d vectors\n", val);
2547                 }
2548
2549                 return (val);
2550         }
2551 msi:
2552         val = pci_msi_count(dev);
2553         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2554                 adapter->msix = 1;
2555                 device_printf(adapter->dev,"Using MSI interrupt\n");
2556                 return (val);
2557         } 
2558         /* Should only happen due to manual invention */
2559         device_printf(adapter->dev,"Setup MSIX failure\n");
2560         return (0);
2561 }
2562
2563
2564 /*********************************************************************
2565  *
2566  *  Initialize the hardware to a configuration
2567  *  as specified by the adapter structure.
2568  *
2569  **********************************************************************/
2570 static void
2571 em_reset(struct adapter *adapter)
2572 {
2573         device_t        dev = adapter->dev;
2574         struct e1000_hw *hw = &adapter->hw;
2575         u16             rx_buffer_size;
2576
2577         INIT_DEBUGOUT("em_reset: begin");
2578
2579         /* Set up smart power down as default off on newer adapters. */
2580         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2581             hw->mac.type == e1000_82572)) {
2582                 u16 phy_tmp = 0;
2583
2584                 /* Speed up time to link by disabling smart power down. */
2585                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2586                 phy_tmp &= ~IGP02E1000_PM_SPD;
2587                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2588         }
2589
2590         /*
2591          * These parameters control the automatic generation (Tx) and
2592          * response (Rx) to Ethernet PAUSE frames.
2593          * - High water mark should allow for at least two frames to be
2594          *   received after sending an XOFF.
2595          * - Low water mark works best when it is very near the high water mark.
2596          *   This allows the receiver to restart by sending XON when it has
2597          *   drained a bit. Here we use an arbitary value of 1500 which will
2598          *   restart after one full frame is pulled from the buffer. There
2599          *   could be several smaller frames in the buffer and if so they will
2600          *   not trigger the XON until their total number reduces the buffer
2601          *   by 1500.
2602          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2603          */
2604         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2605
2606         hw->fc.high_water = rx_buffer_size -
2607             roundup2(adapter->max_frame_size, 1024);
2608         hw->fc.low_water = hw->fc.high_water - 1500;
2609
2610         if (hw->mac.type == e1000_80003es2lan)
2611                 hw->fc.pause_time = 0xFFFF;
2612         else
2613                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2614
2615         hw->fc.send_xon = TRUE;
2616
2617         /* Set Flow control, use the tunable location if sane */
2618         if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2619                 hw->fc.requested_mode = em_fc_setting;
2620         else
2621                 hw->fc.requested_mode = e1000_fc_none;
2622
2623         /* Override - workaround for PCHLAN issue */
2624         if (hw->mac.type == e1000_pchlan)
2625                 hw->fc.requested_mode = e1000_fc_rx_pause;
2626
2627         /* Issue a global reset */
2628         e1000_reset_hw(hw);
2629         E1000_WRITE_REG(hw, E1000_WUC, 0);
2630
2631         if (e1000_init_hw(hw) < 0) {
2632                 device_printf(dev, "Hardware Initialization Failed\n");
2633                 return;
2634         }
2635
2636         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2637         e1000_get_phy_info(hw);
2638         e1000_check_for_link(hw);
2639         return;
2640 }
2641
2642 /*********************************************************************
2643  *
2644  *  Setup networking device structure and register an interface.
2645  *
2646  **********************************************************************/
2647 static void
2648 em_setup_interface(device_t dev, struct adapter *adapter)
2649 {
2650         struct ifnet   *ifp;
2651
2652         INIT_DEBUGOUT("em_setup_interface: begin");
2653
2654         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2655         if (ifp == NULL)
2656                 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2657         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2658         ifp->if_mtu = ETHERMTU;
2659         ifp->if_init =  em_init;
2660         ifp->if_softc = adapter;
2661         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2662         ifp->if_ioctl = em_ioctl;
2663         ifp->if_start = em_start;
2664         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2665         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2666         IFQ_SET_READY(&ifp->if_snd);
2667
2668         ether_ifattach(ifp, adapter->hw.mac.addr);
2669
2670         ifp->if_capabilities = ifp->if_capenable = 0;
2671
2672 #ifdef EM_MULTIQUEUE
2673         /* Multiqueue tx functions */
2674         ifp->if_transmit = em_mq_start;
2675         ifp->if_qflush = em_qflush;
2676 #endif  
2677
2678         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2679         ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2680
2681         /* Enable TSO by default, can disable with ifconfig */
2682         ifp->if_capabilities |= IFCAP_TSO4;
2683         ifp->if_capenable |= IFCAP_TSO4;
2684
2685         /*
2686          * Tell the upper layer(s) we
2687          * support full VLAN capability
2688          */
2689         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2690         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2691         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2692
2693         /*
2694         ** Dont turn this on by default, if vlans are
2695         ** created on another pseudo device (eg. lagg)
2696         ** then vlan events are not passed thru, breaking
2697         ** operation, but with HW FILTER off it works. If
2698         ** using vlans directly on the em driver you can
2699         ** enable this and get full hardware tag filtering.
2700         */
2701         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2702
2703 #ifdef DEVICE_POLLING
2704         ifp->if_capabilities |= IFCAP_POLLING;
2705 #endif
2706
2707         /* Enable only WOL MAGIC by default */
2708         if (adapter->wol) {
2709                 ifp->if_capabilities |= IFCAP_WOL;
2710                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2711         }
2712                 
2713         /*
2714          * Specify the media types supported by this adapter and register
2715          * callbacks to update media and link information
2716          */
2717         ifmedia_init(&adapter->media, IFM_IMASK,
2718             em_media_change, em_media_status);
2719         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2720             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2721                 u_char fiber_type = IFM_1000_SX;        /* default type */
2722
2723                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
2724                             0, NULL);
2725                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2726         } else {
2727                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2728                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2729                             0, NULL);
2730                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2731                             0, NULL);
2732                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2733                             0, NULL);
2734                 if (adapter->hw.phy.type != e1000_phy_ife) {
2735                         ifmedia_add(&adapter->media,
2736                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2737                         ifmedia_add(&adapter->media,
2738                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2739                 }
2740         }
2741         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2742         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2743 }
2744
2745
2746 /*
2747  * Manage DMA'able memory.
2748  */
2749 static void
2750 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2751 {
2752         if (error)
2753                 return;
2754         *(bus_addr_t *) arg = segs[0].ds_addr;
2755 }
2756
2757 static int
2758 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2759         struct em_dma_alloc *dma, int mapflags)
2760 {
2761         int error;
2762
2763         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2764                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
2765                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2766                                 BUS_SPACE_MAXADDR,      /* highaddr */
2767                                 NULL, NULL,             /* filter, filterarg */
2768                                 size,                   /* maxsize */
2769                                 1,                      /* nsegments */
2770                                 size,                   /* maxsegsize */
2771                                 0,                      /* flags */
2772                                 NULL,                   /* lockfunc */
2773                                 NULL,                   /* lockarg */
2774                                 &dma->dma_tag);
2775         if (error) {
2776                 device_printf(adapter->dev,
2777                     "%s: bus_dma_tag_create failed: %d\n",
2778                     __func__, error);
2779                 goto fail_0;
2780         }
2781
2782         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2783             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2784         if (error) {
2785                 device_printf(adapter->dev,
2786                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2787                     __func__, (uintmax_t)size, error);
2788                 goto fail_2;
2789         }
2790
2791         dma->dma_paddr = 0;
2792         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2793             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2794         if (error || dma->dma_paddr == 0) {
2795                 device_printf(adapter->dev,
2796                     "%s: bus_dmamap_load failed: %d\n",
2797                     __func__, error);
2798                 goto fail_3;
2799         }
2800
2801         return (0);
2802
2803 fail_3:
2804         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2805 fail_2:
2806         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2807         bus_dma_tag_destroy(dma->dma_tag);
2808 fail_0:
2809         dma->dma_map = NULL;
2810         dma->dma_tag = NULL;
2811
2812         return (error);
2813 }
2814
2815 static void
2816 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2817 {
2818         if (dma->dma_tag == NULL)
2819                 return;
2820         if (dma->dma_map != NULL) {
2821                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2822                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2823                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2824                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2825                 dma->dma_map = NULL;
2826         }
2827         bus_dma_tag_destroy(dma->dma_tag);
2828         dma->dma_tag = NULL;
2829 }
2830
2831
2832 /*********************************************************************
2833  *
2834  *  Allocate memory for the transmit and receive rings, and then
2835  *  the descriptors associated with each, called only once at attach.
2836  *
2837  **********************************************************************/
2838 static int
2839 em_allocate_queues(struct adapter *adapter)
2840 {
2841         device_t                dev = adapter->dev;
2842         struct tx_ring          *txr = NULL;
2843         struct rx_ring          *rxr = NULL;
2844         int rsize, tsize, error = E1000_SUCCESS;
2845         int txconf = 0, rxconf = 0;
2846
2847
2848         /* Allocate the TX ring struct memory */
2849         if (!(adapter->tx_rings =
2850             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2851             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2852                 device_printf(dev, "Unable to allocate TX ring memory\n");
2853                 error = ENOMEM;
2854                 goto fail;
2855         }
2856
2857         /* Now allocate the RX */
2858         if (!(adapter->rx_rings =
2859             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2860             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2861                 device_printf(dev, "Unable to allocate RX ring memory\n");
2862                 error = ENOMEM;
2863                 goto rx_fail;
2864         }
2865
2866         tsize = roundup2(adapter->num_tx_desc *
2867             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2868         /*
2869          * Now set up the TX queues, txconf is needed to handle the
2870          * possibility that things fail midcourse and we need to
2871          * undo memory gracefully
2872          */ 
2873         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2874                 /* Set up some basics */
2875                 txr = &adapter->tx_rings[i];
2876                 txr->adapter = adapter;
2877                 txr->me = i;
2878
2879                 /* Initialize the TX lock */
2880                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2881                     device_get_nameunit(dev), txr->me);
2882                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2883
2884                 if (em_dma_malloc(adapter, tsize,
2885                         &txr->txdma, BUS_DMA_NOWAIT)) {
2886                         device_printf(dev,
2887                             "Unable to allocate TX Descriptor memory\n");
2888                         error = ENOMEM;
2889                         goto err_tx_desc;
2890                 }
2891                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2892                 bzero((void *)txr->tx_base, tsize);
2893
2894                 if (em_allocate_transmit_buffers(txr)) {
2895                         device_printf(dev,
2896                             "Critical Failure setting up transmit buffers\n");
2897                         error = ENOMEM;
2898                         goto err_tx_desc;
2899                 }
2900 #if __FreeBSD_version >= 800000
2901                 /* Allocate a buf ring */
2902                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
2903                     M_WAITOK, &txr->tx_mtx);
2904 #endif
2905         }
2906
2907         /*
2908          * Next the RX queues...
2909          */ 
2910         rsize = roundup2(adapter->num_rx_desc *
2911             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2912         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2913                 rxr = &adapter->rx_rings[i];
2914                 rxr->adapter = adapter;
2915                 rxr->me = i;
2916
2917                 /* Initialize the RX lock */
2918                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2919                     device_get_nameunit(dev), txr->me);
2920                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2921
2922                 if (em_dma_malloc(adapter, rsize,
2923                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2924                         device_printf(dev,
2925                             "Unable to allocate RxDescriptor memory\n");
2926                         error = ENOMEM;
2927                         goto err_rx_desc;
2928                 }
2929                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2930                 bzero((void *)rxr->rx_base, rsize);
2931
2932                 /* Allocate receive buffers for the ring*/
2933                 if (em_allocate_receive_buffers(rxr)) {
2934                         device_printf(dev,
2935                             "Critical Failure setting up receive buffers\n");
2936                         error = ENOMEM;
2937                         goto err_rx_desc;
2938                 }
2939         }
2940
2941         return (0);
2942
2943 err_rx_desc:
2944         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2945                 em_dma_free(adapter, &rxr->rxdma);
2946 err_tx_desc:
2947         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2948                 em_dma_free(adapter, &txr->txdma);
2949         free(adapter->rx_rings, M_DEVBUF);
2950 rx_fail:
2951 #if __FreeBSD_version >= 800000
2952         buf_ring_free(txr->br, M_DEVBUF);
2953 #endif
2954         free(adapter->tx_rings, M_DEVBUF);
2955 fail:
2956         return (error);
2957 }
2958
2959
2960 /*********************************************************************
2961  *
2962  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2963  *  the information needed to transmit a packet on the wire. This is
2964  *  called only once at attach, setup is done every reset.
2965  *
2966  **********************************************************************/
2967 static int
2968 em_allocate_transmit_buffers(struct tx_ring *txr)
2969 {
2970         struct adapter *adapter = txr->adapter;
2971         device_t dev = adapter->dev;
2972         struct em_buffer *txbuf;
2973         int error, i;
2974
2975         /*
2976          * Setup DMA descriptor areas.
2977          */
2978         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2979                                1, 0,                    /* alignment, bounds */
2980                                BUS_SPACE_MAXADDR,       /* lowaddr */
2981                                BUS_SPACE_MAXADDR,       /* highaddr */
2982                                NULL, NULL,              /* filter, filterarg */
2983                                EM_TSO_SIZE,             /* maxsize */
2984                                EM_MAX_SCATTER,          /* nsegments */
2985                                PAGE_SIZE,               /* maxsegsize */
2986                                0,                       /* flags */
2987                                NULL,                    /* lockfunc */
2988                                NULL,                    /* lockfuncarg */
2989                                &txr->txtag))) {
2990                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2991                 goto fail;
2992         }
2993
2994         if (!(txr->tx_buffers =
2995             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
2996             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2997                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2998                 error = ENOMEM;
2999                 goto fail;
3000         }
3001
3002         /* Create the descriptor buffer dma maps */
3003         txbuf = txr->tx_buffers;
3004         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3005                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3006                 if (error != 0) {
3007                         device_printf(dev, "Unable to create TX DMA map\n");
3008                         goto fail;
3009                 }
3010         }
3011
3012         return 0;
3013 fail:
3014         /* We free all, it handles case where we are in the middle */
3015         em_free_transmit_structures(adapter);
3016         return (error);
3017 }
3018
3019 /*********************************************************************
3020  *
3021  *  Initialize a transmit ring.
3022  *
3023  **********************************************************************/
3024 static void
3025 em_setup_transmit_ring(struct tx_ring *txr)
3026 {
3027         struct adapter *adapter = txr->adapter;
3028         struct em_buffer *txbuf;
3029         int i;
3030
3031         /* Clear the old descriptor contents */
3032         EM_TX_LOCK(txr);
3033         bzero((void *)txr->tx_base,
3034               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3035         /* Reset indices */
3036         txr->next_avail_desc = 0;
3037         txr->next_to_clean = 0;
3038
3039         /* Free any existing tx buffers. */
3040         txbuf = txr->tx_buffers;
3041         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3042                 if (txbuf->m_head != NULL) {
3043                         bus_dmamap_sync(txr->txtag, txbuf->map,
3044                             BUS_DMASYNC_POSTWRITE);
3045                         bus_dmamap_unload(txr->txtag, txbuf->map);
3046                         m_freem(txbuf->m_head);
3047                         txbuf->m_head = NULL;
3048                 }
3049                 /* clear the watch index */
3050                 txbuf->next_eop = -1;
3051         }
3052
3053         /* Set number of descriptors available */
3054         txr->tx_avail = adapter->num_tx_desc;
3055
3056         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3057             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3058         EM_TX_UNLOCK(txr);
3059 }
3060
3061 /*********************************************************************
3062  *
3063  *  Initialize all transmit rings.
3064  *
3065  **********************************************************************/
3066 static void
3067 em_setup_transmit_structures(struct adapter *adapter)
3068 {
3069         struct tx_ring *txr = adapter->tx_rings;
3070
3071         for (int i = 0; i < adapter->num_queues; i++, txr++)
3072                 em_setup_transmit_ring(txr);
3073
3074         return;
3075 }
3076
3077 /*********************************************************************
3078  *
3079  *  Enable transmit unit.
3080  *
3081  **********************************************************************/
3082 static void
3083 em_initialize_transmit_unit(struct adapter *adapter)
3084 {
3085         struct tx_ring  *txr = adapter->tx_rings;
3086         struct e1000_hw *hw = &adapter->hw;
3087         u32     tctl, tarc, tipg = 0;
3088
3089          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3090
3091         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3092                 u64 bus_addr = txr->txdma.dma_paddr;
3093                 /* Base and Len of TX Ring */
3094                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3095                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3096                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3097                     (u32)(bus_addr >> 32));
3098                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3099                     (u32)bus_addr);
3100                 /* Init the HEAD/TAIL indices */
3101                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3102                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3103
3104                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3105                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3106                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3107
3108                 txr->watchdog_check = FALSE;
3109         }
3110
3111         /* Set the default values for the Tx Inter Packet Gap timer */
3112         switch (adapter->hw.mac.type) {
3113         case e1000_82542:
3114                 tipg = DEFAULT_82542_TIPG_IPGT;
3115                 tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3116                 tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3117                 break;
3118         case e1000_80003es2lan:
3119                 tipg = DEFAULT_82543_TIPG_IPGR1;
3120                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3121                     E1000_TIPG_IPGR2_SHIFT;
3122                 break;
3123         default:
3124                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3125                     (adapter->hw.phy.media_type ==
3126                     e1000_media_type_internal_serdes))
3127                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3128                 else
3129                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3130                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3131                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3132         }
3133
3134         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3135         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3136
3137         if(adapter->hw.mac.type >= e1000_82540)
3138                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3139                     adapter->tx_abs_int_delay.value);
3140
3141         if ((adapter->hw.mac.type == e1000_82571) ||
3142             (adapter->hw.mac.type == e1000_82572)) {
3143                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3144                 tarc |= SPEED_MODE_BIT;
3145                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3146         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3147                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3148                 tarc |= 1;
3149                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3150                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3151                 tarc |= 1;
3152                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3153         }
3154
3155         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3156         if (adapter->tx_int_delay.value > 0)
3157                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3158
3159         /* Program the Transmit Control Register */
3160         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3161         tctl &= ~E1000_TCTL_CT;
3162         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3163                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3164
3165         if (adapter->hw.mac.type >= e1000_82571)
3166                 tctl |= E1000_TCTL_MULR;
3167
3168         /* This write will effectively turn on the transmit unit. */
3169         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3170
3171 }
3172
3173
3174 /*********************************************************************
3175  *
3176  *  Free all transmit rings.
3177  *
3178  **********************************************************************/
3179 static void
3180 em_free_transmit_structures(struct adapter *adapter)
3181 {
3182         struct tx_ring *txr = adapter->tx_rings;
3183
3184         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3185                 EM_TX_LOCK(txr);
3186                 em_free_transmit_buffers(txr);
3187                 em_dma_free(adapter, &txr->txdma);
3188                 EM_TX_UNLOCK(txr);
3189                 EM_TX_LOCK_DESTROY(txr);
3190         }
3191
3192         free(adapter->tx_rings, M_DEVBUF);
3193 }
3194
3195 /*********************************************************************
3196  *
3197  *  Free transmit ring related data structures.
3198  *
3199  **********************************************************************/
3200 static void
3201 em_free_transmit_buffers(struct tx_ring *txr)
3202 {
3203         struct adapter          *adapter = txr->adapter;
3204         struct em_buffer        *txbuf;
3205
3206         INIT_DEBUGOUT("free_transmit_ring: begin");
3207
3208         if (txr->tx_buffers == NULL)
3209                 return;
3210
3211         for (int i = 0; i < adapter->num_tx_desc; i++) {
3212                 txbuf = &txr->tx_buffers[i];
3213                 if (txbuf->m_head != NULL) {
3214                         bus_dmamap_sync(txr->txtag, txbuf->map,
3215                             BUS_DMASYNC_POSTWRITE);
3216                         bus_dmamap_unload(txr->txtag,
3217                             txbuf->map);
3218                         m_freem(txbuf->m_head);
3219                         txbuf->m_head = NULL;
3220                         if (txbuf->map != NULL) {
3221                                 bus_dmamap_destroy(txr->txtag,
3222                                     txbuf->map);
3223                                 txbuf->map = NULL;
3224                         }
3225                 } else if (txbuf->map != NULL) {
3226                         bus_dmamap_unload(txr->txtag,
3227                             txbuf->map);
3228                         bus_dmamap_destroy(txr->txtag,
3229                             txbuf->map);
3230                         txbuf->map = NULL;
3231                 }
3232         }
3233 #if __FreeBSD_version >= 800000
3234         if (txr->br != NULL)
3235                 buf_ring_free(txr->br, M_DEVBUF);
3236 #endif
3237         if (txr->tx_buffers != NULL) {
3238                 free(txr->tx_buffers, M_DEVBUF);
3239                 txr->tx_buffers = NULL;
3240         }
3241         if (txr->txtag != NULL) {
3242                 bus_dma_tag_destroy(txr->txtag);
3243                 txr->txtag = NULL;
3244         }
3245         return;
3246 }
3247
3248
3249 /*********************************************************************
3250  *
3251  *  The offload context needs to be set when we transfer the first
3252  *  packet of a particular protocol (TCP/UDP). This routine has been
3253  *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3254  *
3255  *  Added back the old method of keeping the current context type
3256  *  and not setting if unnecessary, as this is reported to be a
3257  *  big performance win.  -jfv
3258  **********************************************************************/
3259 static void
3260 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3261     u32 *txd_upper, u32 *txd_lower)
3262 {
3263         struct adapter                  *adapter = txr->adapter;
3264         struct e1000_context_desc       *TXD = NULL;
3265         struct em_buffer *tx_buffer;
3266         struct ether_vlan_header *eh;
3267         struct ip *ip = NULL;
3268         struct ip6_hdr *ip6;
3269         int cur, ehdrlen;
3270         u32 cmd, hdr_len, ip_hlen;
3271         u16 etype;
3272         u8 ipproto;
3273
3274
3275         cmd = hdr_len = ipproto = 0;
3276         cur = txr->next_avail_desc;
3277
3278         /*
3279          * Determine where frame payload starts.
3280          * Jump over vlan headers if already present,
3281          * helpful for QinQ too.
3282          */
3283         eh = mtod(mp, struct ether_vlan_header *);
3284         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3285                 etype = ntohs(eh->evl_proto);
3286                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3287         } else {
3288                 etype = ntohs(eh->evl_encap_proto);
3289                 ehdrlen = ETHER_HDR_LEN;
3290         }
3291
3292         /*
3293          * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3294          * TODO: Support SCTP too when it hits the tree.
3295          */
3296         switch (etype) {
3297         case ETHERTYPE_IP:
3298                 ip = (struct ip *)(mp->m_data + ehdrlen);
3299                 ip_hlen = ip->ip_hl << 2;
3300
3301                 /* Setup of IP header checksum. */
3302                 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3303                         /*
3304                          * Start offset for header checksum calculation.
3305                          * End offset for header checksum calculation.
3306                          * Offset of place to put the checksum.
3307                          */
3308                         TXD = (struct e1000_context_desc *)
3309                             &txr->tx_base[cur];
3310                         TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3311                         TXD->lower_setup.ip_fields.ipcse =
3312                             htole16(ehdrlen + ip_hlen);
3313                         TXD->lower_setup.ip_fields.ipcso =
3314                             ehdrlen + offsetof(struct ip, ip_sum);
3315                         cmd |= E1000_TXD_CMD_IP;
3316                         *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3317                 }
3318
3319                 if (mp->m_len < ehdrlen + ip_hlen)
3320                         return; /* failure */
3321
3322                 hdr_len = ehdrlen + ip_hlen;
3323                 ipproto = ip->ip_p;
3324
3325                 break;
3326         case ETHERTYPE_IPV6:
3327                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3328                 ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3329
3330                 if (mp->m_len < ehdrlen + ip_hlen)
3331                         return; /* failure */
3332
3333                 /* IPv6 doesn't have a header checksum. */
3334
3335                 hdr_len = ehdrlen + ip_hlen;
3336                 ipproto = ip6->ip6_nxt;
3337
3338                 break;
3339         default:
3340                 *txd_upper = 0;
3341                 *txd_lower = 0;
3342                 return;
3343         }
3344
3345         switch (ipproto) {
3346         case IPPROTO_TCP:
3347                 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3348                         *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3349                         *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3350                         /* no need for context if already set */
3351                         if (txr->last_hw_offload == CSUM_TCP)
3352                                 return;
3353                         txr->last_hw_offload = CSUM_TCP;
3354                         /*
3355                          * Start offset for payload checksum calculation.
3356                          * End offset for payload checksum calculation.
3357                          * Offset of place to put the checksum.
3358                          */
3359                         TXD = (struct e1000_context_desc *)
3360                             &txr->tx_base[cur];
3361                         TXD->upper_setup.tcp_fields.tucss = hdr_len;
3362                         TXD->upper_setup.tcp_fields.tucse = htole16(0);
3363                         TXD->upper_setup.tcp_fields.tucso =
3364                             hdr_len + offsetof(struct tcphdr, th_sum);
3365                         cmd |= E1000_TXD_CMD_TCP;
3366                 }
3367                 break;
3368         case IPPROTO_UDP:
3369         {
3370                 if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3371                         *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3372                         *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3373                         /* no need for context if already set */
3374                         if (txr->last_hw_offload == CSUM_UDP)
3375                                 return;
3376                         txr->last_hw_offload = CSUM_UDP;
3377                         /*
3378                          * Start offset for header checksum calculation.
3379                          * End offset for header checksum calculation.
3380                          * Offset of place to put the checksum.
3381                          */
3382                         TXD = (struct e1000_context_desc *)
3383                             &txr->tx_base[cur];
3384                         TXD->upper_setup.tcp_fields.tucss = hdr_len;
3385                         TXD->upper_setup.tcp_fields.tucse = htole16(0);
3386                         TXD->upper_setup.tcp_fields.tucso =
3387                             hdr_len + offsetof(struct udphdr, uh_sum);
3388                 }
3389                 /* Fall Thru */
3390         }
3391         default:
3392                 break;
3393         }
3394
3395         TXD->tcp_seg_setup.data = htole32(0);
3396         TXD->cmd_and_length =
3397             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3398         tx_buffer = &txr->tx_buffers[cur];
3399         tx_buffer->m_head = NULL;
3400         tx_buffer->next_eop = -1;
3401
3402         if (++cur == adapter->num_tx_desc)
3403                 cur = 0;
3404
3405         txr->tx_avail--;
3406         txr->next_avail_desc = cur;
3407 }
3408
3409
3410 /**********************************************************************
3411  *
3412  *  Setup work for hardware segmentation offload (TSO)
3413  *
3414  **********************************************************************/
3415 static bool
3416 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3417    u32 *txd_lower)
3418 {
3419         struct adapter                  *adapter = txr->adapter;
3420         struct e1000_context_desc       *TXD;
3421         struct em_buffer                *tx_buffer;
3422         struct ether_vlan_header        *eh;
3423         struct ip                       *ip;
3424         struct ip6_hdr                  *ip6;
3425         struct tcphdr                   *th;
3426         int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3427         u16 etype;
3428
3429         /*
3430          * This function could/should be extended to support IP/IPv6
3431          * fragmentation as well.  But as they say, one step at a time.
3432          */
3433
3434         /*
3435          * Determine where frame payload starts.
3436          * Jump over vlan headers if already present,
3437          * helpful for QinQ too.
3438          */
3439         eh = mtod(mp, struct ether_vlan_header *);
3440         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3441                 etype = ntohs(eh->evl_proto);
3442                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3443         } else {
3444                 etype = ntohs(eh->evl_encap_proto);
3445                 ehdrlen = ETHER_HDR_LEN;
3446         }
3447
3448         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3449         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3450                 return FALSE;   /* -1 */
3451
3452         /*
3453          * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3454          * TODO: Support SCTP too when it hits the tree.
3455          */
3456         switch (etype) {
3457         case ETHERTYPE_IP:
3458                 isip6 = 0;
3459                 ip = (struct ip *)(mp->m_data + ehdrlen);
3460                 if (ip->ip_p != IPPROTO_TCP)
3461                         return FALSE;   /* 0 */
3462                 ip->ip_len = 0;
3463                 ip->ip_sum = 0;
3464                 ip_hlen = ip->ip_hl << 2;
3465                 if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3466                         return FALSE;   /* -1 */
3467                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3468 #if 1
3469                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3470                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3471 #else
3472                 th->th_sum = mp->m_pkthdr.csum_data;
3473 #endif
3474                 break;
3475         case ETHERTYPE_IPV6:
3476                 isip6 = 1;
3477                 return FALSE;                   /* Not supported yet. */
3478                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3479                 if (ip6->ip6_nxt != IPPROTO_TCP)
3480                         return FALSE;   /* 0 */
3481                 ip6->ip6_plen = 0;
3482                 ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3483                 if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3484                         return FALSE;   /* -1 */
3485                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3486 #if 0
3487                 th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3488                     htons(IPPROTO_TCP));        /* XXX: function notyet. */
3489 #else
3490                 th->th_sum = mp->m_pkthdr.csum_data;
3491 #endif
3492                 break;
3493         default:
3494                 return FALSE;
3495         }
3496         hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3497
3498         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3499                       E1000_TXD_DTYP_D |        /* Data descr type */
3500                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3501
3502         /* IP and/or TCP header checksum calculation and insertion. */
3503         *txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3504                       E1000_TXD_POPTS_TXSM) << 8;
3505
3506         cur = txr->next_avail_desc;
3507         tx_buffer = &txr->tx_buffers[cur];
3508         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3509
3510         /* IPv6 doesn't have a header checksum. */
3511         if (!isip6) {
3512                 /*
3513                  * Start offset for header checksum calculation.
3514                  * End offset for header checksum calculation.
3515                  * Offset of place put the checksum.
3516                  */
3517                 TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3518                 TXD->lower_setup.ip_fields.ipcse =
3519                     htole16(ehdrlen + ip_hlen - 1);
3520                 TXD->lower_setup.ip_fields.ipcso =
3521                     ehdrlen + offsetof(struct ip, ip_sum);
3522         }
3523         /*
3524          * Start offset for payload checksum calculation.
3525          * End offset for payload checksum calculation.
3526          * Offset of place to put the checksum.
3527          */
3528         TXD->upper_setup.tcp_fields.tucss =
3529             ehdrlen + ip_hlen;
3530         TXD->upper_setup.tcp_fields.tucse = 0;
3531         TXD->upper_setup.tcp_fields.tucso =
3532             ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3533         /*
3534          * Payload size per packet w/o any headers.
3535          * Length of all headers up to payload.
3536          */
3537         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3538         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3539
3540         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3541                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3542                                 E1000_TXD_CMD_TSE |     /* TSE context */
3543                                 (isip6 ? 0 : E1000_TXD_CMD_IP) | 
3544                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3545                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3546
3547         tx_buffer->m_head = NULL;
3548         tx_buffer->next_eop = -1;
3549
3550         if (++cur == adapter->num_tx_desc)
3551                 cur = 0;
3552
3553         txr->tx_avail--;
3554         txr->next_avail_desc = cur;
3555         txr->tx_tso = TRUE;
3556
3557         return TRUE;
3558 }
3559
3560
3561 /**********************************************************************
3562  *
3563  *  Examine each tx_buffer in the used queue. If the hardware is done
3564  *  processing the packet then free associated resources. The
3565  *  tx_buffer is put back on the free queue.
3566  *
3567  **********************************************************************/
3568 static bool
3569 em_txeof(struct tx_ring *txr)
3570 {
3571         struct adapter  *adapter = txr->adapter;
3572         int first, last, done, num_avail;
3573         struct em_buffer *tx_buffer;
3574         struct e1000_tx_desc   *tx_desc, *eop_desc;
3575         struct ifnet   *ifp = adapter->ifp;
3576
3577         EM_TX_LOCK_ASSERT(txr);
3578
3579         if (txr->tx_avail == adapter->num_tx_desc)
3580                 return (FALSE);
3581
3582         num_avail = txr->tx_avail;
3583         first = txr->next_to_clean;
3584         tx_desc = &txr->tx_base[first];
3585         tx_buffer = &txr->tx_buffers[first];
3586         last = tx_buffer->next_eop;
3587         eop_desc = &txr->tx_base[last];
3588
3589         /*
3590          * What this does is get the index of the
3591          * first descriptor AFTER the EOP of the 
3592          * first packet, that way we can do the
3593          * simple comparison on the inner while loop.
3594          */
3595         if (++last == adapter->num_tx_desc)
3596                 last = 0;
3597         done = last;
3598
3599         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3600             BUS_DMASYNC_POSTREAD);
3601
3602         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3603                 /* We clean the range of the packet */
3604                 while (first != done) {
3605                         tx_desc->upper.data = 0;
3606                         tx_desc->lower.data = 0;
3607                         tx_desc->buffer_addr = 0;
3608                         ++num_avail;
3609
3610                         if (tx_buffer->m_head) {
3611                                 ifp->if_opackets++;
3612                                 bus_dmamap_sync(txr->txtag,
3613                                     tx_buffer->map,
3614                                     BUS_DMASYNC_POSTWRITE);
3615                                 bus_dmamap_unload(txr->txtag,
3616                                     tx_buffer->map);
3617
3618                                 m_freem(tx_buffer->m_head);
3619                                 tx_buffer->m_head = NULL;
3620                         }
3621                         tx_buffer->next_eop = -1;
3622                         txr->watchdog_time = ticks;
3623
3624                         if (++first == adapter->num_tx_desc)
3625                                 first = 0;
3626
3627                         tx_buffer = &txr->tx_buffers[first];
3628                         tx_desc = &txr->tx_base[first];
3629                 }
3630                 /* See if we can continue to the next packet */
3631                 last = tx_buffer->next_eop;
3632                 if (last != -1) {
3633                         eop_desc = &txr->tx_base[last];
3634                         /* Get new done point */
3635                         if (++last == adapter->num_tx_desc) last = 0;
3636                         done = last;
3637                 } else
3638                         break;
3639         }
3640         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3641             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3642
3643         txr->next_to_clean = first;
3644
3645         /*
3646          * If we have enough room, clear IFF_DRV_OACTIVE to
3647          * tell the stack that it is OK to send packets.
3648          * If there are no pending descriptors, clear the watchdog.
3649          */
3650         if (num_avail > EM_TX_CLEANUP_THRESHOLD) {                
3651                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3652                 if (num_avail == adapter->num_tx_desc) {
3653                         txr->watchdog_check = FALSE;
3654                         txr->tx_avail = num_avail;
3655                         return (FALSE);
3656                 } 
3657         }
3658
3659         txr->tx_avail = num_avail;
3660         return (TRUE);
3661 }
3662
3663
3664 /*********************************************************************
3665  *
3666  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3667  *
3668  **********************************************************************/
3669 static void
3670 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3671 {
3672         struct adapter          *adapter = rxr->adapter;
3673         struct mbuf             *m;
3674         bus_dma_segment_t       segs[1];
3675         bus_dmamap_t            map;
3676         struct em_buffer        *rxbuf;
3677         int                     i, error, nsegs, cleaned;
3678
3679         i = rxr->next_to_refresh;
3680         cleaned = -1;
3681         while (i != limit) {
3682                 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3683                 if (m == NULL)
3684                         goto update;
3685                 m->m_len = m->m_pkthdr.len = MCLBYTES;
3686
3687                 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3688                         m_adj(m, ETHER_ALIGN);
3689
3690                 /*
3691                  * Using memory from the mbuf cluster pool, invoke the
3692                  * bus_dma machinery to arrange the memory mapping.
3693                  */
3694                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3695                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3696                 if (error != 0) {
3697                         m_free(m);
3698                         goto update;
3699                 }
3700
3701                 /* If nsegs is wrong then the stack is corrupt. */
3702                 KASSERT(nsegs == 1, ("Too many segments returned!"));
3703         
3704                 rxbuf = &rxr->rx_buffers[i];
3705                 if (rxbuf->m_head != NULL)
3706                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3707         
3708                 map = rxbuf->map;
3709                 rxbuf->map = rxr->rx_sparemap;
3710                 rxr->rx_sparemap = map;
3711                 bus_dmamap_sync(rxr->rxtag,
3712                     rxbuf->map, BUS_DMASYNC_PREREAD);
3713                 rxbuf->m_head = m;
3714                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3715
3716                 cleaned = i;
3717                 /* Calculate next index */
3718                 if (++i == adapter->num_rx_desc)
3719                         i = 0;
3720                 /* This is the work marker for refresh */
3721                 rxr->next_to_refresh = i;
3722         }
3723 update:
3724         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3725             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3726         if (cleaned != -1) /* Update tail index */
3727                 E1000_WRITE_REG(&adapter->hw,
3728                     E1000_RDT(rxr->me), cleaned);
3729
3730         return;
3731 }
3732
3733
3734 /*********************************************************************
3735  *
3736  *  Allocate memory for rx_buffer structures. Since we use one
3737  *  rx_buffer per received packet, the maximum number of rx_buffer's
3738  *  that we'll need is equal to the number of receive descriptors
3739  *  that we've allocated.
3740  *
3741  **********************************************************************/
3742 static int
3743 em_allocate_receive_buffers(struct rx_ring *rxr)
3744 {
3745         struct adapter          *adapter = rxr->adapter;
3746         device_t                dev = adapter->dev;
3747         struct em_buffer        *rxbuf;
3748         int                     error;
3749
3750         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3751             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3752         if (rxr->rx_buffers == NULL) {
3753                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3754                 return (ENOMEM);
3755         }
3756
3757         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3758                                 1, 0,                   /* alignment, bounds */
3759                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3760                                 BUS_SPACE_MAXADDR,      /* highaddr */
3761                                 NULL, NULL,             /* filter, filterarg */
3762                                 MCLBYTES,               /* maxsize */
3763                                 1,                      /* nsegments */
3764                                 MCLBYTES,               /* maxsegsize */
3765                                 0,                      /* flags */
3766                                 NULL,                   /* lockfunc */
3767                                 NULL,                   /* lockarg */
3768                                 &rxr->rxtag);
3769         if (error) {
3770                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3771                     __func__, error);
3772                 goto fail;
3773         }
3774
3775         /* Create the spare map (used by getbuf) */
3776         error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3777              &rxr->rx_sparemap);
3778         if (error) {
3779                 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3780                     __func__, error);
3781                 goto fail;
3782         }
3783
3784         rxbuf = rxr->rx_buffers;
3785         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3786                 rxbuf = &rxr->rx_buffers[i];
3787                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3788                     &rxbuf->map);
3789                 if (error) {
3790                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3791                             __func__, error);
3792                         goto fail;
3793                 }
3794         }
3795
3796         return (0);
3797
3798 fail:
3799         em_free_receive_structures(adapter);
3800         return (error);
3801 }
3802
3803
3804 /*********************************************************************
3805  *
3806  *  Initialize a receive ring and its buffers.
3807  *
3808  **********************************************************************/
3809 static int
3810 em_setup_receive_ring(struct rx_ring *rxr)
3811 {
3812         struct  adapter         *adapter = rxr->adapter;
3813         struct em_buffer        *rxbuf;
3814         bus_dma_segment_t       seg[1];
3815         int                     rsize, nsegs, error;
3816
3817
3818         /* Clear the ring contents */
3819         EM_RX_LOCK(rxr);
3820         rsize = roundup2(adapter->num_rx_desc *
3821             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3822         bzero((void *)rxr->rx_base, rsize);
3823
3824         /*
3825         ** Free current RX buffer structs and their mbufs
3826         */
3827         for (int i = 0; i < adapter->num_rx_desc; i++) {
3828                 rxbuf = &rxr->rx_buffers[i];
3829                 if (rxbuf->m_head != NULL) {
3830                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3831                             BUS_DMASYNC_POSTREAD);
3832                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3833                         m_freem(rxbuf->m_head);
3834                 }
3835         }
3836
3837         /* Now replenish the mbufs */
3838         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3839
3840                 rxbuf = &rxr->rx_buffers[j];
3841                 rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3842                 if (rxbuf->m_head == NULL)
3843                         panic("RX ring hdr initialization failed!\n");
3844                 rxbuf->m_head->m_len = MCLBYTES;
3845                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3846                 rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3847
3848                 /* Get the memory mapping */
3849                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3850                     rxbuf->map, rxbuf->m_head, seg,
3851                     &nsegs, BUS_DMA_NOWAIT);
3852                 if (error != 0)
3853                         panic("RX ring dma initialization failed!\n");
3854                 bus_dmamap_sync(rxr->rxtag,
3855                     rxbuf->map, BUS_DMASYNC_PREREAD);
3856
3857                 /* Update descriptor */
3858                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3859         }
3860
3861
3862         /* Setup our descriptor indices */
3863         rxr->next_to_check = 0;
3864         rxr->next_to_refresh = 0;
3865
3866         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3867             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3868
3869         EM_RX_UNLOCK(rxr);
3870         return (0);
3871 }
3872
3873 /*********************************************************************
3874  *
3875  *  Initialize all receive rings.
3876  *
3877  **********************************************************************/
3878 static int
3879 em_setup_receive_structures(struct adapter *adapter)
3880 {
3881         struct rx_ring *rxr = adapter->rx_rings;
3882         int j;
3883
3884         for (j = 0; j < adapter->num_queues; j++, rxr++)
3885                 if (em_setup_receive_ring(rxr))
3886                         goto fail;
3887
3888         return (0);
3889 fail:
3890         /*
3891          * Free RX buffers allocated so far, we will only handle
3892          * the rings that completed, the failing case will have
3893          * cleaned up for itself. 'j' failed, so its the terminus.
3894          */
3895         for (int i = 0; i < j; ++i) {
3896                 rxr = &adapter->rx_rings[i];
3897                 for (int n = 0; n < adapter->num_rx_desc; n++) {
3898                         struct em_buffer *rxbuf;
3899                         rxbuf = &rxr->rx_buffers[n];
3900                         if (rxbuf->m_head != NULL) {
3901                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3902                                   BUS_DMASYNC_POSTREAD);
3903                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3904                                 m_freem(rxbuf->m_head);
3905                                 rxbuf->m_head = NULL;
3906                         }
3907                 }
3908         }
3909
3910         return (ENOBUFS);
3911 }
3912
3913 /*********************************************************************
3914  *
3915  *  Free all receive rings.
3916  *
3917  **********************************************************************/
3918 static void
3919 em_free_receive_structures(struct adapter *adapter)
3920 {
3921         struct rx_ring *rxr = adapter->rx_rings;
3922
3923         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3924                 em_free_receive_buffers(rxr);
3925                 /* Free the ring memory as well */
3926                 em_dma_free(adapter, &rxr->rxdma);
3927                 EM_RX_LOCK_DESTROY(rxr);
3928         }
3929
3930         free(adapter->rx_rings, M_DEVBUF);
3931 }
3932
3933
3934 /*********************************************************************
3935  *
3936  *  Free receive ring data structures
3937  *
3938  **********************************************************************/
3939 static void
3940 em_free_receive_buffers(struct rx_ring *rxr)
3941 {
3942         struct adapter          *adapter = rxr->adapter;
3943         struct em_buffer        *rxbuf = NULL;
3944
3945         INIT_DEBUGOUT("free_receive_buffers: begin");
3946
3947         if (rxr->rx_sparemap) {
3948                 bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3949                 rxr->rx_sparemap = NULL;
3950         }
3951
3952         if (rxr->rx_buffers != NULL) {
3953                 for (int i = 0; i < adapter->num_rx_desc; i++) {
3954                         rxbuf = &rxr->rx_buffers[i];
3955                         if (rxbuf->map != NULL) {
3956                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3957                                     BUS_DMASYNC_POSTREAD);
3958                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3959                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3960                         }
3961                         if (rxbuf->m_head != NULL) {
3962                                 m_freem(rxbuf->m_head);
3963                                 rxbuf->m_head = NULL;
3964                         }
3965                 }
3966                 free(rxr->rx_buffers, M_DEVBUF);
3967                 rxr->rx_buffers = NULL;
3968         }
3969
3970         if (rxr->rxtag != NULL) {
3971                 bus_dma_tag_destroy(rxr->rxtag);
3972                 rxr->rxtag = NULL;
3973         }
3974
3975         return;
3976 }
3977
3978
3979 /*********************************************************************
3980  *
3981  *  Enable receive unit.
3982  *
3983  **********************************************************************/
3984 #define MAX_INTS_PER_SEC        8000
3985 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
3986
3987 static void
3988 em_initialize_receive_unit(struct adapter *adapter)
3989 {
3990         struct rx_ring  *rxr = adapter->rx_rings;
3991         struct ifnet    *ifp = adapter->ifp;
3992         struct e1000_hw *hw = &adapter->hw;
3993         u64     bus_addr;
3994         u32     rctl, rxcsum;
3995
3996         INIT_DEBUGOUT("em_initialize_receive_units: begin");
3997
3998         /*
3999          * Make sure receives are disabled while setting
4000          * up the descriptor ring
4001          */
4002         rctl = E1000_READ_REG(hw, E1000_RCTL);
4003         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4004
4005         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4006             adapter->rx_abs_int_delay.value);
4007         /*
4008          * Set the interrupt throttling rate. Value is calculated
4009          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4010          */
4011         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4012
4013         /*
4014         ** When using MSIX interrupts we need to throttle
4015         ** using the EITR register (82574 only)
4016         */
4017         if (hw->mac.type == e1000_82574)
4018                 for (int i = 0; i < 4; i++)
4019                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4020                             DEFAULT_ITR);
4021
4022         /* Disable accelerated ackknowledge */
4023         if (adapter->hw.mac.type == e1000_82574)
4024                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4025
4026         if (ifp->if_capenable & IFCAP_RXCSUM) {
4027                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4028                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4029                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4030         }
4031
4032         /*
4033         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4034         ** long latencies are observed, like Lenovo X60. This
4035         ** change eliminates the problem, but since having positive
4036         ** values in RDTR is a known source of problems on other
4037         ** platforms another solution is being sought.
4038         */
4039         if (hw->mac.type == e1000_82573)
4040                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4041
4042         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4043                 /* Setup the Base and Length of the Rx Descriptor Ring */
4044                 bus_addr = rxr->rxdma.dma_paddr;
4045                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4046                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4047                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4048                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4049                 /* Setup the Head and Tail Descriptor Pointers */
4050                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4051                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4052         }
4053
4054         /* Setup the Receive Control Register */
4055         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4056         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4057             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4058             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4059
4060         /* Strip the CRC */
4061         rctl |= E1000_RCTL_SECRC;
4062
4063         /* Make sure VLAN Filters are off */
4064         rctl &= ~E1000_RCTL_VFE;
4065         rctl &= ~E1000_RCTL_SBP;
4066         rctl |= E1000_RCTL_SZ_2048;
4067         if (ifp->if_mtu > ETHERMTU)
4068                 rctl |= E1000_RCTL_LPE;
4069         else
4070                 rctl &= ~E1000_RCTL_LPE;
4071
4072         /* Write out the settings */
4073         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4074
4075         return;
4076 }
4077
4078
4079 /*********************************************************************
4080  *
4081  *  This routine executes in interrupt context. It replenishes
4082  *  the mbufs in the descriptor and sends data which has been
4083  *  dma'ed into host memory to upper layer.
4084  *
4085  *  We loop at most count times if count is > 0, or until done if
4086  *  count < 0.
4087  *  
4088  *  For polling we also now return the number of cleaned packets
4089  *********************************************************************/
4090 static int
4091 em_rxeof(struct rx_ring *rxr, int count)
4092 {
4093         struct adapter          *adapter = rxr->adapter;
4094         struct ifnet            *ifp = adapter->ifp;
4095         struct mbuf             *mp, *sendmp;
4096         u8                      status = 0;
4097         u16                     len;
4098         int                     i, processed, rxdone = 0;
4099         bool                    eop;
4100         struct e1000_rx_desc    *cur;
4101
4102         EM_RX_LOCK(rxr);
4103
4104         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4105
4106                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4107                         break;
4108
4109                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4110                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4111
4112                 cur = &rxr->rx_base[i];
4113                 status = cur->status;
4114                 mp = sendmp = NULL;
4115
4116                 if ((status & E1000_RXD_STAT_DD) == 0)
4117                         break;
4118
4119                 len = le16toh(cur->length);
4120                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4121                 count--;
4122
4123                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4124
4125                         /* Assign correct length to the current fragment */
4126                         mp = rxr->rx_buffers[i].m_head;
4127                         mp->m_len = len;
4128
4129                         if (rxr->fmp == NULL) {
4130                                 mp->m_pkthdr.len = len;
4131                                 rxr->fmp = mp; /* Store the first mbuf */
4132                                 rxr->lmp = mp;
4133                         } else {
4134                                 /* Chain mbuf's together */
4135                                 mp->m_flags &= ~M_PKTHDR;
4136                                 rxr->lmp->m_next = mp;
4137                                 rxr->lmp = rxr->lmp->m_next;
4138                                 rxr->fmp->m_pkthdr.len += len;
4139                         }
4140
4141                         if (eop) {
4142                                 rxr->fmp->m_pkthdr.rcvif = ifp;
4143                                 ifp->if_ipackets++;
4144                                 em_receive_checksum(cur, rxr->fmp);
4145 #ifndef __NO_STRICT_ALIGNMENT
4146                                 if (adapter->max_frame_size >
4147                                     (MCLBYTES - ETHER_ALIGN) &&
4148                                     em_fixup_rx(rxr) != 0)
4149                                         goto skip;
4150 #endif
4151                                 if (status & E1000_RXD_STAT_VP) {
4152                                         rxr->fmp->m_pkthdr.ether_vtag =
4153                                             (le16toh(cur->special) &
4154                                             E1000_RXD_SPC_VLAN_MASK);
4155                                         rxr->fmp->m_flags |= M_VLANTAG;
4156                                 }
4157 #ifdef EM_MULTIQUEUE
4158                                 rxr->fmp->m_pkthdr.flowid = curcpu;
4159                                 rxr->fmp->m_flags |= M_FLOWID;
4160 #endif
4161 #ifndef __NO_STRICT_ALIGNMENT
4162 skip:
4163 #endif
4164                                 sendmp = rxr->fmp;
4165                                 rxr->fmp = NULL;
4166                                 rxr->lmp = NULL;
4167                         }
4168                 } else {
4169                         ifp->if_ierrors++;
4170                         /* Reuse loaded DMA map and just update mbuf chain */
4171                         mp = rxr->rx_buffers[i].m_head;
4172                         mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4173                         mp->m_data = mp->m_ext.ext_buf;
4174                         mp->m_next = NULL;
4175                         if (adapter->max_frame_size <=
4176                             (MCLBYTES - ETHER_ALIGN))
4177                                 m_adj(mp, ETHER_ALIGN);
4178                         if (rxr->fmp != NULL) {
4179                                 m_freem(rxr->fmp);
4180                                 rxr->fmp = NULL;
4181                                 rxr->lmp = NULL;
4182                         }
4183                         sendmp = NULL;
4184                 }
4185
4186                 /* Zero out the receive descriptors status. */
4187                 cur->status = 0;
4188                 ++rxdone;       /* cumulative for POLL */
4189                 ++processed;
4190
4191                 /* Advance our pointers to the next descriptor. */
4192                 if (++i == adapter->num_rx_desc)
4193                         i = 0;
4194
4195                 /* Send to the stack */
4196                 if (sendmp != NULL) {
4197                         rxr->next_to_check = i;
4198                         EM_RX_UNLOCK(rxr);
4199                         (*ifp->if_input)(ifp, sendmp);
4200                         EM_RX_LOCK(rxr);
4201                         i = rxr->next_to_check;
4202                 }
4203
4204                 /* Only refresh mbufs every 8 descriptors */
4205                 if (processed == 8) {
4206                         em_refresh_mbufs(rxr, i);
4207                         processed = 0;
4208                 }
4209         }
4210
4211         /* Catch any remaining refresh work */
4212         if (processed != 0) {
4213                 em_refresh_mbufs(rxr, i);
4214                 processed = 0;
4215         }
4216
4217         rxr->next_to_check = i;
4218         EM_RX_UNLOCK(rxr);
4219
4220 #ifdef DEVICE_POLLING
4221         return (rxdone);
4222 #else
4223         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4224 #endif
4225 }
4226
4227 #ifndef __NO_STRICT_ALIGNMENT
4228 /*
4229  * When jumbo frames are enabled we should realign entire payload on
4230  * architecures with strict alignment. This is serious design mistake of 8254x
4231  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4232  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4233  * payload. On architecures without strict alignment restrictions 8254x still
4234  * performs unaligned memory access which would reduce the performance too.
4235  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4236  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4237  * existing mbuf chain.
4238  *
4239  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4240  * not used at all on architectures with strict alignment.
4241  */
4242 static int
4243 em_fixup_rx(struct rx_ring *rxr)
4244 {
4245         struct adapter *adapter = rxr->adapter;
4246         struct mbuf *m, *n;
4247         int error;
4248
4249         error = 0;
4250         m = rxr->fmp;
4251         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4252                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4253                 m->m_data += ETHER_HDR_LEN;
4254         } else {
4255                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4256                 if (n != NULL) {
4257                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4258                         m->m_data += ETHER_HDR_LEN;
4259                         m->m_len -= ETHER_HDR_LEN;
4260                         n->m_len = ETHER_HDR_LEN;
4261                         M_MOVE_PKTHDR(n, m);
4262                         n->m_next = m;
4263                         rxr->fmp = n;
4264                 } else {
4265                         adapter->dropped_pkts++;
4266                         m_freem(rxr->fmp);
4267                         rxr->fmp = NULL;
4268                         error = ENOMEM;
4269                 }
4270         }
4271
4272         return (error);
4273 }
4274 #endif
4275
4276 /*********************************************************************
4277  *
4278  *  Verify that the hardware indicated that the checksum is valid.
4279  *  Inform the stack about the status of checksum so that stack
4280  *  doesn't spend time verifying the checksum.
4281  *
4282  *********************************************************************/
4283 static void
4284 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4285 {
4286         /* Ignore Checksum bit is set */
4287         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4288                 mp->m_pkthdr.csum_flags = 0;
4289                 return;
4290         }
4291
4292         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4293                 /* Did it pass? */
4294                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4295                         /* IP Checksum Good */
4296                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4297                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4298
4299                 } else {
4300                         mp->m_pkthdr.csum_flags = 0;
4301                 }
4302         }
4303
4304         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4305                 /* Did it pass? */
4306                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4307                         mp->m_pkthdr.csum_flags |=
4308                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4309                         mp->m_pkthdr.csum_data = htons(0xffff);
4310                 }
4311         }
4312 }
4313
4314 /*
4315  * This routine is run via an vlan
4316  * config EVENT
4317  */
4318 static void
4319 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4320 {
4321         struct adapter  *adapter = ifp->if_softc;
4322         u32             index, bit;
4323
4324         if (ifp->if_softc !=  arg)   /* Not our event */
4325                 return;
4326
4327         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4328                 return;
4329
4330         index = (vtag >> 5) & 0x7F;
4331         bit = vtag & 0x1F;
4332         em_shadow_vfta[index] |= (1 << bit);
4333         ++adapter->num_vlans;
4334         /* Re-init to load the changes */
4335         em_init(adapter);
4336 }
4337
4338 /*
4339  * This routine is run via an vlan
4340  * unconfig EVENT
4341  */
4342 static void
4343 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4344 {
4345         struct adapter  *adapter = ifp->if_softc;
4346         u32             index, bit;
4347
4348         if (ifp->if_softc !=  arg)
4349                 return;
4350
4351         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4352                 return;
4353
4354         index = (vtag >> 5) & 0x7F;
4355         bit = vtag & 0x1F;
4356         em_shadow_vfta[index] &= ~(1 << bit);
4357         --adapter->num_vlans;
4358         /* Re-init to load the changes */
4359         em_init(adapter);
4360 }
4361
4362 static void
4363 em_setup_vlan_hw_support(struct adapter *adapter)
4364 {
4365         struct e1000_hw *hw = &adapter->hw;
4366         u32             reg;
4367
4368         /*
4369         ** We get here thru init_locked, meaning
4370         ** a soft reset, this has already cleared
4371         ** the VFTA and other state, so if there
4372         ** have been no vlan's registered do nothing.
4373         */
4374         if (adapter->num_vlans == 0)
4375                 return;
4376
4377         /*
4378         ** A soft reset zero's out the VFTA, so
4379         ** we need to repopulate it now.
4380         */
4381         for (int i = 0; i < EM_VFTA_SIZE; i++)
4382                 if (em_shadow_vfta[i] != 0)
4383                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4384                             i, em_shadow_vfta[i]);
4385
4386         reg = E1000_READ_REG(hw, E1000_CTRL);
4387         reg |= E1000_CTRL_VME;
4388         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4389
4390         /* Enable the Filter Table */
4391         reg = E1000_READ_REG(hw, E1000_RCTL);
4392         reg &= ~E1000_RCTL_CFIEN;
4393         reg |= E1000_RCTL_VFE;
4394         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4395
4396         /* Update the frame size */
4397         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4398             adapter->max_frame_size + VLAN_TAG_SIZE);
4399 }
4400
4401 static void
4402 em_enable_intr(struct adapter *adapter)
4403 {
4404         struct e1000_hw *hw = &adapter->hw;
4405         u32 ims_mask = IMS_ENABLE_MASK;
4406
4407         if (hw->mac.type == e1000_82574) {
4408                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4409                 ims_mask |= EM_MSIX_MASK;
4410         } 
4411         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4412 }
4413
4414 static void
4415 em_disable_intr(struct adapter *adapter)
4416 {
4417         struct e1000_hw *hw = &adapter->hw;
4418
4419         if (hw->mac.type == e1000_82574)
4420                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4421         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4422 }
4423
4424 /*
4425  * Bit of a misnomer, what this really means is
4426  * to enable OS management of the system... aka
4427  * to disable special hardware management features 
4428  */
4429 static void
4430 em_init_manageability(struct adapter *adapter)
4431 {
4432         /* A shared code workaround */
4433 #define E1000_82542_MANC2H E1000_MANC2H
4434         if (adapter->has_manage) {
4435                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4436                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4437
4438                 /* disable hardware interception of ARP */
4439                 manc &= ~(E1000_MANC_ARP_EN);
4440
4441                 /* enable receiving management packets to the host */
4442                 manc |= E1000_MANC_EN_MNG2HOST;
4443 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4444 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4445                 manc2h |= E1000_MNG2HOST_PORT_623;
4446                 manc2h |= E1000_MNG2HOST_PORT_664;
4447                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4448                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4449         }
4450 }
4451
4452 /*
4453  * Give control back to hardware management
4454  * controller if there is one.
4455  */
4456 static void
4457 em_release_manageability(struct adapter *adapter)
4458 {
4459         if (adapter->has_manage) {
4460                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4461
4462                 /* re-enable hardware interception of ARP */
4463                 manc |= E1000_MANC_ARP_EN;
4464                 manc &= ~E1000_MANC_EN_MNG2HOST;
4465
4466                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4467         }
4468 }
4469
4470 /*
4471  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4472  * For ASF and Pass Through versions of f/w this means
4473  * that the driver is loaded. For AMT version type f/w
4474  * this means that the network i/f is open.
4475  */
4476 static void
4477 em_get_hw_control(struct adapter *adapter)
4478 {
4479         u32 ctrl_ext, swsm;
4480
4481         if (adapter->hw.mac.type == e1000_82573) {
4482                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4483                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4484                     swsm | E1000_SWSM_DRV_LOAD);
4485                 return;
4486         }
4487         /* else */
4488         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4489         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4490             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4491         return;
4492 }
4493
4494 /*
4495  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4496  * For ASF and Pass Through versions of f/w this means that
4497  * the driver is no longer loaded. For AMT versions of the
4498  * f/w this means that the network i/f is closed.
4499  */
4500 static void
4501 em_release_hw_control(struct adapter *adapter)
4502 {
4503         u32 ctrl_ext, swsm;
4504
4505         if (!adapter->has_manage)
4506                 return;
4507
4508         if (adapter->hw.mac.type == e1000_82573) {
4509                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4510                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4511                     swsm & ~E1000_SWSM_DRV_LOAD);
4512                 return;
4513         }
4514         /* else */
4515         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4516         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4517             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4518         return;
4519 }
4520
4521 static int
4522 em_is_valid_ether_addr(u8 *addr)
4523 {
4524         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4525
4526         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4527                 return (FALSE);
4528         }
4529
4530         return (TRUE);
4531 }
4532
4533 /*
4534 ** Parse the interface capabilities with regard
4535 ** to both system management and wake-on-lan for
4536 ** later use.
4537 */
4538 static void
4539 em_get_wakeup(device_t dev)
4540 {
4541         struct adapter  *adapter = device_get_softc(dev);
4542         u16             eeprom_data = 0, device_id, apme_mask;
4543
4544         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4545         apme_mask = EM_EEPROM_APME;
4546
4547         switch (adapter->hw.mac.type) {
4548         case e1000_82573:
4549         case e1000_82583:
4550                 adapter->has_amt = TRUE;
4551                 /* Falls thru */
4552         case e1000_82571:
4553         case e1000_82572:
4554         case e1000_80003es2lan:
4555                 if (adapter->hw.bus.func == 1) {
4556                         e1000_read_nvm(&adapter->hw,
4557                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4558                         break;
4559                 } else
4560                         e1000_read_nvm(&adapter->hw,
4561                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4562                 break;
4563         case e1000_ich8lan:
4564         case e1000_ich9lan:
4565         case e1000_ich10lan:
4566         case e1000_pchlan:
4567                 apme_mask = E1000_WUC_APME;
4568                 adapter->has_amt = TRUE;
4569                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4570                 break;
4571         default:
4572                 e1000_read_nvm(&adapter->hw,
4573                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4574                 break;
4575         }
4576         if (eeprom_data & apme_mask)
4577                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4578         /*
4579          * We have the eeprom settings, now apply the special cases
4580          * where the eeprom may be wrong or the board won't support
4581          * wake on lan on a particular port
4582          */
4583         device_id = pci_get_device(dev);
4584         switch (device_id) {
4585         case E1000_DEV_ID_82571EB_FIBER:
4586                 /* Wake events only supported on port A for dual fiber
4587                  * regardless of eeprom setting */
4588                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4589                     E1000_STATUS_FUNC_1)
4590                         adapter->wol = 0;
4591                 break;
4592         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4593         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4594         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4595                 /* if quad port adapter, disable WoL on all but port A */
4596                 if (global_quad_port_a != 0)
4597                         adapter->wol = 0;
4598                 /* Reset for multiple quad port adapters */
4599                 if (++global_quad_port_a == 4)
4600                         global_quad_port_a = 0;
4601                 break;
4602         }
4603         return;
4604 }
4605
4606
4607 /*
4608  * Enable PCI Wake On Lan capability
4609  */
4610 static void
4611 em_enable_wakeup(device_t dev)
4612 {
4613         struct adapter  *adapter = device_get_softc(dev);
4614         struct ifnet    *ifp = adapter->ifp;
4615         u32             pmc, ctrl, ctrl_ext, rctl;
4616         u16             status;
4617
4618         if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4619                 return;
4620
4621         /* Advertise the wakeup capability */
4622         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4623         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4624         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4625         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4626
4627         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4628             (adapter->hw.mac.type == e1000_pchlan) ||
4629             (adapter->hw.mac.type == e1000_ich9lan) ||
4630             (adapter->hw.mac.type == e1000_ich10lan)) {
4631                 e1000_disable_gig_wol_ich8lan(&adapter->hw);
4632                 e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4633         }
4634
4635         /* Keep the laser running on Fiber adapters */
4636         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4637             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4638                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4639                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4640                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4641         }
4642
4643         /*
4644         ** Determine type of Wakeup: note that wol
4645         ** is set with all bits on by default.
4646         */
4647         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4648                 adapter->wol &= ~E1000_WUFC_MAG;
4649
4650         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4651                 adapter->wol &= ~E1000_WUFC_MC;
4652         else {
4653                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4654                 rctl |= E1000_RCTL_MPE;
4655                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4656         }
4657
4658         if (adapter->hw.mac.type == e1000_pchlan) {
4659                 if (em_enable_phy_wakeup(adapter))
4660                         return;
4661         } else {
4662                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4663                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4664         }
4665
4666         if (adapter->hw.phy.type == e1000_phy_igp_3)
4667                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4668
4669         /* Request PME */
4670         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4671         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4672         if (ifp->if_capenable & IFCAP_WOL)
4673                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4674         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4675
4676         return;
4677 }
4678
4679 /*
4680 ** WOL in the newer chipset interfaces (pchlan)
4681 ** require thing to be copied into the phy
4682 */
4683 static int
4684 em_enable_phy_wakeup(struct adapter *adapter)
4685 {
4686         struct e1000_hw *hw = &adapter->hw;
4687         u32 mreg, ret = 0;
4688         u16 preg;
4689
4690         /* copy MAC RARs to PHY RARs */
4691         for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4692                 mreg = E1000_READ_REG(hw, E1000_RAL(i));
4693                 e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4694                 e1000_write_phy_reg(hw, BM_RAR_M(i),
4695                     (u16)((mreg >> 16) & 0xFFFF));
4696                 mreg = E1000_READ_REG(hw, E1000_RAH(i));
4697                 e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4698                 e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4699                     (u16)((mreg >> 16) & 0xFFFF));
4700         }
4701
4702         /* copy MAC MTA to PHY MTA */
4703         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4704                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4705                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4706                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4707                     (u16)((mreg >> 16) & 0xFFFF));
4708         }
4709
4710         /* configure PHY Rx Control register */
4711         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4712         mreg = E1000_READ_REG(hw, E1000_RCTL);
4713         if (mreg & E1000_RCTL_UPE)
4714                 preg |= BM_RCTL_UPE;
4715         if (mreg & E1000_RCTL_MPE)
4716                 preg |= BM_RCTL_MPE;
4717         preg &= ~(BM_RCTL_MO_MASK);
4718         if (mreg & E1000_RCTL_MO_3)
4719                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4720                                 << BM_RCTL_MO_SHIFT);
4721         if (mreg & E1000_RCTL_BAM)
4722                 preg |= BM_RCTL_BAM;
4723         if (mreg & E1000_RCTL_PMCF)
4724                 preg |= BM_RCTL_PMCF;
4725         mreg = E1000_READ_REG(hw, E1000_CTRL);
4726         if (mreg & E1000_CTRL_RFCE)
4727                 preg |= BM_RCTL_RFCE;
4728         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4729
4730         /* enable PHY wakeup in MAC register */
4731         E1000_WRITE_REG(hw, E1000_WUC,
4732             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4733         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4734
4735         /* configure and enable PHY wakeup in PHY registers */
4736         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4737         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4738
4739         /* activate PHY wakeup */
4740         ret = hw->phy.ops.acquire(hw);
4741         if (ret) {
4742                 printf("Could not acquire PHY\n");
4743                 return ret;
4744         }
4745         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4746                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4747         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4748         if (ret) {
4749                 printf("Could not read PHY page 769\n");
4750                 goto out;
4751         }
4752         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4753         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4754         if (ret)
4755                 printf("Could not set PHY Host Wakeup bit\n");
4756 out:
4757         hw->phy.ops.release(hw);
4758
4759         return ret;
4760 }
4761
4762 static void
4763 em_led_func(void *arg, int onoff)
4764 {
4765         struct adapter  *adapter = arg;
4766  
4767         EM_CORE_LOCK(adapter);
4768         if (onoff) {
4769                 e1000_setup_led(&adapter->hw);
4770                 e1000_led_on(&adapter->hw);
4771         } else {
4772                 e1000_led_off(&adapter->hw);
4773                 e1000_cleanup_led(&adapter->hw);
4774         }
4775         EM_CORE_UNLOCK(adapter);
4776 }
4777
4778 /**********************************************************************
4779  *
4780  *  Update the board statistics counters.
4781  *
4782  **********************************************************************/
4783 static void
4784 em_update_stats_counters(struct adapter *adapter)
4785 {
4786         struct ifnet   *ifp;
4787
4788         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4789            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4790                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4791                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4792         }
4793         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4794         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4795         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4796         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4797
4798         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4799         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4800         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4801         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4802         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4803         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4804         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4805         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4806         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4807         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4808         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4809         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4810         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4811         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4812         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4813         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4814         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4815         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4816         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4817         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4818
4819         /* For the 64-bit byte counters the low dword must be read first. */
4820         /* Both registers clear on the read of the high dword */
4821
4822         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4823         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4824
4825         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4826         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4827         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4828         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4829         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4830
4831         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4832         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4833
4834         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4835         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4836         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4837         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4838         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4839         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4840         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4841         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4842         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4843         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4844
4845         if (adapter->hw.mac.type >= e1000_82543) {
4846                 adapter->stats.algnerrc += 
4847                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4848                 adapter->stats.rxerrc += 
4849                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4850                 adapter->stats.tncrs += 
4851                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4852                 adapter->stats.cexterr += 
4853                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4854                 adapter->stats.tsctc += 
4855                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4856                 adapter->stats.tsctfc += 
4857                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4858         }
4859         ifp = adapter->ifp;
4860
4861         ifp->if_collisions = adapter->stats.colc;
4862
4863         /* Rx Errors */
4864         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4865             adapter->stats.crcerrs + adapter->stats.algnerrc +
4866             adapter->stats.ruc + adapter->stats.roc +
4867             adapter->stats.mpc + adapter->stats.cexterr;
4868
4869         /* Tx Errors */
4870         ifp->if_oerrors = adapter->stats.ecol +
4871             adapter->stats.latecol + adapter->watchdog_events;
4872 }
4873
4874
4875 /**********************************************************************
4876  *
4877  *  This routine is called only when em_display_debug_stats is enabled.
4878  *  This routine provides a way to take a look at important statistics
4879  *  maintained by the driver and hardware.
4880  *
4881  **********************************************************************/
4882 static void
4883 em_print_debug_info(struct adapter *adapter)
4884 {
4885         device_t dev = adapter->dev;
4886         u8 *hw_addr = adapter->hw.hw_addr;
4887         struct rx_ring *rxr = adapter->rx_rings;
4888         struct tx_ring *txr = adapter->tx_rings;
4889
4890         device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4891         device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4892             E1000_READ_REG(&adapter->hw, E1000_CTRL),
4893             E1000_READ_REG(&adapter->hw, E1000_RCTL));
4894         device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4895             ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4896             (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4897         device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4898             adapter->hw.fc.high_water,
4899             adapter->hw.fc.low_water);
4900         device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4901             E1000_READ_REG(&adapter->hw, E1000_TIDV),
4902             E1000_READ_REG(&adapter->hw, E1000_TADV));
4903         device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4904             E1000_READ_REG(&adapter->hw, E1000_RDTR),
4905             E1000_READ_REG(&adapter->hw, E1000_RADV));
4906
4907         for (int i = 0; i < adapter->num_queues; i++, txr++) {
4908                 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4909                     E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4910                     E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4911                 device_printf(dev, "TX(%d) no descriptors avail event = %ld\n",
4912                     txr->me, txr->no_desc_avail);
4913                 device_printf(dev, "TX(%d) MSIX IRQ Handled = %ld\n",
4914                     txr->me, txr->tx_irq);
4915                 device_printf(dev, "Num Tx descriptors avail = %d\n",
4916                     txr->tx_avail);
4917                 device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4918                     txr->no_desc_avail);
4919         }
4920         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4921                 device_printf(dev, "RX(%d) MSIX IRQ Handled = %ld\n",
4922                     rxr->me, rxr->rx_irq);
4923                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4924                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4925                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4926         }
4927         device_printf(dev, "Std mbuf failed = %ld\n",
4928             adapter->mbuf_alloc_failed);
4929         device_printf(dev, "Std mbuf cluster failed = %ld\n",
4930             adapter->mbuf_cluster_failed);
4931         device_printf(dev, "Driver dropped packets = %ld\n",
4932             adapter->dropped_pkts);
4933 }
4934
4935 static void
4936 em_print_hw_stats(struct adapter *adapter)
4937 {
4938         device_t dev = adapter->dev;
4939
4940         device_printf(dev, "Excessive collisions = %lld\n",
4941             (long long)adapter->stats.ecol);
4942 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4943         device_printf(dev, "Symbol errors = %lld\n",
4944             (long long)adapter->stats.symerrs);
4945 #endif
4946         device_printf(dev, "Sequence errors = %lld\n",
4947             (long long)adapter->stats.sec);
4948         device_printf(dev, "Defer count = %lld\n",
4949             (long long)adapter->stats.dc);
4950         device_printf(dev, "Missed Packets = %lld\n",
4951             (long long)adapter->stats.mpc);
4952         device_printf(dev, "Receive No Buffers = %lld\n",
4953             (long long)adapter->stats.rnbc);
4954         /* RLEC is inaccurate on some hardware, calculate our own. */
4955         device_printf(dev, "Receive Length Errors = %lld\n",
4956             ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4957         device_printf(dev, "Receive errors = %lld\n",
4958             (long long)adapter->stats.rxerrc);
4959         device_printf(dev, "Crc errors = %lld\n",
4960             (long long)adapter->stats.crcerrs);
4961         device_printf(dev, "Alignment errors = %lld\n",
4962             (long long)adapter->stats.algnerrc);
4963         device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4964             (long long)adapter->stats.cexterr);
4965         device_printf(dev, "watchdog timeouts = %ld\n",
4966             adapter->watchdog_events);
4967         device_printf(dev, "XON Rcvd = %lld\n",
4968             (long long)adapter->stats.xonrxc);
4969         device_printf(dev, "XON Xmtd = %lld\n",
4970             (long long)adapter->stats.xontxc);
4971         device_printf(dev, "XOFF Rcvd = %lld\n",
4972             (long long)adapter->stats.xoffrxc);
4973         device_printf(dev, "XOFF Xmtd = %lld\n",
4974             (long long)adapter->stats.xofftxc);
4975         device_printf(dev, "Good Packets Rcvd = %lld\n",
4976             (long long)adapter->stats.gprc);
4977         device_printf(dev, "Good Packets Xmtd = %lld\n",
4978             (long long)adapter->stats.gptc);
4979         device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4980             (long long)adapter->stats.tsctc);
4981         device_printf(dev, "TSO Contexts Failed = %lld\n",
4982             (long long)adapter->stats.tsctfc);
4983 }
4984
4985 /**********************************************************************
4986  *
4987  *  This routine provides a way to dump out the adapter eeprom,
4988  *  often a useful debug/service tool. This only dumps the first
4989  *  32 words, stuff that matters is in that extent.
4990  *
4991  **********************************************************************/
4992 static void
4993 em_print_nvm_info(struct adapter *adapter)
4994 {
4995         u16     eeprom_data;
4996         int     i, j, row = 0;
4997
4998         /* Its a bit crude, but it gets the job done */
4999         printf("\nInterface EEPROM Dump:\n");
5000         printf("Offset\n0x0000  ");
5001         for (i = 0, j = 0; i < 32; i++, j++) {
5002                 if (j == 8) { /* Make the offset block */
5003                         j = 0; ++row;
5004                         printf("\n0x00%x0  ",row);
5005                 }
5006                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5007                 printf("%04x ", eeprom_data);
5008         }
5009         printf("\n");
5010 }
5011
5012 static int
5013 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5014 {
5015         struct adapter *adapter;
5016         int error;
5017         int result;
5018
5019         result = -1;
5020         error = sysctl_handle_int(oidp, &result, 0, req);
5021
5022         if (error || !req->newptr)
5023                 return (error);
5024
5025         if (result == 1) {
5026                 adapter = (struct adapter *)arg1;
5027                 em_print_debug_info(adapter);
5028         }
5029         /*
5030          * This value will cause a hex dump of the
5031          * first 32 16-bit words of the EEPROM to
5032          * the screen.
5033          */
5034         if (result == 2) {
5035                 adapter = (struct adapter *)arg1;
5036                 em_print_nvm_info(adapter);
5037         }
5038
5039         return (error);
5040 }
5041
5042
5043 static int
5044 em_sysctl_stats(SYSCTL_HANDLER_ARGS)
5045 {
5046         struct adapter *adapter;
5047         int error;
5048         int result;
5049
5050         result = -1;
5051         error = sysctl_handle_int(oidp, &result, 0, req);
5052
5053         if (error || !req->newptr)
5054                 return (error);
5055
5056         if (result == 1) {
5057                 adapter = (struct adapter *)arg1;
5058                 em_print_hw_stats(adapter);
5059         }
5060
5061         return (error);
5062 }
5063
5064 static int
5065 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5066 {
5067         struct em_int_delay_info *info;
5068         struct adapter *adapter;
5069         u32 regval;
5070         int error, usecs, ticks;
5071
5072         info = (struct em_int_delay_info *)arg1;
5073         usecs = info->value;
5074         error = sysctl_handle_int(oidp, &usecs, 0, req);
5075         if (error != 0 || req->newptr == NULL)
5076                 return (error);
5077         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5078                 return (EINVAL);
5079         info->value = usecs;
5080         ticks = EM_USECS_TO_TICKS(usecs);
5081
5082         adapter = info->adapter;
5083         
5084         EM_CORE_LOCK(adapter);
5085         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5086         regval = (regval & ~0xffff) | (ticks & 0xffff);
5087         /* Handle a few special cases. */
5088         switch (info->offset) {
5089         case E1000_RDTR:
5090                 break;
5091         case E1000_TIDV:
5092                 if (ticks == 0) {
5093                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5094                         /* Don't write 0 into the TIDV register. */
5095                         regval++;
5096                 } else
5097                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5098                 break;
5099         }
5100         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5101         EM_CORE_UNLOCK(adapter);
5102         return (0);
5103 }
5104
5105 static void
5106 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5107         const char *description, struct em_int_delay_info *info,
5108         int offset, int value)
5109 {
5110         info->adapter = adapter;
5111         info->offset = offset;
5112         info->value = value;
5113         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5114             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5115             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5116             info, 0, em_sysctl_int_delay, "I", description);
5117 }
5118
5119 static void
5120 em_add_rx_process_limit(struct adapter *adapter, const char *name,
5121         const char *description, int *limit, int value)
5122 {
5123         *limit = value;
5124         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5125             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5126             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5127 }
5128
5129