]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
Import of liblzma, xz, xzdec, lzmainfo from vendor branch
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int     em_display_debug_stats = 0;
92
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.0.5";
97
98
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111         /* Intel(R) PRO/1000 Network Connection */
112         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         /* required last entry */
173         { 0, 0, 0, 0, 0}
174 };
175
176 /*********************************************************************
177  *  Table of branding strings for all supported NICs.
178  *********************************************************************/
179
180 static char *em_strings[] = {
181         "Intel(R) PRO/1000 Network Connection"
182 };
183
184 /*********************************************************************
185  *  Function prototypes
186  *********************************************************************/
187 static int      em_probe(device_t);
188 static int      em_attach(device_t);
189 static int      em_detach(device_t);
190 static int      em_shutdown(device_t);
191 static int      em_suspend(device_t);
192 static int      em_resume(device_t);
193 static void     em_start(struct ifnet *);
194 static void     em_start_locked(struct ifnet *, struct tx_ring *);
195 #ifdef EM_MULTIQUEUE
196 static int      em_mq_start(struct ifnet *, struct mbuf *);
197 static int      em_mq_start_locked(struct ifnet *,
198                     struct tx_ring *, struct mbuf *);
199 static void     em_qflush(struct ifnet *);
200 #endif
201 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
202 static void     em_init(void *);
203 static void     em_init_locked(struct adapter *);
204 static void     em_stop(void *);
205 static void     em_media_status(struct ifnet *, struct ifmediareq *);
206 static int      em_media_change(struct ifnet *);
207 static void     em_identify_hardware(struct adapter *);
208 static int      em_allocate_pci_resources(struct adapter *);
209 static int      em_allocate_legacy(struct adapter *);
210 static int      em_allocate_msix(struct adapter *);
211 static int      em_allocate_queues(struct adapter *);
212 static int      em_setup_msix(struct adapter *);
213 static void     em_free_pci_resources(struct adapter *);
214 static void     em_local_timer(void *);
215 static void     em_reset(struct adapter *);
216 static void     em_setup_interface(device_t, struct adapter *);
217
218 static void     em_setup_transmit_structures(struct adapter *);
219 static void     em_initialize_transmit_unit(struct adapter *);
220 static int      em_allocate_transmit_buffers(struct tx_ring *);
221 static void     em_free_transmit_structures(struct adapter *);
222 static void     em_free_transmit_buffers(struct tx_ring *);
223
224 static int      em_setup_receive_structures(struct adapter *);
225 static int      em_allocate_receive_buffers(struct rx_ring *);
226 static void     em_initialize_receive_unit(struct adapter *);
227 static void     em_free_receive_structures(struct adapter *);
228 static void     em_free_receive_buffers(struct rx_ring *);
229
230 static void     em_enable_intr(struct adapter *);
231 static void     em_disable_intr(struct adapter *);
232 static void     em_update_stats_counters(struct adapter *);
233 static bool     em_txeof(struct tx_ring *);
234 static int      em_rxeof(struct rx_ring *, int);
235 #ifndef __NO_STRICT_ALIGNMENT
236 static int      em_fixup_rx(struct rx_ring *);
237 #endif
238 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
239 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
240                     u32 *, u32 *);
241 static bool     em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
242 static void     em_set_promisc(struct adapter *);
243 static void     em_disable_promisc(struct adapter *);
244 static void     em_set_multi(struct adapter *);
245 static void     em_print_hw_stats(struct adapter *);
246 static void     em_update_link_status(struct adapter *);
247 static void     em_refresh_mbufs(struct rx_ring *, int);
248 static void     em_register_vlan(void *, struct ifnet *, u16);
249 static void     em_unregister_vlan(void *, struct ifnet *, u16);
250 static void     em_setup_vlan_hw_support(struct adapter *);
251 static int      em_xmit(struct tx_ring *, struct mbuf **);
252 static int      em_dma_malloc(struct adapter *, bus_size_t,
253                     struct em_dma_alloc *, int);
254 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
255 static void     em_print_debug_info(struct adapter *);
256 static void     em_print_nvm_info(struct adapter *);
257 static int      em_is_valid_ether_addr(u8 *);
258 static int      em_sysctl_stats(SYSCTL_HANDLER_ARGS);
259 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
260 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
261 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
262                     const char *, struct em_int_delay_info *, int, int);
263 /* Management and WOL Support */
264 static void     em_init_manageability(struct adapter *);
265 static void     em_release_manageability(struct adapter *);
266 static void     em_get_hw_control(struct adapter *);
267 static void     em_release_hw_control(struct adapter *);
268 static void     em_get_wakeup(device_t);
269 static void     em_enable_wakeup(device_t);
270 static int      em_enable_phy_wakeup(struct adapter *);
271 static void     em_led_func(void *, int);
272
273 static int      em_irq_fast(void *);
274
275 /* MSIX handlers */
276 static void     em_msix_tx(void *);
277 static void     em_msix_rx(void *);
278 static void     em_msix_link(void *);
279 static void     em_handle_tx(void *context, int pending);
280 static void     em_handle_rx(void *context, int pending);
281 static void     em_handle_link(void *context, int pending);
282
283 static void     em_add_rx_process_limit(struct adapter *, const char *,
284                     const char *, int *, int);
285
286 #ifdef DEVICE_POLLING
287 static poll_handler_t em_poll;
288 #endif /* POLLING */
289
290 /*********************************************************************
291  *  FreeBSD Device Interface Entry Points
292  *********************************************************************/
293
294 static device_method_t em_methods[] = {
295         /* Device interface */
296         DEVMETHOD(device_probe, em_probe),
297         DEVMETHOD(device_attach, em_attach),
298         DEVMETHOD(device_detach, em_detach),
299         DEVMETHOD(device_shutdown, em_shutdown),
300         DEVMETHOD(device_suspend, em_suspend),
301         DEVMETHOD(device_resume, em_resume),
302         {0, 0}
303 };
304
305 static driver_t em_driver = {
306         "em", em_methods, sizeof(struct adapter),
307 };
308
309 devclass_t em_devclass;
310 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
311 MODULE_DEPEND(em, pci, 1, 1, 1);
312 MODULE_DEPEND(em, ether, 1, 1, 1);
313
314 /*********************************************************************
315  *  Tunable default values.
316  *********************************************************************/
317
318 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
319 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
320 #define M_TSO_LEN                       66
321
322 /* Allow common code without TSO */
323 #ifndef CSUM_TSO
324 #define CSUM_TSO        0
325 #endif
326
327 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
328 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
329 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
330 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
331
332 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
333 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
334 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
335 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
336
337 static int em_rxd = EM_DEFAULT_RXD;
338 static int em_txd = EM_DEFAULT_TXD;
339 TUNABLE_INT("hw.em.rxd", &em_rxd);
340 TUNABLE_INT("hw.em.txd", &em_txd);
341
342 static int em_smart_pwr_down = FALSE;
343 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
344
345 /* Controls whether promiscuous also shows bad packets */
346 static int em_debug_sbp = FALSE;
347 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
348
349 /* Local controls for MSI/MSIX */
350 #ifdef EM_MULTIQUEUE
351 static int em_enable_msix = TRUE;
352 static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
353 #else
354 static int em_enable_msix = FALSE;
355 static int em_msix_queues = 0; /* disable */
356 #endif
357 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
358 TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
359
360 /* How many packets rxeof tries to clean at a time */
361 static int em_rx_process_limit = 100;
362 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
363
364 /* Flow control setting - default to FULL */
365 static int em_fc_setting = e1000_fc_full;
366 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
367
368 /*
369 ** Shadow VFTA table, this is needed because
370 ** the real vlan filter table gets cleared during
371 ** a soft reset and the driver needs to be able
372 ** to repopulate it.
373 */
374 static u32 em_shadow_vfta[EM_VFTA_SIZE];
375
376 /* Global used in WOL setup with multiport cards */
377 static int global_quad_port_a = 0;
378
379 /*********************************************************************
380  *  Device identification routine
381  *
382  *  em_probe determines if the driver should be loaded on
383  *  adapter based on PCI vendor/device id of the adapter.
384  *
385  *  return BUS_PROBE_DEFAULT on success, positive on failure
386  *********************************************************************/
387
388 static int
389 em_probe(device_t dev)
390 {
391         char            adapter_name[60];
392         u16             pci_vendor_id = 0;
393         u16             pci_device_id = 0;
394         u16             pci_subvendor_id = 0;
395         u16             pci_subdevice_id = 0;
396         em_vendor_info_t *ent;
397
398         INIT_DEBUGOUT("em_probe: begin");
399
400         pci_vendor_id = pci_get_vendor(dev);
401         if (pci_vendor_id != EM_VENDOR_ID)
402                 return (ENXIO);
403
404         pci_device_id = pci_get_device(dev);
405         pci_subvendor_id = pci_get_subvendor(dev);
406         pci_subdevice_id = pci_get_subdevice(dev);
407
408         ent = em_vendor_info_array;
409         while (ent->vendor_id != 0) {
410                 if ((pci_vendor_id == ent->vendor_id) &&
411                     (pci_device_id == ent->device_id) &&
412
413                     ((pci_subvendor_id == ent->subvendor_id) ||
414                     (ent->subvendor_id == PCI_ANY_ID)) &&
415
416                     ((pci_subdevice_id == ent->subdevice_id) ||
417                     (ent->subdevice_id == PCI_ANY_ID))) {
418                         sprintf(adapter_name, "%s %s",
419                                 em_strings[ent->index],
420                                 em_driver_version);
421                         device_set_desc_copy(dev, adapter_name);
422                         return (BUS_PROBE_DEFAULT);
423                 }
424                 ent++;
425         }
426
427         return (ENXIO);
428 }
429
430 /*********************************************************************
431  *  Device initialization routine
432  *
433  *  The attach entry point is called when the driver is being loaded.
434  *  This routine identifies the type of hardware, allocates all resources
435  *  and initializes the hardware.
436  *
437  *  return 0 on success, positive on failure
438  *********************************************************************/
439
440 static int
441 em_attach(device_t dev)
442 {
443         struct adapter  *adapter;
444         int             error = 0;
445
446         INIT_DEBUGOUT("em_attach: begin");
447
448         adapter = device_get_softc(dev);
449         adapter->dev = adapter->osdep.dev = dev;
450         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
451
452         /* SYSCTL stuff */
453         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
454             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
455             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
456             em_sysctl_debug_info, "I", "Debug Information");
457
458         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
459             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
460             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
461             em_sysctl_stats, "I", "Statistics");
462
463         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
464
465         /* Determine hardware and mac info */
466         em_identify_hardware(adapter);
467
468         /* Setup PCI resources */
469         if (em_allocate_pci_resources(adapter)) {
470                 device_printf(dev, "Allocation of PCI resources failed\n");
471                 error = ENXIO;
472                 goto err_pci;
473         }
474
475         /*
476         ** For ICH8 and family we need to
477         ** map the flash memory, and this
478         ** must happen after the MAC is 
479         ** identified
480         */
481         if ((adapter->hw.mac.type == e1000_ich8lan) ||
482             (adapter->hw.mac.type == e1000_pchlan) ||
483             (adapter->hw.mac.type == e1000_ich9lan) ||
484             (adapter->hw.mac.type == e1000_ich10lan)) {
485                 int rid = EM_BAR_TYPE_FLASH;
486                 adapter->flash = bus_alloc_resource_any(dev,
487                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
488                 if (adapter->flash == NULL) {
489                         device_printf(dev, "Mapping of Flash failed\n");
490                         error = ENXIO;
491                         goto err_pci;
492                 }
493                 /* This is used in the shared code */
494                 adapter->hw.flash_address = (u8 *)adapter->flash;
495                 adapter->osdep.flash_bus_space_tag =
496                     rman_get_bustag(adapter->flash);
497                 adapter->osdep.flash_bus_space_handle =
498                     rman_get_bushandle(adapter->flash);
499         }
500
501         /* Do Shared Code initialization */
502         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
503                 device_printf(dev, "Setup of Shared code failed\n");
504                 error = ENXIO;
505                 goto err_pci;
506         }
507
508         e1000_get_bus_info(&adapter->hw);
509
510         /* Set up some sysctls for the tunable interrupt delays */
511         em_add_int_delay_sysctl(adapter, "rx_int_delay",
512             "receive interrupt delay in usecs", &adapter->rx_int_delay,
513             E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
514         em_add_int_delay_sysctl(adapter, "tx_int_delay",
515             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
516             E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
517         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
518             "receive interrupt delay limit in usecs",
519             &adapter->rx_abs_int_delay,
520             E1000_REGISTER(&adapter->hw, E1000_RADV),
521             em_rx_abs_int_delay_dflt);
522         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
523             "transmit interrupt delay limit in usecs",
524             &adapter->tx_abs_int_delay,
525             E1000_REGISTER(&adapter->hw, E1000_TADV),
526             em_tx_abs_int_delay_dflt);
527
528         /* Sysctls for limiting the amount of work done in the taskqueue */
529         em_add_rx_process_limit(adapter, "rx_processing_limit",
530             "max number of rx packets to process", &adapter->rx_process_limit,
531             em_rx_process_limit);
532
533         /*
534          * Validate number of transmit and receive descriptors. It
535          * must not exceed hardware maximum, and must be multiple
536          * of E1000_DBA_ALIGN.
537          */
538         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
539             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
540                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
541                     EM_DEFAULT_TXD, em_txd);
542                 adapter->num_tx_desc = EM_DEFAULT_TXD;
543         } else
544                 adapter->num_tx_desc = em_txd;
545
546         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
547             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
548                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
549                     EM_DEFAULT_RXD, em_rxd);
550                 adapter->num_rx_desc = EM_DEFAULT_RXD;
551         } else
552                 adapter->num_rx_desc = em_rxd;
553
554         adapter->hw.mac.autoneg = DO_AUTO_NEG;
555         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
556         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
557
558         /* Copper options */
559         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
560                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
561                 adapter->hw.phy.disable_polarity_correction = FALSE;
562                 adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
563         }
564
565         /*
566          * Set the frame limits assuming
567          * standard ethernet sized frames.
568          */
569         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
570         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
571
572         /*
573          * This controls when hardware reports transmit completion
574          * status.
575          */
576         adapter->hw.mac.report_tx_early = 1;
577
578         /* 
579         ** Get queue/ring memory
580         */
581         if (em_allocate_queues(adapter)) {
582                 error = ENOMEM;
583                 goto err_pci;
584         }
585
586         /*
587         ** Start from a known state, this is
588         ** important in reading the nvm and
589         ** mac from that.
590         */
591         e1000_reset_hw(&adapter->hw);
592
593         /* Make sure we have a good EEPROM before we read from it */
594         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
595                 /*
596                 ** Some PCI-E parts fail the first check due to
597                 ** the link being in sleep state, call it again,
598                 ** if it fails a second time its a real issue.
599                 */
600                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
601                         device_printf(dev,
602                             "The EEPROM Checksum Is Not Valid\n");
603                         error = EIO;
604                         goto err_late;
605                 }
606         }
607
608         /* Copy the permanent MAC address out of the EEPROM */
609         if (e1000_read_mac_addr(&adapter->hw) < 0) {
610                 device_printf(dev, "EEPROM read error while reading MAC"
611                     " address\n");
612                 error = EIO;
613                 goto err_late;
614         }
615
616         if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
617                 device_printf(dev, "Invalid MAC address\n");
618                 error = EIO;
619                 goto err_late;
620         }
621
622         /*
623         **  Do interrupt configuration
624         */
625         if (adapter->msix > 1) /* Do MSIX */
626                 error = em_allocate_msix(adapter);
627         else  /* MSI or Legacy */
628                 error = em_allocate_legacy(adapter);
629         if (error)
630                 goto err_late;
631
632         /*
633          * Get Wake-on-Lan and Management info for later use
634          */
635         em_get_wakeup(dev);
636
637         /* Setup OS specific network interface */
638         em_setup_interface(dev, adapter);
639
640         em_reset(adapter);
641
642         /* Initialize statistics */
643         em_update_stats_counters(adapter);
644
645         adapter->hw.mac.get_link_status = 1;
646         em_update_link_status(adapter);
647
648         /* Indicate SOL/IDER usage */
649         if (e1000_check_reset_block(&adapter->hw))
650                 device_printf(dev,
651                     "PHY reset is blocked due to SOL/IDER session.\n");
652
653         /* Register for VLAN events */
654         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
655             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
656         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
657             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
658
659         /* Non-AMT based hardware can now take control from firmware */
660         if (adapter->has_manage && !adapter->has_amt)
661                 em_get_hw_control(adapter);
662
663         /* Tell the stack that the interface is not active */
664         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
665
666         adapter->led_dev = led_create(em_led_func, adapter,
667             device_get_nameunit(dev));
668
669         INIT_DEBUGOUT("em_attach: end");
670
671         return (0);
672
673 err_late:
674         em_free_transmit_structures(adapter);
675         em_free_receive_structures(adapter);
676         em_release_hw_control(adapter);
677 err_pci:
678         em_free_pci_resources(adapter);
679         EM_CORE_LOCK_DESTROY(adapter);
680
681         return (error);
682 }
683
684 /*********************************************************************
685  *  Device removal routine
686  *
687  *  The detach entry point is called when the driver is being removed.
688  *  This routine stops the adapter and deallocates all the resources
689  *  that were allocated for driver operation.
690  *
691  *  return 0 on success, positive on failure
692  *********************************************************************/
693
694 static int
695 em_detach(device_t dev)
696 {
697         struct adapter  *adapter = device_get_softc(dev);
698         struct ifnet    *ifp = adapter->ifp;
699
700         INIT_DEBUGOUT("em_detach: begin");
701
702         /* Make sure VLANS are not using driver */
703         if (adapter->ifp->if_vlantrunk != NULL) {
704                 device_printf(dev,"Vlan in use, detach first\n");
705                 return (EBUSY);
706         }
707
708 #ifdef DEVICE_POLLING
709         if (ifp->if_capenable & IFCAP_POLLING)
710                 ether_poll_deregister(ifp);
711 #endif
712
713         EM_CORE_LOCK(adapter);
714         adapter->in_detach = 1;
715         em_stop(adapter);
716         EM_CORE_UNLOCK(adapter);
717         EM_CORE_LOCK_DESTROY(adapter);
718
719         e1000_phy_hw_reset(&adapter->hw);
720
721         em_release_manageability(adapter);
722         em_release_hw_control(adapter);
723
724         /* Unregister VLAN events */
725         if (adapter->vlan_attach != NULL)
726                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
727         if (adapter->vlan_detach != NULL)
728                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
729
730         ether_ifdetach(adapter->ifp);
731         callout_drain(&adapter->timer);
732
733         em_free_pci_resources(adapter);
734         bus_generic_detach(dev);
735         if_free(ifp);
736
737         em_free_transmit_structures(adapter);
738         em_free_receive_structures(adapter);
739
740         em_release_hw_control(adapter);
741
742         return (0);
743 }
744
745 /*********************************************************************
746  *
747  *  Shutdown entry point
748  *
749  **********************************************************************/
750
751 static int
752 em_shutdown(device_t dev)
753 {
754         return em_suspend(dev);
755 }
756
757 /*
758  * Suspend/resume device methods.
759  */
760 static int
761 em_suspend(device_t dev)
762 {
763         struct adapter *adapter = device_get_softc(dev);
764
765         EM_CORE_LOCK(adapter);
766
767         em_release_manageability(adapter);
768         em_release_hw_control(adapter);
769         em_enable_wakeup(dev);
770
771         EM_CORE_UNLOCK(adapter);
772
773         return bus_generic_suspend(dev);
774 }
775
776 static int
777 em_resume(device_t dev)
778 {
779         struct adapter *adapter = device_get_softc(dev);
780         struct ifnet *ifp = adapter->ifp;
781
782         if (adapter->led_dev != NULL)
783                 led_destroy(adapter->led_dev);
784
785         EM_CORE_LOCK(adapter);
786         em_init_locked(adapter);
787         em_init_manageability(adapter);
788         EM_CORE_UNLOCK(adapter);
789         em_start(ifp);
790
791         return bus_generic_resume(dev);
792 }
793
794
795 /*********************************************************************
796  *  Transmit entry point
797  *
798  *  em_start is called by the stack to initiate a transmit.
799  *  The driver will remain in this routine as long as there are
800  *  packets to transmit and transmit resources are available.
801  *  In case resources are not available stack is notified and
802  *  the packet is requeued.
803  **********************************************************************/
804
805 #ifdef EM_MULTIQUEUE
806 static int
807 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
808 {
809         struct adapter  *adapter = txr->adapter;
810         struct mbuf     *next;
811         int             err = 0, enq = 0;
812
813         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
814             IFF_DRV_RUNNING || adapter->link_active == 0) {
815                 if (m != NULL)
816                         err = drbr_enqueue(ifp, txr->br, m);
817                 return (err);
818         }
819
820         /* Call cleanup if number of TX descriptors low */
821         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
822                 em_txeof(txr);
823
824         enq = 0;
825         if (m == NULL) {
826                 next = drbr_dequeue(ifp, txr->br);
827         } else if (drbr_needs_enqueue(ifp, txr->br)) {
828                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
829                         return (err);
830                 next = drbr_dequeue(ifp, txr->br);
831         } else
832                 next = m;
833
834         /* Process the queue */
835         while (next != NULL) {
836                 if ((err = em_xmit(txr, &next)) != 0) {
837                         if (next != NULL)
838                                 err = drbr_enqueue(ifp, txr->br, next);
839                         break;
840                 }
841                 enq++;
842                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
843                 ETHER_BPF_MTAP(ifp, next);
844                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
845                         break;
846                 if (txr->tx_avail < EM_MAX_SCATTER) {
847                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
848                         break;
849                 }
850                 next = drbr_dequeue(ifp, txr->br);
851         }
852
853         if (enq > 0) {
854                 /* Set the watchdog */
855                 txr->watchdog_check = TRUE;
856                 txr->watchdog_time = ticks;
857         }
858         return (err);
859 }
860
861 /*
862 ** Multiqueue capable stack interface, this is not
863 ** yet truely multiqueue, but that is coming...
864 */
865 static int
866 em_mq_start(struct ifnet *ifp, struct mbuf *m)
867 {
868         struct adapter  *adapter = ifp->if_softc;
869         struct tx_ring  *txr;
870         int             i, error = 0;
871
872         /* Which queue to use */
873         if ((m->m_flags & M_FLOWID) != 0)
874                 i = m->m_pkthdr.flowid % adapter->num_queues;
875         else
876                 i = curcpu % adapter->num_queues;
877
878         txr = &adapter->tx_rings[i];
879
880         if (EM_TX_TRYLOCK(txr)) {
881                 error = em_mq_start_locked(ifp, txr, m);
882                 EM_TX_UNLOCK(txr);
883         } else 
884                 error = drbr_enqueue(ifp, txr->br, m);
885
886         return (error);
887 }
888
889 /*
890 ** Flush all ring buffers
891 */
892 static void
893 em_qflush(struct ifnet *ifp)
894 {
895         struct adapter  *adapter = ifp->if_softc;
896         struct tx_ring  *txr = adapter->tx_rings;
897         struct mbuf     *m;
898
899         for (int i = 0; i < adapter->num_queues; i++, txr++) {
900                 EM_TX_LOCK(txr);
901                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
902                         m_freem(m);
903                 EM_TX_UNLOCK(txr);
904         }
905         if_qflush(ifp);
906 }
907
908 #endif /* EM_MULTIQUEUE */
909
910 static void
911 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
912 {
913         struct adapter  *adapter = ifp->if_softc;
914         struct mbuf     *m_head;
915
916         EM_TX_LOCK_ASSERT(txr);
917
918         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
919             IFF_DRV_RUNNING)
920                 return;
921
922         if (!adapter->link_active)
923                 return;
924
925         /* Call cleanup if number of TX descriptors low */
926         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
927                 em_txeof(txr);
928
929         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
930                 if (txr->tx_avail < EM_MAX_SCATTER) {
931                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
932                         break;
933                 }
934                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
935                 if (m_head == NULL)
936                         break;
937                 /*
938                  *  Encapsulation can modify our pointer, and or make it
939                  *  NULL on failure.  In that event, we can't requeue.
940                  */
941                 if (em_xmit(txr, &m_head)) {
942                         if (m_head == NULL)
943                                 break;
944                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
945                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
946                         break;
947                 }
948
949                 /* Send a copy of the frame to the BPF listener */
950                 ETHER_BPF_MTAP(ifp, m_head);
951
952                 /* Set timeout in case hardware has problems transmitting. */
953                 txr->watchdog_time = ticks;
954                 txr->watchdog_check = TRUE;
955         }
956
957         return;
958 }
959
960 static void
961 em_start(struct ifnet *ifp)
962 {
963         struct adapter  *adapter = ifp->if_softc;
964         struct tx_ring  *txr = adapter->tx_rings;
965
966         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
967                 EM_TX_LOCK(txr);
968                 em_start_locked(ifp, txr);
969                 EM_TX_UNLOCK(txr);
970         }
971         return;
972 }
973
974 /*********************************************************************
975  *  Ioctl entry point
976  *
977  *  em_ioctl is called when the user wants to configure the
978  *  interface.
979  *
980  *  return 0 on success, positive on failure
981  **********************************************************************/
982
983 static int
984 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
985 {
986         struct adapter  *adapter = ifp->if_softc;
987         struct ifreq *ifr = (struct ifreq *)data;
988 #ifdef INET
989         struct ifaddr *ifa = (struct ifaddr *)data;
990 #endif
991         int error = 0;
992
993         if (adapter->in_detach)
994                 return (error);
995
996         switch (command) {
997         case SIOCSIFADDR:
998 #ifdef INET
999                 if (ifa->ifa_addr->sa_family == AF_INET) {
1000                         /*
1001                          * XXX
1002                          * Since resetting hardware takes a very long time
1003                          * and results in link renegotiation we only
1004                          * initialize the hardware only when it is absolutely
1005                          * required.
1006                          */
1007                         ifp->if_flags |= IFF_UP;
1008                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1009                                 EM_CORE_LOCK(adapter);
1010                                 em_init_locked(adapter);
1011                                 EM_CORE_UNLOCK(adapter);
1012                         }
1013                         arp_ifinit(ifp, ifa);
1014                 } else
1015 #endif
1016                         error = ether_ioctl(ifp, command, data);
1017                 break;
1018         case SIOCSIFMTU:
1019             {
1020                 int max_frame_size;
1021
1022                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1023
1024                 EM_CORE_LOCK(adapter);
1025                 switch (adapter->hw.mac.type) {
1026                 case e1000_82571:
1027                 case e1000_82572:
1028                 case e1000_ich9lan:
1029                 case e1000_ich10lan:
1030                 case e1000_82574:
1031                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1032                         max_frame_size = 9234;
1033                         break;
1034                 case e1000_pchlan:
1035                         max_frame_size = 4096;
1036                         break;
1037                         /* Adapters that do not support jumbo frames */
1038                 case e1000_82583:
1039                 case e1000_ich8lan:
1040                         max_frame_size = ETHER_MAX_LEN;
1041                         break;
1042                 default:
1043                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1044                 }
1045                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1046                     ETHER_CRC_LEN) {
1047                         EM_CORE_UNLOCK(adapter);
1048                         error = EINVAL;
1049                         break;
1050                 }
1051
1052                 ifp->if_mtu = ifr->ifr_mtu;
1053                 adapter->max_frame_size =
1054                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1055                 em_init_locked(adapter);
1056                 EM_CORE_UNLOCK(adapter);
1057                 break;
1058             }
1059         case SIOCSIFFLAGS:
1060                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1061                     SIOCSIFFLAGS (Set Interface Flags)");
1062                 EM_CORE_LOCK(adapter);
1063                 if (ifp->if_flags & IFF_UP) {
1064                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1065                                 if ((ifp->if_flags ^ adapter->if_flags) &
1066                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1067                                         em_disable_promisc(adapter);
1068                                         em_set_promisc(adapter);
1069                                 }
1070                         } else
1071                                 em_init_locked(adapter);
1072                 } else
1073                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1074                                 em_stop(adapter);
1075                 adapter->if_flags = ifp->if_flags;
1076                 EM_CORE_UNLOCK(adapter);
1077                 break;
1078         case SIOCADDMULTI:
1079         case SIOCDELMULTI:
1080                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1081                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1082                         EM_CORE_LOCK(adapter);
1083                         em_disable_intr(adapter);
1084                         em_set_multi(adapter);
1085 #ifdef DEVICE_POLLING
1086                         if (!(ifp->if_capenable & IFCAP_POLLING))
1087 #endif
1088                                 em_enable_intr(adapter);
1089                         EM_CORE_UNLOCK(adapter);
1090                 }
1091                 break;
1092         case SIOCSIFMEDIA:
1093                 /* Check SOL/IDER usage */
1094                 EM_CORE_LOCK(adapter);
1095                 if (e1000_check_reset_block(&adapter->hw)) {
1096                         EM_CORE_UNLOCK(adapter);
1097                         device_printf(adapter->dev, "Media change is"
1098                             " blocked due to SOL/IDER session.\n");
1099                         break;
1100                 }
1101                 EM_CORE_UNLOCK(adapter);
1102         case SIOCGIFMEDIA:
1103                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1104                     SIOCxIFMEDIA (Get/Set Interface Media)");
1105                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1106                 break;
1107         case SIOCSIFCAP:
1108             {
1109                 int mask, reinit;
1110
1111                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1112                 reinit = 0;
1113                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1114 #ifdef DEVICE_POLLING
1115                 if (mask & IFCAP_POLLING) {
1116                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1117                                 error = ether_poll_register(em_poll, ifp);
1118                                 if (error)
1119                                         return (error);
1120                                 EM_CORE_LOCK(adapter);
1121                                 em_disable_intr(adapter);
1122                                 ifp->if_capenable |= IFCAP_POLLING;
1123                                 EM_CORE_UNLOCK(adapter);
1124                         } else {
1125                                 error = ether_poll_deregister(ifp);
1126                                 /* Enable interrupt even in error case */
1127                                 EM_CORE_LOCK(adapter);
1128                                 em_enable_intr(adapter);
1129                                 ifp->if_capenable &= ~IFCAP_POLLING;
1130                                 EM_CORE_UNLOCK(adapter);
1131                         }
1132                 }
1133 #endif
1134                 if (mask & IFCAP_HWCSUM) {
1135                         ifp->if_capenable ^= IFCAP_HWCSUM;
1136                         reinit = 1;
1137                 }
1138                 if (mask & IFCAP_TSO4) {
1139                         ifp->if_capenable ^= IFCAP_TSO4;
1140                         reinit = 1;
1141                 }
1142                 if (mask & IFCAP_VLAN_HWTAGGING) {
1143                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1144                         reinit = 1;
1145                 }
1146                 if (mask & IFCAP_VLAN_HWFILTER) {
1147                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1148                         reinit = 1;
1149                 }
1150                 if ((mask & IFCAP_WOL) &&
1151                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1152                         if (mask & IFCAP_WOL_MCAST)
1153                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1154                         if (mask & IFCAP_WOL_MAGIC)
1155                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1156                 }
1157                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1158                         em_init(adapter);
1159                 VLAN_CAPABILITIES(ifp);
1160                 break;
1161             }
1162
1163         default:
1164                 error = ether_ioctl(ifp, command, data);
1165                 break;
1166         }
1167
1168         return (error);
1169 }
1170
1171
1172 /*********************************************************************
1173  *  Init entry point
1174  *
1175  *  This routine is used in two ways. It is used by the stack as
1176  *  init entry point in network interface structure. It is also used
1177  *  by the driver as a hw/sw initialization routine to get to a
1178  *  consistent state.
1179  *
1180  *  return 0 on success, positive on failure
1181  **********************************************************************/
1182
1183 static void
1184 em_init_locked(struct adapter *adapter)
1185 {
1186         struct ifnet    *ifp = adapter->ifp;
1187         device_t        dev = adapter->dev;
1188         u32             pba;
1189
1190         INIT_DEBUGOUT("em_init: begin");
1191
1192         EM_CORE_LOCK_ASSERT(adapter);
1193
1194         em_disable_intr(adapter);
1195         callout_stop(&adapter->timer);
1196
1197         /*
1198          * Packet Buffer Allocation (PBA)
1199          * Writing PBA sets the receive portion of the buffer
1200          * the remainder is used for the transmit buffer.
1201          */
1202         switch (adapter->hw.mac.type) {
1203         /* Total Packet Buffer on these is 48K */
1204         case e1000_82571:
1205         case e1000_82572:
1206         case e1000_80003es2lan:
1207                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1208                 break;
1209         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1210                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1211                 break;
1212         case e1000_82574:
1213         case e1000_82583:
1214                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1215                 break;
1216         case e1000_ich9lan:
1217         case e1000_ich10lan:
1218         case e1000_pchlan:
1219                 pba = E1000_PBA_10K;
1220                 break;
1221         case e1000_ich8lan:
1222                 pba = E1000_PBA_8K;
1223                 break;
1224         default:
1225                 if (adapter->max_frame_size > 8192)
1226                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1227                 else
1228                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1229         }
1230
1231         INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1232         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1233         
1234         /* Get the latest mac address, User can use a LAA */
1235         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1236               ETHER_ADDR_LEN);
1237
1238         /* Put the address into the Receive Address Array */
1239         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1240
1241         /*
1242          * With the 82571 adapter, RAR[0] may be overwritten
1243          * when the other port is reset, we make a duplicate
1244          * in RAR[14] for that eventuality, this assures
1245          * the interface continues to function.
1246          */
1247         if (adapter->hw.mac.type == e1000_82571) {
1248                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1249                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1250                     E1000_RAR_ENTRIES - 1);
1251         }
1252
1253         /* Initialize the hardware */
1254         em_reset(adapter);
1255         em_update_link_status(adapter);
1256
1257         /* Setup VLAN support, basic and offload if available */
1258         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1259
1260         /* Use real VLAN Filter support? */
1261         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1262                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1263                         /* Use real VLAN Filter support */
1264                         em_setup_vlan_hw_support(adapter);
1265                 else {
1266                         u32 ctrl;
1267                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1268                         ctrl |= E1000_CTRL_VME;
1269                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1270                 }
1271         }
1272
1273         /* Set hardware offload abilities */
1274         ifp->if_hwassist = 0;
1275         if (ifp->if_capenable & IFCAP_TXCSUM)
1276                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1277         if (ifp->if_capenable & IFCAP_TSO4)
1278                 ifp->if_hwassist |= CSUM_TSO;
1279
1280         /* Configure for OS presence */
1281         em_init_manageability(adapter);
1282
1283         /* Prepare transmit descriptors and buffers */
1284         em_setup_transmit_structures(adapter);
1285         em_initialize_transmit_unit(adapter);
1286
1287         /* Setup Multicast table */
1288         em_set_multi(adapter);
1289
1290         /* Prepare receive descriptors and buffers */
1291         if (em_setup_receive_structures(adapter)) {
1292                 device_printf(dev, "Could not setup receive structures\n");
1293                 em_stop(adapter);
1294                 return;
1295         }
1296         em_initialize_receive_unit(adapter);
1297
1298         /* Don't lose promiscuous settings */
1299         em_set_promisc(adapter);
1300
1301         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1302         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1303
1304         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1305         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1306
1307         /* MSI/X configuration for 82574 */
1308         if (adapter->hw.mac.type == e1000_82574) {
1309                 int tmp;
1310                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1311                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1312                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1313                 /* Set the IVAR - interrupt vector routing. */
1314                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1315         }
1316
1317 #ifdef DEVICE_POLLING
1318         /*
1319          * Only enable interrupts if we are not polling, make sure
1320          * they are off otherwise.
1321          */
1322         if (ifp->if_capenable & IFCAP_POLLING)
1323                 em_disable_intr(adapter);
1324         else
1325 #endif /* DEVICE_POLLING */
1326                 em_enable_intr(adapter);
1327
1328         /* AMT based hardware can now take control from firmware */
1329         if (adapter->has_manage && adapter->has_amt)
1330                 em_get_hw_control(adapter);
1331
1332         /* Don't reset the phy next time init gets called */
1333         adapter->hw.phy.reset_disable = TRUE;
1334 }
1335
1336 static void
1337 em_init(void *arg)
1338 {
1339         struct adapter *adapter = arg;
1340
1341         EM_CORE_LOCK(adapter);
1342         em_init_locked(adapter);
1343         EM_CORE_UNLOCK(adapter);
1344 }
1345
1346
1347 #ifdef DEVICE_POLLING
1348 /*********************************************************************
1349  *
1350  *  Legacy polling routine: note this only works with single queue
1351  *
1352  *********************************************************************/
1353 static int
1354 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1355 {
1356         struct adapter *adapter = ifp->if_softc;
1357         struct tx_ring  *txr = adapter->tx_rings;
1358         struct rx_ring  *rxr = adapter->rx_rings;
1359         u32             reg_icr, rx_done = 0;
1360
1361         EM_CORE_LOCK(adapter);
1362         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1363                 EM_CORE_UNLOCK(adapter);
1364                 return (rx_done);
1365         }
1366
1367         if (cmd == POLL_AND_CHECK_STATUS) {
1368                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1369                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1370                         callout_stop(&adapter->timer);
1371                         adapter->hw.mac.get_link_status = 1;
1372                         em_update_link_status(adapter);
1373                         callout_reset(&adapter->timer, hz,
1374                             em_local_timer, adapter);
1375                 }
1376         }
1377         EM_CORE_UNLOCK(adapter);
1378
1379         rx_done = em_rxeof(rxr, count);
1380
1381         EM_TX_LOCK(txr);
1382         em_txeof(txr);
1383 #ifdef EM_MULTIQUEUE
1384         if (!drbr_empty(ifp, txr->br))
1385                 em_mq_start_locked(ifp, txr, NULL);
1386 #else
1387         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1388                 em_start_locked(ifp, txr);
1389 #endif
1390         EM_TX_UNLOCK(txr);
1391
1392         return (rx_done);
1393 }
1394 #endif /* DEVICE_POLLING */
1395
1396
1397 /*********************************************************************
1398  *
1399  *  Fast Legacy/MSI Combined Interrupt Service routine  
1400  *
1401  *********************************************************************/
1402 static int
1403 em_irq_fast(void *arg)
1404 {
1405         struct adapter  *adapter = arg;
1406         struct ifnet    *ifp;
1407         u32             reg_icr;
1408
1409         ifp = adapter->ifp;
1410
1411         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1412
1413         /* Hot eject?  */
1414         if (reg_icr == 0xffffffff)
1415                 return FILTER_STRAY;
1416
1417         /* Definitely not our interrupt.  */
1418         if (reg_icr == 0x0)
1419                 return FILTER_STRAY;
1420
1421         /*
1422          * Starting with the 82571 chip, bit 31 should be used to
1423          * determine whether the interrupt belongs to us.
1424          */
1425         if (adapter->hw.mac.type >= e1000_82571 &&
1426             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1427                 return FILTER_STRAY;
1428
1429         em_disable_intr(adapter);
1430         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1431
1432         /* Link status change */
1433         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1434                 adapter->hw.mac.get_link_status = 1;
1435                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1436         }
1437
1438         if (reg_icr & E1000_ICR_RXO)
1439                 adapter->rx_overruns++;
1440         return FILTER_HANDLED;
1441 }
1442
1443 /* Combined RX/TX handler, used by Legacy and MSI */
1444 static void
1445 em_handle_que(void *context, int pending)
1446 {
1447         struct adapter  *adapter = context;
1448         struct ifnet    *ifp = adapter->ifp;
1449         struct tx_ring  *txr = adapter->tx_rings;
1450         struct rx_ring  *rxr = adapter->rx_rings;
1451         bool            more_rx;
1452
1453
1454         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1455                 more_rx = em_rxeof(rxr, adapter->rx_process_limit);
1456
1457                 EM_TX_LOCK(txr);
1458                 em_txeof(txr);
1459 #ifdef EM_MULTIQUEUE
1460                 if (!drbr_empty(ifp, txr->br))
1461                         em_mq_start_locked(ifp, txr, NULL);
1462 #else
1463                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1464                         em_start_locked(ifp, txr);
1465 #endif
1466                 EM_TX_UNLOCK(txr);
1467                 if (more_rx) {
1468                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1469                         return;
1470                 }
1471         }
1472
1473         em_enable_intr(adapter);
1474         return;
1475 }
1476
1477
1478 /*********************************************************************
1479  *
1480  *  MSIX Interrupt Service Routines
1481  *
1482  **********************************************************************/
1483 static void
1484 em_msix_tx(void *arg)
1485 {
1486         struct tx_ring *txr = arg;
1487         struct adapter *adapter = txr->adapter;
1488         bool            more;
1489
1490         ++txr->tx_irq;
1491         EM_TX_LOCK(txr);
1492         more = em_txeof(txr);
1493         EM_TX_UNLOCK(txr);
1494         if (more)
1495                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1496         else
1497                 /* Reenable this interrupt */
1498                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1499         return;
1500 }
1501
1502 /*********************************************************************
1503  *
1504  *  MSIX RX Interrupt Service routine
1505  *
1506  **********************************************************************/
1507
1508 static void
1509 em_msix_rx(void *arg)
1510 {
1511         struct rx_ring  *rxr = arg;
1512         struct adapter  *adapter = rxr->adapter;
1513         bool            more;
1514
1515         ++rxr->rx_irq;
1516         more = em_rxeof(rxr, adapter->rx_process_limit);
1517         if (more)
1518                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1519         else
1520                 /* Reenable this interrupt */
1521                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1522         return;
1523 }
1524
1525 /*********************************************************************
1526  *
1527  *  MSIX Link Fast Interrupt Service routine
1528  *
1529  **********************************************************************/
1530 static void
1531 em_msix_link(void *arg)
1532 {
1533         struct adapter  *adapter = arg;
1534         u32             reg_icr;
1535
1536         ++adapter->link_irq;
1537         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1538
1539         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1540                 adapter->hw.mac.get_link_status = 1;
1541                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1542         } else
1543                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1544                     EM_MSIX_LINK | E1000_IMS_LSC);
1545         return;
1546 }
1547
1548 static void
1549 em_handle_rx(void *context, int pending)
1550 {
1551         struct rx_ring  *rxr = context;
1552         struct adapter  *adapter = rxr->adapter;
1553         bool            more;
1554
1555         more = em_rxeof(rxr, adapter->rx_process_limit);
1556         if (more)
1557                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1558         else
1559                 /* Reenable this interrupt */
1560                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1561 }
1562
1563 static void
1564 em_handle_tx(void *context, int pending)
1565 {
1566         struct tx_ring  *txr = context;
1567         struct adapter  *adapter = txr->adapter;
1568         struct ifnet    *ifp = adapter->ifp;
1569
1570         if (!EM_TX_TRYLOCK(txr))
1571                 return;
1572
1573         em_txeof(txr);
1574
1575 #ifdef EM_MULTIQUEUE
1576         if (!drbr_empty(ifp, txr->br))
1577                 em_mq_start_locked(ifp, txr, NULL);
1578 #else
1579         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1580                 em_start_locked(ifp, txr);
1581 #endif
1582         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1583         EM_TX_UNLOCK(txr);
1584 }
1585
1586 static void
1587 em_handle_link(void *context, int pending)
1588 {
1589         struct adapter  *adapter = context;
1590         struct ifnet *ifp = adapter->ifp;
1591
1592         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1593                 return;
1594
1595         EM_CORE_LOCK(adapter);
1596         callout_stop(&adapter->timer);
1597         em_update_link_status(adapter);
1598         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1599         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1600             EM_MSIX_LINK | E1000_IMS_LSC);
1601         EM_CORE_UNLOCK(adapter);
1602 }
1603
1604
1605 /*********************************************************************
1606  *
1607  *  Media Ioctl callback
1608  *
1609  *  This routine is called whenever the user queries the status of
1610  *  the interface using ifconfig.
1611  *
1612  **********************************************************************/
1613 static void
1614 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1615 {
1616         struct adapter *adapter = ifp->if_softc;
1617         u_char fiber_type = IFM_1000_SX;
1618
1619         INIT_DEBUGOUT("em_media_status: begin");
1620
1621         EM_CORE_LOCK(adapter);
1622         em_update_link_status(adapter);
1623
1624         ifmr->ifm_status = IFM_AVALID;
1625         ifmr->ifm_active = IFM_ETHER;
1626
1627         if (!adapter->link_active) {
1628                 EM_CORE_UNLOCK(adapter);
1629                 return;
1630         }
1631
1632         ifmr->ifm_status |= IFM_ACTIVE;
1633
1634         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1635             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1636                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1637         } else {
1638                 switch (adapter->link_speed) {
1639                 case 10:
1640                         ifmr->ifm_active |= IFM_10_T;
1641                         break;
1642                 case 100:
1643                         ifmr->ifm_active |= IFM_100_TX;
1644                         break;
1645                 case 1000:
1646                         ifmr->ifm_active |= IFM_1000_T;
1647                         break;
1648                 }
1649                 if (adapter->link_duplex == FULL_DUPLEX)
1650                         ifmr->ifm_active |= IFM_FDX;
1651                 else
1652                         ifmr->ifm_active |= IFM_HDX;
1653         }
1654         EM_CORE_UNLOCK(adapter);
1655 }
1656
1657 /*********************************************************************
1658  *
1659  *  Media Ioctl callback
1660  *
1661  *  This routine is called when the user changes speed/duplex using
1662  *  media/mediopt option with ifconfig.
1663  *
1664  **********************************************************************/
1665 static int
1666 em_media_change(struct ifnet *ifp)
1667 {
1668         struct adapter *adapter = ifp->if_softc;
1669         struct ifmedia  *ifm = &adapter->media;
1670
1671         INIT_DEBUGOUT("em_media_change: begin");
1672
1673         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1674                 return (EINVAL);
1675
1676         EM_CORE_LOCK(adapter);
1677         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1678         case IFM_AUTO:
1679                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1680                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1681                 break;
1682         case IFM_1000_LX:
1683         case IFM_1000_SX:
1684         case IFM_1000_T:
1685                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1686                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1687                 break;
1688         case IFM_100_TX:
1689                 adapter->hw.mac.autoneg = FALSE;
1690                 adapter->hw.phy.autoneg_advertised = 0;
1691                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1692                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1693                 else
1694                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1695                 break;
1696         case IFM_10_T:
1697                 adapter->hw.mac.autoneg = FALSE;
1698                 adapter->hw.phy.autoneg_advertised = 0;
1699                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1700                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1701                 else
1702                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1703                 break;
1704         default:
1705                 device_printf(adapter->dev, "Unsupported media type\n");
1706         }
1707
1708         /* As the speed/duplex settings my have changed we need to
1709          * reset the PHY.
1710          */
1711         adapter->hw.phy.reset_disable = FALSE;
1712
1713         em_init_locked(adapter);
1714         EM_CORE_UNLOCK(adapter);
1715
1716         return (0);
1717 }
1718
1719 /*********************************************************************
1720  *
1721  *  This routine maps the mbufs to tx descriptors.
1722  *
1723  *  return 0 on success, positive on failure
1724  **********************************************************************/
1725
1726 static int
1727 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1728 {
1729         struct adapter          *adapter = txr->adapter;
1730         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1731         bus_dmamap_t            map;
1732         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1733         struct e1000_tx_desc    *ctxd = NULL;
1734         struct mbuf             *m_head;
1735         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1736         int                     nsegs, i, j, first, last = 0;
1737         int                     error, do_tso, tso_desc = 0;
1738
1739         m_head = *m_headp;
1740         txd_upper = txd_lower = txd_used = txd_saved = 0;
1741         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1742
1743         /*
1744          * TSO workaround: 
1745          *  If an mbuf is only header we need  
1746          *     to pull 4 bytes of data into it. 
1747          */
1748         if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1749                 m_head = m_pullup(m_head, M_TSO_LEN + 4);
1750                 *m_headp = m_head;
1751                 if (m_head == NULL)
1752                         return (ENOBUFS);
1753         }
1754
1755         /*
1756          * Map the packet for DMA
1757          *
1758          * Capture the first descriptor index,
1759          * this descriptor will have the index
1760          * of the EOP which is the only one that
1761          * now gets a DONE bit writeback.
1762          */
1763         first = txr->next_avail_desc;
1764         tx_buffer = &txr->tx_buffers[first];
1765         tx_buffer_mapped = tx_buffer;
1766         map = tx_buffer->map;
1767
1768         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1769             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1770
1771         /*
1772          * There are two types of errors we can (try) to handle:
1773          * - EFBIG means the mbuf chain was too long and bus_dma ran
1774          *   out of segments.  Defragment the mbuf chain and try again.
1775          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1776          *   at this point in time.  Defer sending and try again later.
1777          * All other errors, in particular EINVAL, are fatal and prevent the
1778          * mbuf chain from ever going through.  Drop it and report error.
1779          */
1780         if (error == EFBIG) {
1781                 struct mbuf *m;
1782
1783                 m = m_defrag(*m_headp, M_DONTWAIT);
1784                 if (m == NULL) {
1785                         adapter->mbuf_alloc_failed++;
1786                         m_freem(*m_headp);
1787                         *m_headp = NULL;
1788                         return (ENOBUFS);
1789                 }
1790                 *m_headp = m;
1791
1792                 /* Try it again */
1793                 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1794                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1795
1796                 if (error) {
1797                         adapter->no_tx_dma_setup++;
1798                         m_freem(*m_headp);
1799                         *m_headp = NULL;
1800                         return (error);
1801                 }
1802         } else if (error != 0) {
1803                 adapter->no_tx_dma_setup++;
1804                 return (error);
1805         }
1806
1807         /*
1808          * TSO Hardware workaround, if this packet is not
1809          * TSO, and is only a single descriptor long, and
1810          * it follows a TSO burst, then we need to add a
1811          * sentinel descriptor to prevent premature writeback.
1812          */
1813         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1814                 if (nsegs == 1)
1815                         tso_desc = TRUE;
1816                 txr->tx_tso = FALSE;
1817         }
1818
1819         if (nsegs > (txr->tx_avail - 2)) {
1820                 txr->no_desc_avail++;
1821                 bus_dmamap_unload(txr->txtag, map);
1822                 return (ENOBUFS);
1823         }
1824         m_head = *m_headp;
1825
1826         /* Do hardware assists */
1827 #if __FreeBSD_version >= 700000
1828         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1829                 error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1830                 if (error != TRUE)
1831                         return (ENXIO); /* something foobar */
1832                 /* we need to make a final sentinel transmit desc */
1833                 tso_desc = TRUE;
1834         } else
1835 #endif
1836         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1837                 em_transmit_checksum_setup(txr,  m_head,
1838                     &txd_upper, &txd_lower);
1839
1840         i = txr->next_avail_desc;
1841
1842         /* Set up our transmit descriptors */
1843         for (j = 0; j < nsegs; j++) {
1844                 bus_size_t seg_len;
1845                 bus_addr_t seg_addr;
1846
1847                 tx_buffer = &txr->tx_buffers[i];
1848                 ctxd = &txr->tx_base[i];
1849                 seg_addr = segs[j].ds_addr;
1850                 seg_len  = segs[j].ds_len;
1851                 /*
1852                 ** TSO Workaround:
1853                 ** If this is the last descriptor, we want to
1854                 ** split it so we have a small final sentinel
1855                 */
1856                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1857                         seg_len -= 4;
1858                         ctxd->buffer_addr = htole64(seg_addr);
1859                         ctxd->lower.data = htole32(
1860                         adapter->txd_cmd | txd_lower | seg_len);
1861                         ctxd->upper.data =
1862                             htole32(txd_upper);
1863                         if (++i == adapter->num_tx_desc)
1864                                 i = 0;
1865                         /* Now make the sentinel */     
1866                         ++txd_used; /* using an extra txd */
1867                         ctxd = &txr->tx_base[i];
1868                         tx_buffer = &txr->tx_buffers[i];
1869                         ctxd->buffer_addr =
1870                             htole64(seg_addr + seg_len);
1871                         ctxd->lower.data = htole32(
1872                         adapter->txd_cmd | txd_lower | 4);
1873                         ctxd->upper.data =
1874                             htole32(txd_upper);
1875                         last = i;
1876                         if (++i == adapter->num_tx_desc)
1877                                 i = 0;
1878                 } else {
1879                         ctxd->buffer_addr = htole64(seg_addr);
1880                         ctxd->lower.data = htole32(
1881                         adapter->txd_cmd | txd_lower | seg_len);
1882                         ctxd->upper.data =
1883                             htole32(txd_upper);
1884                         last = i;
1885                         if (++i == adapter->num_tx_desc)
1886                                 i = 0;
1887                 }
1888                 tx_buffer->m_head = NULL;
1889                 tx_buffer->next_eop = -1;
1890         }
1891
1892         txr->next_avail_desc = i;
1893         txr->tx_avail -= nsegs;
1894         if (tso_desc) /* TSO used an extra for sentinel */
1895                 txr->tx_avail -= txd_used;
1896
1897         if (m_head->m_flags & M_VLANTAG) {
1898                 /* Set the vlan id. */
1899                 ctxd->upper.fields.special =
1900                     htole16(m_head->m_pkthdr.ether_vtag);
1901                 /* Tell hardware to add tag */
1902                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1903         }
1904
1905         tx_buffer->m_head = m_head;
1906         tx_buffer_mapped->map = tx_buffer->map;
1907         tx_buffer->map = map;
1908         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1909
1910         /*
1911          * Last Descriptor of Packet
1912          * needs End Of Packet (EOP)
1913          * and Report Status (RS)
1914          */
1915         ctxd->lower.data |=
1916             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1917         /*
1918          * Keep track in the first buffer which
1919          * descriptor will be written back
1920          */
1921         tx_buffer = &txr->tx_buffers[first];
1922         tx_buffer->next_eop = last;
1923
1924         /*
1925          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1926          * that this frame is available to transmit.
1927          */
1928         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1929             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1930         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1931
1932         return (0);
1933 }
1934
1935 static void
1936 em_set_promisc(struct adapter *adapter)
1937 {
1938         struct ifnet    *ifp = adapter->ifp;
1939         u32             reg_rctl;
1940
1941         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1942
1943         if (ifp->if_flags & IFF_PROMISC) {
1944                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1945                 /* Turn this on if you want to see bad packets */
1946                 if (em_debug_sbp)
1947                         reg_rctl |= E1000_RCTL_SBP;
1948                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1949         } else if (ifp->if_flags & IFF_ALLMULTI) {
1950                 reg_rctl |= E1000_RCTL_MPE;
1951                 reg_rctl &= ~E1000_RCTL_UPE;
1952                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1953         }
1954 }
1955
1956 static void
1957 em_disable_promisc(struct adapter *adapter)
1958 {
1959         u32     reg_rctl;
1960
1961         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1962
1963         reg_rctl &=  (~E1000_RCTL_UPE);
1964         reg_rctl &=  (~E1000_RCTL_MPE);
1965         reg_rctl &=  (~E1000_RCTL_SBP);
1966         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1967 }
1968
1969
1970 /*********************************************************************
1971  *  Multicast Update
1972  *
1973  *  This routine is called whenever multicast address list is updated.
1974  *
1975  **********************************************************************/
1976
1977 static void
1978 em_set_multi(struct adapter *adapter)
1979 {
1980         struct ifnet    *ifp = adapter->ifp;
1981         struct ifmultiaddr *ifma;
1982         u32 reg_rctl = 0;
1983         u8  *mta; /* Multicast array memory */
1984         int mcnt = 0;
1985
1986         IOCTL_DEBUGOUT("em_set_multi: begin");
1987
1988         if (adapter->hw.mac.type == e1000_82542 && 
1989             adapter->hw.revision_id == E1000_REVISION_2) {
1990                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1991                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1992                         e1000_pci_clear_mwi(&adapter->hw);
1993                 reg_rctl |= E1000_RCTL_RST;
1994                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1995                 msec_delay(5);
1996         }
1997
1998         /* Allocate temporary memory to setup array */
1999         mta = malloc(sizeof(u8) *
2000             (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
2001             M_DEVBUF, M_NOWAIT | M_ZERO);
2002         if (mta == NULL)
2003                 panic("em_set_multi memory failure\n");
2004
2005 #if __FreeBSD_version < 800000
2006         IF_ADDR_LOCK(ifp);
2007 #else
2008         if_maddr_rlock(ifp);
2009 #endif
2010         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2011                 if (ifma->ifma_addr->sa_family != AF_LINK)
2012                         continue;
2013
2014                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2015                         break;
2016
2017                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2018                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2019                 mcnt++;
2020         }
2021 #if __FreeBSD_version < 800000
2022         IF_ADDR_UNLOCK(ifp);
2023 #else
2024         if_maddr_runlock(ifp);
2025 #endif
2026         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2027                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2028                 reg_rctl |= E1000_RCTL_MPE;
2029                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2030         } else
2031                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2032
2033         if (adapter->hw.mac.type == e1000_82542 && 
2034             adapter->hw.revision_id == E1000_REVISION_2) {
2035                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2036                 reg_rctl &= ~E1000_RCTL_RST;
2037                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2038                 msec_delay(5);
2039                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2040                         e1000_pci_set_mwi(&adapter->hw);
2041         }
2042         free(mta, M_DEVBUF);
2043 }
2044
2045
2046 /*********************************************************************
2047  *  Timer routine
2048  *
2049  *  This routine checks for link status and updates statistics.
2050  *
2051  **********************************************************************/
2052
2053 static void
2054 em_local_timer(void *arg)
2055 {
2056         struct adapter  *adapter = arg;
2057         struct ifnet    *ifp = adapter->ifp;
2058         struct tx_ring  *txr = adapter->tx_rings;
2059
2060         EM_CORE_LOCK_ASSERT(adapter);
2061
2062         em_update_link_status(adapter);
2063         em_update_stats_counters(adapter);
2064
2065         /* Reset LAA into RAR[0] on 82571 */
2066         if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2067                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2068
2069         if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2070                 em_print_hw_stats(adapter);
2071
2072         /*
2073         ** Check for time since any descriptor was cleaned
2074         */
2075         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2076                 EM_TX_LOCK(txr);
2077                 if (txr->watchdog_check == FALSE) {
2078                         EM_TX_UNLOCK(txr);
2079                         continue;
2080                 }
2081                 if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2082                         goto hung;
2083                 EM_TX_UNLOCK(txr);
2084         }
2085
2086         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2087         return;
2088 hung:
2089         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2090         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2091         adapter->watchdog_events++;
2092         EM_TX_UNLOCK(txr);
2093         em_init_locked(adapter);
2094 }
2095
2096
2097 static void
2098 em_update_link_status(struct adapter *adapter)
2099 {
2100         struct e1000_hw *hw = &adapter->hw;
2101         struct ifnet *ifp = adapter->ifp;
2102         device_t dev = adapter->dev;
2103         u32 link_check = 0;
2104
2105         /* Get the cached link value or read phy for real */
2106         switch (hw->phy.media_type) {
2107         case e1000_media_type_copper:
2108                 if (hw->mac.get_link_status) {
2109                         /* Do the work to read phy */
2110                         e1000_check_for_link(hw);
2111                         link_check = !hw->mac.get_link_status;
2112                         if (link_check) /* ESB2 fix */
2113                                 e1000_cfg_on_link_up(hw);
2114                 } else
2115                         link_check = TRUE;
2116                 break;
2117         case e1000_media_type_fiber:
2118                 e1000_check_for_link(hw);
2119                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2120                                  E1000_STATUS_LU);
2121                 break;
2122         case e1000_media_type_internal_serdes:
2123                 e1000_check_for_link(hw);
2124                 link_check = adapter->hw.mac.serdes_has_link;
2125                 break;
2126         default:
2127         case e1000_media_type_unknown:
2128                 break;
2129         }
2130
2131         /* Now check for a transition */
2132         if (link_check && (adapter->link_active == 0)) {
2133                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2134                     &adapter->link_duplex);
2135                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2136                 if ((adapter->link_speed != SPEED_1000) &&
2137                     ((hw->mac.type == e1000_82571) ||
2138                     (hw->mac.type == e1000_82572))) {
2139                         int tarc0;
2140                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2141                         tarc0 &= ~SPEED_MODE_BIT;
2142                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2143                 }
2144                 if (bootverbose)
2145                         device_printf(dev, "Link is up %d Mbps %s\n",
2146                             adapter->link_speed,
2147                             ((adapter->link_duplex == FULL_DUPLEX) ?
2148                             "Full Duplex" : "Half Duplex"));
2149                 adapter->link_active = 1;
2150                 adapter->smartspeed = 0;
2151                 ifp->if_baudrate = adapter->link_speed * 1000000;
2152                 if_link_state_change(ifp, LINK_STATE_UP);
2153         } else if (!link_check && (adapter->link_active == 1)) {
2154                 ifp->if_baudrate = adapter->link_speed = 0;
2155                 adapter->link_duplex = 0;
2156                 if (bootverbose)
2157                         device_printf(dev, "Link is Down\n");
2158                 adapter->link_active = 0;
2159                 /* Link down, disable watchdog */
2160                 // JFV change later
2161                 //adapter->watchdog_check = FALSE;
2162                 if_link_state_change(ifp, LINK_STATE_DOWN);
2163         }
2164 }
2165
2166 /*********************************************************************
2167  *
2168  *  This routine disables all traffic on the adapter by issuing a
2169  *  global reset on the MAC and deallocates TX/RX buffers.
2170  *
2171  *  This routine should always be called with BOTH the CORE
2172  *  and TX locks.
2173  **********************************************************************/
2174
2175 static void
2176 em_stop(void *arg)
2177 {
2178         struct adapter  *adapter = arg;
2179         struct ifnet    *ifp = adapter->ifp;
2180         struct tx_ring  *txr = adapter->tx_rings;
2181
2182         EM_CORE_LOCK_ASSERT(adapter);
2183
2184         INIT_DEBUGOUT("em_stop: begin");
2185
2186         em_disable_intr(adapter);
2187         callout_stop(&adapter->timer);
2188
2189         /* Tell the stack that the interface is no longer active */
2190         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2191
2192         /* Unarm watchdog timer. */
2193         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2194                 EM_TX_LOCK(txr);
2195                 txr->watchdog_check = FALSE;
2196                 EM_TX_UNLOCK(txr);
2197         }
2198
2199         e1000_reset_hw(&adapter->hw);
2200         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2201
2202         e1000_led_off(&adapter->hw);
2203         e1000_cleanup_led(&adapter->hw);
2204 }
2205
2206
2207 /*********************************************************************
2208  *
2209  *  Determine hardware revision.
2210  *
2211  **********************************************************************/
2212 static void
2213 em_identify_hardware(struct adapter *adapter)
2214 {
2215         device_t dev = adapter->dev;
2216
2217         /* Make sure our PCI config space has the necessary stuff set */
2218         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2219         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2220             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2221                 device_printf(dev, "Memory Access and/or Bus Master bits "
2222                     "were not set!\n");
2223                 adapter->hw.bus.pci_cmd_word |=
2224                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2225                 pci_write_config(dev, PCIR_COMMAND,
2226                     adapter->hw.bus.pci_cmd_word, 2);
2227         }
2228
2229         /* Save off the information about this board */
2230         adapter->hw.vendor_id = pci_get_vendor(dev);
2231         adapter->hw.device_id = pci_get_device(dev);
2232         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2233         adapter->hw.subsystem_vendor_id =
2234             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2235         adapter->hw.subsystem_device_id =
2236             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2237
2238         /* Do Shared Code Init and Setup */
2239         if (e1000_set_mac_type(&adapter->hw)) {
2240                 device_printf(dev, "Setup init failure\n");
2241                 return;
2242         }
2243 }
2244
2245 static int
2246 em_allocate_pci_resources(struct adapter *adapter)
2247 {
2248         device_t        dev = adapter->dev;
2249         int             rid;
2250
2251         rid = PCIR_BAR(0);
2252         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2253             &rid, RF_ACTIVE);
2254         if (adapter->memory == NULL) {
2255                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2256                 return (ENXIO);
2257         }
2258         adapter->osdep.mem_bus_space_tag =
2259             rman_get_bustag(adapter->memory);
2260         adapter->osdep.mem_bus_space_handle =
2261             rman_get_bushandle(adapter->memory);
2262         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2263
2264         /* Default to a single queue */
2265         adapter->num_queues = 1;
2266
2267         /*
2268          * Setup MSI/X or MSI if PCI Express
2269          */
2270         adapter->msix = em_setup_msix(adapter);
2271
2272         adapter->hw.back = &adapter->osdep;
2273
2274         return (0);
2275 }
2276
2277 /*********************************************************************
2278  *
2279  *  Setup the Legacy or MSI Interrupt handler
2280  *
2281  **********************************************************************/
2282 int
2283 em_allocate_legacy(struct adapter *adapter)
2284 {
2285         device_t dev = adapter->dev;
2286         int error, rid = 0;
2287
2288         /* Manually turn off all interrupts */
2289         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2290
2291         if (adapter->msix == 1) /* using MSI */
2292                 rid = 1;
2293         /* We allocate a single interrupt resource */
2294         adapter->res = bus_alloc_resource_any(dev,
2295             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2296         if (adapter->res == NULL) {
2297                 device_printf(dev, "Unable to allocate bus resource: "
2298                     "interrupt\n");
2299                 return (ENXIO);
2300         }
2301
2302         /*
2303          * Allocate a fast interrupt and the associated
2304          * deferred processing contexts.
2305          */
2306         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2307         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2308         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2309             taskqueue_thread_enqueue, &adapter->tq);
2310         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2311             device_get_nameunit(adapter->dev));
2312         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2313             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2314                 device_printf(dev, "Failed to register fast interrupt "
2315                             "handler: %d\n", error);
2316                 taskqueue_free(adapter->tq);
2317                 adapter->tq = NULL;
2318                 return (error);
2319         }
2320         
2321         return (0);
2322 }
2323
2324 /*********************************************************************
2325  *
2326  *  Setup the MSIX Interrupt handlers
2327  *   This is not really Multiqueue, rather
2328  *   its just multiple interrupt vectors.
2329  *
2330  **********************************************************************/
2331 int
2332 em_allocate_msix(struct adapter *adapter)
2333 {
2334         device_t        dev = adapter->dev;
2335         struct          tx_ring *txr = adapter->tx_rings;
2336         struct          rx_ring *rxr = adapter->rx_rings;
2337         int             error, rid, vector = 0;
2338
2339
2340         /* Make sure all interrupts are disabled */
2341         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2342
2343         /* First set up ring resources */
2344         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2345
2346                 /* RX ring */
2347                 rid = vector + 1;
2348
2349                 rxr->res = bus_alloc_resource_any(dev,
2350                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2351                 if (rxr->res == NULL) {
2352                         device_printf(dev,
2353                             "Unable to allocate bus resource: "
2354                             "RX MSIX Interrupt %d\n", i);
2355                         return (ENXIO);
2356                 }
2357                 if ((error = bus_setup_intr(dev, rxr->res,
2358                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2359                     rxr, &rxr->tag)) != 0) {
2360                         device_printf(dev, "Failed to register RX handler");
2361                         return (error);
2362                 }
2363                 rxr->msix = vector++; /* NOTE increment vector for TX */
2364                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2365                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2366                     taskqueue_thread_enqueue, &rxr->tq);
2367                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2368                     device_get_nameunit(adapter->dev));
2369                 /*
2370                 ** Set the bit to enable interrupt
2371                 ** in E1000_IMS -- bits 20 and 21
2372                 ** are for RX0 and RX1, note this has
2373                 ** NOTHING to do with the MSIX vector
2374                 */
2375                 rxr->ims = 1 << (20 + i);
2376                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2377
2378                 /* TX ring */
2379                 rid = vector + 1;
2380                 txr->res = bus_alloc_resource_any(dev,
2381                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2382                 if (txr->res == NULL) {
2383                         device_printf(dev,
2384                             "Unable to allocate bus resource: "
2385                             "TX MSIX Interrupt %d\n", i);
2386                         return (ENXIO);
2387                 }
2388                 if ((error = bus_setup_intr(dev, txr->res,
2389                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2390                     txr, &txr->tag)) != 0) {
2391                         device_printf(dev, "Failed to register TX handler");
2392                         return (error);
2393                 }
2394                 txr->msix = vector++; /* Increment vector for next pass */
2395                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2396                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2397                     taskqueue_thread_enqueue, &txr->tq);
2398                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2399                     device_get_nameunit(adapter->dev));
2400                 /*
2401                 ** Set the bit to enable interrupt
2402                 ** in E1000_IMS -- bits 22 and 23
2403                 ** are for TX0 and TX1, note this has
2404                 ** NOTHING to do with the MSIX vector
2405                 */
2406                 txr->ims = 1 << (22 + i);
2407                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2408         }
2409
2410         /* Link interrupt */
2411         ++rid;
2412         adapter->res = bus_alloc_resource_any(dev,
2413             SYS_RES_IRQ, &rid, RF_ACTIVE);
2414         if (!adapter->res) {
2415                 device_printf(dev,"Unable to allocate "
2416                     "bus resource: Link interrupt [%d]\n", rid);
2417                 return (ENXIO);
2418         }
2419         /* Set the link handler function */
2420         error = bus_setup_intr(dev, adapter->res,
2421             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2422             em_msix_link, adapter, &adapter->tag);
2423         if (error) {
2424                 adapter->res = NULL;
2425                 device_printf(dev, "Failed to register LINK handler");
2426                 return (error);
2427         }
2428         adapter->linkvec = vector;
2429         adapter->ivars |=  (8 | vector) << 16;
2430         adapter->ivars |= 0x80000000;
2431         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2432         adapter->tq = taskqueue_create_fast("em_link", M_NOWAIT,
2433             taskqueue_thread_enqueue, &adapter->tq);
2434         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2435             device_get_nameunit(adapter->dev));
2436
2437         return (0);
2438 }
2439
2440
2441 static void
2442 em_free_pci_resources(struct adapter *adapter)
2443 {
2444         device_t        dev = adapter->dev;
2445         struct tx_ring  *txr;
2446         struct rx_ring  *rxr;
2447         int             rid;
2448
2449
2450         /*
2451         ** Release all the queue interrupt resources:
2452         */
2453         for (int i = 0; i < adapter->num_queues; i++) {
2454                 txr = &adapter->tx_rings[i];
2455                 rxr = &adapter->rx_rings[i];
2456                 rid = txr->msix +1;
2457                 if (txr->tag != NULL) {
2458                         bus_teardown_intr(dev, txr->res, txr->tag);
2459                         txr->tag = NULL;
2460                 }
2461                 if (txr->res != NULL)
2462                         bus_release_resource(dev, SYS_RES_IRQ,
2463                             rid, txr->res);
2464                 rid = rxr->msix +1;
2465                 if (rxr->tag != NULL) {
2466                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2467                         rxr->tag = NULL;
2468                 }
2469                 if (rxr->res != NULL)
2470                         bus_release_resource(dev, SYS_RES_IRQ,
2471                             rid, rxr->res);
2472         }
2473
2474         if (adapter->linkvec) /* we are doing MSIX */
2475                 rid = adapter->linkvec + 1;
2476         else
2477                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2478
2479         if (adapter->tag != NULL) {
2480                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2481                 adapter->tag = NULL;
2482         }
2483
2484         if (adapter->res != NULL)
2485                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2486
2487
2488         if (adapter->msix)
2489                 pci_release_msi(dev);
2490
2491         if (adapter->msix_mem != NULL)
2492                 bus_release_resource(dev, SYS_RES_MEMORY,
2493                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2494
2495         if (adapter->memory != NULL)
2496                 bus_release_resource(dev, SYS_RES_MEMORY,
2497                     PCIR_BAR(0), adapter->memory);
2498
2499         if (adapter->flash != NULL)
2500                 bus_release_resource(dev, SYS_RES_MEMORY,
2501                     EM_FLASH, adapter->flash);
2502 }
2503
2504 /*
2505  * Setup MSI or MSI/X
2506  */
2507 static int
2508 em_setup_msix(struct adapter *adapter)
2509 {
2510         device_t dev = adapter->dev;
2511         int val = 0;
2512
2513
2514         /* Setup MSI/X for Hartwell */
2515         if ((adapter->hw.mac.type == e1000_82574) &&
2516             (em_enable_msix == TRUE)) {
2517                 /* Map the MSIX BAR */
2518                 int rid = PCIR_BAR(EM_MSIX_BAR);
2519                 adapter->msix_mem = bus_alloc_resource_any(dev,
2520                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2521                 if (!adapter->msix_mem) {
2522                         /* May not be enabled */
2523                         device_printf(adapter->dev,
2524                             "Unable to map MSIX table \n");
2525                         goto msi;
2526                 }
2527                 val = pci_msix_count(dev); 
2528                 if (val != 5) {
2529                         bus_release_resource(dev, SYS_RES_MEMORY,
2530                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2531                         adapter->msix_mem = NULL;
2532                         device_printf(adapter->dev,
2533                             "MSIX vectors wrong, using MSI \n");
2534                         goto msi;
2535                 }
2536                 if (em_msix_queues == 2) {
2537                         val = 5;
2538                         adapter->num_queues = 2;
2539                 } else {
2540                         val = 3;
2541                         adapter->num_queues = 1;
2542                 }
2543                 if (pci_alloc_msix(dev, &val) == 0) {
2544                         device_printf(adapter->dev,
2545                             "Using MSIX interrupts "
2546                             "with %d vectors\n", val);
2547                 }
2548
2549                 return (val);
2550         }
2551 msi:
2552         val = pci_msi_count(dev);
2553         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2554                 adapter->msix = 1;
2555                 device_printf(adapter->dev,"Using MSI interrupt\n");
2556                 return (val);
2557         } 
2558         /* Should only happen due to manual invention */
2559         device_printf(adapter->dev,"Setup MSIX failure\n");
2560         return (0);
2561 }
2562
2563
2564 /*********************************************************************
2565  *
2566  *  Initialize the hardware to a configuration
2567  *  as specified by the adapter structure.
2568  *
2569  **********************************************************************/
2570 static void
2571 em_reset(struct adapter *adapter)
2572 {
2573         device_t        dev = adapter->dev;
2574         struct e1000_hw *hw = &adapter->hw;
2575         u16             rx_buffer_size;
2576
2577         INIT_DEBUGOUT("em_reset: begin");
2578
2579         /* Set up smart power down as default off on newer adapters. */
2580         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2581             hw->mac.type == e1000_82572)) {
2582                 u16 phy_tmp = 0;
2583
2584                 /* Speed up time to link by disabling smart power down. */
2585                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2586                 phy_tmp &= ~IGP02E1000_PM_SPD;
2587                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2588         }
2589
2590         /*
2591          * These parameters control the automatic generation (Tx) and
2592          * response (Rx) to Ethernet PAUSE frames.
2593          * - High water mark should allow for at least two frames to be
2594          *   received after sending an XOFF.
2595          * - Low water mark works best when it is very near the high water mark.
2596          *   This allows the receiver to restart by sending XON when it has
2597          *   drained a bit. Here we use an arbitary value of 1500 which will
2598          *   restart after one full frame is pulled from the buffer. There
2599          *   could be several smaller frames in the buffer and if so they will
2600          *   not trigger the XON until their total number reduces the buffer
2601          *   by 1500.
2602          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2603          */
2604         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2605
2606         hw->fc.high_water = rx_buffer_size -
2607             roundup2(adapter->max_frame_size, 1024);
2608         hw->fc.low_water = hw->fc.high_water - 1500;
2609
2610         if (hw->mac.type == e1000_80003es2lan)
2611                 hw->fc.pause_time = 0xFFFF;
2612         else
2613                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2614
2615         hw->fc.send_xon = TRUE;
2616
2617         /* Set Flow control, use the tunable location if sane */
2618         if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2619                 hw->fc.requested_mode = em_fc_setting;
2620         else
2621                 hw->fc.requested_mode = e1000_fc_none;
2622
2623         /* Override - workaround for PCHLAN issue */
2624         if (hw->mac.type == e1000_pchlan)
2625                 hw->fc.requested_mode = e1000_fc_rx_pause;
2626
2627         /* Issue a global reset */
2628         e1000_reset_hw(hw);
2629         E1000_WRITE_REG(hw, E1000_WUC, 0);
2630
2631         if (e1000_init_hw(hw) < 0) {
2632                 device_printf(dev, "Hardware Initialization Failed\n");
2633                 return;
2634         }
2635
2636         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2637         e1000_get_phy_info(hw);
2638         e1000_check_for_link(hw);
2639         return;
2640 }
2641
2642 /*********************************************************************
2643  *
2644  *  Setup networking device structure and register an interface.
2645  *
2646  **********************************************************************/
2647 static void
2648 em_setup_interface(device_t dev, struct adapter *adapter)
2649 {
2650         struct ifnet   *ifp;
2651
2652         INIT_DEBUGOUT("em_setup_interface: begin");
2653
2654         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2655         if (ifp == NULL)
2656                 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2657         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2658         ifp->if_mtu = ETHERMTU;
2659         ifp->if_init =  em_init;
2660         ifp->if_softc = adapter;
2661         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2662         ifp->if_ioctl = em_ioctl;
2663         ifp->if_start = em_start;
2664         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2665         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2666         IFQ_SET_READY(&ifp->if_snd);
2667
2668         ether_ifattach(ifp, adapter->hw.mac.addr);
2669
2670         ifp->if_capabilities = ifp->if_capenable = 0;
2671
2672 #ifdef EM_MULTIQUEUE
2673         /* Multiqueue tx functions */
2674         ifp->if_transmit = em_mq_start;
2675         ifp->if_qflush = em_qflush;
2676 #endif  
2677
2678         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2679         ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2680
2681         /* Enable TSO by default, can disable with ifconfig */
2682         ifp->if_capabilities |= IFCAP_TSO4;
2683         ifp->if_capenable |= IFCAP_TSO4;
2684
2685         /*
2686          * Tell the upper layer(s) we
2687          * support full VLAN capability
2688          */
2689         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2690         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2691         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2692
2693         /*
2694         ** Dont turn this on by default, if vlans are
2695         ** created on another pseudo device (eg. lagg)
2696         ** then vlan events are not passed thru, breaking
2697         ** operation, but with HW FILTER off it works. If
2698         ** using vlans directly on the em driver you can
2699         ** enable this and get full hardware tag filtering.
2700         */
2701         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2702
2703 #ifdef DEVICE_POLLING
2704         ifp->if_capabilities |= IFCAP_POLLING;
2705 #endif
2706
2707         /* Enable only WOL MAGIC by default */
2708         if (adapter->wol) {
2709                 ifp->if_capabilities |= IFCAP_WOL;
2710                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2711         }
2712                 
2713         /*
2714          * Specify the media types supported by this adapter and register
2715          * callbacks to update media and link information
2716          */
2717         ifmedia_init(&adapter->media, IFM_IMASK,
2718             em_media_change, em_media_status);
2719         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2720             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2721                 u_char fiber_type = IFM_1000_SX;        /* default type */
2722
2723                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
2724                             0, NULL);
2725                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2726         } else {
2727                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2728                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2729                             0, NULL);
2730                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2731                             0, NULL);
2732                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2733                             0, NULL);
2734                 if (adapter->hw.phy.type != e1000_phy_ife) {
2735                         ifmedia_add(&adapter->media,
2736                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2737                         ifmedia_add(&adapter->media,
2738                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2739                 }
2740         }
2741         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2742         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2743 }
2744
2745
2746 /*
2747  * Manage DMA'able memory.
2748  */
2749 static void
2750 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2751 {
2752         if (error)
2753                 return;
2754         *(bus_addr_t *) arg = segs[0].ds_addr;
2755 }
2756
2757 static int
2758 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2759         struct em_dma_alloc *dma, int mapflags)
2760 {
2761         int error;
2762
2763         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2764                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
2765                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2766                                 BUS_SPACE_MAXADDR,      /* highaddr */
2767                                 NULL, NULL,             /* filter, filterarg */
2768                                 size,                   /* maxsize */
2769                                 1,                      /* nsegments */
2770                                 size,                   /* maxsegsize */
2771                                 0,                      /* flags */
2772                                 NULL,                   /* lockfunc */
2773                                 NULL,                   /* lockarg */
2774                                 &dma->dma_tag);
2775         if (error) {
2776                 device_printf(adapter->dev,
2777                     "%s: bus_dma_tag_create failed: %d\n",
2778                     __func__, error);
2779                 goto fail_0;
2780         }
2781
2782         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2783             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2784         if (error) {
2785                 device_printf(adapter->dev,
2786                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2787                     __func__, (uintmax_t)size, error);
2788                 goto fail_2;
2789         }
2790
2791         dma->dma_paddr = 0;
2792         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2793             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2794         if (error || dma->dma_paddr == 0) {
2795                 device_printf(adapter->dev,
2796                     "%s: bus_dmamap_load failed: %d\n",
2797                     __func__, error);
2798                 goto fail_3;
2799         }
2800
2801         return (0);
2802
2803 fail_3:
2804         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2805 fail_2:
2806         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2807         bus_dma_tag_destroy(dma->dma_tag);
2808 fail_0:
2809         dma->dma_map = NULL;
2810         dma->dma_tag = NULL;
2811
2812         return (error);
2813 }
2814
2815 static void
2816 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2817 {
2818         if (dma->dma_tag == NULL)
2819                 return;
2820         if (dma->dma_map != NULL) {
2821                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2822                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2823                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2824                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2825                 dma->dma_map = NULL;
2826         }
2827         bus_dma_tag_destroy(dma->dma_tag);
2828         dma->dma_tag = NULL;
2829 }
2830
2831
2832 /*********************************************************************
2833  *
2834  *  Allocate memory for the transmit and receive rings, and then
2835  *  the descriptors associated with each, called only once at attach.
2836  *
2837  **********************************************************************/
2838 static int
2839 em_allocate_queues(struct adapter *adapter)
2840 {
2841         device_t                dev = adapter->dev;
2842         struct tx_ring          *txr = NULL;
2843         struct rx_ring          *rxr = NULL;
2844         int rsize, tsize, error = E1000_SUCCESS;
2845         int txconf = 0, rxconf = 0;
2846
2847
2848         /* Allocate the TX ring struct memory */
2849         if (!(adapter->tx_rings =
2850             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2851             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2852                 device_printf(dev, "Unable to allocate TX ring memory\n");
2853                 error = ENOMEM;
2854                 goto fail;
2855         }
2856
2857         /* Now allocate the RX */
2858         if (!(adapter->rx_rings =
2859             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2860             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2861                 device_printf(dev, "Unable to allocate RX ring memory\n");
2862                 error = ENOMEM;
2863                 goto rx_fail;
2864         }
2865
2866         tsize = roundup2(adapter->num_tx_desc *
2867             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2868         /*
2869          * Now set up the TX queues, txconf is needed to handle the
2870          * possibility that things fail midcourse and we need to
2871          * undo memory gracefully
2872          */ 
2873         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2874                 /* Set up some basics */
2875                 txr = &adapter->tx_rings[i];
2876                 txr->adapter = adapter;
2877                 txr->me = i;
2878
2879                 /* Initialize the TX lock */
2880                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2881                     device_get_nameunit(dev), txr->me);
2882                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2883
2884                 if (em_dma_malloc(adapter, tsize,
2885                         &txr->txdma, BUS_DMA_NOWAIT)) {
2886                         device_printf(dev,
2887                             "Unable to allocate TX Descriptor memory\n");
2888                         error = ENOMEM;
2889                         goto err_tx_desc;
2890                 }
2891                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2892                 bzero((void *)txr->tx_base, tsize);
2893
2894                 if (em_allocate_transmit_buffers(txr)) {
2895                         device_printf(dev,
2896                             "Critical Failure setting up transmit buffers\n");
2897                         error = ENOMEM;
2898                         goto err_tx_desc;
2899                 }
2900 #if __FreeBSD_version >= 800000
2901                 /* Allocate a buf ring */
2902                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
2903                     M_WAITOK, &txr->tx_mtx);
2904 #endif
2905         }
2906
2907         /*
2908          * Next the RX queues...
2909          */ 
2910         rsize = roundup2(adapter->num_rx_desc *
2911             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2912         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2913                 rxr = &adapter->rx_rings[i];
2914                 rxr->adapter = adapter;
2915                 rxr->me = i;
2916
2917                 /* Initialize the RX lock */
2918                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2919                     device_get_nameunit(dev), txr->me);
2920                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2921
2922                 if (em_dma_malloc(adapter, rsize,
2923                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2924                         device_printf(dev,
2925                             "Unable to allocate RxDescriptor memory\n");
2926                         error = ENOMEM;
2927                         goto err_rx_desc;
2928                 }
2929                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2930                 bzero((void *)rxr->rx_base, rsize);
2931
2932                 /* Allocate receive buffers for the ring*/
2933                 if (em_allocate_receive_buffers(rxr)) {
2934                         device_printf(dev,
2935                             "Critical Failure setting up receive buffers\n");
2936                         error = ENOMEM;
2937                         goto err_rx_desc;
2938                 }
2939         }
2940
2941         return (0);
2942
2943 err_rx_desc:
2944         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2945                 em_dma_free(adapter, &rxr->rxdma);
2946 err_tx_desc:
2947         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2948                 em_dma_free(adapter, &txr->txdma);
2949         free(adapter->rx_rings, M_DEVBUF);
2950 rx_fail:
2951         buf_ring_free(txr->br, M_DEVBUF);
2952         free(adapter->tx_rings, M_DEVBUF);
2953 fail:
2954         return (error);
2955 }
2956
2957
2958 /*********************************************************************
2959  *
2960  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2961  *  the information needed to transmit a packet on the wire. This is
2962  *  called only once at attach, setup is done every reset.
2963  *
2964  **********************************************************************/
2965 static int
2966 em_allocate_transmit_buffers(struct tx_ring *txr)
2967 {
2968         struct adapter *adapter = txr->adapter;
2969         device_t dev = adapter->dev;
2970         struct em_buffer *txbuf;
2971         int error, i;
2972
2973         /*
2974          * Setup DMA descriptor areas.
2975          */
2976         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2977                                1, 0,                    /* alignment, bounds */
2978                                BUS_SPACE_MAXADDR,       /* lowaddr */
2979                                BUS_SPACE_MAXADDR,       /* highaddr */
2980                                NULL, NULL,              /* filter, filterarg */
2981                                EM_TSO_SIZE,             /* maxsize */
2982                                EM_MAX_SCATTER,          /* nsegments */
2983                                PAGE_SIZE,               /* maxsegsize */
2984                                0,                       /* flags */
2985                                NULL,                    /* lockfunc */
2986                                NULL,                    /* lockfuncarg */
2987                                &txr->txtag))) {
2988                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2989                 goto fail;
2990         }
2991
2992         if (!(txr->tx_buffers =
2993             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
2994             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2995                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2996                 error = ENOMEM;
2997                 goto fail;
2998         }
2999
3000         /* Create the descriptor buffer dma maps */
3001         txbuf = txr->tx_buffers;
3002         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3003                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3004                 if (error != 0) {
3005                         device_printf(dev, "Unable to create TX DMA map\n");
3006                         goto fail;
3007                 }
3008         }
3009
3010         return 0;
3011 fail:
3012         /* We free all, it handles case where we are in the middle */
3013         em_free_transmit_structures(adapter);
3014         return (error);
3015 }
3016
3017 /*********************************************************************
3018  *
3019  *  Initialize a transmit ring.
3020  *
3021  **********************************************************************/
3022 static void
3023 em_setup_transmit_ring(struct tx_ring *txr)
3024 {
3025         struct adapter *adapter = txr->adapter;
3026         struct em_buffer *txbuf;
3027         int i;
3028
3029         /* Clear the old descriptor contents */
3030         EM_TX_LOCK(txr);
3031         bzero((void *)txr->tx_base,
3032               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3033         /* Reset indices */
3034         txr->next_avail_desc = 0;
3035         txr->next_to_clean = 0;
3036
3037         /* Free any existing tx buffers. */
3038         txbuf = txr->tx_buffers;
3039         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3040                 if (txbuf->m_head != NULL) {
3041                         bus_dmamap_sync(txr->txtag, txbuf->map,
3042                             BUS_DMASYNC_POSTWRITE);
3043                         bus_dmamap_unload(txr->txtag, txbuf->map);
3044                         m_freem(txbuf->m_head);
3045                         txbuf->m_head = NULL;
3046                 }
3047                 /* clear the watch index */
3048                 txbuf->next_eop = -1;
3049         }
3050
3051         /* Set number of descriptors available */
3052         txr->tx_avail = adapter->num_tx_desc;
3053
3054         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3055             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3056         EM_TX_UNLOCK(txr);
3057 }
3058
3059 /*********************************************************************
3060  *
3061  *  Initialize all transmit rings.
3062  *
3063  **********************************************************************/
3064 static void
3065 em_setup_transmit_structures(struct adapter *adapter)
3066 {
3067         struct tx_ring *txr = adapter->tx_rings;
3068
3069         for (int i = 0; i < adapter->num_queues; i++, txr++)
3070                 em_setup_transmit_ring(txr);
3071
3072         return;
3073 }
3074
3075 /*********************************************************************
3076  *
3077  *  Enable transmit unit.
3078  *
3079  **********************************************************************/
3080 static void
3081 em_initialize_transmit_unit(struct adapter *adapter)
3082 {
3083         struct tx_ring  *txr = adapter->tx_rings;
3084         struct e1000_hw *hw = &adapter->hw;
3085         u32     tctl, tarc, tipg = 0;
3086
3087          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3088
3089         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3090                 u64 bus_addr = txr->txdma.dma_paddr;
3091                 /* Base and Len of TX Ring */
3092                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3093                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3094                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3095                     (u32)(bus_addr >> 32));
3096                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3097                     (u32)bus_addr);
3098                 /* Init the HEAD/TAIL indices */
3099                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3100                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3101
3102                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3103                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3104                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3105
3106                 txr->watchdog_check = FALSE;
3107         }
3108
3109         /* Set the default values for the Tx Inter Packet Gap timer */
3110         switch (adapter->hw.mac.type) {
3111         case e1000_82542:
3112                 tipg = DEFAULT_82542_TIPG_IPGT;
3113                 tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3114                 tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3115                 break;
3116         case e1000_80003es2lan:
3117                 tipg = DEFAULT_82543_TIPG_IPGR1;
3118                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3119                     E1000_TIPG_IPGR2_SHIFT;
3120                 break;
3121         default:
3122                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3123                     (adapter->hw.phy.media_type ==
3124                     e1000_media_type_internal_serdes))
3125                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3126                 else
3127                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3128                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3129                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3130         }
3131
3132         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3133         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3134
3135         if(adapter->hw.mac.type >= e1000_82540)
3136                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3137                     adapter->tx_abs_int_delay.value);
3138
3139         if ((adapter->hw.mac.type == e1000_82571) ||
3140             (adapter->hw.mac.type == e1000_82572)) {
3141                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3142                 tarc |= SPEED_MODE_BIT;
3143                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3144         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3145                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3146                 tarc |= 1;
3147                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3148                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3149                 tarc |= 1;
3150                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3151         }
3152
3153         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3154         if (adapter->tx_int_delay.value > 0)
3155                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3156
3157         /* Program the Transmit Control Register */
3158         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3159         tctl &= ~E1000_TCTL_CT;
3160         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3161                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3162
3163         if (adapter->hw.mac.type >= e1000_82571)
3164                 tctl |= E1000_TCTL_MULR;
3165
3166         /* This write will effectively turn on the transmit unit. */
3167         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3168
3169 }
3170
3171
3172 /*********************************************************************
3173  *
3174  *  Free all transmit rings.
3175  *
3176  **********************************************************************/
3177 static void
3178 em_free_transmit_structures(struct adapter *adapter)
3179 {
3180         struct tx_ring *txr = adapter->tx_rings;
3181
3182         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3183                 EM_TX_LOCK(txr);
3184                 em_free_transmit_buffers(txr);
3185                 em_dma_free(adapter, &txr->txdma);
3186                 EM_TX_UNLOCK(txr);
3187                 EM_TX_LOCK_DESTROY(txr);
3188         }
3189
3190         free(adapter->tx_rings, M_DEVBUF);
3191 }
3192
3193 /*********************************************************************
3194  *
3195  *  Free transmit ring related data structures.
3196  *
3197  **********************************************************************/
3198 static void
3199 em_free_transmit_buffers(struct tx_ring *txr)
3200 {
3201         struct adapter          *adapter = txr->adapter;
3202         struct em_buffer        *txbuf;
3203
3204         INIT_DEBUGOUT("free_transmit_ring: begin");
3205
3206         if (txr->tx_buffers == NULL)
3207                 return;
3208
3209         for (int i = 0; i < adapter->num_tx_desc; i++) {
3210                 txbuf = &txr->tx_buffers[i];
3211                 if (txbuf->m_head != NULL) {
3212                         bus_dmamap_sync(txr->txtag, txbuf->map,
3213                             BUS_DMASYNC_POSTWRITE);
3214                         bus_dmamap_unload(txr->txtag,
3215                             txbuf->map);
3216                         m_freem(txbuf->m_head);
3217                         txbuf->m_head = NULL;
3218                         if (txbuf->map != NULL) {
3219                                 bus_dmamap_destroy(txr->txtag,
3220                                     txbuf->map);
3221                                 txbuf->map = NULL;
3222                         }
3223                 } else if (txbuf->map != NULL) {
3224                         bus_dmamap_unload(txr->txtag,
3225                             txbuf->map);
3226                         bus_dmamap_destroy(txr->txtag,
3227                             txbuf->map);
3228                         txbuf->map = NULL;
3229                 }
3230         }
3231 #if __FreeBSD_version >= 800000
3232         if (txr->br != NULL)
3233                 buf_ring_free(txr->br, M_DEVBUF);
3234 #endif
3235         if (txr->tx_buffers != NULL) {
3236                 free(txr->tx_buffers, M_DEVBUF);
3237                 txr->tx_buffers = NULL;
3238         }
3239         if (txr->txtag != NULL) {
3240                 bus_dma_tag_destroy(txr->txtag);
3241                 txr->txtag = NULL;
3242         }
3243         return;
3244 }
3245
3246
3247 /*********************************************************************
3248  *
3249  *  The offload context needs to be set when we transfer the first
3250  *  packet of a particular protocol (TCP/UDP). This routine has been
3251  *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3252  *
3253  *  Added back the old method of keeping the current context type
3254  *  and not setting if unnecessary, as this is reported to be a
3255  *  big performance win.  -jfv
3256  **********************************************************************/
3257 static void
3258 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3259     u32 *txd_upper, u32 *txd_lower)
3260 {
3261         struct adapter                  *adapter = txr->adapter;
3262         struct e1000_context_desc       *TXD = NULL;
3263         struct em_buffer *tx_buffer;
3264         struct ether_vlan_header *eh;
3265         struct ip *ip = NULL;
3266         struct ip6_hdr *ip6;
3267         int cur, ehdrlen;
3268         u32 cmd, hdr_len, ip_hlen;
3269         u16 etype;
3270         u8 ipproto;
3271
3272
3273         cmd = hdr_len = ipproto = 0;
3274         cur = txr->next_avail_desc;
3275
3276         /*
3277          * Determine where frame payload starts.
3278          * Jump over vlan headers if already present,
3279          * helpful for QinQ too.
3280          */
3281         eh = mtod(mp, struct ether_vlan_header *);
3282         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3283                 etype = ntohs(eh->evl_proto);
3284                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3285         } else {
3286                 etype = ntohs(eh->evl_encap_proto);
3287                 ehdrlen = ETHER_HDR_LEN;
3288         }
3289
3290         /*
3291          * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3292          * TODO: Support SCTP too when it hits the tree.
3293          */
3294         switch (etype) {
3295         case ETHERTYPE_IP:
3296                 ip = (struct ip *)(mp->m_data + ehdrlen);
3297                 ip_hlen = ip->ip_hl << 2;
3298
3299                 /* Setup of IP header checksum. */
3300                 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3301                         /*
3302                          * Start offset for header checksum calculation.
3303                          * End offset for header checksum calculation.
3304                          * Offset of place to put the checksum.
3305                          */
3306                         TXD = (struct e1000_context_desc *)
3307                             &txr->tx_base[cur];
3308                         TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3309                         TXD->lower_setup.ip_fields.ipcse =
3310                             htole16(ehdrlen + ip_hlen);
3311                         TXD->lower_setup.ip_fields.ipcso =
3312                             ehdrlen + offsetof(struct ip, ip_sum);
3313                         cmd |= E1000_TXD_CMD_IP;
3314                         *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3315                 }
3316
3317                 if (mp->m_len < ehdrlen + ip_hlen)
3318                         return; /* failure */
3319
3320                 hdr_len = ehdrlen + ip_hlen;
3321                 ipproto = ip->ip_p;
3322
3323                 break;
3324         case ETHERTYPE_IPV6:
3325                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3326                 ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3327
3328                 if (mp->m_len < ehdrlen + ip_hlen)
3329                         return; /* failure */
3330
3331                 /* IPv6 doesn't have a header checksum. */
3332
3333                 hdr_len = ehdrlen + ip_hlen;
3334                 ipproto = ip6->ip6_nxt;
3335
3336                 break;
3337         default:
3338                 *txd_upper = 0;
3339                 *txd_lower = 0;
3340                 return;
3341         }
3342
3343         switch (ipproto) {
3344         case IPPROTO_TCP:
3345                 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3346                         *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3347                         *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3348                         /* no need for context if already set */
3349                         if (txr->last_hw_offload == CSUM_TCP)
3350                                 return;
3351                         txr->last_hw_offload = CSUM_TCP;
3352                         /*
3353                          * Start offset for payload checksum calculation.
3354                          * End offset for payload checksum calculation.
3355                          * Offset of place to put the checksum.
3356                          */
3357                         TXD = (struct e1000_context_desc *)
3358                             &txr->tx_base[cur];
3359                         TXD->upper_setup.tcp_fields.tucss = hdr_len;
3360                         TXD->upper_setup.tcp_fields.tucse = htole16(0);
3361                         TXD->upper_setup.tcp_fields.tucso =
3362                             hdr_len + offsetof(struct tcphdr, th_sum);
3363                         cmd |= E1000_TXD_CMD_TCP;
3364                 }
3365                 break;
3366         case IPPROTO_UDP:
3367         {
3368                 if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3369                         *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3370                         *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3371                         /* no need for context if already set */
3372                         if (txr->last_hw_offload == CSUM_UDP)
3373                                 return;
3374                         txr->last_hw_offload = CSUM_UDP;
3375                         /*
3376                          * Start offset for header checksum calculation.
3377                          * End offset for header checksum calculation.
3378                          * Offset of place to put the checksum.
3379                          */
3380                         TXD = (struct e1000_context_desc *)
3381                             &txr->tx_base[cur];
3382                         TXD->upper_setup.tcp_fields.tucss = hdr_len;
3383                         TXD->upper_setup.tcp_fields.tucse = htole16(0);
3384                         TXD->upper_setup.tcp_fields.tucso =
3385                             hdr_len + offsetof(struct udphdr, uh_sum);
3386                 }
3387                 /* Fall Thru */
3388         }
3389         default:
3390                 break;
3391         }
3392
3393         TXD->tcp_seg_setup.data = htole32(0);
3394         TXD->cmd_and_length =
3395             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3396         tx_buffer = &txr->tx_buffers[cur];
3397         tx_buffer->m_head = NULL;
3398         tx_buffer->next_eop = -1;
3399
3400         if (++cur == adapter->num_tx_desc)
3401                 cur = 0;
3402
3403         txr->tx_avail--;
3404         txr->next_avail_desc = cur;
3405 }
3406
3407
3408 /**********************************************************************
3409  *
3410  *  Setup work for hardware segmentation offload (TSO)
3411  *
3412  **********************************************************************/
3413 static bool
3414 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3415    u32 *txd_lower)
3416 {
3417         struct adapter                  *adapter = txr->adapter;
3418         struct e1000_context_desc       *TXD;
3419         struct em_buffer                *tx_buffer;
3420         struct ether_vlan_header        *eh;
3421         struct ip                       *ip;
3422         struct ip6_hdr                  *ip6;
3423         struct tcphdr                   *th;
3424         int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3425         u16 etype;
3426
3427         /*
3428          * This function could/should be extended to support IP/IPv6
3429          * fragmentation as well.  But as they say, one step at a time.
3430          */
3431
3432         /*
3433          * Determine where frame payload starts.
3434          * Jump over vlan headers if already present,
3435          * helpful for QinQ too.
3436          */
3437         eh = mtod(mp, struct ether_vlan_header *);
3438         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3439                 etype = ntohs(eh->evl_proto);
3440                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3441         } else {
3442                 etype = ntohs(eh->evl_encap_proto);
3443                 ehdrlen = ETHER_HDR_LEN;
3444         }
3445
3446         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3447         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3448                 return FALSE;   /* -1 */
3449
3450         /*
3451          * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3452          * TODO: Support SCTP too when it hits the tree.
3453          */
3454         switch (etype) {
3455         case ETHERTYPE_IP:
3456                 isip6 = 0;
3457                 ip = (struct ip *)(mp->m_data + ehdrlen);
3458                 if (ip->ip_p != IPPROTO_TCP)
3459                         return FALSE;   /* 0 */
3460                 ip->ip_len = 0;
3461                 ip->ip_sum = 0;
3462                 ip_hlen = ip->ip_hl << 2;
3463                 if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3464                         return FALSE;   /* -1 */
3465                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3466 #if 1
3467                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3468                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3469 #else
3470                 th->th_sum = mp->m_pkthdr.csum_data;
3471 #endif
3472                 break;
3473         case ETHERTYPE_IPV6:
3474                 isip6 = 1;
3475                 return FALSE;                   /* Not supported yet. */
3476                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3477                 if (ip6->ip6_nxt != IPPROTO_TCP)
3478                         return FALSE;   /* 0 */
3479                 ip6->ip6_plen = 0;
3480                 ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3481                 if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3482                         return FALSE;   /* -1 */
3483                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3484 #if 0
3485                 th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3486                     htons(IPPROTO_TCP));        /* XXX: function notyet. */
3487 #else
3488                 th->th_sum = mp->m_pkthdr.csum_data;
3489 #endif
3490                 break;
3491         default:
3492                 return FALSE;
3493         }
3494         hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3495
3496         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3497                       E1000_TXD_DTYP_D |        /* Data descr type */
3498                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3499
3500         /* IP and/or TCP header checksum calculation and insertion. */
3501         *txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3502                       E1000_TXD_POPTS_TXSM) << 8;
3503
3504         cur = txr->next_avail_desc;
3505         tx_buffer = &txr->tx_buffers[cur];
3506         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3507
3508         /* IPv6 doesn't have a header checksum. */
3509         if (!isip6) {
3510                 /*
3511                  * Start offset for header checksum calculation.
3512                  * End offset for header checksum calculation.
3513                  * Offset of place put the checksum.
3514                  */
3515                 TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3516                 TXD->lower_setup.ip_fields.ipcse =
3517                     htole16(ehdrlen + ip_hlen - 1);
3518                 TXD->lower_setup.ip_fields.ipcso =
3519                     ehdrlen + offsetof(struct ip, ip_sum);
3520         }
3521         /*
3522          * Start offset for payload checksum calculation.
3523          * End offset for payload checksum calculation.
3524          * Offset of place to put the checksum.
3525          */
3526         TXD->upper_setup.tcp_fields.tucss =
3527             ehdrlen + ip_hlen;
3528         TXD->upper_setup.tcp_fields.tucse = 0;
3529         TXD->upper_setup.tcp_fields.tucso =
3530             ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3531         /*
3532          * Payload size per packet w/o any headers.
3533          * Length of all headers up to payload.
3534          */
3535         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3536         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3537
3538         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3539                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3540                                 E1000_TXD_CMD_TSE |     /* TSE context */
3541                                 (isip6 ? 0 : E1000_TXD_CMD_IP) | 
3542                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3543                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3544
3545         tx_buffer->m_head = NULL;
3546         tx_buffer->next_eop = -1;
3547
3548         if (++cur == adapter->num_tx_desc)
3549                 cur = 0;
3550
3551         txr->tx_avail--;
3552         txr->next_avail_desc = cur;
3553         txr->tx_tso = TRUE;
3554
3555         return TRUE;
3556 }
3557
3558
3559 /**********************************************************************
3560  *
3561  *  Examine each tx_buffer in the used queue. If the hardware is done
3562  *  processing the packet then free associated resources. The
3563  *  tx_buffer is put back on the free queue.
3564  *
3565  **********************************************************************/
3566 static bool
3567 em_txeof(struct tx_ring *txr)
3568 {
3569         struct adapter  *adapter = txr->adapter;
3570         int first, last, done, num_avail;
3571         struct em_buffer *tx_buffer;
3572         struct e1000_tx_desc   *tx_desc, *eop_desc;
3573         struct ifnet   *ifp = adapter->ifp;
3574
3575         EM_TX_LOCK_ASSERT(txr);
3576
3577         if (txr->tx_avail == adapter->num_tx_desc)
3578                 return (FALSE);
3579
3580         num_avail = txr->tx_avail;
3581         first = txr->next_to_clean;
3582         tx_desc = &txr->tx_base[first];
3583         tx_buffer = &txr->tx_buffers[first];
3584         last = tx_buffer->next_eop;
3585         eop_desc = &txr->tx_base[last];
3586
3587         /*
3588          * What this does is get the index of the
3589          * first descriptor AFTER the EOP of the 
3590          * first packet, that way we can do the
3591          * simple comparison on the inner while loop.
3592          */
3593         if (++last == adapter->num_tx_desc)
3594                 last = 0;
3595         done = last;
3596
3597         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3598             BUS_DMASYNC_POSTREAD);
3599
3600         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3601                 /* We clean the range of the packet */
3602                 while (first != done) {
3603                         tx_desc->upper.data = 0;
3604                         tx_desc->lower.data = 0;
3605                         tx_desc->buffer_addr = 0;
3606                         ++num_avail;
3607
3608                         if (tx_buffer->m_head) {
3609                                 ifp->if_opackets++;
3610                                 bus_dmamap_sync(txr->txtag,
3611                                     tx_buffer->map,
3612                                     BUS_DMASYNC_POSTWRITE);
3613                                 bus_dmamap_unload(txr->txtag,
3614                                     tx_buffer->map);
3615
3616                                 m_freem(tx_buffer->m_head);
3617                                 tx_buffer->m_head = NULL;
3618                         }
3619                         tx_buffer->next_eop = -1;
3620                         txr->watchdog_time = ticks;
3621
3622                         if (++first == adapter->num_tx_desc)
3623                                 first = 0;
3624
3625                         tx_buffer = &txr->tx_buffers[first];
3626                         tx_desc = &txr->tx_base[first];
3627                 }
3628                 /* See if we can continue to the next packet */
3629                 last = tx_buffer->next_eop;
3630                 if (last != -1) {
3631                         eop_desc = &txr->tx_base[last];
3632                         /* Get new done point */
3633                         if (++last == adapter->num_tx_desc) last = 0;
3634                         done = last;
3635                 } else
3636                         break;
3637         }
3638         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3639             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3640
3641         txr->next_to_clean = first;
3642
3643         /*
3644          * If we have enough room, clear IFF_DRV_OACTIVE to
3645          * tell the stack that it is OK to send packets.
3646          * If there are no pending descriptors, clear the watchdog.
3647          */
3648         if (num_avail > EM_TX_CLEANUP_THRESHOLD) {                
3649                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3650                 if (num_avail == adapter->num_tx_desc) {
3651                         txr->watchdog_check = FALSE;
3652                         txr->tx_avail = num_avail;
3653                         return (FALSE);
3654                 } 
3655         }
3656
3657         txr->tx_avail = num_avail;
3658         return (TRUE);
3659 }
3660
3661
3662 /*********************************************************************
3663  *
3664  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3665  *
3666  **********************************************************************/
3667 static void
3668 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3669 {
3670         struct adapter          *adapter = rxr->adapter;
3671         struct mbuf             *m;
3672         bus_dma_segment_t       segs[1];
3673         bus_dmamap_t            map;
3674         struct em_buffer        *rxbuf;
3675         int                     i, error, nsegs, cleaned;
3676
3677         i = rxr->next_to_refresh;
3678         cleaned = -1;
3679         while (i != limit) {
3680                 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3681                 if (m == NULL)
3682                         goto update;
3683                 m->m_len = m->m_pkthdr.len = MCLBYTES;
3684
3685                 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3686                         m_adj(m, ETHER_ALIGN);
3687
3688                 /*
3689                  * Using memory from the mbuf cluster pool, invoke the
3690                  * bus_dma machinery to arrange the memory mapping.
3691                  */
3692                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3693                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3694                 if (error != 0) {
3695                         m_free(m);
3696                         goto update;
3697                 }
3698
3699                 /* If nsegs is wrong then the stack is corrupt. */
3700                 KASSERT(nsegs == 1, ("Too many segments returned!"));
3701         
3702                 rxbuf = &rxr->rx_buffers[i];
3703                 if (rxbuf->m_head != NULL)
3704                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3705         
3706                 map = rxbuf->map;
3707                 rxbuf->map = rxr->rx_sparemap;
3708                 rxr->rx_sparemap = map;
3709                 bus_dmamap_sync(rxr->rxtag,
3710                     rxbuf->map, BUS_DMASYNC_PREREAD);
3711                 rxbuf->m_head = m;
3712                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3713
3714                 cleaned = i;
3715                 /* Calculate next index */
3716                 if (++i == adapter->num_rx_desc)
3717                         i = 0;
3718                 /* This is the work marker for refresh */
3719                 rxr->next_to_refresh = i;
3720         }
3721 update:
3722         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3723             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3724         if (cleaned != -1) /* Update tail index */
3725                 E1000_WRITE_REG(&adapter->hw,
3726                     E1000_RDT(rxr->me), cleaned);
3727
3728         return;
3729 }
3730
3731
3732 /*********************************************************************
3733  *
3734  *  Allocate memory for rx_buffer structures. Since we use one
3735  *  rx_buffer per received packet, the maximum number of rx_buffer's
3736  *  that we'll need is equal to the number of receive descriptors
3737  *  that we've allocated.
3738  *
3739  **********************************************************************/
3740 static int
3741 em_allocate_receive_buffers(struct rx_ring *rxr)
3742 {
3743         struct adapter          *adapter = rxr->adapter;
3744         device_t                dev = adapter->dev;
3745         struct em_buffer        *rxbuf;
3746         int                     error;
3747
3748         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3749             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3750         if (rxr->rx_buffers == NULL) {
3751                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3752                 return (ENOMEM);
3753         }
3754
3755         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3756                                 1, 0,                   /* alignment, bounds */
3757                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3758                                 BUS_SPACE_MAXADDR,      /* highaddr */
3759                                 NULL, NULL,             /* filter, filterarg */
3760                                 MCLBYTES,               /* maxsize */
3761                                 1,                      /* nsegments */
3762                                 MCLBYTES,               /* maxsegsize */
3763                                 0,                      /* flags */
3764                                 NULL,                   /* lockfunc */
3765                                 NULL,                   /* lockarg */
3766                                 &rxr->rxtag);
3767         if (error) {
3768                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3769                     __func__, error);
3770                 goto fail;
3771         }
3772
3773         /* Create the spare map (used by getbuf) */
3774         error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3775              &rxr->rx_sparemap);
3776         if (error) {
3777                 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3778                     __func__, error);
3779                 goto fail;
3780         }
3781
3782         rxbuf = rxr->rx_buffers;
3783         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3784                 rxbuf = &rxr->rx_buffers[i];
3785                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3786                     &rxbuf->map);
3787                 if (error) {
3788                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3789                             __func__, error);
3790                         goto fail;
3791                 }
3792         }
3793
3794         return (0);
3795
3796 fail:
3797         em_free_receive_structures(adapter);
3798         return (error);
3799 }
3800
3801
3802 /*********************************************************************
3803  *
3804  *  Initialize a receive ring and its buffers.
3805  *
3806  **********************************************************************/
3807 static int
3808 em_setup_receive_ring(struct rx_ring *rxr)
3809 {
3810         struct  adapter         *adapter = rxr->adapter;
3811         struct em_buffer        *rxbuf;
3812         bus_dma_segment_t       seg[1];
3813         int                     rsize, nsegs, error;
3814
3815
3816         /* Clear the ring contents */
3817         EM_RX_LOCK(rxr);
3818         rsize = roundup2(adapter->num_rx_desc *
3819             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3820         bzero((void *)rxr->rx_base, rsize);
3821
3822         /*
3823         ** Free current RX buffer structs and their mbufs
3824         */
3825         for (int i = 0; i < adapter->num_rx_desc; i++) {
3826                 rxbuf = &rxr->rx_buffers[i];
3827                 if (rxbuf->m_head != NULL) {
3828                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3829                             BUS_DMASYNC_POSTREAD);
3830                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3831                         m_freem(rxbuf->m_head);
3832                 }
3833         }
3834
3835         /* Now replenish the mbufs */
3836         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3837
3838                 rxbuf = &rxr->rx_buffers[j];
3839                 rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3840                 if (rxbuf->m_head == NULL)
3841                         panic("RX ring hdr initialization failed!\n");
3842                 rxbuf->m_head->m_len = MCLBYTES;
3843                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3844                 rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3845
3846                 /* Get the memory mapping */
3847                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3848                     rxbuf->map, rxbuf->m_head, seg,
3849                     &nsegs, BUS_DMA_NOWAIT);
3850                 if (error != 0)
3851                         panic("RX ring dma initialization failed!\n");
3852                 bus_dmamap_sync(rxr->rxtag,
3853                     rxbuf->map, BUS_DMASYNC_PREREAD);
3854
3855                 /* Update descriptor */
3856                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3857         }
3858
3859
3860         /* Setup our descriptor indices */
3861         rxr->next_to_check = 0;
3862         rxr->next_to_refresh = 0;
3863
3864         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3865             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3866
3867         EM_RX_UNLOCK(rxr);
3868         return (0);
3869 }
3870
3871 /*********************************************************************
3872  *
3873  *  Initialize all receive rings.
3874  *
3875  **********************************************************************/
3876 static int
3877 em_setup_receive_structures(struct adapter *adapter)
3878 {
3879         struct rx_ring *rxr = adapter->rx_rings;
3880         int j;
3881
3882         for (j = 0; j < adapter->num_queues; j++, rxr++)
3883                 if (em_setup_receive_ring(rxr))
3884                         goto fail;
3885
3886         return (0);
3887 fail:
3888         /*
3889          * Free RX buffers allocated so far, we will only handle
3890          * the rings that completed, the failing case will have
3891          * cleaned up for itself. 'j' failed, so its the terminus.
3892          */
3893         for (int i = 0; i < j; ++i) {
3894                 rxr = &adapter->rx_rings[i];
3895                 for (int n = 0; n < adapter->num_rx_desc; n++) {
3896                         struct em_buffer *rxbuf;
3897                         rxbuf = &rxr->rx_buffers[n];
3898                         if (rxbuf->m_head != NULL) {
3899                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3900                                   BUS_DMASYNC_POSTREAD);
3901                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3902                                 m_freem(rxbuf->m_head);
3903                                 rxbuf->m_head = NULL;
3904                         }
3905                 }
3906         }
3907
3908         return (ENOBUFS);
3909 }
3910
3911 /*********************************************************************
3912  *
3913  *  Free all receive rings.
3914  *
3915  **********************************************************************/
3916 static void
3917 em_free_receive_structures(struct adapter *adapter)
3918 {
3919         struct rx_ring *rxr = adapter->rx_rings;
3920
3921         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3922                 em_free_receive_buffers(rxr);
3923                 /* Free the ring memory as well */
3924                 em_dma_free(adapter, &rxr->rxdma);
3925                 EM_RX_LOCK_DESTROY(rxr);
3926         }
3927
3928         free(adapter->rx_rings, M_DEVBUF);
3929 }
3930
3931
3932 /*********************************************************************
3933  *
3934  *  Free receive ring data structures
3935  *
3936  **********************************************************************/
3937 static void
3938 em_free_receive_buffers(struct rx_ring *rxr)
3939 {
3940         struct adapter          *adapter = rxr->adapter;
3941         struct em_buffer        *rxbuf = NULL;
3942
3943         INIT_DEBUGOUT("free_receive_buffers: begin");
3944
3945         if (rxr->rx_sparemap) {
3946                 bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3947                 rxr->rx_sparemap = NULL;
3948         }
3949
3950         if (rxr->rx_buffers != NULL) {
3951                 for (int i = 0; i < adapter->num_rx_desc; i++) {
3952                         rxbuf = &rxr->rx_buffers[i];
3953                         if (rxbuf->map != NULL) {
3954                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3955                                     BUS_DMASYNC_POSTREAD);
3956                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3957                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3958                         }
3959                         if (rxbuf->m_head != NULL) {
3960                                 m_freem(rxbuf->m_head);
3961                                 rxbuf->m_head = NULL;
3962                         }
3963                 }
3964                 free(rxr->rx_buffers, M_DEVBUF);
3965                 rxr->rx_buffers = NULL;
3966         }
3967
3968         if (rxr->rxtag != NULL) {
3969                 bus_dma_tag_destroy(rxr->rxtag);
3970                 rxr->rxtag = NULL;
3971         }
3972
3973         return;
3974 }
3975
3976
3977 /*********************************************************************
3978  *
3979  *  Enable receive unit.
3980  *
3981  **********************************************************************/
3982 #define MAX_INTS_PER_SEC        8000
3983 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
3984
3985 static void
3986 em_initialize_receive_unit(struct adapter *adapter)
3987 {
3988         struct rx_ring  *rxr = adapter->rx_rings;
3989         struct ifnet    *ifp = adapter->ifp;
3990         struct e1000_hw *hw = &adapter->hw;
3991         u64     bus_addr;
3992         u32     rctl, rxcsum;
3993
3994         INIT_DEBUGOUT("em_initialize_receive_units: begin");
3995
3996         /*
3997          * Make sure receives are disabled while setting
3998          * up the descriptor ring
3999          */
4000         rctl = E1000_READ_REG(hw, E1000_RCTL);
4001         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4002
4003         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4004             adapter->rx_abs_int_delay.value);
4005         /*
4006          * Set the interrupt throttling rate. Value is calculated
4007          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4008          */
4009         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4010
4011         /*
4012         ** When using MSIX interrupts we need to throttle
4013         ** using the EITR register (82574 only)
4014         */
4015         if (hw->mac.type == e1000_82574)
4016                 for (int i = 0; i < 4; i++)
4017                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4018                             DEFAULT_ITR);
4019
4020         /* Disable accelerated ackknowledge */
4021         if (adapter->hw.mac.type == e1000_82574)
4022                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4023
4024         if (ifp->if_capenable & IFCAP_RXCSUM) {
4025                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4026                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4027                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4028         }
4029
4030         /*
4031         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4032         ** long latencies are observed, like Lenovo X60. This
4033         ** change eliminates the problem, but since having positive
4034         ** values in RDTR is a known source of problems on other
4035         ** platforms another solution is being sought.
4036         */
4037         if (hw->mac.type == e1000_82573)
4038                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4039
4040         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4041                 /* Setup the Base and Length of the Rx Descriptor Ring */
4042                 bus_addr = rxr->rxdma.dma_paddr;
4043                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4044                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4045                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4046                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4047                 /* Setup the Head and Tail Descriptor Pointers */
4048                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4049                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4050         }
4051
4052         /* Setup the Receive Control Register */
4053         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4054         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4055             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4056             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4057
4058         /* Strip the CRC */
4059         rctl |= E1000_RCTL_SECRC;
4060
4061         /* Make sure VLAN Filters are off */
4062         rctl &= ~E1000_RCTL_VFE;
4063         rctl &= ~E1000_RCTL_SBP;
4064         rctl |= E1000_RCTL_SZ_2048;
4065         if (ifp->if_mtu > ETHERMTU)
4066                 rctl |= E1000_RCTL_LPE;
4067         else
4068                 rctl &= ~E1000_RCTL_LPE;
4069
4070         /* Write out the settings */
4071         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4072
4073         return;
4074 }
4075
4076
4077 /*********************************************************************
4078  *
4079  *  This routine executes in interrupt context. It replenishes
4080  *  the mbufs in the descriptor and sends data which has been
4081  *  dma'ed into host memory to upper layer.
4082  *
4083  *  We loop at most count times if count is > 0, or until done if
4084  *  count < 0.
4085  *  
4086  *  For polling we also now return the number of cleaned packets
4087  *********************************************************************/
4088 static int
4089 em_rxeof(struct rx_ring *rxr, int count)
4090 {
4091         struct adapter          *adapter = rxr->adapter;
4092         struct ifnet            *ifp = adapter->ifp;
4093         struct mbuf             *mp, *sendmp;
4094         u8                      status = 0;
4095         u16                     len;
4096         int                     i, processed, rxdone = 0;
4097         bool                    eop;
4098         struct e1000_rx_desc    *cur;
4099
4100         EM_RX_LOCK(rxr);
4101
4102         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4103
4104                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4105                         break;
4106
4107                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4108                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4109
4110                 cur = &rxr->rx_base[i];
4111                 status = cur->status;
4112                 mp = sendmp = NULL;
4113
4114                 if ((status & E1000_RXD_STAT_DD) == 0)
4115                         break;
4116
4117                 len = le16toh(cur->length);
4118                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4119                 count--;
4120
4121                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4122
4123                         /* Assign correct length to the current fragment */
4124                         mp = rxr->rx_buffers[i].m_head;
4125                         mp->m_len = len;
4126
4127                         if (rxr->fmp == NULL) {
4128                                 mp->m_pkthdr.len = len;
4129                                 rxr->fmp = mp; /* Store the first mbuf */
4130                                 rxr->lmp = mp;
4131                         } else {
4132                                 /* Chain mbuf's together */
4133                                 mp->m_flags &= ~M_PKTHDR;
4134                                 rxr->lmp->m_next = mp;
4135                                 rxr->lmp = rxr->lmp->m_next;
4136                                 rxr->fmp->m_pkthdr.len += len;
4137                         }
4138
4139                         if (eop) {
4140                                 rxr->fmp->m_pkthdr.rcvif = ifp;
4141                                 ifp->if_ipackets++;
4142                                 em_receive_checksum(cur, rxr->fmp);
4143 #ifndef __NO_STRICT_ALIGNMENT
4144                                 if (adapter->max_frame_size >
4145                                     (MCLBYTES - ETHER_ALIGN) &&
4146                                     em_fixup_rx(rxr) != 0)
4147                                         goto skip;
4148 #endif
4149                                 if (status & E1000_RXD_STAT_VP) {
4150                                         rxr->fmp->m_pkthdr.ether_vtag =
4151                                             (le16toh(cur->special) &
4152                                             E1000_RXD_SPC_VLAN_MASK);
4153                                         rxr->fmp->m_flags |= M_VLANTAG;
4154                                 }
4155 #ifdef EM_MULTIQUEUE
4156                                 rxr->fmp->m_pkthdr.flowid = curcpu;
4157                                 rxr->fmp->m_flags |= M_FLOWID;
4158 #endif
4159 #ifndef __NO_STRICT_ALIGNMENT
4160 skip:
4161 #endif
4162                                 sendmp = rxr->fmp;
4163                                 rxr->fmp = NULL;
4164                                 rxr->lmp = NULL;
4165                         }
4166                 } else {
4167                         ifp->if_ierrors++;
4168                         /* Reuse loaded DMA map and just update mbuf chain */
4169                         mp = rxr->rx_buffers[i].m_head;
4170                         mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4171                         mp->m_data = mp->m_ext.ext_buf;
4172                         mp->m_next = NULL;
4173                         if (adapter->max_frame_size <=
4174                             (MCLBYTES - ETHER_ALIGN))
4175                                 m_adj(mp, ETHER_ALIGN);
4176                         if (rxr->fmp != NULL) {
4177                                 m_freem(rxr->fmp);
4178                                 rxr->fmp = NULL;
4179                                 rxr->lmp = NULL;
4180                         }
4181                         sendmp = NULL;
4182                 }
4183
4184                 /* Zero out the receive descriptors status. */
4185                 cur->status = 0;
4186                 ++rxdone;       /* cumulative for POLL */
4187                 ++processed;
4188
4189                 /* Advance our pointers to the next descriptor. */
4190                 if (++i == adapter->num_rx_desc)
4191                         i = 0;
4192
4193                 /* Send to the stack */
4194                 if (sendmp != NULL) {
4195                         rxr->next_to_check = i;
4196                         EM_RX_UNLOCK(rxr);
4197                         (*ifp->if_input)(ifp, sendmp);
4198                         EM_RX_LOCK(rxr);
4199                         i = rxr->next_to_check;
4200                 }
4201
4202                 /* Only refresh mbufs every 8 descriptors */
4203                 if (processed == 8) {
4204                         em_refresh_mbufs(rxr, i);
4205                         processed = 0;
4206                 }
4207         }
4208
4209         /* Catch any remaining refresh work */
4210         if (processed != 0) {
4211                 em_refresh_mbufs(rxr, i);
4212                 processed = 0;
4213         }
4214
4215         rxr->next_to_check = i;
4216         EM_RX_UNLOCK(rxr);
4217
4218 #ifdef DEVICE_POLLING
4219         return (rxdone);
4220 #else
4221         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4222 #endif
4223 }
4224
4225 #ifndef __NO_STRICT_ALIGNMENT
4226 /*
4227  * When jumbo frames are enabled we should realign entire payload on
4228  * architecures with strict alignment. This is serious design mistake of 8254x
4229  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4230  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4231  * payload. On architecures without strict alignment restrictions 8254x still
4232  * performs unaligned memory access which would reduce the performance too.
4233  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4234  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4235  * existing mbuf chain.
4236  *
4237  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4238  * not used at all on architectures with strict alignment.
4239  */
4240 static int
4241 em_fixup_rx(struct rx_ring *rxr)
4242 {
4243         struct adapter *adapter = rxr->adapter;
4244         struct mbuf *m, *n;
4245         int error;
4246
4247         error = 0;
4248         m = rxr->fmp;
4249         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4250                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4251                 m->m_data += ETHER_HDR_LEN;
4252         } else {
4253                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4254                 if (n != NULL) {
4255                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4256                         m->m_data += ETHER_HDR_LEN;
4257                         m->m_len -= ETHER_HDR_LEN;
4258                         n->m_len = ETHER_HDR_LEN;
4259                         M_MOVE_PKTHDR(n, m);
4260                         n->m_next = m;
4261                         rxr->fmp = n;
4262                 } else {
4263                         adapter->dropped_pkts++;
4264                         m_freem(rxr->fmp);
4265                         rxr->fmp = NULL;
4266                         error = ENOMEM;
4267                 }
4268         }
4269
4270         return (error);
4271 }
4272 #endif
4273
4274 /*********************************************************************
4275  *
4276  *  Verify that the hardware indicated that the checksum is valid.
4277  *  Inform the stack about the status of checksum so that stack
4278  *  doesn't spend time verifying the checksum.
4279  *
4280  *********************************************************************/
4281 static void
4282 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4283 {
4284         /* Ignore Checksum bit is set */
4285         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4286                 mp->m_pkthdr.csum_flags = 0;
4287                 return;
4288         }
4289
4290         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4291                 /* Did it pass? */
4292                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4293                         /* IP Checksum Good */
4294                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4295                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4296
4297                 } else {
4298                         mp->m_pkthdr.csum_flags = 0;
4299                 }
4300         }
4301
4302         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4303                 /* Did it pass? */
4304                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4305                         mp->m_pkthdr.csum_flags |=
4306                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4307                         mp->m_pkthdr.csum_data = htons(0xffff);
4308                 }
4309         }
4310 }
4311
4312 /*
4313  * This routine is run via an vlan
4314  * config EVENT
4315  */
4316 static void
4317 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4318 {
4319         struct adapter  *adapter = ifp->if_softc;
4320         u32             index, bit;
4321
4322         if (ifp->if_softc !=  arg)   /* Not our event */
4323                 return;
4324
4325         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4326                 return;
4327
4328         index = (vtag >> 5) & 0x7F;
4329         bit = vtag & 0x1F;
4330         em_shadow_vfta[index] |= (1 << bit);
4331         ++adapter->num_vlans;
4332         /* Re-init to load the changes */
4333         em_init(adapter);
4334 }
4335
4336 /*
4337  * This routine is run via an vlan
4338  * unconfig EVENT
4339  */
4340 static void
4341 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4342 {
4343         struct adapter  *adapter = ifp->if_softc;
4344         u32             index, bit;
4345
4346         if (ifp->if_softc !=  arg)
4347                 return;
4348
4349         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4350                 return;
4351
4352         index = (vtag >> 5) & 0x7F;
4353         bit = vtag & 0x1F;
4354         em_shadow_vfta[index] &= ~(1 << bit);
4355         --adapter->num_vlans;
4356         /* Re-init to load the changes */
4357         em_init(adapter);
4358 }
4359
4360 static void
4361 em_setup_vlan_hw_support(struct adapter *adapter)
4362 {
4363         struct e1000_hw *hw = &adapter->hw;
4364         u32             reg;
4365
4366         /*
4367         ** We get here thru init_locked, meaning
4368         ** a soft reset, this has already cleared
4369         ** the VFTA and other state, so if there
4370         ** have been no vlan's registered do nothing.
4371         */
4372         if (adapter->num_vlans == 0)
4373                 return;
4374
4375         /*
4376         ** A soft reset zero's out the VFTA, so
4377         ** we need to repopulate it now.
4378         */
4379         for (int i = 0; i < EM_VFTA_SIZE; i++)
4380                 if (em_shadow_vfta[i] != 0)
4381                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4382                             i, em_shadow_vfta[i]);
4383
4384         reg = E1000_READ_REG(hw, E1000_CTRL);
4385         reg |= E1000_CTRL_VME;
4386         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4387
4388         /* Enable the Filter Table */
4389         reg = E1000_READ_REG(hw, E1000_RCTL);
4390         reg &= ~E1000_RCTL_CFIEN;
4391         reg |= E1000_RCTL_VFE;
4392         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4393
4394         /* Update the frame size */
4395         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4396             adapter->max_frame_size + VLAN_TAG_SIZE);
4397 }
4398
4399 static void
4400 em_enable_intr(struct adapter *adapter)
4401 {
4402         struct e1000_hw *hw = &adapter->hw;
4403         u32 ims_mask = IMS_ENABLE_MASK;
4404
4405         if (hw->mac.type == e1000_82574) {
4406                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4407                 ims_mask |= EM_MSIX_MASK;
4408         } 
4409         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4410 }
4411
4412 static void
4413 em_disable_intr(struct adapter *adapter)
4414 {
4415         struct e1000_hw *hw = &adapter->hw;
4416
4417         if (hw->mac.type == e1000_82574)
4418                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4419         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4420 }
4421
4422 /*
4423  * Bit of a misnomer, what this really means is
4424  * to enable OS management of the system... aka
4425  * to disable special hardware management features 
4426  */
4427 static void
4428 em_init_manageability(struct adapter *adapter)
4429 {
4430         /* A shared code workaround */
4431 #define E1000_82542_MANC2H E1000_MANC2H
4432         if (adapter->has_manage) {
4433                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4434                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4435
4436                 /* disable hardware interception of ARP */
4437                 manc &= ~(E1000_MANC_ARP_EN);
4438
4439                 /* enable receiving management packets to the host */
4440                 manc |= E1000_MANC_EN_MNG2HOST;
4441 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4442 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4443                 manc2h |= E1000_MNG2HOST_PORT_623;
4444                 manc2h |= E1000_MNG2HOST_PORT_664;
4445                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4446                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4447         }
4448 }
4449
4450 /*
4451  * Give control back to hardware management
4452  * controller if there is one.
4453  */
4454 static void
4455 em_release_manageability(struct adapter *adapter)
4456 {
4457         if (adapter->has_manage) {
4458                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4459
4460                 /* re-enable hardware interception of ARP */
4461                 manc |= E1000_MANC_ARP_EN;
4462                 manc &= ~E1000_MANC_EN_MNG2HOST;
4463
4464                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4465         }
4466 }
4467
4468 /*
4469  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4470  * For ASF and Pass Through versions of f/w this means
4471  * that the driver is loaded. For AMT version type f/w
4472  * this means that the network i/f is open.
4473  */
4474 static void
4475 em_get_hw_control(struct adapter *adapter)
4476 {
4477         u32 ctrl_ext, swsm;
4478
4479         if (adapter->hw.mac.type == e1000_82573) {
4480                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4481                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4482                     swsm | E1000_SWSM_DRV_LOAD);
4483                 return;
4484         }
4485         /* else */
4486         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4487         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4488             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4489         return;
4490 }
4491
4492 /*
4493  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4494  * For ASF and Pass Through versions of f/w this means that
4495  * the driver is no longer loaded. For AMT versions of the
4496  * f/w this means that the network i/f is closed.
4497  */
4498 static void
4499 em_release_hw_control(struct adapter *adapter)
4500 {
4501         u32 ctrl_ext, swsm;
4502
4503         if (!adapter->has_manage)
4504                 return;
4505
4506         if (adapter->hw.mac.type == e1000_82573) {
4507                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4508                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4509                     swsm & ~E1000_SWSM_DRV_LOAD);
4510                 return;
4511         }
4512         /* else */
4513         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4514         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4515             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4516         return;
4517 }
4518
4519 static int
4520 em_is_valid_ether_addr(u8 *addr)
4521 {
4522         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4523
4524         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4525                 return (FALSE);
4526         }
4527
4528         return (TRUE);
4529 }
4530
4531 /*
4532 ** Parse the interface capabilities with regard
4533 ** to both system management and wake-on-lan for
4534 ** later use.
4535 */
4536 static void
4537 em_get_wakeup(device_t dev)
4538 {
4539         struct adapter  *adapter = device_get_softc(dev);
4540         u16             eeprom_data = 0, device_id, apme_mask;
4541
4542         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4543         apme_mask = EM_EEPROM_APME;
4544
4545         switch (adapter->hw.mac.type) {
4546         case e1000_82573:
4547         case e1000_82583:
4548                 adapter->has_amt = TRUE;
4549                 /* Falls thru */
4550         case e1000_82571:
4551         case e1000_82572:
4552         case e1000_80003es2lan:
4553                 if (adapter->hw.bus.func == 1) {
4554                         e1000_read_nvm(&adapter->hw,
4555                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4556                         break;
4557                 } else
4558                         e1000_read_nvm(&adapter->hw,
4559                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4560                 break;
4561         case e1000_ich8lan:
4562         case e1000_ich9lan:
4563         case e1000_ich10lan:
4564         case e1000_pchlan:
4565                 apme_mask = E1000_WUC_APME;
4566                 adapter->has_amt = TRUE;
4567                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4568                 break;
4569         default:
4570                 e1000_read_nvm(&adapter->hw,
4571                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4572                 break;
4573         }
4574         if (eeprom_data & apme_mask)
4575                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4576         /*
4577          * We have the eeprom settings, now apply the special cases
4578          * where the eeprom may be wrong or the board won't support
4579          * wake on lan on a particular port
4580          */
4581         device_id = pci_get_device(dev);
4582         switch (device_id) {
4583         case E1000_DEV_ID_82571EB_FIBER:
4584                 /* Wake events only supported on port A for dual fiber
4585                  * regardless of eeprom setting */
4586                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4587                     E1000_STATUS_FUNC_1)
4588                         adapter->wol = 0;
4589                 break;
4590         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4591         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4592         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4593                 /* if quad port adapter, disable WoL on all but port A */
4594                 if (global_quad_port_a != 0)
4595                         adapter->wol = 0;
4596                 /* Reset for multiple quad port adapters */
4597                 if (++global_quad_port_a == 4)
4598                         global_quad_port_a = 0;
4599                 break;
4600         }
4601         return;
4602 }
4603
4604
4605 /*
4606  * Enable PCI Wake On Lan capability
4607  */
4608 static void
4609 em_enable_wakeup(device_t dev)
4610 {
4611         struct adapter  *adapter = device_get_softc(dev);
4612         struct ifnet    *ifp = adapter->ifp;
4613         u32             pmc, ctrl, ctrl_ext, rctl;
4614         u16             status;
4615
4616         if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4617                 return;
4618
4619         /* Advertise the wakeup capability */
4620         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4621         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4622         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4623         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4624
4625         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4626             (adapter->hw.mac.type == e1000_pchlan) ||
4627             (adapter->hw.mac.type == e1000_ich9lan) ||
4628             (adapter->hw.mac.type == e1000_ich10lan)) {
4629                 e1000_disable_gig_wol_ich8lan(&adapter->hw);
4630                 e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4631         }
4632
4633         /* Keep the laser running on Fiber adapters */
4634         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4635             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4636                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4637                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4638                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4639         }
4640
4641         /*
4642         ** Determine type of Wakeup: note that wol
4643         ** is set with all bits on by default.
4644         */
4645         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4646                 adapter->wol &= ~E1000_WUFC_MAG;
4647
4648         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4649                 adapter->wol &= ~E1000_WUFC_MC;
4650         else {
4651                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4652                 rctl |= E1000_RCTL_MPE;
4653                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4654         }
4655
4656         if (adapter->hw.mac.type == e1000_pchlan) {
4657                 if (em_enable_phy_wakeup(adapter))
4658                         return;
4659         } else {
4660                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4661                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4662         }
4663
4664         if (adapter->hw.phy.type == e1000_phy_igp_3)
4665                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4666
4667         /* Request PME */
4668         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4669         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4670         if (ifp->if_capenable & IFCAP_WOL)
4671                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4672         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4673
4674         return;
4675 }
4676
4677 /*
4678 ** WOL in the newer chipset interfaces (pchlan)
4679 ** require thing to be copied into the phy
4680 */
4681 static int
4682 em_enable_phy_wakeup(struct adapter *adapter)
4683 {
4684         struct e1000_hw *hw = &adapter->hw;
4685         u32 mreg, ret = 0;
4686         u16 preg;
4687
4688         /* copy MAC RARs to PHY RARs */
4689         for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4690                 mreg = E1000_READ_REG(hw, E1000_RAL(i));
4691                 e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4692                 e1000_write_phy_reg(hw, BM_RAR_M(i),
4693                     (u16)((mreg >> 16) & 0xFFFF));
4694                 mreg = E1000_READ_REG(hw, E1000_RAH(i));
4695                 e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4696                 e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4697                     (u16)((mreg >> 16) & 0xFFFF));
4698         }
4699
4700         /* copy MAC MTA to PHY MTA */
4701         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4702                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4703                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4704                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4705                     (u16)((mreg >> 16) & 0xFFFF));
4706         }
4707
4708         /* configure PHY Rx Control register */
4709         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4710         mreg = E1000_READ_REG(hw, E1000_RCTL);
4711         if (mreg & E1000_RCTL_UPE)
4712                 preg |= BM_RCTL_UPE;
4713         if (mreg & E1000_RCTL_MPE)
4714                 preg |= BM_RCTL_MPE;
4715         preg &= ~(BM_RCTL_MO_MASK);
4716         if (mreg & E1000_RCTL_MO_3)
4717                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4718                                 << BM_RCTL_MO_SHIFT);
4719         if (mreg & E1000_RCTL_BAM)
4720                 preg |= BM_RCTL_BAM;
4721         if (mreg & E1000_RCTL_PMCF)
4722                 preg |= BM_RCTL_PMCF;
4723         mreg = E1000_READ_REG(hw, E1000_CTRL);
4724         if (mreg & E1000_CTRL_RFCE)
4725                 preg |= BM_RCTL_RFCE;
4726         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4727
4728         /* enable PHY wakeup in MAC register */
4729         E1000_WRITE_REG(hw, E1000_WUC,
4730             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4731         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4732
4733         /* configure and enable PHY wakeup in PHY registers */
4734         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4735         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4736
4737         /* activate PHY wakeup */
4738         ret = hw->phy.ops.acquire(hw);
4739         if (ret) {
4740                 printf("Could not acquire PHY\n");
4741                 return ret;
4742         }
4743         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4744                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4745         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4746         if (ret) {
4747                 printf("Could not read PHY page 769\n");
4748                 goto out;
4749         }
4750         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4751         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4752         if (ret)
4753                 printf("Could not set PHY Host Wakeup bit\n");
4754 out:
4755         hw->phy.ops.release(hw);
4756
4757         return ret;
4758 }
4759
4760 static void
4761 em_led_func(void *arg, int onoff)
4762 {
4763         struct adapter  *adapter = arg;
4764  
4765         EM_CORE_LOCK(adapter);
4766         if (onoff) {
4767                 e1000_setup_led(&adapter->hw);
4768                 e1000_led_on(&adapter->hw);
4769         } else {
4770                 e1000_led_off(&adapter->hw);
4771                 e1000_cleanup_led(&adapter->hw);
4772         }
4773         EM_CORE_UNLOCK(adapter);
4774 }
4775
4776 /**********************************************************************
4777  *
4778  *  Update the board statistics counters.
4779  *
4780  **********************************************************************/
4781 static void
4782 em_update_stats_counters(struct adapter *adapter)
4783 {
4784         struct ifnet   *ifp;
4785
4786         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4787            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4788                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4789                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4790         }
4791         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4792         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4793         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4794         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4795
4796         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4797         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4798         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4799         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4800         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4801         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4802         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4803         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4804         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4805         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4806         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4807         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4808         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4809         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4810         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4811         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4812         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4813         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4814         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4815         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4816
4817         /* For the 64-bit byte counters the low dword must be read first. */
4818         /* Both registers clear on the read of the high dword */
4819
4820         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4821         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4822
4823         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4824         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4825         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4826         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4827         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4828
4829         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4830         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4831
4832         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4833         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4834         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4835         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4836         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4837         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4838         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4839         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4840         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4841         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4842
4843         if (adapter->hw.mac.type >= e1000_82543) {
4844                 adapter->stats.algnerrc += 
4845                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4846                 adapter->stats.rxerrc += 
4847                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4848                 adapter->stats.tncrs += 
4849                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4850                 adapter->stats.cexterr += 
4851                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4852                 adapter->stats.tsctc += 
4853                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4854                 adapter->stats.tsctfc += 
4855                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4856         }
4857         ifp = adapter->ifp;
4858
4859         ifp->if_collisions = adapter->stats.colc;
4860
4861         /* Rx Errors */
4862         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4863             adapter->stats.crcerrs + adapter->stats.algnerrc +
4864             adapter->stats.ruc + adapter->stats.roc +
4865             adapter->stats.mpc + adapter->stats.cexterr;
4866
4867         /* Tx Errors */
4868         ifp->if_oerrors = adapter->stats.ecol +
4869             adapter->stats.latecol + adapter->watchdog_events;
4870 }
4871
4872
4873 /**********************************************************************
4874  *
4875  *  This routine is called only when em_display_debug_stats is enabled.
4876  *  This routine provides a way to take a look at important statistics
4877  *  maintained by the driver and hardware.
4878  *
4879  **********************************************************************/
4880 static void
4881 em_print_debug_info(struct adapter *adapter)
4882 {
4883         device_t dev = adapter->dev;
4884         u8 *hw_addr = adapter->hw.hw_addr;
4885         struct rx_ring *rxr = adapter->rx_rings;
4886         struct tx_ring *txr = adapter->tx_rings;
4887
4888         device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4889         device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4890             E1000_READ_REG(&adapter->hw, E1000_CTRL),
4891             E1000_READ_REG(&adapter->hw, E1000_RCTL));
4892         device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4893             ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4894             (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4895         device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4896             adapter->hw.fc.high_water,
4897             adapter->hw.fc.low_water);
4898         device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4899             E1000_READ_REG(&adapter->hw, E1000_TIDV),
4900             E1000_READ_REG(&adapter->hw, E1000_TADV));
4901         device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4902             E1000_READ_REG(&adapter->hw, E1000_RDTR),
4903             E1000_READ_REG(&adapter->hw, E1000_RADV));
4904
4905         for (int i = 0; i < adapter->num_queues; i++, txr++) {
4906                 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4907                     E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4908                     E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4909                 device_printf(dev, "TX(%d) no descriptors avail event = %ld\n",
4910                     txr->me, txr->no_desc_avail);
4911                 device_printf(dev, "TX(%d) MSIX IRQ Handled = %ld\n",
4912                     txr->me, txr->tx_irq);
4913                 device_printf(dev, "Num Tx descriptors avail = %d\n",
4914                     txr->tx_avail);
4915                 device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4916                     txr->no_desc_avail);
4917         }
4918         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4919                 device_printf(dev, "RX(%d) MSIX IRQ Handled = %ld\n",
4920                     rxr->me, rxr->rx_irq);
4921                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4922                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4923                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4924         }
4925         device_printf(dev, "Std mbuf failed = %ld\n",
4926             adapter->mbuf_alloc_failed);
4927         device_printf(dev, "Std mbuf cluster failed = %ld\n",
4928             adapter->mbuf_cluster_failed);
4929         device_printf(dev, "Driver dropped packets = %ld\n",
4930             adapter->dropped_pkts);
4931 }
4932
4933 static void
4934 em_print_hw_stats(struct adapter *adapter)
4935 {
4936         device_t dev = adapter->dev;
4937
4938         device_printf(dev, "Excessive collisions = %lld\n",
4939             (long long)adapter->stats.ecol);
4940 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4941         device_printf(dev, "Symbol errors = %lld\n",
4942             (long long)adapter->stats.symerrs);
4943 #endif
4944         device_printf(dev, "Sequence errors = %lld\n",
4945             (long long)adapter->stats.sec);
4946         device_printf(dev, "Defer count = %lld\n",
4947             (long long)adapter->stats.dc);
4948         device_printf(dev, "Missed Packets = %lld\n",
4949             (long long)adapter->stats.mpc);
4950         device_printf(dev, "Receive No Buffers = %lld\n",
4951             (long long)adapter->stats.rnbc);
4952         /* RLEC is inaccurate on some hardware, calculate our own. */
4953         device_printf(dev, "Receive Length Errors = %lld\n",
4954             ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4955         device_printf(dev, "Receive errors = %lld\n",
4956             (long long)adapter->stats.rxerrc);
4957         device_printf(dev, "Crc errors = %lld\n",
4958             (long long)adapter->stats.crcerrs);
4959         device_printf(dev, "Alignment errors = %lld\n",
4960             (long long)adapter->stats.algnerrc);
4961         device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4962             (long long)adapter->stats.cexterr);
4963         device_printf(dev, "watchdog timeouts = %ld\n",
4964             adapter->watchdog_events);
4965         device_printf(dev, "XON Rcvd = %lld\n",
4966             (long long)adapter->stats.xonrxc);
4967         device_printf(dev, "XON Xmtd = %lld\n",
4968             (long long)adapter->stats.xontxc);
4969         device_printf(dev, "XOFF Rcvd = %lld\n",
4970             (long long)adapter->stats.xoffrxc);
4971         device_printf(dev, "XOFF Xmtd = %lld\n",
4972             (long long)adapter->stats.xofftxc);
4973         device_printf(dev, "Good Packets Rcvd = %lld\n",
4974             (long long)adapter->stats.gprc);
4975         device_printf(dev, "Good Packets Xmtd = %lld\n",
4976             (long long)adapter->stats.gptc);
4977         device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4978             (long long)adapter->stats.tsctc);
4979         device_printf(dev, "TSO Contexts Failed = %lld\n",
4980             (long long)adapter->stats.tsctfc);
4981 }
4982
4983 /**********************************************************************
4984  *
4985  *  This routine provides a way to dump out the adapter eeprom,
4986  *  often a useful debug/service tool. This only dumps the first
4987  *  32 words, stuff that matters is in that extent.
4988  *
4989  **********************************************************************/
4990 static void
4991 em_print_nvm_info(struct adapter *adapter)
4992 {
4993         u16     eeprom_data;
4994         int     i, j, row = 0;
4995
4996         /* Its a bit crude, but it gets the job done */
4997         printf("\nInterface EEPROM Dump:\n");
4998         printf("Offset\n0x0000  ");
4999         for (i = 0, j = 0; i < 32; i++, j++) {
5000                 if (j == 8) { /* Make the offset block */
5001                         j = 0; ++row;
5002                         printf("\n0x00%x0  ",row);
5003                 }
5004                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5005                 printf("%04x ", eeprom_data);
5006         }
5007         printf("\n");
5008 }
5009
5010 static int
5011 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5012 {
5013         struct adapter *adapter;
5014         int error;
5015         int result;
5016
5017         result = -1;
5018         error = sysctl_handle_int(oidp, &result, 0, req);
5019
5020         if (error || !req->newptr)
5021                 return (error);
5022
5023         if (result == 1) {
5024                 adapter = (struct adapter *)arg1;
5025                 em_print_debug_info(adapter);
5026         }
5027         /*
5028          * This value will cause a hex dump of the
5029          * first 32 16-bit words of the EEPROM to
5030          * the screen.
5031          */
5032         if (result == 2) {
5033                 adapter = (struct adapter *)arg1;
5034                 em_print_nvm_info(adapter);
5035         }
5036
5037         return (error);
5038 }
5039
5040
5041 static int
5042 em_sysctl_stats(SYSCTL_HANDLER_ARGS)
5043 {
5044         struct adapter *adapter;
5045         int error;
5046         int result;
5047
5048         result = -1;
5049         error = sysctl_handle_int(oidp, &result, 0, req);
5050
5051         if (error || !req->newptr)
5052                 return (error);
5053
5054         if (result == 1) {
5055                 adapter = (struct adapter *)arg1;
5056                 em_print_hw_stats(adapter);
5057         }
5058
5059         return (error);
5060 }
5061
5062 static int
5063 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5064 {
5065         struct em_int_delay_info *info;
5066         struct adapter *adapter;
5067         u32 regval;
5068         int error, usecs, ticks;
5069
5070         info = (struct em_int_delay_info *)arg1;
5071         usecs = info->value;
5072         error = sysctl_handle_int(oidp, &usecs, 0, req);
5073         if (error != 0 || req->newptr == NULL)
5074                 return (error);
5075         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5076                 return (EINVAL);
5077         info->value = usecs;
5078         ticks = EM_USECS_TO_TICKS(usecs);
5079
5080         adapter = info->adapter;
5081         
5082         EM_CORE_LOCK(adapter);
5083         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5084         regval = (regval & ~0xffff) | (ticks & 0xffff);
5085         /* Handle a few special cases. */
5086         switch (info->offset) {
5087         case E1000_RDTR:
5088                 break;
5089         case E1000_TIDV:
5090                 if (ticks == 0) {
5091                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5092                         /* Don't write 0 into the TIDV register. */
5093                         regval++;
5094                 } else
5095                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5096                 break;
5097         }
5098         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5099         EM_CORE_UNLOCK(adapter);
5100         return (0);
5101 }
5102
5103 static void
5104 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5105         const char *description, struct em_int_delay_info *info,
5106         int offset, int value)
5107 {
5108         info->adapter = adapter;
5109         info->offset = offset;
5110         info->value = value;
5111         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5112             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5113             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5114             info, 0, em_sysctl_int_delay, "I", description);
5115 }
5116
5117 static void
5118 em_add_rx_process_limit(struct adapter *adapter, const char *name,
5119         const char *description, int *limit, int value)
5120 {
5121         *limit = value;
5122         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5123             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5124             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5125 }
5126
5127