]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - sys/dev/e1000/if_em.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int     em_display_debug_stats = 0;
92
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.0.5";
97
98
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111         /* Intel(R) PRO/1000 Network Connection */
112         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         /* required last entry */
173         { 0, 0, 0, 0, 0}
174 };
175
176 /*********************************************************************
177  *  Table of branding strings for all supported NICs.
178  *********************************************************************/
179
180 static char *em_strings[] = {
181         "Intel(R) PRO/1000 Network Connection"
182 };
183
184 /*********************************************************************
185  *  Function prototypes
186  *********************************************************************/
187 static int      em_probe(device_t);
188 static int      em_attach(device_t);
189 static int      em_detach(device_t);
190 static int      em_shutdown(device_t);
191 static int      em_suspend(device_t);
192 static int      em_resume(device_t);
193 static void     em_start(struct ifnet *);
194 static void     em_start_locked(struct ifnet *, struct tx_ring *);
195 #ifdef EM_MULTIQUEUE
196 static int      em_mq_start(struct ifnet *, struct mbuf *);
197 static int      em_mq_start_locked(struct ifnet *,
198                     struct tx_ring *, struct mbuf *);
199 static void     em_qflush(struct ifnet *);
200 #endif
201 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
202 static void     em_init(void *);
203 static void     em_init_locked(struct adapter *);
204 static void     em_stop(void *);
205 static void     em_media_status(struct ifnet *, struct ifmediareq *);
206 static int      em_media_change(struct ifnet *);
207 static void     em_identify_hardware(struct adapter *);
208 static int      em_allocate_pci_resources(struct adapter *);
209 static int      em_allocate_legacy(struct adapter *);
210 static int      em_allocate_msix(struct adapter *);
211 static int      em_allocate_queues(struct adapter *);
212 static int      em_setup_msix(struct adapter *);
213 static void     em_free_pci_resources(struct adapter *);
214 static void     em_local_timer(void *);
215 static void     em_reset(struct adapter *);
216 static void     em_setup_interface(device_t, struct adapter *);
217
218 static void     em_setup_transmit_structures(struct adapter *);
219 static void     em_initialize_transmit_unit(struct adapter *);
220 static int      em_allocate_transmit_buffers(struct tx_ring *);
221 static void     em_free_transmit_structures(struct adapter *);
222 static void     em_free_transmit_buffers(struct tx_ring *);
223
224 static int      em_setup_receive_structures(struct adapter *);
225 static int      em_allocate_receive_buffers(struct rx_ring *);
226 static void     em_initialize_receive_unit(struct adapter *);
227 static void     em_free_receive_structures(struct adapter *);
228 static void     em_free_receive_buffers(struct rx_ring *);
229
230 static void     em_enable_intr(struct adapter *);
231 static void     em_disable_intr(struct adapter *);
232 static void     em_update_stats_counters(struct adapter *);
233 static bool     em_txeof(struct tx_ring *);
234 static int      em_rxeof(struct rx_ring *, int);
235 #ifndef __NO_STRICT_ALIGNMENT
236 static int      em_fixup_rx(struct rx_ring *);
237 #endif
238 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
239 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
240                     u32 *, u32 *);
241 static bool     em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
242 static void     em_set_promisc(struct adapter *);
243 static void     em_disable_promisc(struct adapter *);
244 static void     em_set_multi(struct adapter *);
245 static void     em_print_hw_stats(struct adapter *);
246 static void     em_update_link_status(struct adapter *);
247 static void     em_refresh_mbufs(struct rx_ring *, int);
248 static void     em_register_vlan(void *, struct ifnet *, u16);
249 static void     em_unregister_vlan(void *, struct ifnet *, u16);
250 static void     em_setup_vlan_hw_support(struct adapter *);
251 static int      em_xmit(struct tx_ring *, struct mbuf **);
252 static int      em_dma_malloc(struct adapter *, bus_size_t,
253                     struct em_dma_alloc *, int);
254 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
255 static void     em_print_debug_info(struct adapter *);
256 static void     em_print_nvm_info(struct adapter *);
257 static int      em_is_valid_ether_addr(u8 *);
258 static int      em_sysctl_stats(SYSCTL_HANDLER_ARGS);
259 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
260 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
261 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
262                     const char *, struct em_int_delay_info *, int, int);
263 /* Management and WOL Support */
264 static void     em_init_manageability(struct adapter *);
265 static void     em_release_manageability(struct adapter *);
266 static void     em_get_hw_control(struct adapter *);
267 static void     em_release_hw_control(struct adapter *);
268 static void     em_get_wakeup(device_t);
269 static void     em_enable_wakeup(device_t);
270 static int      em_enable_phy_wakeup(struct adapter *);
271 static void     em_led_func(void *, int);
272
273 static int      em_irq_fast(void *);
274
275 /* MSIX handlers */
276 static void     em_msix_tx(void *);
277 static void     em_msix_rx(void *);
278 static void     em_msix_link(void *);
279 static void     em_handle_tx(void *context, int pending);
280 static void     em_handle_rx(void *context, int pending);
281 static void     em_handle_link(void *context, int pending);
282
283 static void     em_add_rx_process_limit(struct adapter *, const char *,
284                     const char *, int *, int);
285
286 #ifdef DEVICE_POLLING
287 static poll_handler_t em_poll;
288 #endif /* POLLING */
289
290 /*********************************************************************
291  *  FreeBSD Device Interface Entry Points
292  *********************************************************************/
293
294 static device_method_t em_methods[] = {
295         /* Device interface */
296         DEVMETHOD(device_probe, em_probe),
297         DEVMETHOD(device_attach, em_attach),
298         DEVMETHOD(device_detach, em_detach),
299         DEVMETHOD(device_shutdown, em_shutdown),
300         DEVMETHOD(device_suspend, em_suspend),
301         DEVMETHOD(device_resume, em_resume),
302         {0, 0}
303 };
304
305 static driver_t em_driver = {
306         "em", em_methods, sizeof(struct adapter),
307 };
308
309 devclass_t em_devclass;
310 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
311 MODULE_DEPEND(em, pci, 1, 1, 1);
312 MODULE_DEPEND(em, ether, 1, 1, 1);
313
314 /*********************************************************************
315  *  Tunable default values.
316  *********************************************************************/
317
318 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
319 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
320 #define M_TSO_LEN                       66
321
322 /* Allow common code without TSO */
323 #ifndef CSUM_TSO
324 #define CSUM_TSO        0
325 #endif
326
327 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
328 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
329 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
330 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
331
332 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
333 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
334 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
335 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
336
337 static int em_rxd = EM_DEFAULT_RXD;
338 static int em_txd = EM_DEFAULT_TXD;
339 TUNABLE_INT("hw.em.rxd", &em_rxd);
340 TUNABLE_INT("hw.em.txd", &em_txd);
341
342 static int em_smart_pwr_down = FALSE;
343 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
344
345 /* Controls whether promiscuous also shows bad packets */
346 static int em_debug_sbp = FALSE;
347 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
348
349 /* Local controls for MSI/MSIX */
350 static int em_enable_msix = TRUE;
351 static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
352 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
353 TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
354
355 /* How many packets rxeof tries to clean at a time */
356 static int em_rx_process_limit = 100;
357 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
358
359 /* Flow control setting - default to FULL */
360 static int em_fc_setting = e1000_fc_full;
361 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
362
363 /*
364 ** Shadow VFTA table, this is needed because
365 ** the real vlan filter table gets cleared during
366 ** a soft reset and the driver needs to be able
367 ** to repopulate it.
368 */
369 static u32 em_shadow_vfta[EM_VFTA_SIZE];
370
371 /* Global used in WOL setup with multiport cards */
372 static int global_quad_port_a = 0;
373
374 /*********************************************************************
375  *  Device identification routine
376  *
377  *  em_probe determines if the driver should be loaded on
378  *  adapter based on PCI vendor/device id of the adapter.
379  *
380  *  return BUS_PROBE_DEFAULT on success, positive on failure
381  *********************************************************************/
382
383 static int
384 em_probe(device_t dev)
385 {
386         char            adapter_name[60];
387         u16             pci_vendor_id = 0;
388         u16             pci_device_id = 0;
389         u16             pci_subvendor_id = 0;
390         u16             pci_subdevice_id = 0;
391         em_vendor_info_t *ent;
392
393         INIT_DEBUGOUT("em_probe: begin");
394
395         pci_vendor_id = pci_get_vendor(dev);
396         if (pci_vendor_id != EM_VENDOR_ID)
397                 return (ENXIO);
398
399         pci_device_id = pci_get_device(dev);
400         pci_subvendor_id = pci_get_subvendor(dev);
401         pci_subdevice_id = pci_get_subdevice(dev);
402
403         ent = em_vendor_info_array;
404         while (ent->vendor_id != 0) {
405                 if ((pci_vendor_id == ent->vendor_id) &&
406                     (pci_device_id == ent->device_id) &&
407
408                     ((pci_subvendor_id == ent->subvendor_id) ||
409                     (ent->subvendor_id == PCI_ANY_ID)) &&
410
411                     ((pci_subdevice_id == ent->subdevice_id) ||
412                     (ent->subdevice_id == PCI_ANY_ID))) {
413                         sprintf(adapter_name, "%s %s",
414                                 em_strings[ent->index],
415                                 em_driver_version);
416                         device_set_desc_copy(dev, adapter_name);
417                         return (BUS_PROBE_DEFAULT);
418                 }
419                 ent++;
420         }
421
422         return (ENXIO);
423 }
424
425 /*********************************************************************
426  *  Device initialization routine
427  *
428  *  The attach entry point is called when the driver is being loaded.
429  *  This routine identifies the type of hardware, allocates all resources
430  *  and initializes the hardware.
431  *
432  *  return 0 on success, positive on failure
433  *********************************************************************/
434
435 static int
436 em_attach(device_t dev)
437 {
438         struct adapter  *adapter;
439         int             error = 0;
440
441         INIT_DEBUGOUT("em_attach: begin");
442
443         adapter = device_get_softc(dev);
444         adapter->dev = adapter->osdep.dev = dev;
445         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
446
447         /* SYSCTL stuff */
448         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
449             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
450             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
451             em_sysctl_debug_info, "I", "Debug Information");
452
453         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
454             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
455             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
456             em_sysctl_stats, "I", "Statistics");
457
458         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
459
460         /* Determine hardware and mac info */
461         em_identify_hardware(adapter);
462
463         /* Setup PCI resources */
464         if (em_allocate_pci_resources(adapter)) {
465                 device_printf(dev, "Allocation of PCI resources failed\n");
466                 error = ENXIO;
467                 goto err_pci;
468         }
469
470         /*
471         ** For ICH8 and family we need to
472         ** map the flash memory, and this
473         ** must happen after the MAC is 
474         ** identified
475         */
476         if ((adapter->hw.mac.type == e1000_ich8lan) ||
477             (adapter->hw.mac.type == e1000_pchlan) ||
478             (adapter->hw.mac.type == e1000_ich9lan) ||
479             (adapter->hw.mac.type == e1000_ich10lan)) {
480                 int rid = EM_BAR_TYPE_FLASH;
481                 adapter->flash = bus_alloc_resource_any(dev,
482                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
483                 if (adapter->flash == NULL) {
484                         device_printf(dev, "Mapping of Flash failed\n");
485                         error = ENXIO;
486                         goto err_pci;
487                 }
488                 /* This is used in the shared code */
489                 adapter->hw.flash_address = (u8 *)adapter->flash;
490                 adapter->osdep.flash_bus_space_tag =
491                     rman_get_bustag(adapter->flash);
492                 adapter->osdep.flash_bus_space_handle =
493                     rman_get_bushandle(adapter->flash);
494         }
495
496         /* Do Shared Code initialization */
497         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
498                 device_printf(dev, "Setup of Shared code failed\n");
499                 error = ENXIO;
500                 goto err_pci;
501         }
502
503         e1000_get_bus_info(&adapter->hw);
504
505         /* Set up some sysctls for the tunable interrupt delays */
506         em_add_int_delay_sysctl(adapter, "rx_int_delay",
507             "receive interrupt delay in usecs", &adapter->rx_int_delay,
508             E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
509         em_add_int_delay_sysctl(adapter, "tx_int_delay",
510             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
511             E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
512         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
513             "receive interrupt delay limit in usecs",
514             &adapter->rx_abs_int_delay,
515             E1000_REGISTER(&adapter->hw, E1000_RADV),
516             em_rx_abs_int_delay_dflt);
517         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
518             "transmit interrupt delay limit in usecs",
519             &adapter->tx_abs_int_delay,
520             E1000_REGISTER(&adapter->hw, E1000_TADV),
521             em_tx_abs_int_delay_dflt);
522
523         /* Sysctls for limiting the amount of work done in the taskqueue */
524         em_add_rx_process_limit(adapter, "rx_processing_limit",
525             "max number of rx packets to process", &adapter->rx_process_limit,
526             em_rx_process_limit);
527
528         /*
529          * Validate number of transmit and receive descriptors. It
530          * must not exceed hardware maximum, and must be multiple
531          * of E1000_DBA_ALIGN.
532          */
533         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
534             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
535                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
536                     EM_DEFAULT_TXD, em_txd);
537                 adapter->num_tx_desc = EM_DEFAULT_TXD;
538         } else
539                 adapter->num_tx_desc = em_txd;
540
541         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
542             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
543                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
544                     EM_DEFAULT_RXD, em_rxd);
545                 adapter->num_rx_desc = EM_DEFAULT_RXD;
546         } else
547                 adapter->num_rx_desc = em_rxd;
548
549         adapter->hw.mac.autoneg = DO_AUTO_NEG;
550         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
551         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
552
553         /* Copper options */
554         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
555                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
556                 adapter->hw.phy.disable_polarity_correction = FALSE;
557                 adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
558         }
559
560         /*
561          * Set the frame limits assuming
562          * standard ethernet sized frames.
563          */
564         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
565         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
566
567         /*
568          * This controls when hardware reports transmit completion
569          * status.
570          */
571         adapter->hw.mac.report_tx_early = 1;
572
573         /* 
574         ** Get queue/ring memory
575         */
576         if (em_allocate_queues(adapter)) {
577                 error = ENOMEM;
578                 goto err_pci;
579         }
580
581         /*
582         ** Start from a known state, this is
583         ** important in reading the nvm and
584         ** mac from that.
585         */
586         e1000_reset_hw(&adapter->hw);
587
588         /* Make sure we have a good EEPROM before we read from it */
589         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
590                 /*
591                 ** Some PCI-E parts fail the first check due to
592                 ** the link being in sleep state, call it again,
593                 ** if it fails a second time its a real issue.
594                 */
595                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
596                         device_printf(dev,
597                             "The EEPROM Checksum Is Not Valid\n");
598                         error = EIO;
599                         goto err_late;
600                 }
601         }
602
603         /* Copy the permanent MAC address out of the EEPROM */
604         if (e1000_read_mac_addr(&adapter->hw) < 0) {
605                 device_printf(dev, "EEPROM read error while reading MAC"
606                     " address\n");
607                 error = EIO;
608                 goto err_late;
609         }
610
611         if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
612                 device_printf(dev, "Invalid MAC address\n");
613                 error = EIO;
614                 goto err_late;
615         }
616
617         /*
618         **  Do interrupt configuration
619         */
620         if (adapter->msix > 1) /* Do MSIX */
621                 error = em_allocate_msix(adapter);
622         else  /* MSI or Legacy */
623                 error = em_allocate_legacy(adapter);
624         if (error)
625                 goto err_late;
626
627         /*
628          * Get Wake-on-Lan and Management info for later use
629          */
630         em_get_wakeup(dev);
631
632         /* Setup OS specific network interface */
633         em_setup_interface(dev, adapter);
634
635         em_reset(adapter);
636
637         /* Initialize statistics */
638         em_update_stats_counters(adapter);
639
640         adapter->hw.mac.get_link_status = 1;
641         em_update_link_status(adapter);
642
643         /* Indicate SOL/IDER usage */
644         if (e1000_check_reset_block(&adapter->hw))
645                 device_printf(dev,
646                     "PHY reset is blocked due to SOL/IDER session.\n");
647
648         /* Register for VLAN events */
649         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
650             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
651         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
652             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
653
654         /* Non-AMT based hardware can now take control from firmware */
655         if (adapter->has_manage && !adapter->has_amt)
656                 em_get_hw_control(adapter);
657
658         /* Tell the stack that the interface is not active */
659         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
660
661         adapter->led_dev = led_create(em_led_func, adapter,
662             device_get_nameunit(dev));
663
664         INIT_DEBUGOUT("em_attach: end");
665
666         return (0);
667
668 err_late:
669         em_free_transmit_structures(adapter);
670         em_free_receive_structures(adapter);
671         em_release_hw_control(adapter);
672 err_pci:
673         em_free_pci_resources(adapter);
674         EM_CORE_LOCK_DESTROY(adapter);
675
676         return (error);
677 }
678
679 /*********************************************************************
680  *  Device removal routine
681  *
682  *  The detach entry point is called when the driver is being removed.
683  *  This routine stops the adapter and deallocates all the resources
684  *  that were allocated for driver operation.
685  *
686  *  return 0 on success, positive on failure
687  *********************************************************************/
688
689 static int
690 em_detach(device_t dev)
691 {
692         struct adapter  *adapter = device_get_softc(dev);
693         struct ifnet    *ifp = adapter->ifp;
694
695         INIT_DEBUGOUT("em_detach: begin");
696
697         /* Make sure VLANS are not using driver */
698         if (adapter->ifp->if_vlantrunk != NULL) {
699                 device_printf(dev,"Vlan in use, detach first\n");
700                 return (EBUSY);
701         }
702
703 #ifdef DEVICE_POLLING
704         if (ifp->if_capenable & IFCAP_POLLING)
705                 ether_poll_deregister(ifp);
706 #endif
707
708         if (adapter->led_dev != NULL)
709                 led_destroy(adapter->led_dev);
710
711         EM_CORE_LOCK(adapter);
712         adapter->in_detach = 1;
713         em_stop(adapter);
714         EM_CORE_UNLOCK(adapter);
715         EM_CORE_LOCK_DESTROY(adapter);
716
717         e1000_phy_hw_reset(&adapter->hw);
718
719         em_release_manageability(adapter);
720         em_release_hw_control(adapter);
721
722         /* Unregister VLAN events */
723         if (adapter->vlan_attach != NULL)
724                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
725         if (adapter->vlan_detach != NULL)
726                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
727
728         ether_ifdetach(adapter->ifp);
729         callout_drain(&adapter->timer);
730
731         em_free_pci_resources(adapter);
732         bus_generic_detach(dev);
733         if_free(ifp);
734
735         em_free_transmit_structures(adapter);
736         em_free_receive_structures(adapter);
737
738         em_release_hw_control(adapter);
739
740         return (0);
741 }
742
743 /*********************************************************************
744  *
745  *  Shutdown entry point
746  *
747  **********************************************************************/
748
749 static int
750 em_shutdown(device_t dev)
751 {
752         return em_suspend(dev);
753 }
754
755 /*
756  * Suspend/resume device methods.
757  */
758 static int
759 em_suspend(device_t dev)
760 {
761         struct adapter *adapter = device_get_softc(dev);
762
763         EM_CORE_LOCK(adapter);
764
765         em_release_manageability(adapter);
766         em_release_hw_control(adapter);
767         em_enable_wakeup(dev);
768
769         EM_CORE_UNLOCK(adapter);
770
771         return bus_generic_suspend(dev);
772 }
773
774 static int
775 em_resume(device_t dev)
776 {
777         struct adapter *adapter = device_get_softc(dev);
778         struct ifnet *ifp = adapter->ifp;
779
780         EM_CORE_LOCK(adapter);
781         em_init_locked(adapter);
782         em_init_manageability(adapter);
783         EM_CORE_UNLOCK(adapter);
784         em_start(ifp);
785
786         return bus_generic_resume(dev);
787 }
788
789
790 /*********************************************************************
791  *  Transmit entry point
792  *
793  *  em_start is called by the stack to initiate a transmit.
794  *  The driver will remain in this routine as long as there are
795  *  packets to transmit and transmit resources are available.
796  *  In case resources are not available stack is notified and
797  *  the packet is requeued.
798  **********************************************************************/
799
800 #ifdef EM_MULTIQUEUE
801 static int
802 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
803 {
804         struct adapter  *adapter = txr->adapter;
805         struct mbuf     *next;
806         int             err = 0, enq = 0;
807
808         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
809             IFF_DRV_RUNNING || adapter->link_active == 0) {
810                 if (m != NULL)
811                         err = drbr_enqueue(ifp, txr->br, m);
812                 return (err);
813         }
814
815         /* Call cleanup if number of TX descriptors low */
816         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
817                 em_txeof(txr);
818
819         enq = 0;
820         if (m == NULL) {
821                 next = drbr_dequeue(ifp, txr->br);
822         } else if (drbr_needs_enqueue(ifp, txr->br)) {
823                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
824                         return (err);
825                 next = drbr_dequeue(ifp, txr->br);
826         } else
827                 next = m;
828
829         /* Process the queue */
830         while (next != NULL) {
831                 if ((err = em_xmit(txr, &next)) != 0) {
832                         if (next != NULL)
833                                 err = drbr_enqueue(ifp, txr->br, next);
834                         break;
835                 }
836                 enq++;
837                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
838                 ETHER_BPF_MTAP(ifp, next);
839                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
840                         break;
841                 if (txr->tx_avail < EM_MAX_SCATTER) {
842                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
843                         break;
844                 }
845                 next = drbr_dequeue(ifp, txr->br);
846         }
847
848         if (enq > 0) {
849                 /* Set the watchdog */
850                 txr->watchdog_check = TRUE;
851                 txr->watchdog_time = ticks;
852         }
853         return (err);
854 }
855
856 /*
857 ** Multiqueue capable stack interface, this is not
858 ** yet truely multiqueue, but that is coming...
859 */
860 static int
861 em_mq_start(struct ifnet *ifp, struct mbuf *m)
862 {
863         struct adapter  *adapter = ifp->if_softc;
864         struct tx_ring  *txr;
865         int             i, error = 0;
866
867         /* Which queue to use */
868         if ((m->m_flags & M_FLOWID) != 0)
869                 i = m->m_pkthdr.flowid % adapter->num_queues;
870         else
871                 i = curcpu % adapter->num_queues;
872
873         txr = &adapter->tx_rings[i];
874
875         if (EM_TX_TRYLOCK(txr)) {
876                 error = em_mq_start_locked(ifp, txr, m);
877                 EM_TX_UNLOCK(txr);
878         } else 
879                 error = drbr_enqueue(ifp, txr->br, m);
880
881         return (error);
882 }
883
884 /*
885 ** Flush all ring buffers
886 */
887 static void
888 em_qflush(struct ifnet *ifp)
889 {
890         struct adapter  *adapter = ifp->if_softc;
891         struct tx_ring  *txr = adapter->tx_rings;
892         struct mbuf     *m;
893
894         for (int i = 0; i < adapter->num_queues; i++, txr++) {
895                 EM_TX_LOCK(txr);
896                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
897                         m_freem(m);
898                 EM_TX_UNLOCK(txr);
899         }
900         if_qflush(ifp);
901 }
902
903 #endif /* EM_MULTIQUEUE */
904
905 static void
906 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
907 {
908         struct adapter  *adapter = ifp->if_softc;
909         struct mbuf     *m_head;
910
911         EM_TX_LOCK_ASSERT(txr);
912
913         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
914             IFF_DRV_RUNNING)
915                 return;
916
917         if (!adapter->link_active)
918                 return;
919
920         /* Call cleanup if number of TX descriptors low */
921         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
922                 em_txeof(txr);
923
924         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
925                 if (txr->tx_avail < EM_MAX_SCATTER) {
926                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
927                         break;
928                 }
929                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
930                 if (m_head == NULL)
931                         break;
932                 /*
933                  *  Encapsulation can modify our pointer, and or make it
934                  *  NULL on failure.  In that event, we can't requeue.
935                  */
936                 if (em_xmit(txr, &m_head)) {
937                         if (m_head == NULL)
938                                 break;
939                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
940                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
941                         break;
942                 }
943
944                 /* Send a copy of the frame to the BPF listener */
945                 ETHER_BPF_MTAP(ifp, m_head);
946
947                 /* Set timeout in case hardware has problems transmitting. */
948                 txr->watchdog_time = ticks;
949                 txr->watchdog_check = TRUE;
950         }
951
952         return;
953 }
954
955 static void
956 em_start(struct ifnet *ifp)
957 {
958         struct adapter  *adapter = ifp->if_softc;
959         struct tx_ring  *txr = adapter->tx_rings;
960
961         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
962                 EM_TX_LOCK(txr);
963                 em_start_locked(ifp, txr);
964                 EM_TX_UNLOCK(txr);
965         }
966         return;
967 }
968
969 /*********************************************************************
970  *  Ioctl entry point
971  *
972  *  em_ioctl is called when the user wants to configure the
973  *  interface.
974  *
975  *  return 0 on success, positive on failure
976  **********************************************************************/
977
978 static int
979 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
980 {
981         struct adapter  *adapter = ifp->if_softc;
982         struct ifreq *ifr = (struct ifreq *)data;
983 #ifdef INET
984         struct ifaddr *ifa = (struct ifaddr *)data;
985 #endif
986         int error = 0;
987
988         if (adapter->in_detach)
989                 return (error);
990
991         switch (command) {
992         case SIOCSIFADDR:
993 #ifdef INET
994                 if (ifa->ifa_addr->sa_family == AF_INET) {
995                         /*
996                          * XXX
997                          * Since resetting hardware takes a very long time
998                          * and results in link renegotiation we only
999                          * initialize the hardware only when it is absolutely
1000                          * required.
1001                          */
1002                         ifp->if_flags |= IFF_UP;
1003                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1004                                 EM_CORE_LOCK(adapter);
1005                                 em_init_locked(adapter);
1006                                 EM_CORE_UNLOCK(adapter);
1007                         }
1008                         arp_ifinit(ifp, ifa);
1009                 } else
1010 #endif
1011                         error = ether_ioctl(ifp, command, data);
1012                 break;
1013         case SIOCSIFMTU:
1014             {
1015                 int max_frame_size;
1016
1017                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1018
1019                 EM_CORE_LOCK(adapter);
1020                 switch (adapter->hw.mac.type) {
1021                 case e1000_82571:
1022                 case e1000_82572:
1023                 case e1000_ich9lan:
1024                 case e1000_ich10lan:
1025                 case e1000_82574:
1026                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1027                         max_frame_size = 9234;
1028                         break;
1029                 case e1000_pchlan:
1030                         max_frame_size = 4096;
1031                         break;
1032                         /* Adapters that do not support jumbo frames */
1033                 case e1000_82583:
1034                 case e1000_ich8lan:
1035                         max_frame_size = ETHER_MAX_LEN;
1036                         break;
1037                 default:
1038                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1039                 }
1040                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1041                     ETHER_CRC_LEN) {
1042                         EM_CORE_UNLOCK(adapter);
1043                         error = EINVAL;
1044                         break;
1045                 }
1046
1047                 ifp->if_mtu = ifr->ifr_mtu;
1048                 adapter->max_frame_size =
1049                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1050                 em_init_locked(adapter);
1051                 EM_CORE_UNLOCK(adapter);
1052                 break;
1053             }
1054         case SIOCSIFFLAGS:
1055                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1056                     SIOCSIFFLAGS (Set Interface Flags)");
1057                 EM_CORE_LOCK(adapter);
1058                 if (ifp->if_flags & IFF_UP) {
1059                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1060                                 if ((ifp->if_flags ^ adapter->if_flags) &
1061                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1062                                         em_disable_promisc(adapter);
1063                                         em_set_promisc(adapter);
1064                                 }
1065                         } else
1066                                 em_init_locked(adapter);
1067                 } else
1068                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1069                                 em_stop(adapter);
1070                 adapter->if_flags = ifp->if_flags;
1071                 EM_CORE_UNLOCK(adapter);
1072                 break;
1073         case SIOCADDMULTI:
1074         case SIOCDELMULTI:
1075                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1076                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1077                         EM_CORE_LOCK(adapter);
1078                         em_disable_intr(adapter);
1079                         em_set_multi(adapter);
1080 #ifdef DEVICE_POLLING
1081                         if (!(ifp->if_capenable & IFCAP_POLLING))
1082 #endif
1083                                 em_enable_intr(adapter);
1084                         EM_CORE_UNLOCK(adapter);
1085                 }
1086                 break;
1087         case SIOCSIFMEDIA:
1088                 /* Check SOL/IDER usage */
1089                 EM_CORE_LOCK(adapter);
1090                 if (e1000_check_reset_block(&adapter->hw)) {
1091                         EM_CORE_UNLOCK(adapter);
1092                         device_printf(adapter->dev, "Media change is"
1093                             " blocked due to SOL/IDER session.\n");
1094                         break;
1095                 }
1096                 EM_CORE_UNLOCK(adapter);
1097         case SIOCGIFMEDIA:
1098                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1099                     SIOCxIFMEDIA (Get/Set Interface Media)");
1100                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1101                 break;
1102         case SIOCSIFCAP:
1103             {
1104                 int mask, reinit;
1105
1106                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1107                 reinit = 0;
1108                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1109 #ifdef DEVICE_POLLING
1110                 if (mask & IFCAP_POLLING) {
1111                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1112                                 error = ether_poll_register(em_poll, ifp);
1113                                 if (error)
1114                                         return (error);
1115                                 EM_CORE_LOCK(adapter);
1116                                 em_disable_intr(adapter);
1117                                 ifp->if_capenable |= IFCAP_POLLING;
1118                                 EM_CORE_UNLOCK(adapter);
1119                         } else {
1120                                 error = ether_poll_deregister(ifp);
1121                                 /* Enable interrupt even in error case */
1122                                 EM_CORE_LOCK(adapter);
1123                                 em_enable_intr(adapter);
1124                                 ifp->if_capenable &= ~IFCAP_POLLING;
1125                                 EM_CORE_UNLOCK(adapter);
1126                         }
1127                 }
1128 #endif
1129                 if (mask & IFCAP_HWCSUM) {
1130                         ifp->if_capenable ^= IFCAP_HWCSUM;
1131                         reinit = 1;
1132                 }
1133                 if (mask & IFCAP_TSO4) {
1134                         ifp->if_capenable ^= IFCAP_TSO4;
1135                         reinit = 1;
1136                 }
1137                 if (mask & IFCAP_VLAN_HWTAGGING) {
1138                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1139                         reinit = 1;
1140                 }
1141                 if (mask & IFCAP_VLAN_HWFILTER) {
1142                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1143                         reinit = 1;
1144                 }
1145                 if ((mask & IFCAP_WOL) &&
1146                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1147                         if (mask & IFCAP_WOL_MCAST)
1148                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1149                         if (mask & IFCAP_WOL_MAGIC)
1150                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1151                 }
1152                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1153                         em_init(adapter);
1154                 VLAN_CAPABILITIES(ifp);
1155                 break;
1156             }
1157
1158         default:
1159                 error = ether_ioctl(ifp, command, data);
1160                 break;
1161         }
1162
1163         return (error);
1164 }
1165
1166
1167 /*********************************************************************
1168  *  Init entry point
1169  *
1170  *  This routine is used in two ways. It is used by the stack as
1171  *  init entry point in network interface structure. It is also used
1172  *  by the driver as a hw/sw initialization routine to get to a
1173  *  consistent state.
1174  *
1175  *  return 0 on success, positive on failure
1176  **********************************************************************/
1177
1178 static void
1179 em_init_locked(struct adapter *adapter)
1180 {
1181         struct ifnet    *ifp = adapter->ifp;
1182         device_t        dev = adapter->dev;
1183         u32             pba;
1184
1185         INIT_DEBUGOUT("em_init: begin");
1186
1187         EM_CORE_LOCK_ASSERT(adapter);
1188
1189         em_disable_intr(adapter);
1190         callout_stop(&adapter->timer);
1191
1192         /*
1193          * Packet Buffer Allocation (PBA)
1194          * Writing PBA sets the receive portion of the buffer
1195          * the remainder is used for the transmit buffer.
1196          */
1197         switch (adapter->hw.mac.type) {
1198         /* Total Packet Buffer on these is 48K */
1199         case e1000_82571:
1200         case e1000_82572:
1201         case e1000_80003es2lan:
1202                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1203                 break;
1204         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1205                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1206                 break;
1207         case e1000_82574:
1208         case e1000_82583:
1209                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1210                 break;
1211         case e1000_ich9lan:
1212         case e1000_ich10lan:
1213         case e1000_pchlan:
1214                 pba = E1000_PBA_10K;
1215                 break;
1216         case e1000_ich8lan:
1217                 pba = E1000_PBA_8K;
1218                 break;
1219         default:
1220                 if (adapter->max_frame_size > 8192)
1221                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1222                 else
1223                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1224         }
1225
1226         INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1227         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1228         
1229         /* Get the latest mac address, User can use a LAA */
1230         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1231               ETHER_ADDR_LEN);
1232
1233         /* Put the address into the Receive Address Array */
1234         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1235
1236         /*
1237          * With the 82571 adapter, RAR[0] may be overwritten
1238          * when the other port is reset, we make a duplicate
1239          * in RAR[14] for that eventuality, this assures
1240          * the interface continues to function.
1241          */
1242         if (adapter->hw.mac.type == e1000_82571) {
1243                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1244                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1245                     E1000_RAR_ENTRIES - 1);
1246         }
1247
1248         /* Initialize the hardware */
1249         em_reset(adapter);
1250         em_update_link_status(adapter);
1251
1252         /* Setup VLAN support, basic and offload if available */
1253         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1254
1255         /* Use real VLAN Filter support? */
1256         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1257                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1258                         /* Use real VLAN Filter support */
1259                         em_setup_vlan_hw_support(adapter);
1260                 else {
1261                         u32 ctrl;
1262                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1263                         ctrl |= E1000_CTRL_VME;
1264                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1265                 }
1266         }
1267
1268         /* Set hardware offload abilities */
1269         ifp->if_hwassist = 0;
1270         if (ifp->if_capenable & IFCAP_TXCSUM)
1271                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1272         if (ifp->if_capenable & IFCAP_TSO4)
1273                 ifp->if_hwassist |= CSUM_TSO;
1274
1275         /* Configure for OS presence */
1276         em_init_manageability(adapter);
1277
1278         /* Prepare transmit descriptors and buffers */
1279         em_setup_transmit_structures(adapter);
1280         em_initialize_transmit_unit(adapter);
1281
1282         /* Setup Multicast table */
1283         em_set_multi(adapter);
1284
1285         /* Prepare receive descriptors and buffers */
1286         if (em_setup_receive_structures(adapter)) {
1287                 device_printf(dev, "Could not setup receive structures\n");
1288                 em_stop(adapter);
1289                 return;
1290         }
1291         em_initialize_receive_unit(adapter);
1292
1293         /* Don't lose promiscuous settings */
1294         em_set_promisc(adapter);
1295
1296         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1297         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1298
1299         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1300         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1301
1302         /* MSI/X configuration for 82574 */
1303         if (adapter->hw.mac.type == e1000_82574) {
1304                 int tmp;
1305                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1306                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1307                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1308                 /* Set the IVAR - interrupt vector routing. */
1309                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1310         }
1311
1312 #ifdef DEVICE_POLLING
1313         /*
1314          * Only enable interrupts if we are not polling, make sure
1315          * they are off otherwise.
1316          */
1317         if (ifp->if_capenable & IFCAP_POLLING)
1318                 em_disable_intr(adapter);
1319         else
1320 #endif /* DEVICE_POLLING */
1321                 em_enable_intr(adapter);
1322
1323         /* AMT based hardware can now take control from firmware */
1324         if (adapter->has_manage && adapter->has_amt)
1325                 em_get_hw_control(adapter);
1326
1327         /* Don't reset the phy next time init gets called */
1328         adapter->hw.phy.reset_disable = TRUE;
1329 }
1330
1331 static void
1332 em_init(void *arg)
1333 {
1334         struct adapter *adapter = arg;
1335
1336         EM_CORE_LOCK(adapter);
1337         em_init_locked(adapter);
1338         EM_CORE_UNLOCK(adapter);
1339 }
1340
1341
1342 #ifdef DEVICE_POLLING
1343 /*********************************************************************
1344  *
1345  *  Legacy polling routine: note this only works with single queue
1346  *
1347  *********************************************************************/
1348 static int
1349 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1350 {
1351         struct adapter *adapter = ifp->if_softc;
1352         struct tx_ring  *txr = adapter->tx_rings;
1353         struct rx_ring  *rxr = adapter->rx_rings;
1354         u32             reg_icr, rx_done = 0;
1355
1356         EM_CORE_LOCK(adapter);
1357         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1358                 EM_CORE_UNLOCK(adapter);
1359                 return (rx_done);
1360         }
1361
1362         if (cmd == POLL_AND_CHECK_STATUS) {
1363                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1364                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1365                         callout_stop(&adapter->timer);
1366                         adapter->hw.mac.get_link_status = 1;
1367                         em_update_link_status(adapter);
1368                         callout_reset(&adapter->timer, hz,
1369                             em_local_timer, adapter);
1370                 }
1371         }
1372         EM_CORE_UNLOCK(adapter);
1373
1374         EM_RX_LOCK(rxr);
1375         rx_done = em_rxeof(rxr, count);
1376         EM_RX_UNLOCK(rxr);
1377
1378         EM_TX_LOCK(txr);
1379         em_txeof(txr);
1380 #ifdef EM_MULTIQUEUE
1381         if (!drbr_empty(ifp, txr->br))
1382                 em_mq_start_locked(ifp, txr, NULL);
1383 #else
1384         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1385                 em_start_locked(ifp, txr);
1386 #endif
1387         EM_TX_UNLOCK(txr);
1388
1389         return (rx_done);
1390 }
1391 #endif /* DEVICE_POLLING */
1392
1393
1394 /*********************************************************************
1395  *
1396  *  Fast Legacy/MSI Combined Interrupt Service routine  
1397  *
1398  *********************************************************************/
1399 static int
1400 em_irq_fast(void *arg)
1401 {
1402         struct adapter  *adapter = arg;
1403         struct ifnet    *ifp;
1404         u32             reg_icr;
1405
1406         ifp = adapter->ifp;
1407
1408         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1409
1410         /* Hot eject?  */
1411         if (reg_icr == 0xffffffff)
1412                 return FILTER_STRAY;
1413
1414         /* Definitely not our interrupt.  */
1415         if (reg_icr == 0x0)
1416                 return FILTER_STRAY;
1417
1418         /*
1419          * Starting with the 82571 chip, bit 31 should be used to
1420          * determine whether the interrupt belongs to us.
1421          */
1422         if (adapter->hw.mac.type >= e1000_82571 &&
1423             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1424                 return FILTER_STRAY;
1425
1426         em_disable_intr(adapter);
1427         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1428
1429         /* Link status change */
1430         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1431                 adapter->hw.mac.get_link_status = 1;
1432                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1433         }
1434
1435         if (reg_icr & E1000_ICR_RXO)
1436                 adapter->rx_overruns++;
1437         return FILTER_HANDLED;
1438 }
1439
1440 /* Combined RX/TX handler, used by Legacy and MSI */
1441 static void
1442 em_handle_que(void *context, int pending)
1443 {
1444         struct adapter  *adapter = context;
1445         struct ifnet    *ifp = adapter->ifp;
1446         struct tx_ring  *txr = adapter->tx_rings;
1447         struct rx_ring  *rxr = adapter->rx_rings;
1448         bool            more_rx;
1449
1450
1451         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1452                 EM_RX_LOCK(rxr);
1453                 more_rx = em_rxeof(rxr, adapter->rx_process_limit);
1454                 EM_RX_UNLOCK(rxr);
1455
1456                 EM_TX_LOCK(txr);
1457                 em_txeof(txr);
1458 #ifdef EM_MULTIQUEUE
1459                 if (!drbr_empty(ifp, txr->br))
1460                         em_mq_start_locked(ifp, txr, NULL);
1461 #else
1462                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1463                         em_start_locked(ifp, txr);
1464 #endif
1465                 EM_TX_UNLOCK(txr);
1466                 if (more_rx) {
1467                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1468                         return;
1469                 }
1470         }
1471
1472         em_enable_intr(adapter);
1473         return;
1474 }
1475
1476
1477 /*********************************************************************
1478  *
1479  *  MSIX Interrupt Service Routines
1480  *
1481  **********************************************************************/
1482 static void
1483 em_msix_tx(void *arg)
1484 {
1485         struct tx_ring *txr = arg;
1486         struct adapter *adapter = txr->adapter;
1487         bool            more;
1488
1489         ++txr->tx_irq;
1490         EM_TX_LOCK(txr);
1491         more = em_txeof(txr);
1492         EM_TX_UNLOCK(txr);
1493         if (more)
1494                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1495         else
1496                 /* Reenable this interrupt */
1497                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1498         return;
1499 }
1500
1501 /*********************************************************************
1502  *
1503  *  MSIX RX Interrupt Service routine
1504  *
1505  **********************************************************************/
1506
1507 static void
1508 em_msix_rx(void *arg)
1509 {
1510         struct rx_ring  *rxr = arg;
1511         struct adapter  *adapter = rxr->adapter;
1512         bool            more;
1513
1514         EM_RX_LOCK(rxr);
1515         ++rxr->rx_irq;
1516         more = em_rxeof(rxr, adapter->rx_process_limit);
1517         EM_RX_UNLOCK(rxr);
1518         if (more)
1519                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1520         else
1521                 /* Reenable this interrupt */
1522                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1523         return;
1524 }
1525
1526 /*********************************************************************
1527  *
1528  *  MSIX Link Fast Interrupt Service routine
1529  *
1530  **********************************************************************/
1531 static void
1532 em_msix_link(void *arg)
1533 {
1534         struct adapter  *adapter = arg;
1535         u32             reg_icr;
1536
1537         ++adapter->link_irq;
1538         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1539
1540         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1541                 adapter->hw.mac.get_link_status = 1;
1542                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1543         } else
1544                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1545                     EM_MSIX_LINK | E1000_IMS_LSC);
1546         return;
1547 }
1548
1549 static void
1550 em_handle_rx(void *context, int pending)
1551 {
1552         struct rx_ring  *rxr = context;
1553         struct adapter  *adapter = rxr->adapter;
1554         bool            more;
1555
1556         EM_RX_LOCK(rxr);
1557         more = em_rxeof(rxr, adapter->rx_process_limit);
1558         EM_RX_UNLOCK(rxr);
1559         if (more)
1560                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1561         else
1562                 /* Reenable this interrupt */
1563                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1564 }
1565
1566 static void
1567 em_handle_tx(void *context, int pending)
1568 {
1569         struct tx_ring  *txr = context;
1570         struct adapter  *adapter = txr->adapter;
1571         struct ifnet    *ifp = adapter->ifp;
1572
1573         if (!EM_TX_TRYLOCK(txr))
1574                 return;
1575
1576         em_txeof(txr);
1577
1578 #ifdef EM_MULTIQUEUE
1579         if (!drbr_empty(ifp, txr->br))
1580                 em_mq_start_locked(ifp, txr, NULL);
1581 #else
1582         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1583                 em_start_locked(ifp, txr);
1584 #endif
1585         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1586         EM_TX_UNLOCK(txr);
1587 }
1588
1589 static void
1590 em_handle_link(void *context, int pending)
1591 {
1592         struct adapter  *adapter = context;
1593         struct ifnet *ifp = adapter->ifp;
1594
1595         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1596                 return;
1597
1598         EM_CORE_LOCK(adapter);
1599         callout_stop(&adapter->timer);
1600         em_update_link_status(adapter);
1601         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1602         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1603             EM_MSIX_LINK | E1000_IMS_LSC);
1604         EM_CORE_UNLOCK(adapter);
1605 }
1606
1607
1608 /*********************************************************************
1609  *
1610  *  Media Ioctl callback
1611  *
1612  *  This routine is called whenever the user queries the status of
1613  *  the interface using ifconfig.
1614  *
1615  **********************************************************************/
1616 static void
1617 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1618 {
1619         struct adapter *adapter = ifp->if_softc;
1620         u_char fiber_type = IFM_1000_SX;
1621
1622         INIT_DEBUGOUT("em_media_status: begin");
1623
1624         EM_CORE_LOCK(adapter);
1625         em_update_link_status(adapter);
1626
1627         ifmr->ifm_status = IFM_AVALID;
1628         ifmr->ifm_active = IFM_ETHER;
1629
1630         if (!adapter->link_active) {
1631                 EM_CORE_UNLOCK(adapter);
1632                 return;
1633         }
1634
1635         ifmr->ifm_status |= IFM_ACTIVE;
1636
1637         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1638             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1639                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1640         } else {
1641                 switch (adapter->link_speed) {
1642                 case 10:
1643                         ifmr->ifm_active |= IFM_10_T;
1644                         break;
1645                 case 100:
1646                         ifmr->ifm_active |= IFM_100_TX;
1647                         break;
1648                 case 1000:
1649                         ifmr->ifm_active |= IFM_1000_T;
1650                         break;
1651                 }
1652                 if (adapter->link_duplex == FULL_DUPLEX)
1653                         ifmr->ifm_active |= IFM_FDX;
1654                 else
1655                         ifmr->ifm_active |= IFM_HDX;
1656         }
1657         EM_CORE_UNLOCK(adapter);
1658 }
1659
1660 /*********************************************************************
1661  *
1662  *  Media Ioctl callback
1663  *
1664  *  This routine is called when the user changes speed/duplex using
1665  *  media/mediopt option with ifconfig.
1666  *
1667  **********************************************************************/
1668 static int
1669 em_media_change(struct ifnet *ifp)
1670 {
1671         struct adapter *adapter = ifp->if_softc;
1672         struct ifmedia  *ifm = &adapter->media;
1673
1674         INIT_DEBUGOUT("em_media_change: begin");
1675
1676         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1677                 return (EINVAL);
1678
1679         EM_CORE_LOCK(adapter);
1680         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1681         case IFM_AUTO:
1682                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1683                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1684                 break;
1685         case IFM_1000_LX:
1686         case IFM_1000_SX:
1687         case IFM_1000_T:
1688                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1689                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1690                 break;
1691         case IFM_100_TX:
1692                 adapter->hw.mac.autoneg = FALSE;
1693                 adapter->hw.phy.autoneg_advertised = 0;
1694                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1695                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1696                 else
1697                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1698                 break;
1699         case IFM_10_T:
1700                 adapter->hw.mac.autoneg = FALSE;
1701                 adapter->hw.phy.autoneg_advertised = 0;
1702                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1703                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1704                 else
1705                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1706                 break;
1707         default:
1708                 device_printf(adapter->dev, "Unsupported media type\n");
1709         }
1710
1711         /* As the speed/duplex settings my have changed we need to
1712          * reset the PHY.
1713          */
1714         adapter->hw.phy.reset_disable = FALSE;
1715
1716         em_init_locked(adapter);
1717         EM_CORE_UNLOCK(adapter);
1718
1719         return (0);
1720 }
1721
1722 /*********************************************************************
1723  *
1724  *  This routine maps the mbufs to tx descriptors.
1725  *
1726  *  return 0 on success, positive on failure
1727  **********************************************************************/
1728
1729 static int
1730 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1731 {
1732         struct adapter          *adapter = txr->adapter;
1733         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1734         bus_dmamap_t            map;
1735         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1736         struct e1000_tx_desc    *ctxd = NULL;
1737         struct mbuf             *m_head;
1738         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1739         int                     nsegs, i, j, first, last = 0;
1740         int                     error, do_tso, tso_desc = 0;
1741
1742         m_head = *m_headp;
1743         txd_upper = txd_lower = txd_used = txd_saved = 0;
1744         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1745
1746         /*
1747          * TSO workaround: 
1748          *  If an mbuf is only header we need  
1749          *     to pull 4 bytes of data into it. 
1750          */
1751         if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1752                 m_head = m_pullup(m_head, M_TSO_LEN + 4);
1753                 *m_headp = m_head;
1754                 if (m_head == NULL)
1755                         return (ENOBUFS);
1756         }
1757
1758         /*
1759          * Map the packet for DMA
1760          *
1761          * Capture the first descriptor index,
1762          * this descriptor will have the index
1763          * of the EOP which is the only one that
1764          * now gets a DONE bit writeback.
1765          */
1766         first = txr->next_avail_desc;
1767         tx_buffer = &txr->tx_buffers[first];
1768         tx_buffer_mapped = tx_buffer;
1769         map = tx_buffer->map;
1770
1771         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1772             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1773
1774         /*
1775          * There are two types of errors we can (try) to handle:
1776          * - EFBIG means the mbuf chain was too long and bus_dma ran
1777          *   out of segments.  Defragment the mbuf chain and try again.
1778          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1779          *   at this point in time.  Defer sending and try again later.
1780          * All other errors, in particular EINVAL, are fatal and prevent the
1781          * mbuf chain from ever going through.  Drop it and report error.
1782          */
1783         if (error == EFBIG) {
1784                 struct mbuf *m;
1785
1786                 m = m_defrag(*m_headp, M_DONTWAIT);
1787                 if (m == NULL) {
1788                         adapter->mbuf_alloc_failed++;
1789                         m_freem(*m_headp);
1790                         *m_headp = NULL;
1791                         return (ENOBUFS);
1792                 }
1793                 *m_headp = m;
1794
1795                 /* Try it again */
1796                 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1797                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1798
1799                 if (error) {
1800                         adapter->no_tx_dma_setup++;
1801                         m_freem(*m_headp);
1802                         *m_headp = NULL;
1803                         return (error);
1804                 }
1805         } else if (error != 0) {
1806                 adapter->no_tx_dma_setup++;
1807                 return (error);
1808         }
1809
1810         /*
1811          * TSO Hardware workaround, if this packet is not
1812          * TSO, and is only a single descriptor long, and
1813          * it follows a TSO burst, then we need to add a
1814          * sentinel descriptor to prevent premature writeback.
1815          */
1816         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1817                 if (nsegs == 1)
1818                         tso_desc = TRUE;
1819                 txr->tx_tso = FALSE;
1820         }
1821
1822         if (nsegs > (txr->tx_avail - 2)) {
1823                 txr->no_desc_avail++;
1824                 bus_dmamap_unload(txr->txtag, map);
1825                 return (ENOBUFS);
1826         }
1827         m_head = *m_headp;
1828
1829         /* Do hardware assists */
1830 #if __FreeBSD_version >= 700000
1831         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1832                 error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1833                 if (error != TRUE)
1834                         return (ENXIO); /* something foobar */
1835                 /* we need to make a final sentinel transmit desc */
1836                 tso_desc = TRUE;
1837         } else
1838 #endif
1839         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1840                 em_transmit_checksum_setup(txr,  m_head,
1841                     &txd_upper, &txd_lower);
1842
1843         i = txr->next_avail_desc;
1844
1845         /* Set up our transmit descriptors */
1846         for (j = 0; j < nsegs; j++) {
1847                 bus_size_t seg_len;
1848                 bus_addr_t seg_addr;
1849
1850                 tx_buffer = &txr->tx_buffers[i];
1851                 ctxd = &txr->tx_base[i];
1852                 seg_addr = segs[j].ds_addr;
1853                 seg_len  = segs[j].ds_len;
1854                 /*
1855                 ** TSO Workaround:
1856                 ** If this is the last descriptor, we want to
1857                 ** split it so we have a small final sentinel
1858                 */
1859                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1860                         seg_len -= 4;
1861                         ctxd->buffer_addr = htole64(seg_addr);
1862                         ctxd->lower.data = htole32(
1863                         adapter->txd_cmd | txd_lower | seg_len);
1864                         ctxd->upper.data =
1865                             htole32(txd_upper);
1866                         if (++i == adapter->num_tx_desc)
1867                                 i = 0;
1868                         /* Now make the sentinel */     
1869                         ++txd_used; /* using an extra txd */
1870                         ctxd = &txr->tx_base[i];
1871                         tx_buffer = &txr->tx_buffers[i];
1872                         ctxd->buffer_addr =
1873                             htole64(seg_addr + seg_len);
1874                         ctxd->lower.data = htole32(
1875                         adapter->txd_cmd | txd_lower | 4);
1876                         ctxd->upper.data =
1877                             htole32(txd_upper);
1878                         last = i;
1879                         if (++i == adapter->num_tx_desc)
1880                                 i = 0;
1881                 } else {
1882                         ctxd->buffer_addr = htole64(seg_addr);
1883                         ctxd->lower.data = htole32(
1884                         adapter->txd_cmd | txd_lower | seg_len);
1885                         ctxd->upper.data =
1886                             htole32(txd_upper);
1887                         last = i;
1888                         if (++i == adapter->num_tx_desc)
1889                                 i = 0;
1890                 }
1891                 tx_buffer->m_head = NULL;
1892                 tx_buffer->next_eop = -1;
1893         }
1894
1895         txr->next_avail_desc = i;
1896         txr->tx_avail -= nsegs;
1897         if (tso_desc) /* TSO used an extra for sentinel */
1898                 txr->tx_avail -= txd_used;
1899
1900         if (m_head->m_flags & M_VLANTAG) {
1901                 /* Set the vlan id. */
1902                 ctxd->upper.fields.special =
1903                     htole16(m_head->m_pkthdr.ether_vtag);
1904                 /* Tell hardware to add tag */
1905                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1906         }
1907
1908         tx_buffer->m_head = m_head;
1909         tx_buffer_mapped->map = tx_buffer->map;
1910         tx_buffer->map = map;
1911         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1912
1913         /*
1914          * Last Descriptor of Packet
1915          * needs End Of Packet (EOP)
1916          * and Report Status (RS)
1917          */
1918         ctxd->lower.data |=
1919             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1920         /*
1921          * Keep track in the first buffer which
1922          * descriptor will be written back
1923          */
1924         tx_buffer = &txr->tx_buffers[first];
1925         tx_buffer->next_eop = last;
1926
1927         /*
1928          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1929          * that this frame is available to transmit.
1930          */
1931         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1932             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1933         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1934
1935         return (0);
1936 }
1937
1938 static void
1939 em_set_promisc(struct adapter *adapter)
1940 {
1941         struct ifnet    *ifp = adapter->ifp;
1942         u32             reg_rctl;
1943
1944         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1945
1946         if (ifp->if_flags & IFF_PROMISC) {
1947                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1948                 /* Turn this on if you want to see bad packets */
1949                 if (em_debug_sbp)
1950                         reg_rctl |= E1000_RCTL_SBP;
1951                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1952         } else if (ifp->if_flags & IFF_ALLMULTI) {
1953                 reg_rctl |= E1000_RCTL_MPE;
1954                 reg_rctl &= ~E1000_RCTL_UPE;
1955                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1956         }
1957 }
1958
1959 static void
1960 em_disable_promisc(struct adapter *adapter)
1961 {
1962         u32     reg_rctl;
1963
1964         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1965
1966         reg_rctl &=  (~E1000_RCTL_UPE);
1967         reg_rctl &=  (~E1000_RCTL_MPE);
1968         reg_rctl &=  (~E1000_RCTL_SBP);
1969         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1970 }
1971
1972
1973 /*********************************************************************
1974  *  Multicast Update
1975  *
1976  *  This routine is called whenever multicast address list is updated.
1977  *
1978  **********************************************************************/
1979
1980 static void
1981 em_set_multi(struct adapter *adapter)
1982 {
1983         struct ifnet    *ifp = adapter->ifp;
1984         struct ifmultiaddr *ifma;
1985         u32 reg_rctl = 0;
1986         u8  *mta; /* Multicast array memory */
1987         int mcnt = 0;
1988
1989         IOCTL_DEBUGOUT("em_set_multi: begin");
1990
1991         if (adapter->hw.mac.type == e1000_82542 && 
1992             adapter->hw.revision_id == E1000_REVISION_2) {
1993                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1994                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1995                         e1000_pci_clear_mwi(&adapter->hw);
1996                 reg_rctl |= E1000_RCTL_RST;
1997                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1998                 msec_delay(5);
1999         }
2000
2001         /* Allocate temporary memory to setup array */
2002         mta = malloc(sizeof(u8) *
2003             (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
2004             M_DEVBUF, M_NOWAIT | M_ZERO);
2005         if (mta == NULL)
2006                 panic("em_set_multi memory failure\n");
2007
2008 #if __FreeBSD_version < 800000
2009         IF_ADDR_LOCK(ifp);
2010 #else
2011         if_maddr_rlock(ifp);
2012 #endif
2013         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2014                 if (ifma->ifma_addr->sa_family != AF_LINK)
2015                         continue;
2016
2017                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2018                         break;
2019
2020                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2021                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2022                 mcnt++;
2023         }
2024 #if __FreeBSD_version < 800000
2025         IF_ADDR_UNLOCK(ifp);
2026 #else
2027         if_maddr_runlock(ifp);
2028 #endif
2029         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2030                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2031                 reg_rctl |= E1000_RCTL_MPE;
2032                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2033         } else
2034                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2035
2036         if (adapter->hw.mac.type == e1000_82542 && 
2037             adapter->hw.revision_id == E1000_REVISION_2) {
2038                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2039                 reg_rctl &= ~E1000_RCTL_RST;
2040                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2041                 msec_delay(5);
2042                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2043                         e1000_pci_set_mwi(&adapter->hw);
2044         }
2045         free(mta, M_DEVBUF);
2046 }
2047
2048
2049 /*********************************************************************
2050  *  Timer routine
2051  *
2052  *  This routine checks for link status and updates statistics.
2053  *
2054  **********************************************************************/
2055
2056 static void
2057 em_local_timer(void *arg)
2058 {
2059         struct adapter  *adapter = arg;
2060         struct ifnet    *ifp = adapter->ifp;
2061         struct tx_ring  *txr = adapter->tx_rings;
2062
2063         EM_CORE_LOCK_ASSERT(adapter);
2064
2065         em_update_link_status(adapter);
2066         em_update_stats_counters(adapter);
2067
2068         /* Reset LAA into RAR[0] on 82571 */
2069         if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2070                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2071
2072         if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2073                 em_print_hw_stats(adapter);
2074
2075         /*
2076         ** Check for time since any descriptor was cleaned
2077         */
2078         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2079                 EM_TX_LOCK(txr);
2080                 if (txr->watchdog_check == FALSE) {
2081                         EM_TX_UNLOCK(txr);
2082                         continue;
2083                 }
2084                 if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2085                         goto hung;
2086                 EM_TX_UNLOCK(txr);
2087         }
2088
2089         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2090         return;
2091 hung:
2092         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2093         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2094         adapter->watchdog_events++;
2095         EM_TX_UNLOCK(txr);
2096         em_init_locked(adapter);
2097 }
2098
2099
2100 static void
2101 em_update_link_status(struct adapter *adapter)
2102 {
2103         struct e1000_hw *hw = &adapter->hw;
2104         struct ifnet *ifp = adapter->ifp;
2105         device_t dev = adapter->dev;
2106         u32 link_check = 0;
2107
2108         /* Get the cached link value or read phy for real */
2109         switch (hw->phy.media_type) {
2110         case e1000_media_type_copper:
2111                 if (hw->mac.get_link_status) {
2112                         /* Do the work to read phy */
2113                         e1000_check_for_link(hw);
2114                         link_check = !hw->mac.get_link_status;
2115                         if (link_check) /* ESB2 fix */
2116                                 e1000_cfg_on_link_up(hw);
2117                 } else
2118                         link_check = TRUE;
2119                 break;
2120         case e1000_media_type_fiber:
2121                 e1000_check_for_link(hw);
2122                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2123                                  E1000_STATUS_LU);
2124                 break;
2125         case e1000_media_type_internal_serdes:
2126                 e1000_check_for_link(hw);
2127                 link_check = adapter->hw.mac.serdes_has_link;
2128                 break;
2129         default:
2130         case e1000_media_type_unknown:
2131                 break;
2132         }
2133
2134         /* Now check for a transition */
2135         if (link_check && (adapter->link_active == 0)) {
2136                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2137                     &adapter->link_duplex);
2138                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2139                 if ((adapter->link_speed != SPEED_1000) &&
2140                     ((hw->mac.type == e1000_82571) ||
2141                     (hw->mac.type == e1000_82572))) {
2142                         int tarc0;
2143                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2144                         tarc0 &= ~SPEED_MODE_BIT;
2145                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2146                 }
2147                 if (bootverbose)
2148                         device_printf(dev, "Link is up %d Mbps %s\n",
2149                             adapter->link_speed,
2150                             ((adapter->link_duplex == FULL_DUPLEX) ?
2151                             "Full Duplex" : "Half Duplex"));
2152                 adapter->link_active = 1;
2153                 adapter->smartspeed = 0;
2154                 ifp->if_baudrate = adapter->link_speed * 1000000;
2155                 if_link_state_change(ifp, LINK_STATE_UP);
2156         } else if (!link_check && (adapter->link_active == 1)) {
2157                 ifp->if_baudrate = adapter->link_speed = 0;
2158                 adapter->link_duplex = 0;
2159                 if (bootverbose)
2160                         device_printf(dev, "Link is Down\n");
2161                 adapter->link_active = 0;
2162                 /* Link down, disable watchdog */
2163                 // JFV change later
2164                 //adapter->watchdog_check = FALSE;
2165                 if_link_state_change(ifp, LINK_STATE_DOWN);
2166         }
2167 }
2168
2169 /*********************************************************************
2170  *
2171  *  This routine disables all traffic on the adapter by issuing a
2172  *  global reset on the MAC and deallocates TX/RX buffers.
2173  *
2174  *  This routine should always be called with BOTH the CORE
2175  *  and TX locks.
2176  **********************************************************************/
2177
2178 static void
2179 em_stop(void *arg)
2180 {
2181         struct adapter  *adapter = arg;
2182         struct ifnet    *ifp = adapter->ifp;
2183         struct tx_ring  *txr = adapter->tx_rings;
2184
2185         EM_CORE_LOCK_ASSERT(adapter);
2186
2187         INIT_DEBUGOUT("em_stop: begin");
2188
2189         em_disable_intr(adapter);
2190         callout_stop(&adapter->timer);
2191
2192         /* Tell the stack that the interface is no longer active */
2193         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2194
2195         /* Unarm watchdog timer. */
2196         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2197                 EM_TX_LOCK(txr);
2198                 txr->watchdog_check = FALSE;
2199                 EM_TX_UNLOCK(txr);
2200         }
2201
2202         e1000_reset_hw(&adapter->hw);
2203         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2204
2205         e1000_led_off(&adapter->hw);
2206         e1000_cleanup_led(&adapter->hw);
2207 }
2208
2209
2210 /*********************************************************************
2211  *
2212  *  Determine hardware revision.
2213  *
2214  **********************************************************************/
2215 static void
2216 em_identify_hardware(struct adapter *adapter)
2217 {
2218         device_t dev = adapter->dev;
2219
2220         /* Make sure our PCI config space has the necessary stuff set */
2221         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2222         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2223             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2224                 device_printf(dev, "Memory Access and/or Bus Master bits "
2225                     "were not set!\n");
2226                 adapter->hw.bus.pci_cmd_word |=
2227                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2228                 pci_write_config(dev, PCIR_COMMAND,
2229                     adapter->hw.bus.pci_cmd_word, 2);
2230         }
2231
2232         /* Save off the information about this board */
2233         adapter->hw.vendor_id = pci_get_vendor(dev);
2234         adapter->hw.device_id = pci_get_device(dev);
2235         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2236         adapter->hw.subsystem_vendor_id =
2237             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2238         adapter->hw.subsystem_device_id =
2239             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2240
2241         /* Do Shared Code Init and Setup */
2242         if (e1000_set_mac_type(&adapter->hw)) {
2243                 device_printf(dev, "Setup init failure\n");
2244                 return;
2245         }
2246 }
2247
2248 static int
2249 em_allocate_pci_resources(struct adapter *adapter)
2250 {
2251         device_t        dev = adapter->dev;
2252         int             rid;
2253
2254         rid = PCIR_BAR(0);
2255         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2256             &rid, RF_ACTIVE);
2257         if (adapter->memory == NULL) {
2258                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2259                 return (ENXIO);
2260         }
2261         adapter->osdep.mem_bus_space_tag =
2262             rman_get_bustag(adapter->memory);
2263         adapter->osdep.mem_bus_space_handle =
2264             rman_get_bushandle(adapter->memory);
2265         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2266
2267         /* Default to a single queue */
2268         adapter->num_queues = 1;
2269
2270         /*
2271          * Setup MSI/X or MSI if PCI Express
2272          */
2273         adapter->msix = em_setup_msix(adapter);
2274
2275         adapter->hw.back = &adapter->osdep;
2276
2277         return (0);
2278 }
2279
2280 /*********************************************************************
2281  *
2282  *  Setup the Legacy or MSI Interrupt handler
2283  *
2284  **********************************************************************/
2285 int
2286 em_allocate_legacy(struct adapter *adapter)
2287 {
2288         device_t dev = adapter->dev;
2289         int error, rid = 0;
2290
2291         /* Manually turn off all interrupts */
2292         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2293
2294         if (adapter->msix == 1) /* using MSI */
2295                 rid = 1;
2296         /* We allocate a single interrupt resource */
2297         adapter->res = bus_alloc_resource_any(dev,
2298             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2299         if (adapter->res == NULL) {
2300                 device_printf(dev, "Unable to allocate bus resource: "
2301                     "interrupt\n");
2302                 return (ENXIO);
2303         }
2304
2305         /*
2306          * Allocate a fast interrupt and the associated
2307          * deferred processing contexts.
2308          */
2309         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2310         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2311         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2312             taskqueue_thread_enqueue, &adapter->tq);
2313         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2314             device_get_nameunit(adapter->dev));
2315         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2316             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2317                 device_printf(dev, "Failed to register fast interrupt "
2318                             "handler: %d\n", error);
2319                 taskqueue_free(adapter->tq);
2320                 adapter->tq = NULL;
2321                 return (error);
2322         }
2323         
2324         return (0);
2325 }
2326
2327 /*********************************************************************
2328  *
2329  *  Setup the MSIX Interrupt handlers
2330  *   This is not really Multiqueue, rather
2331  *   its just multiple interrupt vectors.
2332  *
2333  **********************************************************************/
2334 int
2335 em_allocate_msix(struct adapter *adapter)
2336 {
2337         device_t        dev = adapter->dev;
2338         struct          tx_ring *txr = adapter->tx_rings;
2339         struct          rx_ring *rxr = adapter->rx_rings;
2340         int             error, rid, vector = 0;
2341
2342
2343         /* Make sure all interrupts are disabled */
2344         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2345
2346         /* First set up ring resources */
2347         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2348
2349                 /* RX ring */
2350                 rid = vector + 1;
2351
2352                 rxr->res = bus_alloc_resource_any(dev,
2353                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2354                 if (rxr->res == NULL) {
2355                         device_printf(dev,
2356                             "Unable to allocate bus resource: "
2357                             "RX MSIX Interrupt %d\n", i);
2358                         return (ENXIO);
2359                 }
2360                 if ((error = bus_setup_intr(dev, rxr->res,
2361                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2362                     rxr, &rxr->tag)) != 0) {
2363                         device_printf(dev, "Failed to register RX handler");
2364                         return (error);
2365                 }
2366                 rxr->msix = vector++; /* NOTE increment vector for TX */
2367                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2368                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2369                     taskqueue_thread_enqueue, &rxr->tq);
2370                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2371                     device_get_nameunit(adapter->dev));
2372                 /*
2373                 ** Set the bit to enable interrupt
2374                 ** in E1000_IMS -- bits 20 and 21
2375                 ** are for RX0 and RX1, note this has
2376                 ** NOTHING to do with the MSIX vector
2377                 */
2378                 rxr->ims = 1 << (20 + i);
2379                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2380
2381                 /* TX ring */
2382                 rid = vector + 1;
2383                 txr->res = bus_alloc_resource_any(dev,
2384                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2385                 if (txr->res == NULL) {
2386                         device_printf(dev,
2387                             "Unable to allocate bus resource: "
2388                             "TX MSIX Interrupt %d\n", i);
2389                         return (ENXIO);
2390                 }
2391                 if ((error = bus_setup_intr(dev, txr->res,
2392                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2393                     txr, &txr->tag)) != 0) {
2394                         device_printf(dev, "Failed to register TX handler");
2395                         return (error);
2396                 }
2397                 txr->msix = vector++; /* Increment vector for next pass */
2398                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2399                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2400                     taskqueue_thread_enqueue, &txr->tq);
2401                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2402                     device_get_nameunit(adapter->dev));
2403                 /*
2404                 ** Set the bit to enable interrupt
2405                 ** in E1000_IMS -- bits 22 and 23
2406                 ** are for TX0 and TX1, note this has
2407                 ** NOTHING to do with the MSIX vector
2408                 */
2409                 txr->ims = 1 << (22 + i);
2410                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2411         }
2412
2413         /* Link interrupt */
2414         ++rid;
2415         adapter->res = bus_alloc_resource_any(dev,
2416             SYS_RES_IRQ, &rid, RF_ACTIVE);
2417         if (!adapter->res) {
2418                 device_printf(dev,"Unable to allocate "
2419                     "bus resource: Link interrupt [%d]\n", rid);
2420                 return (ENXIO);
2421         }
2422         /* Set the link handler function */
2423         error = bus_setup_intr(dev, adapter->res,
2424             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2425             em_msix_link, adapter, &adapter->tag);
2426         if (error) {
2427                 adapter->res = NULL;
2428                 device_printf(dev, "Failed to register LINK handler");
2429                 return (error);
2430         }
2431         adapter->linkvec = vector;
2432         adapter->ivars |=  (8 | vector) << 16;
2433         adapter->ivars |= 0x80000000;
2434         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2435         adapter->tq = taskqueue_create_fast("em_link", M_NOWAIT,
2436             taskqueue_thread_enqueue, &adapter->tq);
2437         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2438             device_get_nameunit(adapter->dev));
2439
2440         return (0);
2441 }
2442
2443
2444 static void
2445 em_free_pci_resources(struct adapter *adapter)
2446 {
2447         device_t        dev = adapter->dev;
2448         struct tx_ring  *txr;
2449         struct rx_ring  *rxr;
2450         int             rid;
2451
2452
2453         /*
2454         ** Release all the queue interrupt resources:
2455         */
2456         for (int i = 0; i < adapter->num_queues; i++) {
2457                 txr = &adapter->tx_rings[i];
2458                 rxr = &adapter->rx_rings[i];
2459                 rid = txr->msix +1;
2460                 if (txr->tag != NULL) {
2461                         bus_teardown_intr(dev, txr->res, txr->tag);
2462                         txr->tag = NULL;
2463                 }
2464                 if (txr->res != NULL)
2465                         bus_release_resource(dev, SYS_RES_IRQ,
2466                             rid, txr->res);
2467                 rid = rxr->msix +1;
2468                 if (rxr->tag != NULL) {
2469                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2470                         rxr->tag = NULL;
2471                 }
2472                 if (rxr->res != NULL)
2473                         bus_release_resource(dev, SYS_RES_IRQ,
2474                             rid, rxr->res);
2475         }
2476
2477         if (adapter->linkvec) /* we are doing MSIX */
2478                 rid = adapter->linkvec + 1;
2479         else
2480                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2481
2482         if (adapter->tag != NULL) {
2483                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2484                 adapter->tag = NULL;
2485         }
2486
2487         if (adapter->res != NULL)
2488                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2489
2490
2491         if (adapter->msix)
2492                 pci_release_msi(dev);
2493
2494         if (adapter->msix_mem != NULL)
2495                 bus_release_resource(dev, SYS_RES_MEMORY,
2496                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2497
2498         if (adapter->memory != NULL)
2499                 bus_release_resource(dev, SYS_RES_MEMORY,
2500                     PCIR_BAR(0), adapter->memory);
2501
2502         if (adapter->flash != NULL)
2503                 bus_release_resource(dev, SYS_RES_MEMORY,
2504                     EM_FLASH, adapter->flash);
2505 }
2506
2507 /*
2508  * Setup MSI or MSI/X
2509  */
2510 static int
2511 em_setup_msix(struct adapter *adapter)
2512 {
2513         device_t dev = adapter->dev;
2514         int val = 0;
2515
2516
2517         /* Setup MSI/X for Hartwell */
2518         if ((adapter->hw.mac.type == e1000_82574) &&
2519             (em_enable_msix == TRUE)) {
2520                 /* Map the MSIX BAR */
2521                 int rid = PCIR_BAR(EM_MSIX_BAR);
2522                 adapter->msix_mem = bus_alloc_resource_any(dev,
2523                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2524                 if (!adapter->msix_mem) {
2525                         /* May not be enabled */
2526                         device_printf(adapter->dev,
2527                             "Unable to map MSIX table \n");
2528                         goto msi;
2529                 }
2530                 val = pci_msix_count(dev); 
2531                 if (val != 5) {
2532                         bus_release_resource(dev, SYS_RES_MEMORY,
2533                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2534                         adapter->msix_mem = NULL;
2535                         device_printf(adapter->dev,
2536                             "MSIX vectors wrong, using MSI \n");
2537                         goto msi;
2538                 }
2539                 if (em_msix_queues == 2) {
2540                         val = 5;
2541                         adapter->num_queues = 2;
2542                 } else {
2543                         val = 3;
2544                         adapter->num_queues = 1;
2545                 }
2546                 if (pci_alloc_msix(dev, &val) == 0) {
2547                         device_printf(adapter->dev,
2548                             "Using MSIX interrupts "
2549                             "with %d vectors\n", val);
2550                 }
2551
2552                 return (val);
2553         }
2554 msi:
2555         val = pci_msi_count(dev);
2556         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2557                 adapter->msix = 1;
2558                 device_printf(adapter->dev,"Using MSI interrupt\n");
2559                 return (val);
2560         } 
2561         /* Should only happen due to manual invention */
2562         device_printf(adapter->dev,"Setup MSIX failure\n");
2563         return (0);
2564 }
2565
2566
2567 /*********************************************************************
2568  *
2569  *  Initialize the hardware to a configuration
2570  *  as specified by the adapter structure.
2571  *
2572  **********************************************************************/
2573 static void
2574 em_reset(struct adapter *adapter)
2575 {
2576         device_t        dev = adapter->dev;
2577         struct e1000_hw *hw = &adapter->hw;
2578         u16             rx_buffer_size;
2579
2580         INIT_DEBUGOUT("em_reset: begin");
2581
2582         /* Set up smart power down as default off on newer adapters. */
2583         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2584             hw->mac.type == e1000_82572)) {
2585                 u16 phy_tmp = 0;
2586
2587                 /* Speed up time to link by disabling smart power down. */
2588                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2589                 phy_tmp &= ~IGP02E1000_PM_SPD;
2590                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2591         }
2592
2593         /*
2594          * These parameters control the automatic generation (Tx) and
2595          * response (Rx) to Ethernet PAUSE frames.
2596          * - High water mark should allow for at least two frames to be
2597          *   received after sending an XOFF.
2598          * - Low water mark works best when it is very near the high water mark.
2599          *   This allows the receiver to restart by sending XON when it has
2600          *   drained a bit. Here we use an arbitary value of 1500 which will
2601          *   restart after one full frame is pulled from the buffer. There
2602          *   could be several smaller frames in the buffer and if so they will
2603          *   not trigger the XON until their total number reduces the buffer
2604          *   by 1500.
2605          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2606          */
2607         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2608
2609         hw->fc.high_water = rx_buffer_size -
2610             roundup2(adapter->max_frame_size, 1024);
2611         hw->fc.low_water = hw->fc.high_water - 1500;
2612
2613         if (hw->mac.type == e1000_80003es2lan)
2614                 hw->fc.pause_time = 0xFFFF;
2615         else
2616                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2617
2618         hw->fc.send_xon = TRUE;
2619
2620         /* Set Flow control, use the tunable location if sane */
2621         if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2622                 hw->fc.requested_mode = em_fc_setting;
2623         else
2624                 hw->fc.requested_mode = e1000_fc_none;
2625
2626         /* Override - workaround for PCHLAN issue */
2627         if (hw->mac.type == e1000_pchlan)
2628                 hw->fc.requested_mode = e1000_fc_rx_pause;
2629
2630         /* Issue a global reset */
2631         e1000_reset_hw(hw);
2632         E1000_WRITE_REG(hw, E1000_WUC, 0);
2633
2634         if (e1000_init_hw(hw) < 0) {
2635                 device_printf(dev, "Hardware Initialization Failed\n");
2636                 return;
2637         }
2638
2639         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2640         e1000_get_phy_info(hw);
2641         e1000_check_for_link(hw);
2642         return;
2643 }
2644
2645 /*********************************************************************
2646  *
2647  *  Setup networking device structure and register an interface.
2648  *
2649  **********************************************************************/
2650 static void
2651 em_setup_interface(device_t dev, struct adapter *adapter)
2652 {
2653         struct ifnet   *ifp;
2654
2655         INIT_DEBUGOUT("em_setup_interface: begin");
2656
2657         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2658         if (ifp == NULL)
2659                 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2660         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2661         ifp->if_mtu = ETHERMTU;
2662         ifp->if_init =  em_init;
2663         ifp->if_softc = adapter;
2664         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2665         ifp->if_ioctl = em_ioctl;
2666         ifp->if_start = em_start;
2667         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2668         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2669         IFQ_SET_READY(&ifp->if_snd);
2670
2671         ether_ifattach(ifp, adapter->hw.mac.addr);
2672
2673         ifp->if_capabilities = ifp->if_capenable = 0;
2674
2675 #ifdef EM_MULTIQUEUE
2676         /* Multiqueue tx functions */
2677         ifp->if_transmit = em_mq_start;
2678         ifp->if_qflush = em_qflush;
2679 #endif  
2680
2681         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2682         ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2683
2684         /* Enable TSO by default, can disable with ifconfig */
2685         ifp->if_capabilities |= IFCAP_TSO4;
2686         ifp->if_capenable |= IFCAP_TSO4;
2687
2688         /*
2689          * Tell the upper layer(s) we
2690          * support full VLAN capability
2691          */
2692         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2693         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2694         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2695
2696         /*
2697         ** Dont turn this on by default, if vlans are
2698         ** created on another pseudo device (eg. lagg)
2699         ** then vlan events are not passed thru, breaking
2700         ** operation, but with HW FILTER off it works. If
2701         ** using vlans directly on the em driver you can
2702         ** enable this and get full hardware tag filtering.
2703         */
2704         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2705
2706 #ifdef DEVICE_POLLING
2707         ifp->if_capabilities |= IFCAP_POLLING;
2708 #endif
2709
2710         /* Enable only WOL MAGIC by default */
2711         if (adapter->wol) {
2712                 ifp->if_capabilities |= IFCAP_WOL;
2713                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2714         }
2715                 
2716         /*
2717          * Specify the media types supported by this adapter and register
2718          * callbacks to update media and link information
2719          */
2720         ifmedia_init(&adapter->media, IFM_IMASK,
2721             em_media_change, em_media_status);
2722         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2723             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2724                 u_char fiber_type = IFM_1000_SX;        /* default type */
2725
2726                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
2727                             0, NULL);
2728                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2729         } else {
2730                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2731                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2732                             0, NULL);
2733                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2734                             0, NULL);
2735                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2736                             0, NULL);
2737                 if (adapter->hw.phy.type != e1000_phy_ife) {
2738                         ifmedia_add(&adapter->media,
2739                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2740                         ifmedia_add(&adapter->media,
2741                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2742                 }
2743         }
2744         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2745         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2746 }
2747
2748
2749 /*
2750  * Manage DMA'able memory.
2751  */
2752 static void
2753 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2754 {
2755         if (error)
2756                 return;
2757         *(bus_addr_t *) arg = segs[0].ds_addr;
2758 }
2759
2760 static int
2761 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2762         struct em_dma_alloc *dma, int mapflags)
2763 {
2764         int error;
2765
2766         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2767                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
2768                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2769                                 BUS_SPACE_MAXADDR,      /* highaddr */
2770                                 NULL, NULL,             /* filter, filterarg */
2771                                 size,                   /* maxsize */
2772                                 1,                      /* nsegments */
2773                                 size,                   /* maxsegsize */
2774                                 0,                      /* flags */
2775                                 NULL,                   /* lockfunc */
2776                                 NULL,                   /* lockarg */
2777                                 &dma->dma_tag);
2778         if (error) {
2779                 device_printf(adapter->dev,
2780                     "%s: bus_dma_tag_create failed: %d\n",
2781                     __func__, error);
2782                 goto fail_0;
2783         }
2784
2785         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2786             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2787         if (error) {
2788                 device_printf(adapter->dev,
2789                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2790                     __func__, (uintmax_t)size, error);
2791                 goto fail_2;
2792         }
2793
2794         dma->dma_paddr = 0;
2795         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2796             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2797         if (error || dma->dma_paddr == 0) {
2798                 device_printf(adapter->dev,
2799                     "%s: bus_dmamap_load failed: %d\n",
2800                     __func__, error);
2801                 goto fail_3;
2802         }
2803
2804         return (0);
2805
2806 fail_3:
2807         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2808 fail_2:
2809         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2810         bus_dma_tag_destroy(dma->dma_tag);
2811 fail_0:
2812         dma->dma_map = NULL;
2813         dma->dma_tag = NULL;
2814
2815         return (error);
2816 }
2817
2818 static void
2819 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2820 {
2821         if (dma->dma_tag == NULL)
2822                 return;
2823         if (dma->dma_map != NULL) {
2824                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2825                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2826                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2827                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2828                 dma->dma_map = NULL;
2829         }
2830         bus_dma_tag_destroy(dma->dma_tag);
2831         dma->dma_tag = NULL;
2832 }
2833
2834
2835 /*********************************************************************
2836  *
2837  *  Allocate memory for the transmit and receive rings, and then
2838  *  the descriptors associated with each, called only once at attach.
2839  *
2840  **********************************************************************/
2841 static int
2842 em_allocate_queues(struct adapter *adapter)
2843 {
2844         device_t                dev = adapter->dev;
2845         struct tx_ring          *txr = NULL;
2846         struct rx_ring          *rxr = NULL;
2847         int rsize, tsize, error = E1000_SUCCESS;
2848         int txconf = 0, rxconf = 0;
2849
2850
2851         /* Allocate the TX ring struct memory */
2852         if (!(adapter->tx_rings =
2853             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2854             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2855                 device_printf(dev, "Unable to allocate TX ring memory\n");
2856                 error = ENOMEM;
2857                 goto fail;
2858         }
2859
2860         /* Now allocate the RX */
2861         if (!(adapter->rx_rings =
2862             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2863             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2864                 device_printf(dev, "Unable to allocate RX ring memory\n");
2865                 error = ENOMEM;
2866                 goto rx_fail;
2867         }
2868
2869         tsize = roundup2(adapter->num_tx_desc *
2870             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2871         /*
2872          * Now set up the TX queues, txconf is needed to handle the
2873          * possibility that things fail midcourse and we need to
2874          * undo memory gracefully
2875          */ 
2876         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2877                 /* Set up some basics */
2878                 txr = &adapter->tx_rings[i];
2879                 txr->adapter = adapter;
2880                 txr->me = i;
2881
2882                 /* Initialize the TX lock */
2883                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2884                     device_get_nameunit(dev), txr->me);
2885                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2886
2887                 if (em_dma_malloc(adapter, tsize,
2888                         &txr->txdma, BUS_DMA_NOWAIT)) {
2889                         device_printf(dev,
2890                             "Unable to allocate TX Descriptor memory\n");
2891                         error = ENOMEM;
2892                         goto err_tx_desc;
2893                 }
2894                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2895                 bzero((void *)txr->tx_base, tsize);
2896
2897                 if (em_allocate_transmit_buffers(txr)) {
2898                         device_printf(dev,
2899                             "Critical Failure setting up transmit buffers\n");
2900                         error = ENOMEM;
2901                         goto err_tx_desc;
2902                 }
2903 #if __FreeBSD_version >= 800000
2904                 /* Allocate a buf ring */
2905                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
2906                     M_WAITOK, &txr->tx_mtx);
2907 #endif
2908         }
2909
2910         /*
2911          * Next the RX queues...
2912          */ 
2913         rsize = roundup2(adapter->num_rx_desc *
2914             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2915         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2916                 rxr = &adapter->rx_rings[i];
2917                 rxr->adapter = adapter;
2918                 rxr->me = i;
2919
2920                 /* Initialize the RX lock */
2921                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2922                     device_get_nameunit(dev), txr->me);
2923                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2924
2925                 if (em_dma_malloc(adapter, rsize,
2926                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2927                         device_printf(dev,
2928                             "Unable to allocate RxDescriptor memory\n");
2929                         error = ENOMEM;
2930                         goto err_rx_desc;
2931                 }
2932                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2933                 bzero((void *)rxr->rx_base, rsize);
2934
2935                 /* Allocate receive buffers for the ring*/
2936                 if (em_allocate_receive_buffers(rxr)) {
2937                         device_printf(dev,
2938                             "Critical Failure setting up receive buffers\n");
2939                         error = ENOMEM;
2940                         goto err_rx_desc;
2941                 }
2942         }
2943
2944         return (0);
2945
2946 err_rx_desc:
2947         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2948                 em_dma_free(adapter, &rxr->rxdma);
2949 err_tx_desc:
2950         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2951                 em_dma_free(adapter, &txr->txdma);
2952         free(adapter->rx_rings, M_DEVBUF);
2953 rx_fail:
2954 #if __FreeBSD_version >= 800000
2955         buf_ring_free(txr->br, M_DEVBUF);
2956 #endif
2957         free(adapter->tx_rings, M_DEVBUF);
2958 fail:
2959         return (error);
2960 }
2961
2962
2963 /*********************************************************************
2964  *
2965  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2966  *  the information needed to transmit a packet on the wire. This is
2967  *  called only once at attach, setup is done every reset.
2968  *
2969  **********************************************************************/
2970 static int
2971 em_allocate_transmit_buffers(struct tx_ring *txr)
2972 {
2973         struct adapter *adapter = txr->adapter;
2974         device_t dev = adapter->dev;
2975         struct em_buffer *txbuf;
2976         int error, i;
2977
2978         /*
2979          * Setup DMA descriptor areas.
2980          */
2981         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2982                                1, 0,                    /* alignment, bounds */
2983                                BUS_SPACE_MAXADDR,       /* lowaddr */
2984                                BUS_SPACE_MAXADDR,       /* highaddr */
2985                                NULL, NULL,              /* filter, filterarg */
2986                                EM_TSO_SIZE,             /* maxsize */
2987                                EM_MAX_SCATTER,          /* nsegments */
2988                                PAGE_SIZE,               /* maxsegsize */
2989                                0,                       /* flags */
2990                                NULL,                    /* lockfunc */
2991                                NULL,                    /* lockfuncarg */
2992                                &txr->txtag))) {
2993                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2994                 goto fail;
2995         }
2996
2997         if (!(txr->tx_buffers =
2998             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
2999             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3000                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3001                 error = ENOMEM;
3002                 goto fail;
3003         }
3004
3005         /* Create the descriptor buffer dma maps */
3006         txbuf = txr->tx_buffers;
3007         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3008                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3009                 if (error != 0) {
3010                         device_printf(dev, "Unable to create TX DMA map\n");
3011                         goto fail;
3012                 }
3013         }
3014
3015         return 0;
3016 fail:
3017         /* We free all, it handles case where we are in the middle */
3018         em_free_transmit_structures(adapter);
3019         return (error);
3020 }
3021
3022 /*********************************************************************
3023  *
3024  *  Initialize a transmit ring.
3025  *
3026  **********************************************************************/
3027 static void
3028 em_setup_transmit_ring(struct tx_ring *txr)
3029 {
3030         struct adapter *adapter = txr->adapter;
3031         struct em_buffer *txbuf;
3032         int i;
3033
3034         /* Clear the old descriptor contents */
3035         EM_TX_LOCK(txr);
3036         bzero((void *)txr->tx_base,
3037               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3038         /* Reset indices */
3039         txr->next_avail_desc = 0;
3040         txr->next_to_clean = 0;
3041
3042         /* Free any existing tx buffers. */
3043         txbuf = txr->tx_buffers;
3044         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3045                 if (txbuf->m_head != NULL) {
3046                         bus_dmamap_sync(txr->txtag, txbuf->map,
3047                             BUS_DMASYNC_POSTWRITE);
3048                         bus_dmamap_unload(txr->txtag, txbuf->map);
3049                         m_freem(txbuf->m_head);
3050                         txbuf->m_head = NULL;
3051                 }
3052                 /* clear the watch index */
3053                 txbuf->next_eop = -1;
3054         }
3055
3056         /* Set number of descriptors available */
3057         txr->tx_avail = adapter->num_tx_desc;
3058
3059         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3060             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3061         EM_TX_UNLOCK(txr);
3062 }
3063
3064 /*********************************************************************
3065  *
3066  *  Initialize all transmit rings.
3067  *
3068  **********************************************************************/
3069 static void
3070 em_setup_transmit_structures(struct adapter *adapter)
3071 {
3072         struct tx_ring *txr = adapter->tx_rings;
3073
3074         for (int i = 0; i < adapter->num_queues; i++, txr++)
3075                 em_setup_transmit_ring(txr);
3076
3077         return;
3078 }
3079
3080 /*********************************************************************
3081  *
3082  *  Enable transmit unit.
3083  *
3084  **********************************************************************/
3085 static void
3086 em_initialize_transmit_unit(struct adapter *adapter)
3087 {
3088         struct tx_ring  *txr = adapter->tx_rings;
3089         struct e1000_hw *hw = &adapter->hw;
3090         u32     tctl, tarc, tipg = 0;
3091
3092          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3093
3094         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3095                 u64 bus_addr = txr->txdma.dma_paddr;
3096                 /* Base and Len of TX Ring */
3097                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3098                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3099                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3100                     (u32)(bus_addr >> 32));
3101                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3102                     (u32)bus_addr);
3103                 /* Init the HEAD/TAIL indices */
3104                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3105                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3106
3107                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3108                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3109                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3110
3111                 txr->watchdog_check = FALSE;
3112         }
3113
3114         /* Set the default values for the Tx Inter Packet Gap timer */
3115         switch (adapter->hw.mac.type) {
3116         case e1000_82542:
3117                 tipg = DEFAULT_82542_TIPG_IPGT;
3118                 tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3119                 tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3120                 break;
3121         case e1000_80003es2lan:
3122                 tipg = DEFAULT_82543_TIPG_IPGR1;
3123                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3124                     E1000_TIPG_IPGR2_SHIFT;
3125                 break;
3126         default:
3127                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3128                     (adapter->hw.phy.media_type ==
3129                     e1000_media_type_internal_serdes))
3130                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3131                 else
3132                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3133                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3134                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3135         }
3136
3137         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3138         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3139
3140         if(adapter->hw.mac.type >= e1000_82540)
3141                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3142                     adapter->tx_abs_int_delay.value);
3143
3144         if ((adapter->hw.mac.type == e1000_82571) ||
3145             (adapter->hw.mac.type == e1000_82572)) {
3146                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3147                 tarc |= SPEED_MODE_BIT;
3148                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3149         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3150                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3151                 tarc |= 1;
3152                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3153                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3154                 tarc |= 1;
3155                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3156         }
3157
3158         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3159         if (adapter->tx_int_delay.value > 0)
3160                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3161
3162         /* Program the Transmit Control Register */
3163         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3164         tctl &= ~E1000_TCTL_CT;
3165         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3166                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3167
3168         if (adapter->hw.mac.type >= e1000_82571)
3169                 tctl |= E1000_TCTL_MULR;
3170
3171         /* This write will effectively turn on the transmit unit. */
3172         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3173
3174 }
3175
3176
3177 /*********************************************************************
3178  *
3179  *  Free all transmit rings.
3180  *
3181  **********************************************************************/
3182 static void
3183 em_free_transmit_structures(struct adapter *adapter)
3184 {
3185         struct tx_ring *txr = adapter->tx_rings;
3186
3187         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3188                 EM_TX_LOCK(txr);
3189                 em_free_transmit_buffers(txr);
3190                 em_dma_free(adapter, &txr->txdma);
3191                 EM_TX_UNLOCK(txr);
3192                 EM_TX_LOCK_DESTROY(txr);
3193         }
3194
3195         free(adapter->tx_rings, M_DEVBUF);
3196 }
3197
3198 /*********************************************************************
3199  *
3200  *  Free transmit ring related data structures.
3201  *
3202  **********************************************************************/
3203 static void
3204 em_free_transmit_buffers(struct tx_ring *txr)
3205 {
3206         struct adapter          *adapter = txr->adapter;
3207         struct em_buffer        *txbuf;
3208
3209         INIT_DEBUGOUT("free_transmit_ring: begin");
3210
3211         if (txr->tx_buffers == NULL)
3212                 return;
3213
3214         for (int i = 0; i < adapter->num_tx_desc; i++) {
3215                 txbuf = &txr->tx_buffers[i];
3216                 if (txbuf->m_head != NULL) {
3217                         bus_dmamap_sync(txr->txtag, txbuf->map,
3218                             BUS_DMASYNC_POSTWRITE);
3219                         bus_dmamap_unload(txr->txtag,
3220                             txbuf->map);
3221                         m_freem(txbuf->m_head);
3222                         txbuf->m_head = NULL;
3223                         if (txbuf->map != NULL) {
3224                                 bus_dmamap_destroy(txr->txtag,
3225                                     txbuf->map);
3226                                 txbuf->map = NULL;
3227                         }
3228                 } else if (txbuf->map != NULL) {
3229                         bus_dmamap_unload(txr->txtag,
3230                             txbuf->map);
3231                         bus_dmamap_destroy(txr->txtag,
3232                             txbuf->map);
3233                         txbuf->map = NULL;
3234                 }
3235         }
3236 #if __FreeBSD_version >= 800000
3237         if (txr->br != NULL)
3238                 buf_ring_free(txr->br, M_DEVBUF);
3239 #endif
3240         if (txr->tx_buffers != NULL) {
3241                 free(txr->tx_buffers, M_DEVBUF);
3242                 txr->tx_buffers = NULL;
3243         }
3244         if (txr->txtag != NULL) {
3245                 bus_dma_tag_destroy(txr->txtag);
3246                 txr->txtag = NULL;
3247         }
3248         return;
3249 }
3250
3251
3252 /*********************************************************************
3253  *
3254  *  The offload context needs to be set when we transfer the first
3255  *  packet of a particular protocol (TCP/UDP). This routine has been
3256  *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3257  *
3258  *  Added back the old method of keeping the current context type
3259  *  and not setting if unnecessary, as this is reported to be a
3260  *  big performance win.  -jfv
3261  **********************************************************************/
3262 static void
3263 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3264     u32 *txd_upper, u32 *txd_lower)
3265 {
3266         struct adapter                  *adapter = txr->adapter;
3267         struct e1000_context_desc       *TXD = NULL;
3268         struct em_buffer *tx_buffer;
3269         struct ether_vlan_header *eh;
3270         struct ip *ip = NULL;
3271         struct ip6_hdr *ip6;
3272         int cur, ehdrlen;
3273         u32 cmd, hdr_len, ip_hlen;
3274         u16 etype;
3275         u8 ipproto;
3276
3277
3278         cmd = hdr_len = ipproto = 0;
3279         cur = txr->next_avail_desc;
3280
3281         /*
3282          * Determine where frame payload starts.
3283          * Jump over vlan headers if already present,
3284          * helpful for QinQ too.
3285          */
3286         eh = mtod(mp, struct ether_vlan_header *);
3287         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3288                 etype = ntohs(eh->evl_proto);
3289                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3290         } else {
3291                 etype = ntohs(eh->evl_encap_proto);
3292                 ehdrlen = ETHER_HDR_LEN;
3293         }
3294
3295         /*
3296          * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3297          * TODO: Support SCTP too when it hits the tree.
3298          */
3299         switch (etype) {
3300         case ETHERTYPE_IP:
3301                 ip = (struct ip *)(mp->m_data + ehdrlen);
3302                 ip_hlen = ip->ip_hl << 2;
3303
3304                 /* Setup of IP header checksum. */
3305                 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3306                         /*
3307                          * Start offset for header checksum calculation.
3308                          * End offset for header checksum calculation.
3309                          * Offset of place to put the checksum.
3310                          */
3311                         TXD = (struct e1000_context_desc *)
3312                             &txr->tx_base[cur];
3313                         TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3314                         TXD->lower_setup.ip_fields.ipcse =
3315                             htole16(ehdrlen + ip_hlen);
3316                         TXD->lower_setup.ip_fields.ipcso =
3317                             ehdrlen + offsetof(struct ip, ip_sum);
3318                         cmd |= E1000_TXD_CMD_IP;
3319                         *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3320                 }
3321
3322                 if (mp->m_len < ehdrlen + ip_hlen)
3323                         return; /* failure */
3324
3325                 hdr_len = ehdrlen + ip_hlen;
3326                 ipproto = ip->ip_p;
3327
3328                 break;
3329         case ETHERTYPE_IPV6:
3330                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3331                 ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3332
3333                 if (mp->m_len < ehdrlen + ip_hlen)
3334                         return; /* failure */
3335
3336                 /* IPv6 doesn't have a header checksum. */
3337
3338                 hdr_len = ehdrlen + ip_hlen;
3339                 ipproto = ip6->ip6_nxt;
3340
3341                 break;
3342         default:
3343                 *txd_upper = 0;
3344                 *txd_lower = 0;
3345                 return;
3346         }
3347
3348         switch (ipproto) {
3349         case IPPROTO_TCP:
3350                 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3351                         *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3352                         *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3353                         /* no need for context if already set */
3354                         if (txr->last_hw_offload == CSUM_TCP)
3355                                 return;
3356                         txr->last_hw_offload = CSUM_TCP;
3357                         /*
3358                          * Start offset for payload checksum calculation.
3359                          * End offset for payload checksum calculation.
3360                          * Offset of place to put the checksum.
3361                          */
3362                         TXD = (struct e1000_context_desc *)
3363                             &txr->tx_base[cur];
3364                         TXD->upper_setup.tcp_fields.tucss = hdr_len;
3365                         TXD->upper_setup.tcp_fields.tucse = htole16(0);
3366                         TXD->upper_setup.tcp_fields.tucso =
3367                             hdr_len + offsetof(struct tcphdr, th_sum);
3368                         cmd |= E1000_TXD_CMD_TCP;
3369                 }
3370                 break;
3371         case IPPROTO_UDP:
3372         {
3373                 if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3374                         *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3375                         *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3376                         /* no need for context if already set */
3377                         if (txr->last_hw_offload == CSUM_UDP)
3378                                 return;
3379                         txr->last_hw_offload = CSUM_UDP;
3380                         /*
3381                          * Start offset for header checksum calculation.
3382                          * End offset for header checksum calculation.
3383                          * Offset of place to put the checksum.
3384                          */
3385                         TXD = (struct e1000_context_desc *)
3386                             &txr->tx_base[cur];
3387                         TXD->upper_setup.tcp_fields.tucss = hdr_len;
3388                         TXD->upper_setup.tcp_fields.tucse = htole16(0);
3389                         TXD->upper_setup.tcp_fields.tucso =
3390                             hdr_len + offsetof(struct udphdr, uh_sum);
3391                 }
3392                 /* Fall Thru */
3393         }
3394         default:
3395                 break;
3396         }
3397
3398         TXD->tcp_seg_setup.data = htole32(0);
3399         TXD->cmd_and_length =
3400             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3401         tx_buffer = &txr->tx_buffers[cur];
3402         tx_buffer->m_head = NULL;
3403         tx_buffer->next_eop = -1;
3404
3405         if (++cur == adapter->num_tx_desc)
3406                 cur = 0;
3407
3408         txr->tx_avail--;
3409         txr->next_avail_desc = cur;
3410 }
3411
3412
3413 /**********************************************************************
3414  *
3415  *  Setup work for hardware segmentation offload (TSO)
3416  *
3417  **********************************************************************/
3418 static bool
3419 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3420    u32 *txd_lower)
3421 {
3422         struct adapter                  *adapter = txr->adapter;
3423         struct e1000_context_desc       *TXD;
3424         struct em_buffer                *tx_buffer;
3425         struct ether_vlan_header        *eh;
3426         struct ip                       *ip;
3427         struct ip6_hdr                  *ip6;
3428         struct tcphdr                   *th;
3429         int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3430         u16 etype;
3431
3432         /*
3433          * This function could/should be extended to support IP/IPv6
3434          * fragmentation as well.  But as they say, one step at a time.
3435          */
3436
3437         /*
3438          * Determine where frame payload starts.
3439          * Jump over vlan headers if already present,
3440          * helpful for QinQ too.
3441          */
3442         eh = mtod(mp, struct ether_vlan_header *);
3443         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3444                 etype = ntohs(eh->evl_proto);
3445                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3446         } else {
3447                 etype = ntohs(eh->evl_encap_proto);
3448                 ehdrlen = ETHER_HDR_LEN;
3449         }
3450
3451         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3452         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3453                 return FALSE;   /* -1 */
3454
3455         /*
3456          * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3457          * TODO: Support SCTP too when it hits the tree.
3458          */
3459         switch (etype) {
3460         case ETHERTYPE_IP:
3461                 isip6 = 0;
3462                 ip = (struct ip *)(mp->m_data + ehdrlen);
3463                 if (ip->ip_p != IPPROTO_TCP)
3464                         return FALSE;   /* 0 */
3465                 ip->ip_len = 0;
3466                 ip->ip_sum = 0;
3467                 ip_hlen = ip->ip_hl << 2;
3468                 if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3469                         return FALSE;   /* -1 */
3470                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3471 #if 1
3472                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3473                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3474 #else
3475                 th->th_sum = mp->m_pkthdr.csum_data;
3476 #endif
3477                 break;
3478         case ETHERTYPE_IPV6:
3479                 isip6 = 1;
3480                 return FALSE;                   /* Not supported yet. */
3481                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3482                 if (ip6->ip6_nxt != IPPROTO_TCP)
3483                         return FALSE;   /* 0 */
3484                 ip6->ip6_plen = 0;
3485                 ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3486                 if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3487                         return FALSE;   /* -1 */
3488                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3489 #if 0
3490                 th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3491                     htons(IPPROTO_TCP));        /* XXX: function notyet. */
3492 #else
3493                 th->th_sum = mp->m_pkthdr.csum_data;
3494 #endif
3495                 break;
3496         default:
3497                 return FALSE;
3498         }
3499         hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3500
3501         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3502                       E1000_TXD_DTYP_D |        /* Data descr type */
3503                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3504
3505         /* IP and/or TCP header checksum calculation and insertion. */
3506         *txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3507                       E1000_TXD_POPTS_TXSM) << 8;
3508
3509         cur = txr->next_avail_desc;
3510         tx_buffer = &txr->tx_buffers[cur];
3511         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3512
3513         /* IPv6 doesn't have a header checksum. */
3514         if (!isip6) {
3515                 /*
3516                  * Start offset for header checksum calculation.
3517                  * End offset for header checksum calculation.
3518                  * Offset of place put the checksum.
3519                  */
3520                 TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3521                 TXD->lower_setup.ip_fields.ipcse =
3522                     htole16(ehdrlen + ip_hlen - 1);
3523                 TXD->lower_setup.ip_fields.ipcso =
3524                     ehdrlen + offsetof(struct ip, ip_sum);
3525         }
3526         /*
3527          * Start offset for payload checksum calculation.
3528          * End offset for payload checksum calculation.
3529          * Offset of place to put the checksum.
3530          */
3531         TXD->upper_setup.tcp_fields.tucss =
3532             ehdrlen + ip_hlen;
3533         TXD->upper_setup.tcp_fields.tucse = 0;
3534         TXD->upper_setup.tcp_fields.tucso =
3535             ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3536         /*
3537          * Payload size per packet w/o any headers.
3538          * Length of all headers up to payload.
3539          */
3540         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3541         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3542
3543         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3544                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3545                                 E1000_TXD_CMD_TSE |     /* TSE context */
3546                                 (isip6 ? 0 : E1000_TXD_CMD_IP) | 
3547                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3548                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3549
3550         tx_buffer->m_head = NULL;
3551         tx_buffer->next_eop = -1;
3552
3553         if (++cur == adapter->num_tx_desc)
3554                 cur = 0;
3555
3556         txr->tx_avail--;
3557         txr->next_avail_desc = cur;
3558         txr->tx_tso = TRUE;
3559
3560         return TRUE;
3561 }
3562
3563
3564 /**********************************************************************
3565  *
3566  *  Examine each tx_buffer in the used queue. If the hardware is done
3567  *  processing the packet then free associated resources. The
3568  *  tx_buffer is put back on the free queue.
3569  *
3570  **********************************************************************/
3571 static bool
3572 em_txeof(struct tx_ring *txr)
3573 {
3574         struct adapter  *adapter = txr->adapter;
3575         int first, last, done, num_avail;
3576         struct em_buffer *tx_buffer;
3577         struct e1000_tx_desc   *tx_desc, *eop_desc;
3578         struct ifnet   *ifp = adapter->ifp;
3579
3580         EM_TX_LOCK_ASSERT(txr);
3581
3582         if (txr->tx_avail == adapter->num_tx_desc)
3583                 return (FALSE);
3584
3585         num_avail = txr->tx_avail;
3586         first = txr->next_to_clean;
3587         tx_desc = &txr->tx_base[first];
3588         tx_buffer = &txr->tx_buffers[first];
3589         last = tx_buffer->next_eop;
3590         eop_desc = &txr->tx_base[last];
3591
3592         /*
3593          * What this does is get the index of the
3594          * first descriptor AFTER the EOP of the 
3595          * first packet, that way we can do the
3596          * simple comparison on the inner while loop.
3597          */
3598         if (++last == adapter->num_tx_desc)
3599                 last = 0;
3600         done = last;
3601
3602         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3603             BUS_DMASYNC_POSTREAD);
3604
3605         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3606                 /* We clean the range of the packet */
3607                 while (first != done) {
3608                         tx_desc->upper.data = 0;
3609                         tx_desc->lower.data = 0;
3610                         tx_desc->buffer_addr = 0;
3611                         ++num_avail;
3612
3613                         if (tx_buffer->m_head) {
3614                                 ifp->if_opackets++;
3615                                 bus_dmamap_sync(txr->txtag,
3616                                     tx_buffer->map,
3617                                     BUS_DMASYNC_POSTWRITE);
3618                                 bus_dmamap_unload(txr->txtag,
3619                                     tx_buffer->map);
3620
3621                                 m_freem(tx_buffer->m_head);
3622                                 tx_buffer->m_head = NULL;
3623                         }
3624                         tx_buffer->next_eop = -1;
3625                         txr->watchdog_time = ticks;
3626
3627                         if (++first == adapter->num_tx_desc)
3628                                 first = 0;
3629
3630                         tx_buffer = &txr->tx_buffers[first];
3631                         tx_desc = &txr->tx_base[first];
3632                 }
3633                 /* See if we can continue to the next packet */
3634                 last = tx_buffer->next_eop;
3635                 if (last != -1) {
3636                         eop_desc = &txr->tx_base[last];
3637                         /* Get new done point */
3638                         if (++last == adapter->num_tx_desc) last = 0;
3639                         done = last;
3640                 } else
3641                         break;
3642         }
3643         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3644             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3645
3646         txr->next_to_clean = first;
3647
3648         /*
3649          * If we have enough room, clear IFF_DRV_OACTIVE to
3650          * tell the stack that it is OK to send packets.
3651          * If there are no pending descriptors, clear the watchdog.
3652          */
3653         if (num_avail > EM_TX_CLEANUP_THRESHOLD) {                
3654                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3655                 if (num_avail == adapter->num_tx_desc) {
3656                         txr->watchdog_check = FALSE;
3657                         txr->tx_avail = num_avail;
3658                         return (FALSE);
3659                 } 
3660         }
3661
3662         txr->tx_avail = num_avail;
3663         return (TRUE);
3664 }
3665
3666
3667 /*********************************************************************
3668  *
3669  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3670  *
3671  **********************************************************************/
3672 static void
3673 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3674 {
3675         struct adapter          *adapter = rxr->adapter;
3676         struct mbuf             *m;
3677         bus_dma_segment_t       segs[1];
3678         bus_dmamap_t            map;
3679         struct em_buffer        *rxbuf;
3680         int                     i, error, nsegs, cleaned;
3681
3682         i = rxr->next_to_refresh;
3683         cleaned = -1;
3684         while (i != limit) {
3685                 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3686                 if (m == NULL)
3687                         goto update;
3688                 m->m_len = m->m_pkthdr.len = MCLBYTES;
3689
3690                 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3691                         m_adj(m, ETHER_ALIGN);
3692
3693                 /*
3694                  * Using memory from the mbuf cluster pool, invoke the
3695                  * bus_dma machinery to arrange the memory mapping.
3696                  */
3697                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3698                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3699                 if (error != 0) {
3700                         m_free(m);
3701                         goto update;
3702                 }
3703
3704                 /* If nsegs is wrong then the stack is corrupt. */
3705                 KASSERT(nsegs == 1, ("Too many segments returned!"));
3706         
3707                 rxbuf = &rxr->rx_buffers[i];
3708                 if (rxbuf->m_head != NULL)
3709                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3710         
3711                 map = rxbuf->map;
3712                 rxbuf->map = rxr->rx_sparemap;
3713                 rxr->rx_sparemap = map;
3714                 bus_dmamap_sync(rxr->rxtag,
3715                     rxbuf->map, BUS_DMASYNC_PREREAD);
3716                 rxbuf->m_head = m;
3717                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3718
3719                 cleaned = i;
3720                 /* Calculate next index */
3721                 if (++i == adapter->num_rx_desc)
3722                         i = 0;
3723                 /* This is the work marker for refresh */
3724                 rxr->next_to_refresh = i;
3725         }
3726 update:
3727         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3728             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3729         if (cleaned != -1) /* Update tail index */
3730                 E1000_WRITE_REG(&adapter->hw,
3731                     E1000_RDT(rxr->me), cleaned);
3732
3733         return;
3734 }
3735
3736
3737 /*********************************************************************
3738  *
3739  *  Allocate memory for rx_buffer structures. Since we use one
3740  *  rx_buffer per received packet, the maximum number of rx_buffer's
3741  *  that we'll need is equal to the number of receive descriptors
3742  *  that we've allocated.
3743  *
3744  **********************************************************************/
3745 static int
3746 em_allocate_receive_buffers(struct rx_ring *rxr)
3747 {
3748         struct adapter          *adapter = rxr->adapter;
3749         device_t                dev = adapter->dev;
3750         struct em_buffer        *rxbuf;
3751         int                     error;
3752
3753         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3754             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3755         if (rxr->rx_buffers == NULL) {
3756                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3757                 return (ENOMEM);
3758         }
3759
3760         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3761                                 1, 0,                   /* alignment, bounds */
3762                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3763                                 BUS_SPACE_MAXADDR,      /* highaddr */
3764                                 NULL, NULL,             /* filter, filterarg */
3765                                 MCLBYTES,               /* maxsize */
3766                                 1,                      /* nsegments */
3767                                 MCLBYTES,               /* maxsegsize */
3768                                 0,                      /* flags */
3769                                 NULL,                   /* lockfunc */
3770                                 NULL,                   /* lockarg */
3771                                 &rxr->rxtag);
3772         if (error) {
3773                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3774                     __func__, error);
3775                 goto fail;
3776         }
3777
3778         /* Create the spare map (used by getbuf) */
3779         error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3780              &rxr->rx_sparemap);
3781         if (error) {
3782                 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3783                     __func__, error);
3784                 goto fail;
3785         }
3786
3787         rxbuf = rxr->rx_buffers;
3788         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3789                 rxbuf = &rxr->rx_buffers[i];
3790                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3791                     &rxbuf->map);
3792                 if (error) {
3793                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3794                             __func__, error);
3795                         goto fail;
3796                 }
3797         }
3798
3799         return (0);
3800
3801 fail:
3802         em_free_receive_structures(adapter);
3803         return (error);
3804 }
3805
3806
3807 /*********************************************************************
3808  *
3809  *  Initialize a receive ring and its buffers.
3810  *
3811  **********************************************************************/
3812 static int
3813 em_setup_receive_ring(struct rx_ring *rxr)
3814 {
3815         struct  adapter         *adapter = rxr->adapter;
3816         struct em_buffer        *rxbuf;
3817         bus_dma_segment_t       seg[1];
3818         int                     rsize, nsegs, error;
3819
3820
3821         /* Clear the ring contents */
3822         EM_RX_LOCK(rxr);
3823         rsize = roundup2(adapter->num_rx_desc *
3824             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3825         bzero((void *)rxr->rx_base, rsize);
3826
3827         /*
3828         ** Free current RX buffer structs and their mbufs
3829         */
3830         for (int i = 0; i < adapter->num_rx_desc; i++) {
3831                 rxbuf = &rxr->rx_buffers[i];
3832                 if (rxbuf->m_head != NULL) {
3833                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3834                             BUS_DMASYNC_POSTREAD);
3835                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3836                         m_freem(rxbuf->m_head);
3837                 }
3838         }
3839
3840         /* Now replenish the mbufs */
3841         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3842
3843                 rxbuf = &rxr->rx_buffers[j];
3844                 rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3845                 if (rxbuf->m_head == NULL)
3846                         panic("RX ring hdr initialization failed!\n");
3847                 rxbuf->m_head->m_len = MCLBYTES;
3848                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3849                 rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3850
3851                 /* Get the memory mapping */
3852                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3853                     rxbuf->map, rxbuf->m_head, seg,
3854                     &nsegs, BUS_DMA_NOWAIT);
3855                 if (error != 0)
3856                         panic("RX ring dma initialization failed!\n");
3857                 bus_dmamap_sync(rxr->rxtag,
3858                     rxbuf->map, BUS_DMASYNC_PREREAD);
3859
3860                 /* Update descriptor */
3861                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3862         }
3863
3864
3865         /* Setup our descriptor indices */
3866         rxr->next_to_check = 0;
3867         rxr->next_to_refresh = 0;
3868
3869         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3870             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3871
3872         EM_RX_UNLOCK(rxr);
3873         return (0);
3874 }
3875
3876 /*********************************************************************
3877  *
3878  *  Initialize all receive rings.
3879  *
3880  **********************************************************************/
3881 static int
3882 em_setup_receive_structures(struct adapter *adapter)
3883 {
3884         struct rx_ring *rxr = adapter->rx_rings;
3885         int j;
3886
3887         for (j = 0; j < adapter->num_queues; j++, rxr++)
3888                 if (em_setup_receive_ring(rxr))
3889                         goto fail;
3890
3891         return (0);
3892 fail:
3893         /*
3894          * Free RX buffers allocated so far, we will only handle
3895          * the rings that completed, the failing case will have
3896          * cleaned up for itself. 'j' failed, so its the terminus.
3897          */
3898         for (int i = 0; i < j; ++i) {
3899                 rxr = &adapter->rx_rings[i];
3900                 for (int n = 0; n < adapter->num_rx_desc; n++) {
3901                         struct em_buffer *rxbuf;
3902                         rxbuf = &rxr->rx_buffers[n];
3903                         if (rxbuf->m_head != NULL) {
3904                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3905                                   BUS_DMASYNC_POSTREAD);
3906                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3907                                 m_freem(rxbuf->m_head);
3908                                 rxbuf->m_head = NULL;
3909                         }
3910                 }
3911         }
3912
3913         return (ENOBUFS);
3914 }
3915
3916 /*********************************************************************
3917  *
3918  *  Free all receive rings.
3919  *
3920  **********************************************************************/
3921 static void
3922 em_free_receive_structures(struct adapter *adapter)
3923 {
3924         struct rx_ring *rxr = adapter->rx_rings;
3925
3926         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3927                 em_free_receive_buffers(rxr);
3928                 /* Free the ring memory as well */
3929                 em_dma_free(adapter, &rxr->rxdma);
3930                 EM_RX_LOCK_DESTROY(rxr);
3931         }
3932
3933         free(adapter->rx_rings, M_DEVBUF);
3934 }
3935
3936
3937 /*********************************************************************
3938  *
3939  *  Free receive ring data structures
3940  *
3941  **********************************************************************/
3942 static void
3943 em_free_receive_buffers(struct rx_ring *rxr)
3944 {
3945         struct adapter          *adapter = rxr->adapter;
3946         struct em_buffer        *rxbuf = NULL;
3947
3948         INIT_DEBUGOUT("free_receive_buffers: begin");
3949
3950         if (rxr->rx_sparemap) {
3951                 bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3952                 rxr->rx_sparemap = NULL;
3953         }
3954
3955         if (rxr->rx_buffers != NULL) {
3956                 for (int i = 0; i < adapter->num_rx_desc; i++) {
3957                         rxbuf = &rxr->rx_buffers[i];
3958                         if (rxbuf->map != NULL) {
3959                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3960                                     BUS_DMASYNC_POSTREAD);
3961                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3962                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3963                         }
3964                         if (rxbuf->m_head != NULL) {
3965                                 m_freem(rxbuf->m_head);
3966                                 rxbuf->m_head = NULL;
3967                         }
3968                 }
3969                 free(rxr->rx_buffers, M_DEVBUF);
3970                 rxr->rx_buffers = NULL;
3971         }
3972
3973         if (rxr->rxtag != NULL) {
3974                 bus_dma_tag_destroy(rxr->rxtag);
3975                 rxr->rxtag = NULL;
3976         }
3977
3978         return;
3979 }
3980
3981
3982 /*********************************************************************
3983  *
3984  *  Enable receive unit.
3985  *
3986  **********************************************************************/
3987 #define MAX_INTS_PER_SEC        8000
3988 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
3989
3990 static void
3991 em_initialize_receive_unit(struct adapter *adapter)
3992 {
3993         struct rx_ring  *rxr = adapter->rx_rings;
3994         struct ifnet    *ifp = adapter->ifp;
3995         struct e1000_hw *hw = &adapter->hw;
3996         u64     bus_addr;
3997         u32     rctl, rxcsum;
3998
3999         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4000
4001         /*
4002          * Make sure receives are disabled while setting
4003          * up the descriptor ring
4004          */
4005         rctl = E1000_READ_REG(hw, E1000_RCTL);
4006         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4007
4008         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4009             adapter->rx_abs_int_delay.value);
4010         /*
4011          * Set the interrupt throttling rate. Value is calculated
4012          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4013          */
4014         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4015
4016         /*
4017         ** When using MSIX interrupts we need to throttle
4018         ** using the EITR register (82574 only)
4019         */
4020         if (hw->mac.type == e1000_82574)
4021                 for (int i = 0; i < 4; i++)
4022                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4023                             DEFAULT_ITR);
4024
4025         /* Disable accelerated ackknowledge */
4026         if (adapter->hw.mac.type == e1000_82574)
4027                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4028
4029         if (ifp->if_capenable & IFCAP_RXCSUM) {
4030                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4031                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4032                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4033         }
4034
4035         /*
4036         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4037         ** long latencies are observed, like Lenovo X60. This
4038         ** change eliminates the problem, but since having positive
4039         ** values in RDTR is a known source of problems on other
4040         ** platforms another solution is being sought.
4041         */
4042         if (hw->mac.type == e1000_82573)
4043                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4044
4045         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4046                 /* Setup the Base and Length of the Rx Descriptor Ring */
4047                 bus_addr = rxr->rxdma.dma_paddr;
4048                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4049                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4050                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4051                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4052                 /* Setup the Head and Tail Descriptor Pointers */
4053                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4054                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4055         }
4056
4057         /* Setup the Receive Control Register */
4058         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4059         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4060             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4061             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4062
4063         /* Strip the CRC */
4064         rctl |= E1000_RCTL_SECRC;
4065
4066         /* Make sure VLAN Filters are off */
4067         rctl &= ~E1000_RCTL_VFE;
4068         rctl &= ~E1000_RCTL_SBP;
4069         rctl |= E1000_RCTL_SZ_2048;
4070         if (ifp->if_mtu > ETHERMTU)
4071                 rctl |= E1000_RCTL_LPE;
4072         else
4073                 rctl &= ~E1000_RCTL_LPE;
4074
4075         /* Write out the settings */
4076         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4077
4078         return;
4079 }
4080
4081
4082 /*********************************************************************
4083  *
4084  *  This routine executes in interrupt context. It replenishes
4085  *  the mbufs in the descriptor and sends data which has been
4086  *  dma'ed into host memory to upper layer.
4087  *
4088  *  We loop at most count times if count is > 0, or until done if
4089  *  count < 0.
4090  *  
4091  *  For polling we also now return the number of cleaned packets
4092  *********************************************************************/
4093 static int
4094 em_rxeof(struct rx_ring *rxr, int count)
4095 {
4096         struct adapter          *adapter = rxr->adapter;
4097         struct ifnet            *ifp = adapter->ifp;
4098         struct mbuf             *mp, *sendmp;
4099         u8                      status = 0;
4100         u16                     len;
4101         int                     i, processed, rxdone = 0;
4102         bool                    eop;
4103         struct e1000_rx_desc    *cur;
4104
4105         EM_RX_LOCK_ASSERT(rxr);
4106
4107         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4108
4109                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4110                         break;
4111
4112                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4113                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4114
4115                 cur = &rxr->rx_base[i];
4116                 status = cur->status;
4117                 mp = sendmp = NULL;
4118
4119                 if ((status & E1000_RXD_STAT_DD) == 0)
4120                         break;
4121
4122                 len = le16toh(cur->length);
4123                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4124                 count--;
4125
4126                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4127
4128                         /* Assign correct length to the current fragment */
4129                         mp = rxr->rx_buffers[i].m_head;
4130                         mp->m_len = len;
4131
4132                         if (rxr->fmp == NULL) {
4133                                 mp->m_pkthdr.len = len;
4134                                 rxr->fmp = mp; /* Store the first mbuf */
4135                                 rxr->lmp = mp;
4136                         } else {
4137                                 /* Chain mbuf's together */
4138                                 mp->m_flags &= ~M_PKTHDR;
4139                                 rxr->lmp->m_next = mp;
4140                                 rxr->lmp = rxr->lmp->m_next;
4141                                 rxr->fmp->m_pkthdr.len += len;
4142                         }
4143
4144                         if (eop) {
4145                                 rxr->fmp->m_pkthdr.rcvif = ifp;
4146                                 ifp->if_ipackets++;
4147                                 em_receive_checksum(cur, rxr->fmp);
4148 #ifndef __NO_STRICT_ALIGNMENT
4149                                 if (adapter->max_frame_size >
4150                                     (MCLBYTES - ETHER_ALIGN) &&
4151                                     em_fixup_rx(rxr) != 0)
4152                                         goto skip;
4153 #endif
4154                                 if (status & E1000_RXD_STAT_VP) {
4155                                         rxr->fmp->m_pkthdr.ether_vtag =
4156                                             (le16toh(cur->special) &
4157                                             E1000_RXD_SPC_VLAN_MASK);
4158                                         rxr->fmp->m_flags |= M_VLANTAG;
4159                                 }
4160 #ifdef EM_MULTIQUEUE
4161                                 rxr->fmp->m_pkthdr.flowid = curcpu;
4162                                 rxr->fmp->m_flags |= M_FLOWID;
4163 #endif
4164 #ifndef __NO_STRICT_ALIGNMENT
4165 skip:
4166 #endif
4167                                 sendmp = rxr->fmp;
4168                                 rxr->fmp = NULL;
4169                                 rxr->lmp = NULL;
4170                         }
4171                 } else {
4172                         ifp->if_ierrors++;
4173                         /* Reuse loaded DMA map and just update mbuf chain */
4174                         mp = rxr->rx_buffers[i].m_head;
4175                         mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4176                         mp->m_data = mp->m_ext.ext_buf;
4177                         mp->m_next = NULL;
4178                         if (adapter->max_frame_size <=
4179                             (MCLBYTES - ETHER_ALIGN))
4180                                 m_adj(mp, ETHER_ALIGN);
4181                         if (rxr->fmp != NULL) {
4182                                 m_freem(rxr->fmp);
4183                                 rxr->fmp = NULL;
4184                                 rxr->lmp = NULL;
4185                         }
4186                         sendmp = NULL;
4187                 }
4188
4189                 /* Zero out the receive descriptors status. */
4190                 cur->status = 0;
4191                 ++rxdone;       /* cumulative for POLL */
4192                 ++processed;
4193
4194                 /* Advance our pointers to the next descriptor. */
4195                 if (++i == adapter->num_rx_desc)
4196                         i = 0;
4197
4198                 /* Send to the stack */
4199                 if (sendmp != NULL)
4200                         (*ifp->if_input)(ifp, sendmp);
4201
4202                 /* Only refresh mbufs every 8 descriptors */
4203                 if (processed == 8) {
4204                         em_refresh_mbufs(rxr, i);
4205                         processed = 0;
4206                 }
4207         }
4208
4209         /* Catch any remaining refresh work */
4210         if (processed != 0) {
4211                 em_refresh_mbufs(rxr, i);
4212                 processed = 0;
4213         }
4214
4215         rxr->next_to_check = i;
4216
4217 #ifdef DEVICE_POLLING
4218         return (rxdone);
4219 #else
4220         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4221 #endif
4222 }
4223
4224 #ifndef __NO_STRICT_ALIGNMENT
4225 /*
4226  * When jumbo frames are enabled we should realign entire payload on
4227  * architecures with strict alignment. This is serious design mistake of 8254x
4228  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4229  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4230  * payload. On architecures without strict alignment restrictions 8254x still
4231  * performs unaligned memory access which would reduce the performance too.
4232  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4233  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4234  * existing mbuf chain.
4235  *
4236  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4237  * not used at all on architectures with strict alignment.
4238  */
4239 static int
4240 em_fixup_rx(struct rx_ring *rxr)
4241 {
4242         struct adapter *adapter = rxr->adapter;
4243         struct mbuf *m, *n;
4244         int error;
4245
4246         error = 0;
4247         m = rxr->fmp;
4248         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4249                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4250                 m->m_data += ETHER_HDR_LEN;
4251         } else {
4252                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4253                 if (n != NULL) {
4254                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4255                         m->m_data += ETHER_HDR_LEN;
4256                         m->m_len -= ETHER_HDR_LEN;
4257                         n->m_len = ETHER_HDR_LEN;
4258                         M_MOVE_PKTHDR(n, m);
4259                         n->m_next = m;
4260                         rxr->fmp = n;
4261                 } else {
4262                         adapter->dropped_pkts++;
4263                         m_freem(rxr->fmp);
4264                         rxr->fmp = NULL;
4265                         error = ENOMEM;
4266                 }
4267         }
4268
4269         return (error);
4270 }
4271 #endif
4272
4273 /*********************************************************************
4274  *
4275  *  Verify that the hardware indicated that the checksum is valid.
4276  *  Inform the stack about the status of checksum so that stack
4277  *  doesn't spend time verifying the checksum.
4278  *
4279  *********************************************************************/
4280 static void
4281 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4282 {
4283         /* Ignore Checksum bit is set */
4284         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4285                 mp->m_pkthdr.csum_flags = 0;
4286                 return;
4287         }
4288
4289         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4290                 /* Did it pass? */
4291                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4292                         /* IP Checksum Good */
4293                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4294                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4295
4296                 } else {
4297                         mp->m_pkthdr.csum_flags = 0;
4298                 }
4299         }
4300
4301         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4302                 /* Did it pass? */
4303                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4304                         mp->m_pkthdr.csum_flags |=
4305                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4306                         mp->m_pkthdr.csum_data = htons(0xffff);
4307                 }
4308         }
4309 }
4310
4311 /*
4312  * This routine is run via an vlan
4313  * config EVENT
4314  */
4315 static void
4316 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4317 {
4318         struct adapter  *adapter = ifp->if_softc;
4319         u32             index, bit;
4320
4321         if (ifp->if_softc !=  arg)   /* Not our event */
4322                 return;
4323
4324         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4325                 return;
4326
4327         index = (vtag >> 5) & 0x7F;
4328         bit = vtag & 0x1F;
4329         em_shadow_vfta[index] |= (1 << bit);
4330         ++adapter->num_vlans;
4331         /* Re-init to load the changes */
4332         em_init(adapter);
4333 }
4334
4335 /*
4336  * This routine is run via an vlan
4337  * unconfig EVENT
4338  */
4339 static void
4340 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4341 {
4342         struct adapter  *adapter = ifp->if_softc;
4343         u32             index, bit;
4344
4345         if (ifp->if_softc !=  arg)
4346                 return;
4347
4348         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4349                 return;
4350
4351         index = (vtag >> 5) & 0x7F;
4352         bit = vtag & 0x1F;
4353         em_shadow_vfta[index] &= ~(1 << bit);
4354         --adapter->num_vlans;
4355         /* Re-init to load the changes */
4356         em_init(adapter);
4357 }
4358
4359 static void
4360 em_setup_vlan_hw_support(struct adapter *adapter)
4361 {
4362         struct e1000_hw *hw = &adapter->hw;
4363         u32             reg;
4364
4365         /*
4366         ** We get here thru init_locked, meaning
4367         ** a soft reset, this has already cleared
4368         ** the VFTA and other state, so if there
4369         ** have been no vlan's registered do nothing.
4370         */
4371         if (adapter->num_vlans == 0)
4372                 return;
4373
4374         /*
4375         ** A soft reset zero's out the VFTA, so
4376         ** we need to repopulate it now.
4377         */
4378         for (int i = 0; i < EM_VFTA_SIZE; i++)
4379                 if (em_shadow_vfta[i] != 0)
4380                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4381                             i, em_shadow_vfta[i]);
4382
4383         reg = E1000_READ_REG(hw, E1000_CTRL);
4384         reg |= E1000_CTRL_VME;
4385         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4386
4387         /* Enable the Filter Table */
4388         reg = E1000_READ_REG(hw, E1000_RCTL);
4389         reg &= ~E1000_RCTL_CFIEN;
4390         reg |= E1000_RCTL_VFE;
4391         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4392
4393         /* Update the frame size */
4394         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4395             adapter->max_frame_size + VLAN_TAG_SIZE);
4396 }
4397
4398 static void
4399 em_enable_intr(struct adapter *adapter)
4400 {
4401         struct e1000_hw *hw = &adapter->hw;
4402         u32 ims_mask = IMS_ENABLE_MASK;
4403
4404         if (hw->mac.type == e1000_82574) {
4405                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4406                 ims_mask |= EM_MSIX_MASK;
4407         } 
4408         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4409 }
4410
4411 static void
4412 em_disable_intr(struct adapter *adapter)
4413 {
4414         struct e1000_hw *hw = &adapter->hw;
4415
4416         if (hw->mac.type == e1000_82574)
4417                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4418         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4419 }
4420
4421 /*
4422  * Bit of a misnomer, what this really means is
4423  * to enable OS management of the system... aka
4424  * to disable special hardware management features 
4425  */
4426 static void
4427 em_init_manageability(struct adapter *adapter)
4428 {
4429         /* A shared code workaround */
4430 #define E1000_82542_MANC2H E1000_MANC2H
4431         if (adapter->has_manage) {
4432                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4433                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4434
4435                 /* disable hardware interception of ARP */
4436                 manc &= ~(E1000_MANC_ARP_EN);
4437
4438                 /* enable receiving management packets to the host */
4439                 manc |= E1000_MANC_EN_MNG2HOST;
4440 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4441 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4442                 manc2h |= E1000_MNG2HOST_PORT_623;
4443                 manc2h |= E1000_MNG2HOST_PORT_664;
4444                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4445                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4446         }
4447 }
4448
4449 /*
4450  * Give control back to hardware management
4451  * controller if there is one.
4452  */
4453 static void
4454 em_release_manageability(struct adapter *adapter)
4455 {
4456         if (adapter->has_manage) {
4457                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4458
4459                 /* re-enable hardware interception of ARP */
4460                 manc |= E1000_MANC_ARP_EN;
4461                 manc &= ~E1000_MANC_EN_MNG2HOST;
4462
4463                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4464         }
4465 }
4466
4467 /*
4468  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4469  * For ASF and Pass Through versions of f/w this means
4470  * that the driver is loaded. For AMT version type f/w
4471  * this means that the network i/f is open.
4472  */
4473 static void
4474 em_get_hw_control(struct adapter *adapter)
4475 {
4476         u32 ctrl_ext, swsm;
4477
4478         if (adapter->hw.mac.type == e1000_82573) {
4479                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4480                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4481                     swsm | E1000_SWSM_DRV_LOAD);
4482                 return;
4483         }
4484         /* else */
4485         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4486         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4487             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4488         return;
4489 }
4490
4491 /*
4492  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4493  * For ASF and Pass Through versions of f/w this means that
4494  * the driver is no longer loaded. For AMT versions of the
4495  * f/w this means that the network i/f is closed.
4496  */
4497 static void
4498 em_release_hw_control(struct adapter *adapter)
4499 {
4500         u32 ctrl_ext, swsm;
4501
4502         if (!adapter->has_manage)
4503                 return;
4504
4505         if (adapter->hw.mac.type == e1000_82573) {
4506                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4507                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4508                     swsm & ~E1000_SWSM_DRV_LOAD);
4509                 return;
4510         }
4511         /* else */
4512         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4513         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4514             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4515         return;
4516 }
4517
4518 static int
4519 em_is_valid_ether_addr(u8 *addr)
4520 {
4521         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4522
4523         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4524                 return (FALSE);
4525         }
4526
4527         return (TRUE);
4528 }
4529
4530 /*
4531 ** Parse the interface capabilities with regard
4532 ** to both system management and wake-on-lan for
4533 ** later use.
4534 */
4535 static void
4536 em_get_wakeup(device_t dev)
4537 {
4538         struct adapter  *adapter = device_get_softc(dev);
4539         u16             eeprom_data = 0, device_id, apme_mask;
4540
4541         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4542         apme_mask = EM_EEPROM_APME;
4543
4544         switch (adapter->hw.mac.type) {
4545         case e1000_82573:
4546         case e1000_82583:
4547                 adapter->has_amt = TRUE;
4548                 /* Falls thru */
4549         case e1000_82571:
4550         case e1000_82572:
4551         case e1000_80003es2lan:
4552                 if (adapter->hw.bus.func == 1) {
4553                         e1000_read_nvm(&adapter->hw,
4554                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4555                         break;
4556                 } else
4557                         e1000_read_nvm(&adapter->hw,
4558                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4559                 break;
4560         case e1000_ich8lan:
4561         case e1000_ich9lan:
4562         case e1000_ich10lan:
4563         case e1000_pchlan:
4564                 apme_mask = E1000_WUC_APME;
4565                 adapter->has_amt = TRUE;
4566                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4567                 break;
4568         default:
4569                 e1000_read_nvm(&adapter->hw,
4570                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4571                 break;
4572         }
4573         if (eeprom_data & apme_mask)
4574                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4575         /*
4576          * We have the eeprom settings, now apply the special cases
4577          * where the eeprom may be wrong or the board won't support
4578          * wake on lan on a particular port
4579          */
4580         device_id = pci_get_device(dev);
4581         switch (device_id) {
4582         case E1000_DEV_ID_82571EB_FIBER:
4583                 /* Wake events only supported on port A for dual fiber
4584                  * regardless of eeprom setting */
4585                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4586                     E1000_STATUS_FUNC_1)
4587                         adapter->wol = 0;
4588                 break;
4589         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4590         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4591         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4592                 /* if quad port adapter, disable WoL on all but port A */
4593                 if (global_quad_port_a != 0)
4594                         adapter->wol = 0;
4595                 /* Reset for multiple quad port adapters */
4596                 if (++global_quad_port_a == 4)
4597                         global_quad_port_a = 0;
4598                 break;
4599         }
4600         return;
4601 }
4602
4603
4604 /*
4605  * Enable PCI Wake On Lan capability
4606  */
4607 static void
4608 em_enable_wakeup(device_t dev)
4609 {
4610         struct adapter  *adapter = device_get_softc(dev);
4611         struct ifnet    *ifp = adapter->ifp;
4612         u32             pmc, ctrl, ctrl_ext, rctl;
4613         u16             status;
4614
4615         if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4616                 return;
4617
4618         /* Advertise the wakeup capability */
4619         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4620         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4621         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4622         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4623
4624         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4625             (adapter->hw.mac.type == e1000_pchlan) ||
4626             (adapter->hw.mac.type == e1000_ich9lan) ||
4627             (adapter->hw.mac.type == e1000_ich10lan)) {
4628                 e1000_disable_gig_wol_ich8lan(&adapter->hw);
4629                 e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4630         }
4631
4632         /* Keep the laser running on Fiber adapters */
4633         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4634             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4635                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4636                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4637                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4638         }
4639
4640         /*
4641         ** Determine type of Wakeup: note that wol
4642         ** is set with all bits on by default.
4643         */
4644         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4645                 adapter->wol &= ~E1000_WUFC_MAG;
4646
4647         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4648                 adapter->wol &= ~E1000_WUFC_MC;
4649         else {
4650                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4651                 rctl |= E1000_RCTL_MPE;
4652                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4653         }
4654
4655         if (adapter->hw.mac.type == e1000_pchlan) {
4656                 if (em_enable_phy_wakeup(adapter))
4657                         return;
4658         } else {
4659                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4660                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4661         }
4662
4663         if (adapter->hw.phy.type == e1000_phy_igp_3)
4664                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4665
4666         /* Request PME */
4667         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4668         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4669         if (ifp->if_capenable & IFCAP_WOL)
4670                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4671         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4672
4673         return;
4674 }
4675
4676 /*
4677 ** WOL in the newer chipset interfaces (pchlan)
4678 ** require thing to be copied into the phy
4679 */
4680 static int
4681 em_enable_phy_wakeup(struct adapter *adapter)
4682 {
4683         struct e1000_hw *hw = &adapter->hw;
4684         u32 mreg, ret = 0;
4685         u16 preg;
4686
4687         /* copy MAC RARs to PHY RARs */
4688         for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4689                 mreg = E1000_READ_REG(hw, E1000_RAL(i));
4690                 e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4691                 e1000_write_phy_reg(hw, BM_RAR_M(i),
4692                     (u16)((mreg >> 16) & 0xFFFF));
4693                 mreg = E1000_READ_REG(hw, E1000_RAH(i));
4694                 e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4695                 e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4696                     (u16)((mreg >> 16) & 0xFFFF));
4697         }
4698
4699         /* copy MAC MTA to PHY MTA */
4700         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4701                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4702                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4703                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4704                     (u16)((mreg >> 16) & 0xFFFF));
4705         }
4706
4707         /* configure PHY Rx Control register */
4708         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4709         mreg = E1000_READ_REG(hw, E1000_RCTL);
4710         if (mreg & E1000_RCTL_UPE)
4711                 preg |= BM_RCTL_UPE;
4712         if (mreg & E1000_RCTL_MPE)
4713                 preg |= BM_RCTL_MPE;
4714         preg &= ~(BM_RCTL_MO_MASK);
4715         if (mreg & E1000_RCTL_MO_3)
4716                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4717                                 << BM_RCTL_MO_SHIFT);
4718         if (mreg & E1000_RCTL_BAM)
4719                 preg |= BM_RCTL_BAM;
4720         if (mreg & E1000_RCTL_PMCF)
4721                 preg |= BM_RCTL_PMCF;
4722         mreg = E1000_READ_REG(hw, E1000_CTRL);
4723         if (mreg & E1000_CTRL_RFCE)
4724                 preg |= BM_RCTL_RFCE;
4725         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4726
4727         /* enable PHY wakeup in MAC register */
4728         E1000_WRITE_REG(hw, E1000_WUC,
4729             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4730         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4731
4732         /* configure and enable PHY wakeup in PHY registers */
4733         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4734         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4735
4736         /* activate PHY wakeup */
4737         ret = hw->phy.ops.acquire(hw);
4738         if (ret) {
4739                 printf("Could not acquire PHY\n");
4740                 return ret;
4741         }
4742         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4743                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4744         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4745         if (ret) {
4746                 printf("Could not read PHY page 769\n");
4747                 goto out;
4748         }
4749         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4750         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4751         if (ret)
4752                 printf("Could not set PHY Host Wakeup bit\n");
4753 out:
4754         hw->phy.ops.release(hw);
4755
4756         return ret;
4757 }
4758
4759 static void
4760 em_led_func(void *arg, int onoff)
4761 {
4762         struct adapter  *adapter = arg;
4763  
4764         EM_CORE_LOCK(adapter);
4765         if (onoff) {
4766                 e1000_setup_led(&adapter->hw);
4767                 e1000_led_on(&adapter->hw);
4768         } else {
4769                 e1000_led_off(&adapter->hw);
4770                 e1000_cleanup_led(&adapter->hw);
4771         }
4772         EM_CORE_UNLOCK(adapter);
4773 }
4774
4775 /**********************************************************************
4776  *
4777  *  Update the board statistics counters.
4778  *
4779  **********************************************************************/
4780 static void
4781 em_update_stats_counters(struct adapter *adapter)
4782 {
4783         struct ifnet   *ifp;
4784
4785         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4786            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4787                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4788                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4789         }
4790         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4791         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4792         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4793         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4794
4795         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4796         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4797         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4798         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4799         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4800         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4801         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4802         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4803         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4804         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4805         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4806         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4807         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4808         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4809         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4810         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4811         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4812         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4813         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4814         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4815
4816         /* For the 64-bit byte counters the low dword must be read first. */
4817         /* Both registers clear on the read of the high dword */
4818
4819         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4820         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4821
4822         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4823         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4824         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4825         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4826         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4827
4828         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4829         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4830
4831         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4832         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4833         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4834         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4835         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4836         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4837         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4838         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4839         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4840         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4841
4842         if (adapter->hw.mac.type >= e1000_82543) {
4843                 adapter->stats.algnerrc += 
4844                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4845                 adapter->stats.rxerrc += 
4846                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4847                 adapter->stats.tncrs += 
4848                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4849                 adapter->stats.cexterr += 
4850                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4851                 adapter->stats.tsctc += 
4852                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4853                 adapter->stats.tsctfc += 
4854                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4855         }
4856         ifp = adapter->ifp;
4857
4858         ifp->if_collisions = adapter->stats.colc;
4859
4860         /* Rx Errors */
4861         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4862             adapter->stats.crcerrs + adapter->stats.algnerrc +
4863             adapter->stats.ruc + adapter->stats.roc +
4864             adapter->stats.mpc + adapter->stats.cexterr;
4865
4866         /* Tx Errors */
4867         ifp->if_oerrors = adapter->stats.ecol +
4868             adapter->stats.latecol + adapter->watchdog_events;
4869 }
4870
4871
4872 /**********************************************************************
4873  *
4874  *  This routine is called only when em_display_debug_stats is enabled.
4875  *  This routine provides a way to take a look at important statistics
4876  *  maintained by the driver and hardware.
4877  *
4878  **********************************************************************/
4879 static void
4880 em_print_debug_info(struct adapter *adapter)
4881 {
4882         device_t dev = adapter->dev;
4883         u8 *hw_addr = adapter->hw.hw_addr;
4884         struct rx_ring *rxr = adapter->rx_rings;
4885         struct tx_ring *txr = adapter->tx_rings;
4886
4887         device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4888         device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4889             E1000_READ_REG(&adapter->hw, E1000_CTRL),
4890             E1000_READ_REG(&adapter->hw, E1000_RCTL));
4891         device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4892             ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4893             (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4894         device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4895             adapter->hw.fc.high_water,
4896             adapter->hw.fc.low_water);
4897         device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4898             E1000_READ_REG(&adapter->hw, E1000_TIDV),
4899             E1000_READ_REG(&adapter->hw, E1000_TADV));
4900         device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4901             E1000_READ_REG(&adapter->hw, E1000_RDTR),
4902             E1000_READ_REG(&adapter->hw, E1000_RADV));
4903
4904         for (int i = 0; i < adapter->num_queues; i++, txr++) {
4905                 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4906                     E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4907                     E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4908                 device_printf(dev, "TX(%d) no descriptors avail event = %ld\n",
4909                     txr->me, txr->no_desc_avail);
4910                 device_printf(dev, "TX(%d) MSIX IRQ Handled = %ld\n",
4911                     txr->me, txr->tx_irq);
4912                 device_printf(dev, "Num Tx descriptors avail = %d\n",
4913                     txr->tx_avail);
4914                 device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4915                     txr->no_desc_avail);
4916         }
4917         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4918                 device_printf(dev, "RX(%d) MSIX IRQ Handled = %ld\n",
4919                     rxr->me, rxr->rx_irq);
4920                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4921                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4922                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4923         }
4924         device_printf(dev, "Std mbuf failed = %ld\n",
4925             adapter->mbuf_alloc_failed);
4926         device_printf(dev, "Std mbuf cluster failed = %ld\n",
4927             adapter->mbuf_cluster_failed);
4928         device_printf(dev, "Driver dropped packets = %ld\n",
4929             adapter->dropped_pkts);
4930 }
4931
4932 static void
4933 em_print_hw_stats(struct adapter *adapter)
4934 {
4935         device_t dev = adapter->dev;
4936
4937         device_printf(dev, "Excessive collisions = %lld\n",
4938             (long long)adapter->stats.ecol);
4939 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4940         device_printf(dev, "Symbol errors = %lld\n",
4941             (long long)adapter->stats.symerrs);
4942 #endif
4943         device_printf(dev, "Sequence errors = %lld\n",
4944             (long long)adapter->stats.sec);
4945         device_printf(dev, "Defer count = %lld\n",
4946             (long long)adapter->stats.dc);
4947         device_printf(dev, "Missed Packets = %lld\n",
4948             (long long)adapter->stats.mpc);
4949         device_printf(dev, "Receive No Buffers = %lld\n",
4950             (long long)adapter->stats.rnbc);
4951         /* RLEC is inaccurate on some hardware, calculate our own. */
4952         device_printf(dev, "Receive Length Errors = %lld\n",
4953             ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4954         device_printf(dev, "Receive errors = %lld\n",
4955             (long long)adapter->stats.rxerrc);
4956         device_printf(dev, "Crc errors = %lld\n",
4957             (long long)adapter->stats.crcerrs);
4958         device_printf(dev, "Alignment errors = %lld\n",
4959             (long long)adapter->stats.algnerrc);
4960         device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4961             (long long)adapter->stats.cexterr);
4962         device_printf(dev, "watchdog timeouts = %ld\n",
4963             adapter->watchdog_events);
4964         device_printf(dev, "XON Rcvd = %lld\n",
4965             (long long)adapter->stats.xonrxc);
4966         device_printf(dev, "XON Xmtd = %lld\n",
4967             (long long)adapter->stats.xontxc);
4968         device_printf(dev, "XOFF Rcvd = %lld\n",
4969             (long long)adapter->stats.xoffrxc);
4970         device_printf(dev, "XOFF Xmtd = %lld\n",
4971             (long long)adapter->stats.xofftxc);
4972         device_printf(dev, "Good Packets Rcvd = %lld\n",
4973             (long long)adapter->stats.gprc);
4974         device_printf(dev, "Good Packets Xmtd = %lld\n",
4975             (long long)adapter->stats.gptc);
4976         device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4977             (long long)adapter->stats.tsctc);
4978         device_printf(dev, "TSO Contexts Failed = %lld\n",
4979             (long long)adapter->stats.tsctfc);
4980 }
4981
4982 /**********************************************************************
4983  *
4984  *  This routine provides a way to dump out the adapter eeprom,
4985  *  often a useful debug/service tool. This only dumps the first
4986  *  32 words, stuff that matters is in that extent.
4987  *
4988  **********************************************************************/
4989 static void
4990 em_print_nvm_info(struct adapter *adapter)
4991 {
4992         u16     eeprom_data;
4993         int     i, j, row = 0;
4994
4995         /* Its a bit crude, but it gets the job done */
4996         printf("\nInterface EEPROM Dump:\n");
4997         printf("Offset\n0x0000  ");
4998         for (i = 0, j = 0; i < 32; i++, j++) {
4999                 if (j == 8) { /* Make the offset block */
5000                         j = 0; ++row;
5001                         printf("\n0x00%x0  ",row);
5002                 }
5003                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5004                 printf("%04x ", eeprom_data);
5005         }
5006         printf("\n");
5007 }
5008
5009 static int
5010 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5011 {
5012         struct adapter *adapter;
5013         int error;
5014         int result;
5015
5016         result = -1;
5017         error = sysctl_handle_int(oidp, &result, 0, req);
5018
5019         if (error || !req->newptr)
5020                 return (error);
5021
5022         if (result == 1) {
5023                 adapter = (struct adapter *)arg1;
5024                 em_print_debug_info(adapter);
5025         }
5026         /*
5027          * This value will cause a hex dump of the
5028          * first 32 16-bit words of the EEPROM to
5029          * the screen.
5030          */
5031         if (result == 2) {
5032                 adapter = (struct adapter *)arg1;
5033                 em_print_nvm_info(adapter);
5034         }
5035
5036         return (error);
5037 }
5038
5039
5040 static int
5041 em_sysctl_stats(SYSCTL_HANDLER_ARGS)
5042 {
5043         struct adapter *adapter;
5044         int error;
5045         int result;
5046
5047         result = -1;
5048         error = sysctl_handle_int(oidp, &result, 0, req);
5049
5050         if (error || !req->newptr)
5051                 return (error);
5052
5053         if (result == 1) {
5054                 adapter = (struct adapter *)arg1;
5055                 em_print_hw_stats(adapter);
5056         }
5057
5058         return (error);
5059 }
5060
5061 static int
5062 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5063 {
5064         struct em_int_delay_info *info;
5065         struct adapter *adapter;
5066         u32 regval;
5067         int error, usecs, ticks;
5068
5069         info = (struct em_int_delay_info *)arg1;
5070         usecs = info->value;
5071         error = sysctl_handle_int(oidp, &usecs, 0, req);
5072         if (error != 0 || req->newptr == NULL)
5073                 return (error);
5074         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5075                 return (EINVAL);
5076         info->value = usecs;
5077         ticks = EM_USECS_TO_TICKS(usecs);
5078
5079         adapter = info->adapter;
5080         
5081         EM_CORE_LOCK(adapter);
5082         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5083         regval = (regval & ~0xffff) | (ticks & 0xffff);
5084         /* Handle a few special cases. */
5085         switch (info->offset) {
5086         case E1000_RDTR:
5087                 break;
5088         case E1000_TIDV:
5089                 if (ticks == 0) {
5090                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5091                         /* Don't write 0 into the TIDV register. */
5092                         regval++;
5093                 } else
5094                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5095                 break;
5096         }
5097         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5098         EM_CORE_UNLOCK(adapter);
5099         return (0);
5100 }
5101
5102 static void
5103 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5104         const char *description, struct em_int_delay_info *info,
5105         int offset, int value)
5106 {
5107         info->adapter = adapter;
5108         info->offset = offset;
5109         info->value = value;
5110         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5111             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5112             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5113             info, 0, em_sysctl_int_delay, "I", description);
5114 }
5115
5116 static void
5117 em_add_rx_process_limit(struct adapter *adapter, const char *name,
5118         const char *description, int *limit, int value)
5119 {
5120         *limit = value;
5121         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5122             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5123             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5124 }
5125
5126