]> CyberLeo.Net >> Repos - FreeBSD/releng/8.2.git/blob - sys/dev/e1000/if_em.c
MFC stable/8 r217710
[FreeBSD/releng/8.2.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int     em_display_debug_stats = 0;
92
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.1.9";
97
98 /*********************************************************************
99  *  PCI Device ID Table
100  *
101  *  Used by probe to select devices to load on
102  *  Last field stores an index into e1000_strings
103  *  Last entry must be all 0s
104  *
105  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106  *********************************************************************/
107
108 static em_vendor_info_t em_vendor_info_array[] =
109 {
110         /* Intel(R) PRO/1000 Network Connection */
111         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
112         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
130
131         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
174         /* required last entry */
175         { 0, 0, 0, 0, 0}
176 };
177
178 /*********************************************************************
179  *  Table of branding strings for all supported NICs.
180  *********************************************************************/
181
182 static char *em_strings[] = {
183         "Intel(R) PRO/1000 Network Connection"
184 };
185
186 /*********************************************************************
187  *  Function prototypes
188  *********************************************************************/
189 static int      em_probe(device_t);
190 static int      em_attach(device_t);
191 static int      em_detach(device_t);
192 static int      em_shutdown(device_t);
193 static int      em_suspend(device_t);
194 static int      em_resume(device_t);
195 static void     em_start(struct ifnet *);
196 static void     em_start_locked(struct ifnet *, struct tx_ring *);
197 #ifdef EM_MULTIQUEUE
198 static int      em_mq_start(struct ifnet *, struct mbuf *);
199 static int      em_mq_start_locked(struct ifnet *,
200                     struct tx_ring *, struct mbuf *);
201 static void     em_qflush(struct ifnet *);
202 #endif
203 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
204 static void     em_init(void *);
205 static void     em_init_locked(struct adapter *);
206 static void     em_stop(void *);
207 static void     em_media_status(struct ifnet *, struct ifmediareq *);
208 static int      em_media_change(struct ifnet *);
209 static void     em_identify_hardware(struct adapter *);
210 static int      em_allocate_pci_resources(struct adapter *);
211 static int      em_allocate_legacy(struct adapter *);
212 static int      em_allocate_msix(struct adapter *);
213 static int      em_allocate_queues(struct adapter *);
214 static int      em_setup_msix(struct adapter *);
215 static void     em_free_pci_resources(struct adapter *);
216 static void     em_local_timer(void *);
217 static void     em_reset(struct adapter *);
218 static int      em_setup_interface(device_t, struct adapter *);
219
220 static void     em_setup_transmit_structures(struct adapter *);
221 static void     em_initialize_transmit_unit(struct adapter *);
222 static int      em_allocate_transmit_buffers(struct tx_ring *);
223 static void     em_free_transmit_structures(struct adapter *);
224 static void     em_free_transmit_buffers(struct tx_ring *);
225
226 static int      em_setup_receive_structures(struct adapter *);
227 static int      em_allocate_receive_buffers(struct rx_ring *);
228 static void     em_initialize_receive_unit(struct adapter *);
229 static void     em_free_receive_structures(struct adapter *);
230 static void     em_free_receive_buffers(struct rx_ring *);
231
232 static void     em_enable_intr(struct adapter *);
233 static void     em_disable_intr(struct adapter *);
234 static void     em_update_stats_counters(struct adapter *);
235 static void     em_add_hw_stats(struct adapter *adapter);
236 static bool     em_txeof(struct tx_ring *);
237 static bool     em_rxeof(struct rx_ring *, int, int *);
238 #ifndef __NO_STRICT_ALIGNMENT
239 static int      em_fixup_rx(struct rx_ring *);
240 #endif
241 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243                     struct ip *, u32 *, u32 *);
244 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245                     struct tcphdr *, u32 *, u32 *);
246 static void     em_set_promisc(struct adapter *);
247 static void     em_disable_promisc(struct adapter *);
248 static void     em_set_multi(struct adapter *);
249 static void     em_update_link_status(struct adapter *);
250 static void     em_refresh_mbufs(struct rx_ring *, int);
251 static void     em_register_vlan(void *, struct ifnet *, u16);
252 static void     em_unregister_vlan(void *, struct ifnet *, u16);
253 static void     em_setup_vlan_hw_support(struct adapter *);
254 static int      em_xmit(struct tx_ring *, struct mbuf **);
255 static int      em_dma_malloc(struct adapter *, bus_size_t,
256                     struct em_dma_alloc *, int);
257 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
258 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259 static void     em_print_nvm_info(struct adapter *);
260 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261 static void     em_print_debug_info(struct adapter *);
262 static int      em_is_valid_ether_addr(u8 *);
263 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
265                     const char *, struct em_int_delay_info *, int, int);
266 /* Management and WOL Support */
267 static void     em_init_manageability(struct adapter *);
268 static void     em_release_manageability(struct adapter *);
269 static void     em_get_hw_control(struct adapter *);
270 static void     em_release_hw_control(struct adapter *);
271 static void     em_get_wakeup(device_t);
272 static void     em_enable_wakeup(device_t);
273 static int      em_enable_phy_wakeup(struct adapter *);
274 static void     em_led_func(void *, int);
275 static void     em_disable_aspm(struct adapter *);
276
277 static int      em_irq_fast(void *);
278
279 /* MSIX handlers */
280 static void     em_msix_tx(void *);
281 static void     em_msix_rx(void *);
282 static void     em_msix_link(void *);
283 static void     em_handle_tx(void *context, int pending);
284 static void     em_handle_rx(void *context, int pending);
285 static void     em_handle_link(void *context, int pending);
286
287 static void     em_add_rx_process_limit(struct adapter *, const char *,
288                     const char *, int *, int);
289 static void     em_set_flow_cntrl(struct adapter *, const char *,
290                     const char *, int *, int);
291
292 static __inline void em_rx_discard(struct rx_ring *, int);
293
294 #ifdef DEVICE_POLLING
295 static poll_handler_t em_poll;
296 #endif /* POLLING */
297
298 /*********************************************************************
299  *  FreeBSD Device Interface Entry Points
300  *********************************************************************/
301
302 static device_method_t em_methods[] = {
303         /* Device interface */
304         DEVMETHOD(device_probe, em_probe),
305         DEVMETHOD(device_attach, em_attach),
306         DEVMETHOD(device_detach, em_detach),
307         DEVMETHOD(device_shutdown, em_shutdown),
308         DEVMETHOD(device_suspend, em_suspend),
309         DEVMETHOD(device_resume, em_resume),
310         {0, 0}
311 };
312
313 static driver_t em_driver = {
314         "em", em_methods, sizeof(struct adapter),
315 };
316
317 devclass_t em_devclass;
318 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
319 MODULE_DEPEND(em, pci, 1, 1, 1);
320 MODULE_DEPEND(em, ether, 1, 1, 1);
321
322 /*********************************************************************
323  *  Tunable default values.
324  *********************************************************************/
325
326 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
327 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
328 #define M_TSO_LEN                       66
329
330 /* Allow common code without TSO */
331 #ifndef CSUM_TSO
332 #define CSUM_TSO        0
333 #endif
334
335 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
336 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
337 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
338 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
339
340 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
343 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
344
345 static int em_rxd = EM_DEFAULT_RXD;
346 static int em_txd = EM_DEFAULT_TXD;
347 TUNABLE_INT("hw.em.rxd", &em_rxd);
348 TUNABLE_INT("hw.em.txd", &em_txd);
349
350 static int em_smart_pwr_down = FALSE;
351 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
352
353 /* Controls whether promiscuous also shows bad packets */
354 static int em_debug_sbp = FALSE;
355 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
356
357 static int em_enable_msix = TRUE;
358 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
359
360 /* How many packets rxeof tries to clean at a time */
361 static int em_rx_process_limit = 100;
362 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
363
364 /* Flow control setting - default to FULL */
365 static int em_fc_setting = e1000_fc_full;
366 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
367
368 /* Global used in WOL setup with multiport cards */
369 static int global_quad_port_a = 0;
370
371 /*********************************************************************
372  *  Device identification routine
373  *
374  *  em_probe determines if the driver should be loaded on
375  *  adapter based on PCI vendor/device id of the adapter.
376  *
377  *  return BUS_PROBE_DEFAULT on success, positive on failure
378  *********************************************************************/
379
380 static int
381 em_probe(device_t dev)
382 {
383         char            adapter_name[60];
384         u16             pci_vendor_id = 0;
385         u16             pci_device_id = 0;
386         u16             pci_subvendor_id = 0;
387         u16             pci_subdevice_id = 0;
388         em_vendor_info_t *ent;
389
390         INIT_DEBUGOUT("em_probe: begin");
391
392         pci_vendor_id = pci_get_vendor(dev);
393         if (pci_vendor_id != EM_VENDOR_ID)
394                 return (ENXIO);
395
396         pci_device_id = pci_get_device(dev);
397         pci_subvendor_id = pci_get_subvendor(dev);
398         pci_subdevice_id = pci_get_subdevice(dev);
399
400         ent = em_vendor_info_array;
401         while (ent->vendor_id != 0) {
402                 if ((pci_vendor_id == ent->vendor_id) &&
403                     (pci_device_id == ent->device_id) &&
404
405                     ((pci_subvendor_id == ent->subvendor_id) ||
406                     (ent->subvendor_id == PCI_ANY_ID)) &&
407
408                     ((pci_subdevice_id == ent->subdevice_id) ||
409                     (ent->subdevice_id == PCI_ANY_ID))) {
410                         sprintf(adapter_name, "%s %s",
411                                 em_strings[ent->index],
412                                 em_driver_version);
413                         device_set_desc_copy(dev, adapter_name);
414                         return (BUS_PROBE_DEFAULT);
415                 }
416                 ent++;
417         }
418
419         return (ENXIO);
420 }
421
422 /*********************************************************************
423  *  Device initialization routine
424  *
425  *  The attach entry point is called when the driver is being loaded.
426  *  This routine identifies the type of hardware, allocates all resources
427  *  and initializes the hardware.
428  *
429  *  return 0 on success, positive on failure
430  *********************************************************************/
431
432 static int
433 em_attach(device_t dev)
434 {
435         struct adapter  *adapter;
436         int             error = 0;
437
438         INIT_DEBUGOUT("em_attach: begin");
439
440         adapter = device_get_softc(dev);
441         adapter->dev = adapter->osdep.dev = dev;
442         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
443
444         /* SYSCTL stuff */
445         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
446             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
447             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
448             em_sysctl_nvm_info, "I", "NVM Information");
449
450         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
451             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
452             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
453             em_sysctl_debug_info, "I", "Debug Information");
454
455         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
456
457         /* Determine hardware and mac info */
458         em_identify_hardware(adapter);
459
460         /* Setup PCI resources */
461         if (em_allocate_pci_resources(adapter)) {
462                 device_printf(dev, "Allocation of PCI resources failed\n");
463                 error = ENXIO;
464                 goto err_pci;
465         }
466
467         /*
468         ** For ICH8 and family we need to
469         ** map the flash memory, and this
470         ** must happen after the MAC is 
471         ** identified
472         */
473         if ((adapter->hw.mac.type == e1000_ich8lan) ||
474             (adapter->hw.mac.type == e1000_ich9lan) ||
475             (adapter->hw.mac.type == e1000_ich10lan) ||
476             (adapter->hw.mac.type == e1000_pchlan) ||
477             (adapter->hw.mac.type == e1000_pch2lan)) {
478                 int rid = EM_BAR_TYPE_FLASH;
479                 adapter->flash = bus_alloc_resource_any(dev,
480                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
481                 if (adapter->flash == NULL) {
482                         device_printf(dev, "Mapping of Flash failed\n");
483                         error = ENXIO;
484                         goto err_pci;
485                 }
486                 /* This is used in the shared code */
487                 adapter->hw.flash_address = (u8 *)adapter->flash;
488                 adapter->osdep.flash_bus_space_tag =
489                     rman_get_bustag(adapter->flash);
490                 adapter->osdep.flash_bus_space_handle =
491                     rman_get_bushandle(adapter->flash);
492         }
493
494         /* Do Shared Code initialization */
495         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496                 device_printf(dev, "Setup of Shared code failed\n");
497                 error = ENXIO;
498                 goto err_pci;
499         }
500
501         e1000_get_bus_info(&adapter->hw);
502
503         /* Set up some sysctls for the tunable interrupt delays */
504         em_add_int_delay_sysctl(adapter, "rx_int_delay",
505             "receive interrupt delay in usecs", &adapter->rx_int_delay,
506             E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
507         em_add_int_delay_sysctl(adapter, "tx_int_delay",
508             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
509             E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
510         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
511             "receive interrupt delay limit in usecs",
512             &adapter->rx_abs_int_delay,
513             E1000_REGISTER(&adapter->hw, E1000_RADV),
514             em_rx_abs_int_delay_dflt);
515         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
516             "transmit interrupt delay limit in usecs",
517             &adapter->tx_abs_int_delay,
518             E1000_REGISTER(&adapter->hw, E1000_TADV),
519             em_tx_abs_int_delay_dflt);
520
521         /* Sysctl for limiting the amount of work done in the taskqueue */
522         em_add_rx_process_limit(adapter, "rx_processing_limit",
523             "max number of rx packets to process", &adapter->rx_process_limit,
524             em_rx_process_limit);
525
526         /* Sysctl for setting the interface flow control */
527         em_set_flow_cntrl(adapter, "flow_control",
528             "configure flow control",
529             &adapter->fc_setting, em_fc_setting);
530
531         /*
532          * Validate number of transmit and receive descriptors. It
533          * must not exceed hardware maximum, and must be multiple
534          * of E1000_DBA_ALIGN.
535          */
536         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
537             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
538                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
539                     EM_DEFAULT_TXD, em_txd);
540                 adapter->num_tx_desc = EM_DEFAULT_TXD;
541         } else
542                 adapter->num_tx_desc = em_txd;
543
544         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
545             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
546                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
547                     EM_DEFAULT_RXD, em_rxd);
548                 adapter->num_rx_desc = EM_DEFAULT_RXD;
549         } else
550                 adapter->num_rx_desc = em_rxd;
551
552         adapter->hw.mac.autoneg = DO_AUTO_NEG;
553         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
554         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
555
556         /* Copper options */
557         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
558                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
559                 adapter->hw.phy.disable_polarity_correction = FALSE;
560                 adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
561         }
562
563         /*
564          * Set the frame limits assuming
565          * standard ethernet sized frames.
566          */
567         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
568         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
569
570         /*
571          * This controls when hardware reports transmit completion
572          * status.
573          */
574         adapter->hw.mac.report_tx_early = 1;
575
576         /* 
577         ** Get queue/ring memory
578         */
579         if (em_allocate_queues(adapter)) {
580                 error = ENOMEM;
581                 goto err_pci;
582         }
583
584         /* Allocate multicast array memory. */
585         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
586             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
587         if (adapter->mta == NULL) {
588                 device_printf(dev, "Can not allocate multicast setup array\n");
589                 error = ENOMEM;
590                 goto err_late;
591         }
592
593         /* Check SOL/IDER usage */
594         if (e1000_check_reset_block(&adapter->hw))
595                 device_printf(dev, "PHY reset is blocked"
596                     " due to SOL/IDER session.\n");
597
598         /*
599         ** Start from a known state, this is
600         ** important in reading the nvm and
601         ** mac from that.
602         */
603         e1000_reset_hw(&adapter->hw);
604
605         /* Make sure we have a good EEPROM before we read from it */
606         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
607                 /*
608                 ** Some PCI-E parts fail the first check due to
609                 ** the link being in sleep state, call it again,
610                 ** if it fails a second time its a real issue.
611                 */
612                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
613                         device_printf(dev,
614                             "The EEPROM Checksum Is Not Valid\n");
615                         error = EIO;
616                         goto err_late;
617                 }
618         }
619
620         /* Copy the permanent MAC address out of the EEPROM */
621         if (e1000_read_mac_addr(&adapter->hw) < 0) {
622                 device_printf(dev, "EEPROM read error while reading MAC"
623                     " address\n");
624                 error = EIO;
625                 goto err_late;
626         }
627
628         if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
629                 device_printf(dev, "Invalid MAC address\n");
630                 error = EIO;
631                 goto err_late;
632         }
633
634         /*
635         **  Do interrupt configuration
636         */
637         if (adapter->msix > 1) /* Do MSIX */
638                 error = em_allocate_msix(adapter);
639         else  /* MSI or Legacy */
640                 error = em_allocate_legacy(adapter);
641         if (error)
642                 goto err_late;
643
644         /*
645          * Get Wake-on-Lan and Management info for later use
646          */
647         em_get_wakeup(dev);
648
649         /* Setup OS specific network interface */
650         if (em_setup_interface(dev, adapter) != 0)
651                 goto err_late;
652
653         em_reset(adapter);
654
655         /* Initialize statistics */
656         em_update_stats_counters(adapter);
657
658         adapter->hw.mac.get_link_status = 1;
659         em_update_link_status(adapter);
660
661         /* Register for VLAN events */
662         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
663             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
664         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
665             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
666
667         em_add_hw_stats(adapter);
668
669         /* Non-AMT based hardware can now take control from firmware */
670         if (adapter->has_manage && !adapter->has_amt)
671                 em_get_hw_control(adapter);
672
673         /* Tell the stack that the interface is not active */
674         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
675
676         adapter->led_dev = led_create(em_led_func, adapter,
677             device_get_nameunit(dev));
678
679         INIT_DEBUGOUT("em_attach: end");
680
681         return (0);
682
683 err_late:
684         em_free_transmit_structures(adapter);
685         em_free_receive_structures(adapter);
686         em_release_hw_control(adapter);
687         if (adapter->ifp != NULL)
688                 if_free(adapter->ifp);
689 err_pci:
690         em_free_pci_resources(adapter);
691         free(adapter->mta, M_DEVBUF);
692         EM_CORE_LOCK_DESTROY(adapter);
693
694         return (error);
695 }
696
697 /*********************************************************************
698  *  Device removal routine
699  *
700  *  The detach entry point is called when the driver is being removed.
701  *  This routine stops the adapter and deallocates all the resources
702  *  that were allocated for driver operation.
703  *
704  *  return 0 on success, positive on failure
705  *********************************************************************/
706
707 static int
708 em_detach(device_t dev)
709 {
710         struct adapter  *adapter = device_get_softc(dev);
711         struct ifnet    *ifp = adapter->ifp;
712
713         INIT_DEBUGOUT("em_detach: begin");
714
715         /* Make sure VLANS are not using driver */
716         if (adapter->ifp->if_vlantrunk != NULL) {
717                 device_printf(dev,"Vlan in use, detach first\n");
718                 return (EBUSY);
719         }
720
721 #ifdef DEVICE_POLLING
722         if (ifp->if_capenable & IFCAP_POLLING)
723                 ether_poll_deregister(ifp);
724 #endif
725
726         if (adapter->led_dev != NULL)
727                 led_destroy(adapter->led_dev);
728
729         EM_CORE_LOCK(adapter);
730         adapter->in_detach = 1;
731         em_stop(adapter);
732         EM_CORE_UNLOCK(adapter);
733         EM_CORE_LOCK_DESTROY(adapter);
734
735         e1000_phy_hw_reset(&adapter->hw);
736
737         em_release_manageability(adapter);
738         em_release_hw_control(adapter);
739
740         /* Unregister VLAN events */
741         if (adapter->vlan_attach != NULL)
742                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
743         if (adapter->vlan_detach != NULL)
744                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
745
746         ether_ifdetach(adapter->ifp);
747         callout_drain(&adapter->timer);
748
749         em_free_pci_resources(adapter);
750         bus_generic_detach(dev);
751         if_free(ifp);
752
753         em_free_transmit_structures(adapter);
754         em_free_receive_structures(adapter);
755
756         em_release_hw_control(adapter);
757         free(adapter->mta, M_DEVBUF);
758
759         return (0);
760 }
761
762 /*********************************************************************
763  *
764  *  Shutdown entry point
765  *
766  **********************************************************************/
767
768 static int
769 em_shutdown(device_t dev)
770 {
771         return em_suspend(dev);
772 }
773
774 /*
775  * Suspend/resume device methods.
776  */
777 static int
778 em_suspend(device_t dev)
779 {
780         struct adapter *adapter = device_get_softc(dev);
781
782         EM_CORE_LOCK(adapter);
783
784         em_release_manageability(adapter);
785         em_release_hw_control(adapter);
786         em_enable_wakeup(dev);
787
788         EM_CORE_UNLOCK(adapter);
789
790         return bus_generic_suspend(dev);
791 }
792
793 static int
794 em_resume(device_t dev)
795 {
796         struct adapter *adapter = device_get_softc(dev);
797         struct ifnet *ifp = adapter->ifp;
798
799         EM_CORE_LOCK(adapter);
800         em_init_locked(adapter);
801         em_init_manageability(adapter);
802         EM_CORE_UNLOCK(adapter);
803         em_start(ifp);
804
805         return bus_generic_resume(dev);
806 }
807
808
809 /*********************************************************************
810  *  Transmit entry point
811  *
812  *  em_start is called by the stack to initiate a transmit.
813  *  The driver will remain in this routine as long as there are
814  *  packets to transmit and transmit resources are available.
815  *  In case resources are not available stack is notified and
816  *  the packet is requeued.
817  **********************************************************************/
818
819 #ifdef EM_MULTIQUEUE
820 static int
821 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
822 {
823         struct adapter  *adapter = txr->adapter;
824         struct mbuf     *next;
825         int             err = 0, enq = 0;
826
827         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
828             IFF_DRV_RUNNING || adapter->link_active == 0) {
829                 if (m != NULL)
830                         err = drbr_enqueue(ifp, txr->br, m);
831                 return (err);
832         }
833
834         /* Call cleanup if number of TX descriptors low */
835         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
836                 em_txeof(txr);
837
838         enq = 0;
839         if (m == NULL) {
840                 next = drbr_dequeue(ifp, txr->br);
841         } else if (drbr_needs_enqueue(ifp, txr->br)) {
842                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
843                         return (err);
844                 next = drbr_dequeue(ifp, txr->br);
845         } else
846                 next = m;
847
848         /* Process the queue */
849         while (next != NULL) {
850                 if ((err = em_xmit(txr, &next)) != 0) {
851                         if (next != NULL)
852                                 err = drbr_enqueue(ifp, txr->br, next);
853                         break;
854                 }
855                 enq++;
856                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
857                 ETHER_BPF_MTAP(ifp, next);
858                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
859                         break;
860                 if (txr->tx_avail < EM_MAX_SCATTER) {
861                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
862                         break;
863                 }
864                 next = drbr_dequeue(ifp, txr->br);
865         }
866
867         if (enq > 0) {
868                 /* Set the watchdog */
869                 txr->queue_status = EM_QUEUE_WORKING;
870                 txr->watchdog_time = ticks;
871         }
872         return (err);
873 }
874
875 /*
876 ** Multiqueue capable stack interface
877 */
878 static int
879 em_mq_start(struct ifnet *ifp, struct mbuf *m)
880 {
881         struct adapter  *adapter = ifp->if_softc;
882         struct tx_ring  *txr = adapter->tx_rings;
883         int             error;
884
885         if (EM_TX_TRYLOCK(txr)) {
886                 error = em_mq_start_locked(ifp, txr, m);
887                 EM_TX_UNLOCK(txr);
888         } else 
889                 error = drbr_enqueue(ifp, txr->br, m);
890
891         return (error);
892 }
893
894 /*
895 ** Flush all ring buffers
896 */
897 static void
898 em_qflush(struct ifnet *ifp)
899 {
900         struct adapter  *adapter = ifp->if_softc;
901         struct tx_ring  *txr = adapter->tx_rings;
902         struct mbuf     *m;
903
904         for (int i = 0; i < adapter->num_queues; i++, txr++) {
905                 EM_TX_LOCK(txr);
906                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
907                         m_freem(m);
908                 EM_TX_UNLOCK(txr);
909         }
910         if_qflush(ifp);
911 }
912
913 #endif /* EM_MULTIQUEUE */
914
915 static void
916 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
917 {
918         struct adapter  *adapter = ifp->if_softc;
919         struct mbuf     *m_head;
920
921         EM_TX_LOCK_ASSERT(txr);
922
923         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
924             IFF_DRV_RUNNING)
925                 return;
926
927         if (!adapter->link_active)
928                 return;
929
930         /* Call cleanup if number of TX descriptors low */
931         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
932                 em_txeof(txr);
933
934         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
935                 if (txr->tx_avail < EM_MAX_SCATTER) {
936                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
937                         break;
938                 }
939                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
940                 if (m_head == NULL)
941                         break;
942                 /*
943                  *  Encapsulation can modify our pointer, and or make it
944                  *  NULL on failure.  In that event, we can't requeue.
945                  */
946                 if (em_xmit(txr, &m_head)) {
947                         if (m_head == NULL)
948                                 break;
949                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
950                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
951                         break;
952                 }
953
954                 /* Send a copy of the frame to the BPF listener */
955                 ETHER_BPF_MTAP(ifp, m_head);
956
957                 /* Set timeout in case hardware has problems transmitting. */
958                 txr->watchdog_time = ticks;
959                 txr->queue_status = EM_QUEUE_WORKING;
960         }
961
962         return;
963 }
964
965 static void
966 em_start(struct ifnet *ifp)
967 {
968         struct adapter  *adapter = ifp->if_softc;
969         struct tx_ring  *txr = adapter->tx_rings;
970
971         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
972                 EM_TX_LOCK(txr);
973                 em_start_locked(ifp, txr);
974                 EM_TX_UNLOCK(txr);
975         }
976         return;
977 }
978
979 /*********************************************************************
980  *  Ioctl entry point
981  *
982  *  em_ioctl is called when the user wants to configure the
983  *  interface.
984  *
985  *  return 0 on success, positive on failure
986  **********************************************************************/
987
988 static int
989 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
990 {
991         struct adapter  *adapter = ifp->if_softc;
992         struct ifreq *ifr = (struct ifreq *)data;
993 #ifdef INET
994         struct ifaddr *ifa = (struct ifaddr *)data;
995 #endif
996         int error = 0;
997
998         if (adapter->in_detach)
999                 return (error);
1000
1001         switch (command) {
1002         case SIOCSIFADDR:
1003 #ifdef INET
1004                 if (ifa->ifa_addr->sa_family == AF_INET) {
1005                         /*
1006                          * XXX
1007                          * Since resetting hardware takes a very long time
1008                          * and results in link renegotiation we only
1009                          * initialize the hardware only when it is absolutely
1010                          * required.
1011                          */
1012                         ifp->if_flags |= IFF_UP;
1013                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1014                                 EM_CORE_LOCK(adapter);
1015                                 em_init_locked(adapter);
1016                                 EM_CORE_UNLOCK(adapter);
1017                         }
1018                         arp_ifinit(ifp, ifa);
1019                 } else
1020 #endif
1021                         error = ether_ioctl(ifp, command, data);
1022                 break;
1023         case SIOCSIFMTU:
1024             {
1025                 int max_frame_size;
1026
1027                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1028
1029                 EM_CORE_LOCK(adapter);
1030                 switch (adapter->hw.mac.type) {
1031                 case e1000_82571:
1032                 case e1000_82572:
1033                 case e1000_ich9lan:
1034                 case e1000_ich10lan:
1035                 case e1000_pch2lan:
1036                 case e1000_82574:
1037                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1038                         max_frame_size = 9234;
1039                         break;
1040                 case e1000_pchlan:
1041                         max_frame_size = 4096;
1042                         break;
1043                         /* Adapters that do not support jumbo frames */
1044                 case e1000_82583:
1045                 case e1000_ich8lan:
1046                         max_frame_size = ETHER_MAX_LEN;
1047                         break;
1048                 default:
1049                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1050                 }
1051                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1052                     ETHER_CRC_LEN) {
1053                         EM_CORE_UNLOCK(adapter);
1054                         error = EINVAL;
1055                         break;
1056                 }
1057
1058                 ifp->if_mtu = ifr->ifr_mtu;
1059                 adapter->max_frame_size =
1060                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1061                 em_init_locked(adapter);
1062                 EM_CORE_UNLOCK(adapter);
1063                 break;
1064             }
1065         case SIOCSIFFLAGS:
1066                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1067                     SIOCSIFFLAGS (Set Interface Flags)");
1068                 EM_CORE_LOCK(adapter);
1069                 if (ifp->if_flags & IFF_UP) {
1070                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1071                                 if ((ifp->if_flags ^ adapter->if_flags) &
1072                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1073                                         em_disable_promisc(adapter);
1074                                         em_set_promisc(adapter);
1075                                 }
1076                         } else
1077                                 em_init_locked(adapter);
1078                 } else
1079                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1080                                 em_stop(adapter);
1081                 adapter->if_flags = ifp->if_flags;
1082                 EM_CORE_UNLOCK(adapter);
1083                 break;
1084         case SIOCADDMULTI:
1085         case SIOCDELMULTI:
1086                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1087                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1088                         EM_CORE_LOCK(adapter);
1089                         em_disable_intr(adapter);
1090                         em_set_multi(adapter);
1091 #ifdef DEVICE_POLLING
1092                         if (!(ifp->if_capenable & IFCAP_POLLING))
1093 #endif
1094                                 em_enable_intr(adapter);
1095                         EM_CORE_UNLOCK(adapter);
1096                 }
1097                 break;
1098         case SIOCSIFMEDIA:
1099                 /*
1100                 ** As the speed/duplex settings are being
1101                 ** changed, we need to reset the PHY.
1102                 */
1103                 adapter->hw.phy.reset_disable = FALSE;
1104                 /* Check SOL/IDER usage */
1105                 EM_CORE_LOCK(adapter);
1106                 if (e1000_check_reset_block(&adapter->hw)) {
1107                         EM_CORE_UNLOCK(adapter);
1108                         device_printf(adapter->dev, "Media change is"
1109                             " blocked due to SOL/IDER session.\n");
1110                         break;
1111                 }
1112                 EM_CORE_UNLOCK(adapter);
1113                 /* falls thru */
1114         case SIOCGIFMEDIA:
1115                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1116                     SIOCxIFMEDIA (Get/Set Interface Media)");
1117                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1118                 break;
1119         case SIOCSIFCAP:
1120             {
1121                 int mask, reinit;
1122
1123                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1124                 reinit = 0;
1125                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1126 #ifdef DEVICE_POLLING
1127                 if (mask & IFCAP_POLLING) {
1128                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1129                                 error = ether_poll_register(em_poll, ifp);
1130                                 if (error)
1131                                         return (error);
1132                                 EM_CORE_LOCK(adapter);
1133                                 em_disable_intr(adapter);
1134                                 ifp->if_capenable |= IFCAP_POLLING;
1135                                 EM_CORE_UNLOCK(adapter);
1136                         } else {
1137                                 error = ether_poll_deregister(ifp);
1138                                 /* Enable interrupt even in error case */
1139                                 EM_CORE_LOCK(adapter);
1140                                 em_enable_intr(adapter);
1141                                 ifp->if_capenable &= ~IFCAP_POLLING;
1142                                 EM_CORE_UNLOCK(adapter);
1143                         }
1144                 }
1145 #endif
1146                 if (mask & IFCAP_HWCSUM) {
1147                         ifp->if_capenable ^= IFCAP_HWCSUM;
1148                         reinit = 1;
1149                 }
1150                 if (mask & IFCAP_TSO4) {
1151                         ifp->if_capenable ^= IFCAP_TSO4;
1152                         reinit = 1;
1153                 }
1154                 if (mask & IFCAP_VLAN_HWTAGGING) {
1155                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1156                         reinit = 1;
1157                 }
1158                 if (mask & IFCAP_VLAN_HWFILTER) {
1159                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1160                         reinit = 1;
1161                 }
1162                 if ((mask & IFCAP_WOL) &&
1163                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1164                         if (mask & IFCAP_WOL_MCAST)
1165                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1166                         if (mask & IFCAP_WOL_MAGIC)
1167                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1168                 }
1169                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1170                         em_init(adapter);
1171                 VLAN_CAPABILITIES(ifp);
1172                 break;
1173             }
1174
1175         default:
1176                 error = ether_ioctl(ifp, command, data);
1177                 break;
1178         }
1179
1180         return (error);
1181 }
1182
1183
1184 /*********************************************************************
1185  *  Init entry point
1186  *
1187  *  This routine is used in two ways. It is used by the stack as
1188  *  init entry point in network interface structure. It is also used
1189  *  by the driver as a hw/sw initialization routine to get to a
1190  *  consistent state.
1191  *
1192  *  return 0 on success, positive on failure
1193  **********************************************************************/
1194
1195 static void
1196 em_init_locked(struct adapter *adapter)
1197 {
1198         struct ifnet    *ifp = adapter->ifp;
1199         device_t        dev = adapter->dev;
1200         u32             pba;
1201
1202         INIT_DEBUGOUT("em_init: begin");
1203
1204         EM_CORE_LOCK_ASSERT(adapter);
1205
1206         em_disable_intr(adapter);
1207         callout_stop(&adapter->timer);
1208
1209         /*
1210          * Packet Buffer Allocation (PBA)
1211          * Writing PBA sets the receive portion of the buffer
1212          * the remainder is used for the transmit buffer.
1213          */
1214         switch (adapter->hw.mac.type) {
1215         /* Total Packet Buffer on these is 48K */
1216         case e1000_82571:
1217         case e1000_82572:
1218         case e1000_80003es2lan:
1219                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1220                 break;
1221         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1222                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1223                 break;
1224         case e1000_82574:
1225         case e1000_82583:
1226                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1227                 break;
1228         case e1000_ich8lan:
1229                 pba = E1000_PBA_8K;
1230                 break;
1231         case e1000_ich9lan:
1232         case e1000_ich10lan:
1233                 pba = E1000_PBA_10K;
1234                 break;
1235         case e1000_pchlan:
1236         case e1000_pch2lan:
1237                 pba = E1000_PBA_26K;
1238                 break;
1239         default:
1240                 if (adapter->max_frame_size > 8192)
1241                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1242                 else
1243                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1244         }
1245
1246         INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1247         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1248         
1249         /* Get the latest mac address, User can use a LAA */
1250         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1251               ETHER_ADDR_LEN);
1252
1253         /* Put the address into the Receive Address Array */
1254         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1255
1256         /*
1257          * With the 82571 adapter, RAR[0] may be overwritten
1258          * when the other port is reset, we make a duplicate
1259          * in RAR[14] for that eventuality, this assures
1260          * the interface continues to function.
1261          */
1262         if (adapter->hw.mac.type == e1000_82571) {
1263                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1264                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1265                     E1000_RAR_ENTRIES - 1);
1266         }
1267
1268         /* Initialize the hardware */
1269         em_reset(adapter);
1270         em_update_link_status(adapter);
1271
1272         /* Setup VLAN support, basic and offload if available */
1273         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1274
1275         /* Set hardware offload abilities */
1276         ifp->if_hwassist = 0;
1277         if (ifp->if_capenable & IFCAP_TXCSUM)
1278                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1279         if (ifp->if_capenable & IFCAP_TSO4)
1280                 ifp->if_hwassist |= CSUM_TSO;
1281
1282         /* Configure for OS presence */
1283         em_init_manageability(adapter);
1284
1285         /* Prepare transmit descriptors and buffers */
1286         em_setup_transmit_structures(adapter);
1287         em_initialize_transmit_unit(adapter);
1288
1289         /* Setup Multicast table */
1290         em_set_multi(adapter);
1291
1292         /*
1293         ** Figure out the desired mbuf
1294         ** pool for doing jumbos
1295         */
1296         if (adapter->max_frame_size <= 2048)
1297                 adapter->rx_mbuf_sz = MCLBYTES;
1298         else if (adapter->max_frame_size <= 4096)
1299                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1300         else
1301                 adapter->rx_mbuf_sz = MJUM9BYTES;
1302
1303         /* Prepare receive descriptors and buffers */
1304         if (em_setup_receive_structures(adapter)) {
1305                 device_printf(dev, "Could not setup receive structures\n");
1306                 em_stop(adapter);
1307                 return;
1308         }
1309         em_initialize_receive_unit(adapter);
1310
1311         /* Use real VLAN Filter support? */
1312         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1313                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1314                         /* Use real VLAN Filter support */
1315                         em_setup_vlan_hw_support(adapter);
1316                 else {
1317                         u32 ctrl;
1318                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1319                         ctrl |= E1000_CTRL_VME;
1320                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1321                 }
1322         }
1323
1324         /* Don't lose promiscuous settings */
1325         em_set_promisc(adapter);
1326
1327         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1328         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1329
1330         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1331         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1332
1333         /* MSI/X configuration for 82574 */
1334         if (adapter->hw.mac.type == e1000_82574) {
1335                 int tmp;
1336                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1337                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1338                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1339                 /* Set the IVAR - interrupt vector routing. */
1340                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1341         }
1342
1343 #ifdef DEVICE_POLLING
1344         /*
1345          * Only enable interrupts if we are not polling, make sure
1346          * they are off otherwise.
1347          */
1348         if (ifp->if_capenable & IFCAP_POLLING)
1349                 em_disable_intr(adapter);
1350         else
1351 #endif /* DEVICE_POLLING */
1352                 em_enable_intr(adapter);
1353
1354         /* AMT based hardware can now take control from firmware */
1355         if (adapter->has_manage && adapter->has_amt)
1356                 em_get_hw_control(adapter);
1357
1358         /* Don't reset the phy next time init gets called */
1359         adapter->hw.phy.reset_disable = TRUE;
1360 }
1361
1362 static void
1363 em_init(void *arg)
1364 {
1365         struct adapter *adapter = arg;
1366
1367         EM_CORE_LOCK(adapter);
1368         em_init_locked(adapter);
1369         EM_CORE_UNLOCK(adapter);
1370 }
1371
1372
1373 #ifdef DEVICE_POLLING
1374 /*********************************************************************
1375  *
1376  *  Legacy polling routine: note this only works with single queue
1377  *
1378  *********************************************************************/
1379 static int
1380 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1381 {
1382         struct adapter *adapter = ifp->if_softc;
1383         struct tx_ring  *txr = adapter->tx_rings;
1384         struct rx_ring  *rxr = adapter->rx_rings;
1385         u32             reg_icr;
1386         int             rx_done;
1387
1388         EM_CORE_LOCK(adapter);
1389         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1390                 EM_CORE_UNLOCK(adapter);
1391                 return (0);
1392         }
1393
1394         if (cmd == POLL_AND_CHECK_STATUS) {
1395                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1396                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1397                         callout_stop(&adapter->timer);
1398                         adapter->hw.mac.get_link_status = 1;
1399                         em_update_link_status(adapter);
1400                         callout_reset(&adapter->timer, hz,
1401                             em_local_timer, adapter);
1402                 }
1403         }
1404         EM_CORE_UNLOCK(adapter);
1405
1406         em_rxeof(rxr, count, &rx_done);
1407
1408         EM_TX_LOCK(txr);
1409         em_txeof(txr);
1410 #ifdef EM_MULTIQUEUE
1411         if (!drbr_empty(ifp, txr->br))
1412                 em_mq_start_locked(ifp, txr, NULL);
1413 #else
1414         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1415                 em_start_locked(ifp, txr);
1416 #endif
1417         EM_TX_UNLOCK(txr);
1418
1419         return (rx_done);
1420 }
1421 #endif /* DEVICE_POLLING */
1422
1423
1424 /*********************************************************************
1425  *
1426  *  Fast Legacy/MSI Combined Interrupt Service routine  
1427  *
1428  *********************************************************************/
1429 static int
1430 em_irq_fast(void *arg)
1431 {
1432         struct adapter  *adapter = arg;
1433         struct ifnet    *ifp;
1434         u32             reg_icr;
1435
1436         ifp = adapter->ifp;
1437
1438         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1439
1440         /* Hot eject?  */
1441         if (reg_icr == 0xffffffff)
1442                 return FILTER_STRAY;
1443
1444         /* Definitely not our interrupt.  */
1445         if (reg_icr == 0x0)
1446                 return FILTER_STRAY;
1447
1448         /*
1449          * Starting with the 82571 chip, bit 31 should be used to
1450          * determine whether the interrupt belongs to us.
1451          */
1452         if (adapter->hw.mac.type >= e1000_82571 &&
1453             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1454                 return FILTER_STRAY;
1455
1456         em_disable_intr(adapter);
1457         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1458
1459         /* Link status change */
1460         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1461                 adapter->hw.mac.get_link_status = 1;
1462                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1463         }
1464
1465         if (reg_icr & E1000_ICR_RXO)
1466                 adapter->rx_overruns++;
1467         return FILTER_HANDLED;
1468 }
1469
1470 /* Combined RX/TX handler, used by Legacy and MSI */
1471 static void
1472 em_handle_que(void *context, int pending)
1473 {
1474         struct adapter  *adapter = context;
1475         struct ifnet    *ifp = adapter->ifp;
1476         struct tx_ring  *txr = adapter->tx_rings;
1477         struct rx_ring  *rxr = adapter->rx_rings;
1478         bool            more;
1479
1480
1481         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1482                 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1483
1484                 EM_TX_LOCK(txr);
1485                 em_txeof(txr);
1486 #ifdef EM_MULTIQUEUE
1487                 if (!drbr_empty(ifp, txr->br))
1488                         em_mq_start_locked(ifp, txr, NULL);
1489 #else
1490                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1491                         em_start_locked(ifp, txr);
1492 #endif
1493                 em_txeof(txr);
1494                 EM_TX_UNLOCK(txr);
1495                 if (more) {
1496                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1497                         return;
1498                 }
1499         }
1500
1501         em_enable_intr(adapter);
1502         return;
1503 }
1504
1505
1506 /*********************************************************************
1507  *
1508  *  MSIX Interrupt Service Routines
1509  *
1510  **********************************************************************/
1511 static void
1512 em_msix_tx(void *arg)
1513 {
1514         struct tx_ring *txr = arg;
1515         struct adapter *adapter = txr->adapter;
1516         bool            more;
1517
1518         ++txr->tx_irq;
1519         EM_TX_LOCK(txr);
1520         more = em_txeof(txr);
1521         EM_TX_UNLOCK(txr);
1522         if (more)
1523                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1524         else
1525                 /* Reenable this interrupt */
1526                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1527         return;
1528 }
1529
1530 /*********************************************************************
1531  *
1532  *  MSIX RX Interrupt Service routine
1533  *
1534  **********************************************************************/
1535
1536 static void
1537 em_msix_rx(void *arg)
1538 {
1539         struct rx_ring  *rxr = arg;
1540         struct adapter  *adapter = rxr->adapter;
1541         bool            more;
1542
1543         ++rxr->rx_irq;
1544         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1545         if (more)
1546                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1547         else
1548                 /* Reenable this interrupt */
1549                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1550         return;
1551 }
1552
1553 /*********************************************************************
1554  *
1555  *  MSIX Link Fast Interrupt Service routine
1556  *
1557  **********************************************************************/
1558 static void
1559 em_msix_link(void *arg)
1560 {
1561         struct adapter  *adapter = arg;
1562         u32             reg_icr;
1563
1564         ++adapter->link_irq;
1565         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1566
1567         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1568                 adapter->hw.mac.get_link_status = 1;
1569                 em_handle_link(adapter, 0);
1570         } else
1571                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1572                     EM_MSIX_LINK | E1000_IMS_LSC);
1573         return;
1574 }
1575
1576 static void
1577 em_handle_rx(void *context, int pending)
1578 {
1579         struct rx_ring  *rxr = context;
1580         struct adapter  *adapter = rxr->adapter;
1581         bool            more;
1582
1583         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1584         if (more)
1585                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1586         else
1587                 /* Reenable this interrupt */
1588                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1589 }
1590
1591 static void
1592 em_handle_tx(void *context, int pending)
1593 {
1594         struct tx_ring  *txr = context;
1595         struct adapter  *adapter = txr->adapter;
1596         struct ifnet    *ifp = adapter->ifp;
1597
1598         EM_TX_LOCK(txr);
1599         em_txeof(txr);
1600 #ifdef EM_MULTIQUEUE
1601         if (!drbr_empty(ifp, txr->br))
1602                 em_mq_start_locked(ifp, txr, NULL);
1603 #else
1604         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1605                 em_start_locked(ifp, txr);
1606 #endif
1607         em_txeof(txr);
1608         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1609         EM_TX_UNLOCK(txr);
1610 }
1611
1612 static void
1613 em_handle_link(void *context, int pending)
1614 {
1615         struct adapter  *adapter = context;
1616         struct ifnet *ifp = adapter->ifp;
1617
1618         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1619                 return;
1620
1621         EM_CORE_LOCK(adapter);
1622         callout_stop(&adapter->timer);
1623         em_update_link_status(adapter);
1624         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1625         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1626             EM_MSIX_LINK | E1000_IMS_LSC);
1627         EM_CORE_UNLOCK(adapter);
1628 }
1629
1630
1631 /*********************************************************************
1632  *
1633  *  Media Ioctl callback
1634  *
1635  *  This routine is called whenever the user queries the status of
1636  *  the interface using ifconfig.
1637  *
1638  **********************************************************************/
1639 static void
1640 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1641 {
1642         struct adapter *adapter = ifp->if_softc;
1643         u_char fiber_type = IFM_1000_SX;
1644
1645         INIT_DEBUGOUT("em_media_status: begin");
1646
1647         EM_CORE_LOCK(adapter);
1648         em_update_link_status(adapter);
1649
1650         ifmr->ifm_status = IFM_AVALID;
1651         ifmr->ifm_active = IFM_ETHER;
1652
1653         if (!adapter->link_active) {
1654                 EM_CORE_UNLOCK(adapter);
1655                 return;
1656         }
1657
1658         ifmr->ifm_status |= IFM_ACTIVE;
1659
1660         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1661             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1662                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1663         } else {
1664                 switch (adapter->link_speed) {
1665                 case 10:
1666                         ifmr->ifm_active |= IFM_10_T;
1667                         break;
1668                 case 100:
1669                         ifmr->ifm_active |= IFM_100_TX;
1670                         break;
1671                 case 1000:
1672                         ifmr->ifm_active |= IFM_1000_T;
1673                         break;
1674                 }
1675                 if (adapter->link_duplex == FULL_DUPLEX)
1676                         ifmr->ifm_active |= IFM_FDX;
1677                 else
1678                         ifmr->ifm_active |= IFM_HDX;
1679         }
1680         EM_CORE_UNLOCK(adapter);
1681 }
1682
1683 /*********************************************************************
1684  *
1685  *  Media Ioctl callback
1686  *
1687  *  This routine is called when the user changes speed/duplex using
1688  *  media/mediopt option with ifconfig.
1689  *
1690  **********************************************************************/
1691 static int
1692 em_media_change(struct ifnet *ifp)
1693 {
1694         struct adapter *adapter = ifp->if_softc;
1695         struct ifmedia  *ifm = &adapter->media;
1696
1697         INIT_DEBUGOUT("em_media_change: begin");
1698
1699         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1700                 return (EINVAL);
1701
1702         EM_CORE_LOCK(adapter);
1703         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1704         case IFM_AUTO:
1705                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1706                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1707                 break;
1708         case IFM_1000_LX:
1709         case IFM_1000_SX:
1710         case IFM_1000_T:
1711                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1712                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1713                 break;
1714         case IFM_100_TX:
1715                 adapter->hw.mac.autoneg = FALSE;
1716                 adapter->hw.phy.autoneg_advertised = 0;
1717                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1718                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1719                 else
1720                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1721                 break;
1722         case IFM_10_T:
1723                 adapter->hw.mac.autoneg = FALSE;
1724                 adapter->hw.phy.autoneg_advertised = 0;
1725                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1726                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1727                 else
1728                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1729                 break;
1730         default:
1731                 device_printf(adapter->dev, "Unsupported media type\n");
1732         }
1733
1734         em_init_locked(adapter);
1735         EM_CORE_UNLOCK(adapter);
1736
1737         return (0);
1738 }
1739
1740 /*********************************************************************
1741  *
1742  *  This routine maps the mbufs to tx descriptors.
1743  *
1744  *  return 0 on success, positive on failure
1745  **********************************************************************/
1746
1747 static int
1748 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1749 {
1750         struct adapter          *adapter = txr->adapter;
1751         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1752         bus_dmamap_t            map;
1753         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1754         struct e1000_tx_desc    *ctxd = NULL;
1755         struct mbuf             *m_head;
1756         struct ether_header     *eh;
1757         struct ip               *ip = NULL;
1758         struct tcphdr           *tp = NULL;
1759         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1760         int                     ip_off, poff;
1761         int                     nsegs, i, j, first, last = 0;
1762         int                     error, do_tso, tso_desc = 0;
1763
1764         m_head = *m_headp;
1765         txd_upper = txd_lower = txd_used = txd_saved = 0;
1766         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1767         ip_off = poff = 0;
1768
1769         /*
1770          * Intel recommends entire IP/TCP header length reside in a single
1771          * buffer. If multiple descriptors are used to describe the IP and
1772          * TCP header, each descriptor should describe one or more
1773          * complete headers; descriptors referencing only parts of headers
1774          * are not supported. If all layer headers are not coalesced into
1775          * a single buffer, each buffer should not cross a 4KB boundary,
1776          * or be larger than the maximum read request size.
1777          * Controller also requires modifing IP/TCP header to make TSO work
1778          * so we firstly get a writable mbuf chain then coalesce ethernet/
1779          * IP/TCP header into a single buffer to meet the requirement of
1780          * controller. This also simplifies IP/TCP/UDP checksum offloading
1781          * which also has similiar restrictions.
1782          */
1783         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1784                 if (do_tso || (m_head->m_next != NULL && 
1785                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1786                         if (M_WRITABLE(*m_headp) == 0) {
1787                                 m_head = m_dup(*m_headp, M_DONTWAIT);
1788                                 m_freem(*m_headp);
1789                                 if (m_head == NULL) {
1790                                         *m_headp = NULL;
1791                                         return (ENOBUFS);
1792                                 }
1793                                 *m_headp = m_head;
1794                         }
1795                 }
1796                 /*
1797                  * XXX
1798                  * Assume IPv4, we don't have TSO/checksum offload support
1799                  * for IPv6 yet.
1800                  */
1801                 ip_off = sizeof(struct ether_header);
1802                 m_head = m_pullup(m_head, ip_off);
1803                 if (m_head == NULL) {
1804                         *m_headp = NULL;
1805                         return (ENOBUFS);
1806                 }
1807                 eh = mtod(m_head, struct ether_header *);
1808                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1809                         ip_off = sizeof(struct ether_vlan_header);
1810                         m_head = m_pullup(m_head, ip_off);
1811                         if (m_head == NULL) {
1812                                 *m_headp = NULL;
1813                                 return (ENOBUFS);
1814                         }
1815                 }
1816                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1817                 if (m_head == NULL) {
1818                         *m_headp = NULL;
1819                         return (ENOBUFS);
1820                 }
1821                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1822                 poff = ip_off + (ip->ip_hl << 2);
1823                 m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1824                 if (m_head == NULL) {
1825                         *m_headp = NULL;
1826                         return (ENOBUFS);
1827                 }
1828                 if (do_tso) {
1829                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1830                         /*
1831                          * TSO workaround:
1832                          *   pull 4 more bytes of data into it.
1833                          */
1834                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1835                         if (m_head == NULL) {
1836                                 *m_headp = NULL;
1837                                 return (ENOBUFS);
1838                         }
1839                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1840                         ip->ip_len = 0;
1841                         ip->ip_sum = 0;
1842                         /*
1843                          * The pseudo TCP checksum does not include TCP payload
1844                          * length so driver should recompute the checksum here
1845                          * what hardware expect to see. This is adherence of
1846                          * Microsoft's Large Send specification.
1847                          */
1848                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1849                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1850                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1851                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1852                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1853                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1854                         if (m_head == NULL) {
1855                                 *m_headp = NULL;
1856                                 return (ENOBUFS);
1857                         }
1858                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1859                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1860                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1861                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1862                         if (m_head == NULL) {
1863                                 *m_headp = NULL;
1864                                 return (ENOBUFS);
1865                         }
1866                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1867                 }
1868                 *m_headp = m_head;
1869         }
1870
1871         /*
1872          * Map the packet for DMA
1873          *
1874          * Capture the first descriptor index,
1875          * this descriptor will have the index
1876          * of the EOP which is the only one that
1877          * now gets a DONE bit writeback.
1878          */
1879         first = txr->next_avail_desc;
1880         tx_buffer = &txr->tx_buffers[first];
1881         tx_buffer_mapped = tx_buffer;
1882         map = tx_buffer->map;
1883
1884         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1885             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1886
1887         /*
1888          * There are two types of errors we can (try) to handle:
1889          * - EFBIG means the mbuf chain was too long and bus_dma ran
1890          *   out of segments.  Defragment the mbuf chain and try again.
1891          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1892          *   at this point in time.  Defer sending and try again later.
1893          * All other errors, in particular EINVAL, are fatal and prevent the
1894          * mbuf chain from ever going through.  Drop it and report error.
1895          */
1896         if (error == EFBIG) {
1897                 struct mbuf *m;
1898
1899                 m = m_defrag(*m_headp, M_DONTWAIT);
1900                 if (m == NULL) {
1901                         adapter->mbuf_alloc_failed++;
1902                         m_freem(*m_headp);
1903                         *m_headp = NULL;
1904                         return (ENOBUFS);
1905                 }
1906                 *m_headp = m;
1907
1908                 /* Try it again */
1909                 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1910                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1911
1912                 if (error == ENOMEM) {
1913                         adapter->no_tx_dma_setup++;
1914                         return (error);
1915                 } else if (error != 0) {
1916                         adapter->no_tx_dma_setup++;
1917                         m_freem(*m_headp);
1918                         *m_headp = NULL;
1919                         return (error);
1920                 }
1921
1922         } else if (error == ENOMEM) {
1923                 adapter->no_tx_dma_setup++;
1924                 return (error);
1925         } else if (error != 0) {
1926                 adapter->no_tx_dma_setup++;
1927                 m_freem(*m_headp);
1928                 *m_headp = NULL;
1929                 return (error);
1930         }
1931
1932         /*
1933          * TSO Hardware workaround, if this packet is not
1934          * TSO, and is only a single descriptor long, and
1935          * it follows a TSO burst, then we need to add a
1936          * sentinel descriptor to prevent premature writeback.
1937          */
1938         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1939                 if (nsegs == 1)
1940                         tso_desc = TRUE;
1941                 txr->tx_tso = FALSE;
1942         }
1943
1944         if (nsegs > (txr->tx_avail - 2)) {
1945                 txr->no_desc_avail++;
1946                 bus_dmamap_unload(txr->txtag, map);
1947                 return (ENOBUFS);
1948         }
1949         m_head = *m_headp;
1950
1951         /* Do hardware assists */
1952         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1953                 em_tso_setup(txr, m_head, ip_off, ip, tp,
1954                     &txd_upper, &txd_lower);
1955                 /* we need to make a final sentinel transmit desc */
1956                 tso_desc = TRUE;
1957         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1958                 em_transmit_checksum_setup(txr, m_head,
1959                     ip_off, ip, &txd_upper, &txd_lower);
1960
1961         i = txr->next_avail_desc;
1962
1963         /* Set up our transmit descriptors */
1964         for (j = 0; j < nsegs; j++) {
1965                 bus_size_t seg_len;
1966                 bus_addr_t seg_addr;
1967
1968                 tx_buffer = &txr->tx_buffers[i];
1969                 ctxd = &txr->tx_base[i];
1970                 seg_addr = segs[j].ds_addr;
1971                 seg_len  = segs[j].ds_len;
1972                 /*
1973                 ** TSO Workaround:
1974                 ** If this is the last descriptor, we want to
1975                 ** split it so we have a small final sentinel
1976                 */
1977                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1978                         seg_len -= 4;
1979                         ctxd->buffer_addr = htole64(seg_addr);
1980                         ctxd->lower.data = htole32(
1981                         adapter->txd_cmd | txd_lower | seg_len);
1982                         ctxd->upper.data =
1983                             htole32(txd_upper);
1984                         if (++i == adapter->num_tx_desc)
1985                                 i = 0;
1986                         /* Now make the sentinel */     
1987                         ++txd_used; /* using an extra txd */
1988                         ctxd = &txr->tx_base[i];
1989                         tx_buffer = &txr->tx_buffers[i];
1990                         ctxd->buffer_addr =
1991                             htole64(seg_addr + seg_len);
1992                         ctxd->lower.data = htole32(
1993                         adapter->txd_cmd | txd_lower | 4);
1994                         ctxd->upper.data =
1995                             htole32(txd_upper);
1996                         last = i;
1997                         if (++i == adapter->num_tx_desc)
1998                                 i = 0;
1999                 } else {
2000                         ctxd->buffer_addr = htole64(seg_addr);
2001                         ctxd->lower.data = htole32(
2002                         adapter->txd_cmd | txd_lower | seg_len);
2003                         ctxd->upper.data =
2004                             htole32(txd_upper);
2005                         last = i;
2006                         if (++i == adapter->num_tx_desc)
2007                                 i = 0;
2008                 }
2009                 tx_buffer->m_head = NULL;
2010                 tx_buffer->next_eop = -1;
2011         }
2012
2013         txr->next_avail_desc = i;
2014         txr->tx_avail -= nsegs;
2015         if (tso_desc) /* TSO used an extra for sentinel */
2016                 txr->tx_avail -= txd_used;
2017
2018         if (m_head->m_flags & M_VLANTAG) {
2019                 /* Set the vlan id. */
2020                 ctxd->upper.fields.special =
2021                     htole16(m_head->m_pkthdr.ether_vtag);
2022                 /* Tell hardware to add tag */
2023                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2024         }
2025
2026         tx_buffer->m_head = m_head;
2027         tx_buffer_mapped->map = tx_buffer->map;
2028         tx_buffer->map = map;
2029         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2030
2031         /*
2032          * Last Descriptor of Packet
2033          * needs End Of Packet (EOP)
2034          * and Report Status (RS)
2035          */
2036         ctxd->lower.data |=
2037             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2038         /*
2039          * Keep track in the first buffer which
2040          * descriptor will be written back
2041          */
2042         tx_buffer = &txr->tx_buffers[first];
2043         tx_buffer->next_eop = last;
2044         /* Update the watchdog time early and often */
2045         txr->watchdog_time = ticks;
2046
2047         /*
2048          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2049          * that this frame is available to transmit.
2050          */
2051         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2052             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2053         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2054
2055         return (0);
2056 }
2057
2058 static void
2059 em_set_promisc(struct adapter *adapter)
2060 {
2061         struct ifnet    *ifp = adapter->ifp;
2062         u32             reg_rctl;
2063
2064         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2065
2066         if (ifp->if_flags & IFF_PROMISC) {
2067                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2068                 /* Turn this on if you want to see bad packets */
2069                 if (em_debug_sbp)
2070                         reg_rctl |= E1000_RCTL_SBP;
2071                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2072         } else if (ifp->if_flags & IFF_ALLMULTI) {
2073                 reg_rctl |= E1000_RCTL_MPE;
2074                 reg_rctl &= ~E1000_RCTL_UPE;
2075                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2076         }
2077 }
2078
2079 static void
2080 em_disable_promisc(struct adapter *adapter)
2081 {
2082         u32     reg_rctl;
2083
2084         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2085
2086         reg_rctl &=  (~E1000_RCTL_UPE);
2087         reg_rctl &=  (~E1000_RCTL_MPE);
2088         reg_rctl &=  (~E1000_RCTL_SBP);
2089         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2090 }
2091
2092
2093 /*********************************************************************
2094  *  Multicast Update
2095  *
2096  *  This routine is called whenever multicast address list is updated.
2097  *
2098  **********************************************************************/
2099
2100 static void
2101 em_set_multi(struct adapter *adapter)
2102 {
2103         struct ifnet    *ifp = adapter->ifp;
2104         struct ifmultiaddr *ifma;
2105         u32 reg_rctl = 0;
2106         u8  *mta; /* Multicast array memory */
2107         int mcnt = 0;
2108
2109         IOCTL_DEBUGOUT("em_set_multi: begin");
2110
2111         mta = adapter->mta;
2112         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2113
2114         if (adapter->hw.mac.type == e1000_82542 && 
2115             adapter->hw.revision_id == E1000_REVISION_2) {
2116                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2117                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2118                         e1000_pci_clear_mwi(&adapter->hw);
2119                 reg_rctl |= E1000_RCTL_RST;
2120                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2121                 msec_delay(5);
2122         }
2123
2124 #if __FreeBSD_version < 800000
2125         IF_ADDR_LOCK(ifp);
2126 #else
2127         if_maddr_rlock(ifp);
2128 #endif
2129         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2130                 if (ifma->ifma_addr->sa_family != AF_LINK)
2131                         continue;
2132
2133                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2134                         break;
2135
2136                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2137                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2138                 mcnt++;
2139         }
2140 #if __FreeBSD_version < 800000
2141         IF_ADDR_UNLOCK(ifp);
2142 #else
2143         if_maddr_runlock(ifp);
2144 #endif
2145         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2146                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2147                 reg_rctl |= E1000_RCTL_MPE;
2148                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2149         } else
2150                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2151
2152         if (adapter->hw.mac.type == e1000_82542 && 
2153             adapter->hw.revision_id == E1000_REVISION_2) {
2154                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2155                 reg_rctl &= ~E1000_RCTL_RST;
2156                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2157                 msec_delay(5);
2158                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2159                         e1000_pci_set_mwi(&adapter->hw);
2160         }
2161 }
2162
2163
2164 /*********************************************************************
2165  *  Timer routine
2166  *
2167  *  This routine checks for link status and updates statistics.
2168  *
2169  **********************************************************************/
2170
2171 static void
2172 em_local_timer(void *arg)
2173 {
2174         struct adapter  *adapter = arg;
2175         struct ifnet    *ifp = adapter->ifp;
2176         struct tx_ring  *txr = adapter->tx_rings;
2177
2178         EM_CORE_LOCK_ASSERT(adapter);
2179
2180         em_update_link_status(adapter);
2181         em_update_stats_counters(adapter);
2182
2183         /* Reset LAA into RAR[0] on 82571 */
2184         if ((adapter->hw.mac.type == e1000_82571) &&
2185             e1000_get_laa_state_82571(&adapter->hw))
2186                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2187
2188         /* 
2189         ** Don't do TX watchdog check if we've been paused
2190         */
2191         if (adapter->pause_frames) {
2192                 adapter->pause_frames = 0;
2193                 goto out;
2194         }
2195         /*
2196         ** Check on the state of the TX queue(s), this 
2197         ** can be done without the lock because its RO
2198         ** and the HUNG state will be static if set.
2199         */
2200         for (int i = 0; i < adapter->num_queues; i++, txr++)
2201                 if (txr->queue_status == EM_QUEUE_HUNG)
2202                         goto hung;
2203 out:
2204         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2205         return;
2206 hung:
2207         /* Looks like we're hung */
2208         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2209         device_printf(adapter->dev,
2210             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2211             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2212             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2213         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2214             "Next TX to Clean = %d\n",
2215             txr->me, txr->tx_avail, txr->next_to_clean);
2216         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2217         adapter->watchdog_events++;
2218         em_init_locked(adapter);
2219 }
2220
2221
2222 static void
2223 em_update_link_status(struct adapter *adapter)
2224 {
2225         struct e1000_hw *hw = &adapter->hw;
2226         struct ifnet *ifp = adapter->ifp;
2227         device_t dev = adapter->dev;
2228         struct tx_ring *txr = adapter->tx_rings;
2229         u32 link_check = 0;
2230
2231         /* Get the cached link value or read phy for real */
2232         switch (hw->phy.media_type) {
2233         case e1000_media_type_copper:
2234                 if (hw->mac.get_link_status) {
2235                         /* Do the work to read phy */
2236                         e1000_check_for_link(hw);
2237                         link_check = !hw->mac.get_link_status;
2238                         if (link_check) /* ESB2 fix */
2239                                 e1000_cfg_on_link_up(hw);
2240                 } else
2241                         link_check = TRUE;
2242                 break;
2243         case e1000_media_type_fiber:
2244                 e1000_check_for_link(hw);
2245                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2246                                  E1000_STATUS_LU);
2247                 break;
2248         case e1000_media_type_internal_serdes:
2249                 e1000_check_for_link(hw);
2250                 link_check = adapter->hw.mac.serdes_has_link;
2251                 break;
2252         default:
2253         case e1000_media_type_unknown:
2254                 break;
2255         }
2256
2257         /* Now check for a transition */
2258         if (link_check && (adapter->link_active == 0)) {
2259                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2260                     &adapter->link_duplex);
2261                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2262                 if ((adapter->link_speed != SPEED_1000) &&
2263                     ((hw->mac.type == e1000_82571) ||
2264                     (hw->mac.type == e1000_82572))) {
2265                         int tarc0;
2266                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2267                         tarc0 &= ~SPEED_MODE_BIT;
2268                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2269                 }
2270                 if (bootverbose)
2271                         device_printf(dev, "Link is up %d Mbps %s\n",
2272                             adapter->link_speed,
2273                             ((adapter->link_duplex == FULL_DUPLEX) ?
2274                             "Full Duplex" : "Half Duplex"));
2275                 adapter->link_active = 1;
2276                 adapter->smartspeed = 0;
2277                 ifp->if_baudrate = adapter->link_speed * 1000000;
2278                 if_link_state_change(ifp, LINK_STATE_UP);
2279         } else if (!link_check && (adapter->link_active == 1)) {
2280                 ifp->if_baudrate = adapter->link_speed = 0;
2281                 adapter->link_duplex = 0;
2282                 if (bootverbose)
2283                         device_printf(dev, "Link is Down\n");
2284                 adapter->link_active = 0;
2285                 /* Link down, disable watchdog */
2286                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2287                         txr->queue_status = EM_QUEUE_IDLE;
2288                 if_link_state_change(ifp, LINK_STATE_DOWN);
2289         }
2290 }
2291
2292 /*********************************************************************
2293  *
2294  *  This routine disables all traffic on the adapter by issuing a
2295  *  global reset on the MAC and deallocates TX/RX buffers.
2296  *
2297  *  This routine should always be called with BOTH the CORE
2298  *  and TX locks.
2299  **********************************************************************/
2300
2301 static void
2302 em_stop(void *arg)
2303 {
2304         struct adapter  *adapter = arg;
2305         struct ifnet    *ifp = adapter->ifp;
2306         struct tx_ring  *txr = adapter->tx_rings;
2307
2308         EM_CORE_LOCK_ASSERT(adapter);
2309
2310         INIT_DEBUGOUT("em_stop: begin");
2311
2312         em_disable_intr(adapter);
2313         callout_stop(&adapter->timer);
2314
2315         /* Tell the stack that the interface is no longer active */
2316         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2317
2318         /* Unarm watchdog timer. */
2319         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2320                 EM_TX_LOCK(txr);
2321                 txr->queue_status = EM_QUEUE_IDLE;
2322                 EM_TX_UNLOCK(txr);
2323         }
2324
2325         e1000_reset_hw(&adapter->hw);
2326         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2327
2328         e1000_led_off(&adapter->hw);
2329         e1000_cleanup_led(&adapter->hw);
2330 }
2331
2332
2333 /*********************************************************************
2334  *
2335  *  Determine hardware revision.
2336  *
2337  **********************************************************************/
2338 static void
2339 em_identify_hardware(struct adapter *adapter)
2340 {
2341         device_t dev = adapter->dev;
2342
2343         /* Make sure our PCI config space has the necessary stuff set */
2344         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2345         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2346             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2347                 device_printf(dev, "Memory Access and/or Bus Master bits "
2348                     "were not set!\n");
2349                 adapter->hw.bus.pci_cmd_word |=
2350                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2351                 pci_write_config(dev, PCIR_COMMAND,
2352                     adapter->hw.bus.pci_cmd_word, 2);
2353         }
2354
2355         /* Save off the information about this board */
2356         adapter->hw.vendor_id = pci_get_vendor(dev);
2357         adapter->hw.device_id = pci_get_device(dev);
2358         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2359         adapter->hw.subsystem_vendor_id =
2360             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2361         adapter->hw.subsystem_device_id =
2362             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2363
2364         /* Do Shared Code Init and Setup */
2365         if (e1000_set_mac_type(&adapter->hw)) {
2366                 device_printf(dev, "Setup init failure\n");
2367                 return;
2368         }
2369 }
2370
2371 static int
2372 em_allocate_pci_resources(struct adapter *adapter)
2373 {
2374         device_t        dev = adapter->dev;
2375         int             rid;
2376
2377         rid = PCIR_BAR(0);
2378         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2379             &rid, RF_ACTIVE);
2380         if (adapter->memory == NULL) {
2381                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2382                 return (ENXIO);
2383         }
2384         adapter->osdep.mem_bus_space_tag =
2385             rman_get_bustag(adapter->memory);
2386         adapter->osdep.mem_bus_space_handle =
2387             rman_get_bushandle(adapter->memory);
2388         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2389
2390         /* Default to a single queue */
2391         adapter->num_queues = 1;
2392
2393         /*
2394          * Setup MSI/X or MSI if PCI Express
2395          */
2396         adapter->msix = em_setup_msix(adapter);
2397
2398         adapter->hw.back = &adapter->osdep;
2399
2400         return (0);
2401 }
2402
2403 /*********************************************************************
2404  *
2405  *  Setup the Legacy or MSI Interrupt handler
2406  *
2407  **********************************************************************/
2408 int
2409 em_allocate_legacy(struct adapter *adapter)
2410 {
2411         device_t dev = adapter->dev;
2412         int error, rid = 0;
2413
2414         /* Manually turn off all interrupts */
2415         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2416
2417         if (adapter->msix == 1) /* using MSI */
2418                 rid = 1;
2419         /* We allocate a single interrupt resource */
2420         adapter->res = bus_alloc_resource_any(dev,
2421             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2422         if (adapter->res == NULL) {
2423                 device_printf(dev, "Unable to allocate bus resource: "
2424                     "interrupt\n");
2425                 return (ENXIO);
2426         }
2427
2428         /*
2429          * Allocate a fast interrupt and the associated
2430          * deferred processing contexts.
2431          */
2432         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2433         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2434         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2435             taskqueue_thread_enqueue, &adapter->tq);
2436         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2437             device_get_nameunit(adapter->dev));
2438         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2439             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2440                 device_printf(dev, "Failed to register fast interrupt "
2441                             "handler: %d\n", error);
2442                 taskqueue_free(adapter->tq);
2443                 adapter->tq = NULL;
2444                 return (error);
2445         }
2446         
2447         return (0);
2448 }
2449
2450 /*********************************************************************
2451  *
2452  *  Setup the MSIX Interrupt handlers
2453  *   This is not really Multiqueue, rather
2454  *   its just multiple interrupt vectors.
2455  *
2456  **********************************************************************/
2457 int
2458 em_allocate_msix(struct adapter *adapter)
2459 {
2460         device_t        dev = adapter->dev;
2461         struct          tx_ring *txr = adapter->tx_rings;
2462         struct          rx_ring *rxr = adapter->rx_rings;
2463         int             error, rid, vector = 0;
2464
2465
2466         /* Make sure all interrupts are disabled */
2467         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2468
2469         /* First set up ring resources */
2470         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2471
2472                 /* RX ring */
2473                 rid = vector + 1;
2474
2475                 rxr->res = bus_alloc_resource_any(dev,
2476                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2477                 if (rxr->res == NULL) {
2478                         device_printf(dev,
2479                             "Unable to allocate bus resource: "
2480                             "RX MSIX Interrupt %d\n", i);
2481                         return (ENXIO);
2482                 }
2483                 if ((error = bus_setup_intr(dev, rxr->res,
2484                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2485                     rxr, &rxr->tag)) != 0) {
2486                         device_printf(dev, "Failed to register RX handler");
2487                         return (error);
2488                 }
2489 #if __FreeBSD_version >= 800504
2490                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2491 #endif
2492                 rxr->msix = vector++; /* NOTE increment vector for TX */
2493                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2494                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2495                     taskqueue_thread_enqueue, &rxr->tq);
2496                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2497                     device_get_nameunit(adapter->dev));
2498                 /*
2499                 ** Set the bit to enable interrupt
2500                 ** in E1000_IMS -- bits 20 and 21
2501                 ** are for RX0 and RX1, note this has
2502                 ** NOTHING to do with the MSIX vector
2503                 */
2504                 rxr->ims = 1 << (20 + i);
2505                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2506
2507                 /* TX ring */
2508                 rid = vector + 1;
2509                 txr->res = bus_alloc_resource_any(dev,
2510                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2511                 if (txr->res == NULL) {
2512                         device_printf(dev,
2513                             "Unable to allocate bus resource: "
2514                             "TX MSIX Interrupt %d\n", i);
2515                         return (ENXIO);
2516                 }
2517                 if ((error = bus_setup_intr(dev, txr->res,
2518                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2519                     txr, &txr->tag)) != 0) {
2520                         device_printf(dev, "Failed to register TX handler");
2521                         return (error);
2522                 }
2523 #if __FreeBSD_version >= 800504
2524                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2525 #endif
2526                 txr->msix = vector++; /* Increment vector for next pass */
2527                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2528                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2529                     taskqueue_thread_enqueue, &txr->tq);
2530                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2531                     device_get_nameunit(adapter->dev));
2532                 /*
2533                 ** Set the bit to enable interrupt
2534                 ** in E1000_IMS -- bits 22 and 23
2535                 ** are for TX0 and TX1, note this has
2536                 ** NOTHING to do with the MSIX vector
2537                 */
2538                 txr->ims = 1 << (22 + i);
2539                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2540         }
2541
2542         /* Link interrupt */
2543         ++rid;
2544         adapter->res = bus_alloc_resource_any(dev,
2545             SYS_RES_IRQ, &rid, RF_ACTIVE);
2546         if (!adapter->res) {
2547                 device_printf(dev,"Unable to allocate "
2548                     "bus resource: Link interrupt [%d]\n", rid);
2549                 return (ENXIO);
2550         }
2551         /* Set the link handler function */
2552         error = bus_setup_intr(dev, adapter->res,
2553             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2554             em_msix_link, adapter, &adapter->tag);
2555         if (error) {
2556                 adapter->res = NULL;
2557                 device_printf(dev, "Failed to register LINK handler");
2558                 return (error);
2559         }
2560 #if __FreeBSD_version >= 800504
2561                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2562 #endif
2563         adapter->linkvec = vector;
2564         adapter->ivars |=  (8 | vector) << 16;
2565         adapter->ivars |= 0x80000000;
2566
2567         return (0);
2568 }
2569
2570
2571 static void
2572 em_free_pci_resources(struct adapter *adapter)
2573 {
2574         device_t        dev = adapter->dev;
2575         struct tx_ring  *txr;
2576         struct rx_ring  *rxr;
2577         int             rid;
2578
2579
2580         /*
2581         ** Release all the queue interrupt resources:
2582         */
2583         for (int i = 0; i < adapter->num_queues; i++) {
2584                 txr = &adapter->tx_rings[i];
2585                 rxr = &adapter->rx_rings[i];
2586                 /* an early abort? */
2587                 if ((txr == NULL) || (rxr == NULL))
2588                         break;
2589                 rid = txr->msix +1;
2590                 if (txr->tag != NULL) {
2591                         bus_teardown_intr(dev, txr->res, txr->tag);
2592                         txr->tag = NULL;
2593                 }
2594                 if (txr->res != NULL)
2595                         bus_release_resource(dev, SYS_RES_IRQ,
2596                             rid, txr->res);
2597                 rid = rxr->msix +1;
2598                 if (rxr->tag != NULL) {
2599                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2600                         rxr->tag = NULL;
2601                 }
2602                 if (rxr->res != NULL)
2603                         bus_release_resource(dev, SYS_RES_IRQ,
2604                             rid, rxr->res);
2605         }
2606
2607         if (adapter->linkvec) /* we are doing MSIX */
2608                 rid = adapter->linkvec + 1;
2609         else
2610                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2611
2612         if (adapter->tag != NULL) {
2613                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2614                 adapter->tag = NULL;
2615         }
2616
2617         if (adapter->res != NULL)
2618                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2619
2620
2621         if (adapter->msix)
2622                 pci_release_msi(dev);
2623
2624         if (adapter->msix_mem != NULL)
2625                 bus_release_resource(dev, SYS_RES_MEMORY,
2626                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2627
2628         if (adapter->memory != NULL)
2629                 bus_release_resource(dev, SYS_RES_MEMORY,
2630                     PCIR_BAR(0), adapter->memory);
2631
2632         if (adapter->flash != NULL)
2633                 bus_release_resource(dev, SYS_RES_MEMORY,
2634                     EM_FLASH, adapter->flash);
2635 }
2636
2637 /*
2638  * Setup MSI or MSI/X
2639  */
2640 static int
2641 em_setup_msix(struct adapter *adapter)
2642 {
2643         device_t dev = adapter->dev;
2644         int val = 0;
2645
2646
2647         /*
2648         ** Setup MSI/X for Hartwell: tests have shown
2649         ** use of two queues to be unstable, and to
2650         ** provide no great gain anyway, so we simply
2651         ** seperate the interrupts and use a single queue.
2652         */
2653         if ((adapter->hw.mac.type == e1000_82574) &&
2654             (em_enable_msix == TRUE)) {
2655                 /* Map the MSIX BAR */
2656                 int rid = PCIR_BAR(EM_MSIX_BAR);
2657                 adapter->msix_mem = bus_alloc_resource_any(dev,
2658                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2659                 if (!adapter->msix_mem) {
2660                         /* May not be enabled */
2661                         device_printf(adapter->dev,
2662                             "Unable to map MSIX table \n");
2663                         goto msi;
2664                 }
2665                 val = pci_msix_count(dev); 
2666                 if (val < 3) {
2667                         bus_release_resource(dev, SYS_RES_MEMORY,
2668                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2669                         adapter->msix_mem = NULL;
2670                         device_printf(adapter->dev,
2671                             "MSIX: insufficient vectors, using MSI\n");
2672                         goto msi;
2673                 }
2674                 val = 3;
2675                 adapter->num_queues = 1;
2676                 if (pci_alloc_msix(dev, &val) == 0) {
2677                         device_printf(adapter->dev,
2678                             "Using MSIX interrupts "
2679                             "with %d vectors\n", val);
2680                 }
2681
2682                 return (val);
2683         }
2684 msi:
2685         val = pci_msi_count(dev);
2686         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2687                 adapter->msix = 1;
2688                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2689                 return (val);
2690         } 
2691         /* Should only happen due to manual configuration */
2692         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2693         return (0);
2694 }
2695
2696
2697 /*********************************************************************
2698  *
2699  *  Initialize the hardware to a configuration
2700  *  as specified by the adapter structure.
2701  *
2702  **********************************************************************/
2703 static void
2704 em_reset(struct adapter *adapter)
2705 {
2706         device_t        dev = adapter->dev;
2707         struct ifnet    *ifp = adapter->ifp;
2708         struct e1000_hw *hw = &adapter->hw;
2709         u16             rx_buffer_size;
2710
2711         INIT_DEBUGOUT("em_reset: begin");
2712
2713         /* Set up smart power down as default off on newer adapters. */
2714         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2715             hw->mac.type == e1000_82572)) {
2716                 u16 phy_tmp = 0;
2717
2718                 /* Speed up time to link by disabling smart power down. */
2719                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2720                 phy_tmp &= ~IGP02E1000_PM_SPD;
2721                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2722         }
2723
2724         /*
2725          * These parameters control the automatic generation (Tx) and
2726          * response (Rx) to Ethernet PAUSE frames.
2727          * - High water mark should allow for at least two frames to be
2728          *   received after sending an XOFF.
2729          * - Low water mark works best when it is very near the high water mark.
2730          *   This allows the receiver to restart by sending XON when it has
2731          *   drained a bit. Here we use an arbitary value of 1500 which will
2732          *   restart after one full frame is pulled from the buffer. There
2733          *   could be several smaller frames in the buffer and if so they will
2734          *   not trigger the XON until their total number reduces the buffer
2735          *   by 1500.
2736          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2737          */
2738         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2739
2740         hw->fc.high_water = rx_buffer_size -
2741             roundup2(adapter->max_frame_size, 1024);
2742         hw->fc.low_water = hw->fc.high_water - 1500;
2743
2744         if (hw->mac.type == e1000_80003es2lan)
2745                 hw->fc.pause_time = 0xFFFF;
2746         else
2747                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2748
2749         hw->fc.send_xon = TRUE;
2750
2751         /* Set Flow control, use the tunable location if sane */
2752         hw->fc.requested_mode = adapter->fc_setting;
2753
2754         /* Workaround: no TX flow ctrl for PCH */
2755         if (hw->mac.type == e1000_pchlan)
2756                 hw->fc.requested_mode = e1000_fc_rx_pause;
2757
2758         /* Override - settings for PCH2LAN, ya its magic :) */
2759         if (hw->mac.type == e1000_pch2lan) {
2760                 hw->fc.high_water = 0x5C20;
2761                 hw->fc.low_water = 0x5048;
2762                 hw->fc.pause_time = 0x0650;
2763                 hw->fc.refresh_time = 0x0400;
2764                 /* Jumbos need adjusted PBA */
2765                 if (ifp->if_mtu > ETHERMTU)
2766                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2767                 else
2768                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2769         }
2770
2771         /* Issue a global reset */
2772         e1000_reset_hw(hw);
2773         E1000_WRITE_REG(hw, E1000_WUC, 0);
2774         em_disable_aspm(adapter);
2775
2776         if (e1000_init_hw(hw) < 0) {
2777                 device_printf(dev, "Hardware Initialization Failed\n");
2778                 return;
2779         }
2780
2781         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2782         e1000_get_phy_info(hw);
2783         e1000_check_for_link(hw);
2784         return;
2785 }
2786
2787 /*********************************************************************
2788  *
2789  *  Setup networking device structure and register an interface.
2790  *
2791  **********************************************************************/
2792 static int
2793 em_setup_interface(device_t dev, struct adapter *adapter)
2794 {
2795         struct ifnet   *ifp;
2796
2797         INIT_DEBUGOUT("em_setup_interface: begin");
2798
2799         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2800         if (ifp == NULL) {
2801                 device_printf(dev, "can not allocate ifnet structure\n");
2802                 return (-1);
2803         }
2804         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2805         ifp->if_mtu = ETHERMTU;
2806         ifp->if_init =  em_init;
2807         ifp->if_softc = adapter;
2808         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2809         ifp->if_ioctl = em_ioctl;
2810         ifp->if_start = em_start;
2811         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2812         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2813         IFQ_SET_READY(&ifp->if_snd);
2814
2815         ether_ifattach(ifp, adapter->hw.mac.addr);
2816
2817         ifp->if_capabilities = ifp->if_capenable = 0;
2818
2819 #ifdef EM_MULTIQUEUE
2820         /* Multiqueue tx functions */
2821         ifp->if_transmit = em_mq_start;
2822         ifp->if_qflush = em_qflush;
2823 #endif  
2824
2825         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2826         ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2827
2828         /* Enable TSO by default, can disable with ifconfig */
2829         ifp->if_capabilities |= IFCAP_TSO4;
2830         ifp->if_capenable |= IFCAP_TSO4;
2831
2832         /*
2833          * Tell the upper layer(s) we
2834          * support full VLAN capability
2835          */
2836         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2837         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2838         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2839
2840         /*
2841         ** Dont turn this on by default, if vlans are
2842         ** created on another pseudo device (eg. lagg)
2843         ** then vlan events are not passed thru, breaking
2844         ** operation, but with HW FILTER off it works. If
2845         ** using vlans directly on the em driver you can
2846         ** enable this and get full hardware tag filtering.
2847         */
2848         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2849
2850 #ifdef DEVICE_POLLING
2851         ifp->if_capabilities |= IFCAP_POLLING;
2852 #endif
2853
2854         /* Enable only WOL MAGIC by default */
2855         if (adapter->wol) {
2856                 ifp->if_capabilities |= IFCAP_WOL;
2857                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2858         }
2859                 
2860         /*
2861          * Specify the media types supported by this adapter and register
2862          * callbacks to update media and link information
2863          */
2864         ifmedia_init(&adapter->media, IFM_IMASK,
2865             em_media_change, em_media_status);
2866         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2867             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2868                 u_char fiber_type = IFM_1000_SX;        /* default type */
2869
2870                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
2871                             0, NULL);
2872                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2873         } else {
2874                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2875                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2876                             0, NULL);
2877                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2878                             0, NULL);
2879                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2880                             0, NULL);
2881                 if (adapter->hw.phy.type != e1000_phy_ife) {
2882                         ifmedia_add(&adapter->media,
2883                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2884                         ifmedia_add(&adapter->media,
2885                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2886                 }
2887         }
2888         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2889         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2890         return (0);
2891 }
2892
2893
2894 /*
2895  * Manage DMA'able memory.
2896  */
2897 static void
2898 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2899 {
2900         if (error)
2901                 return;
2902         *(bus_addr_t *) arg = segs[0].ds_addr;
2903 }
2904
2905 static int
2906 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2907         struct em_dma_alloc *dma, int mapflags)
2908 {
2909         int error;
2910
2911         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2912                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
2913                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2914                                 BUS_SPACE_MAXADDR,      /* highaddr */
2915                                 NULL, NULL,             /* filter, filterarg */
2916                                 size,                   /* maxsize */
2917                                 1,                      /* nsegments */
2918                                 size,                   /* maxsegsize */
2919                                 0,                      /* flags */
2920                                 NULL,                   /* lockfunc */
2921                                 NULL,                   /* lockarg */
2922                                 &dma->dma_tag);
2923         if (error) {
2924                 device_printf(adapter->dev,
2925                     "%s: bus_dma_tag_create failed: %d\n",
2926                     __func__, error);
2927                 goto fail_0;
2928         }
2929
2930         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2931             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2932         if (error) {
2933                 device_printf(adapter->dev,
2934                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2935                     __func__, (uintmax_t)size, error);
2936                 goto fail_2;
2937         }
2938
2939         dma->dma_paddr = 0;
2940         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2941             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2942         if (error || dma->dma_paddr == 0) {
2943                 device_printf(adapter->dev,
2944                     "%s: bus_dmamap_load failed: %d\n",
2945                     __func__, error);
2946                 goto fail_3;
2947         }
2948
2949         return (0);
2950
2951 fail_3:
2952         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2953 fail_2:
2954         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2955         bus_dma_tag_destroy(dma->dma_tag);
2956 fail_0:
2957         dma->dma_map = NULL;
2958         dma->dma_tag = NULL;
2959
2960         return (error);
2961 }
2962
2963 static void
2964 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2965 {
2966         if (dma->dma_tag == NULL)
2967                 return;
2968         if (dma->dma_map != NULL) {
2969                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2970                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2971                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2972                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2973                 dma->dma_map = NULL;
2974         }
2975         bus_dma_tag_destroy(dma->dma_tag);
2976         dma->dma_tag = NULL;
2977 }
2978
2979
2980 /*********************************************************************
2981  *
2982  *  Allocate memory for the transmit and receive rings, and then
2983  *  the descriptors associated with each, called only once at attach.
2984  *
2985  **********************************************************************/
2986 static int
2987 em_allocate_queues(struct adapter *adapter)
2988 {
2989         device_t                dev = adapter->dev;
2990         struct tx_ring          *txr = NULL;
2991         struct rx_ring          *rxr = NULL;
2992         int rsize, tsize, error = E1000_SUCCESS;
2993         int txconf = 0, rxconf = 0;
2994
2995
2996         /* Allocate the TX ring struct memory */
2997         if (!(adapter->tx_rings =
2998             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2999             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3000                 device_printf(dev, "Unable to allocate TX ring memory\n");
3001                 error = ENOMEM;
3002                 goto fail;
3003         }
3004
3005         /* Now allocate the RX */
3006         if (!(adapter->rx_rings =
3007             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3008             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3009                 device_printf(dev, "Unable to allocate RX ring memory\n");
3010                 error = ENOMEM;
3011                 goto rx_fail;
3012         }
3013
3014         tsize = roundup2(adapter->num_tx_desc *
3015             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3016         /*
3017          * Now set up the TX queues, txconf is needed to handle the
3018          * possibility that things fail midcourse and we need to
3019          * undo memory gracefully
3020          */ 
3021         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3022                 /* Set up some basics */
3023                 txr = &adapter->tx_rings[i];
3024                 txr->adapter = adapter;
3025                 txr->me = i;
3026
3027                 /* Initialize the TX lock */
3028                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3029                     device_get_nameunit(dev), txr->me);
3030                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3031
3032                 if (em_dma_malloc(adapter, tsize,
3033                         &txr->txdma, BUS_DMA_NOWAIT)) {
3034                         device_printf(dev,
3035                             "Unable to allocate TX Descriptor memory\n");
3036                         error = ENOMEM;
3037                         goto err_tx_desc;
3038                 }
3039                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3040                 bzero((void *)txr->tx_base, tsize);
3041
3042                 if (em_allocate_transmit_buffers(txr)) {
3043                         device_printf(dev,
3044                             "Critical Failure setting up transmit buffers\n");
3045                         error = ENOMEM;
3046                         goto err_tx_desc;
3047                 }
3048 #if __FreeBSD_version >= 800000
3049                 /* Allocate a buf ring */
3050                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3051                     M_WAITOK, &txr->tx_mtx);
3052 #endif
3053         }
3054
3055         /*
3056          * Next the RX queues...
3057          */ 
3058         rsize = roundup2(adapter->num_rx_desc *
3059             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3060         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3061                 rxr = &adapter->rx_rings[i];
3062                 rxr->adapter = adapter;
3063                 rxr->me = i;
3064
3065                 /* Initialize the RX lock */
3066                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3067                     device_get_nameunit(dev), txr->me);
3068                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3069
3070                 if (em_dma_malloc(adapter, rsize,
3071                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3072                         device_printf(dev,
3073                             "Unable to allocate RxDescriptor memory\n");
3074                         error = ENOMEM;
3075                         goto err_rx_desc;
3076                 }
3077                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3078                 bzero((void *)rxr->rx_base, rsize);
3079
3080                 /* Allocate receive buffers for the ring*/
3081                 if (em_allocate_receive_buffers(rxr)) {
3082                         device_printf(dev,
3083                             "Critical Failure setting up receive buffers\n");
3084                         error = ENOMEM;
3085                         goto err_rx_desc;
3086                 }
3087         }
3088
3089         return (0);
3090
3091 err_rx_desc:
3092         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3093                 em_dma_free(adapter, &rxr->rxdma);
3094 err_tx_desc:
3095         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3096                 em_dma_free(adapter, &txr->txdma);
3097         free(adapter->rx_rings, M_DEVBUF);
3098 rx_fail:
3099 #if __FreeBSD_version >= 800000
3100         buf_ring_free(txr->br, M_DEVBUF);
3101 #endif
3102         free(adapter->tx_rings, M_DEVBUF);
3103 fail:
3104         return (error);
3105 }
3106
3107
3108 /*********************************************************************
3109  *
3110  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3111  *  the information needed to transmit a packet on the wire. This is
3112  *  called only once at attach, setup is done every reset.
3113  *
3114  **********************************************************************/
3115 static int
3116 em_allocate_transmit_buffers(struct tx_ring *txr)
3117 {
3118         struct adapter *adapter = txr->adapter;
3119         device_t dev = adapter->dev;
3120         struct em_buffer *txbuf;
3121         int error, i;
3122
3123         /*
3124          * Setup DMA descriptor areas.
3125          */
3126         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3127                                1, 0,                    /* alignment, bounds */
3128                                BUS_SPACE_MAXADDR,       /* lowaddr */
3129                                BUS_SPACE_MAXADDR,       /* highaddr */
3130                                NULL, NULL,              /* filter, filterarg */
3131                                EM_TSO_SIZE,             /* maxsize */
3132                                EM_MAX_SCATTER,          /* nsegments */
3133                                PAGE_SIZE,               /* maxsegsize */
3134                                0,                       /* flags */
3135                                NULL,                    /* lockfunc */
3136                                NULL,                    /* lockfuncarg */
3137                                &txr->txtag))) {
3138                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3139                 goto fail;
3140         }
3141
3142         if (!(txr->tx_buffers =
3143             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3144             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3145                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3146                 error = ENOMEM;
3147                 goto fail;
3148         }
3149
3150         /* Create the descriptor buffer dma maps */
3151         txbuf = txr->tx_buffers;
3152         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3153                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3154                 if (error != 0) {
3155                         device_printf(dev, "Unable to create TX DMA map\n");
3156                         goto fail;
3157                 }
3158         }
3159
3160         return 0;
3161 fail:
3162         /* We free all, it handles case where we are in the middle */
3163         em_free_transmit_structures(adapter);
3164         return (error);
3165 }
3166
3167 /*********************************************************************
3168  *
3169  *  Initialize a transmit ring.
3170  *
3171  **********************************************************************/
3172 static void
3173 em_setup_transmit_ring(struct tx_ring *txr)
3174 {
3175         struct adapter *adapter = txr->adapter;
3176         struct em_buffer *txbuf;
3177         int i;
3178
3179         /* Clear the old descriptor contents */
3180         EM_TX_LOCK(txr);
3181         bzero((void *)txr->tx_base,
3182               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3183         /* Reset indices */
3184         txr->next_avail_desc = 0;
3185         txr->next_to_clean = 0;
3186
3187         /* Free any existing tx buffers. */
3188         txbuf = txr->tx_buffers;
3189         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3190                 if (txbuf->m_head != NULL) {
3191                         bus_dmamap_sync(txr->txtag, txbuf->map,
3192                             BUS_DMASYNC_POSTWRITE);
3193                         bus_dmamap_unload(txr->txtag, txbuf->map);
3194                         m_freem(txbuf->m_head);
3195                         txbuf->m_head = NULL;
3196                 }
3197                 /* clear the watch index */
3198                 txbuf->next_eop = -1;
3199         }
3200
3201         /* Set number of descriptors available */
3202         txr->tx_avail = adapter->num_tx_desc;
3203         txr->queue_status = EM_QUEUE_IDLE;
3204
3205         /* Clear checksum offload context. */
3206         txr->last_hw_offload = 0;
3207         txr->last_hw_ipcss = 0;
3208         txr->last_hw_ipcso = 0;
3209         txr->last_hw_tucss = 0;
3210         txr->last_hw_tucso = 0;
3211
3212         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3213             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3214         EM_TX_UNLOCK(txr);
3215 }
3216
3217 /*********************************************************************
3218  *
3219  *  Initialize all transmit rings.
3220  *
3221  **********************************************************************/
3222 static void
3223 em_setup_transmit_structures(struct adapter *adapter)
3224 {
3225         struct tx_ring *txr = adapter->tx_rings;
3226
3227         for (int i = 0; i < adapter->num_queues; i++, txr++)
3228                 em_setup_transmit_ring(txr);
3229
3230         return;
3231 }
3232
3233 /*********************************************************************
3234  *
3235  *  Enable transmit unit.
3236  *
3237  **********************************************************************/
3238 static void
3239 em_initialize_transmit_unit(struct adapter *adapter)
3240 {
3241         struct tx_ring  *txr = adapter->tx_rings;
3242         struct e1000_hw *hw = &adapter->hw;
3243         u32     tctl, tarc, tipg = 0;
3244
3245          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3246
3247         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3248                 u64 bus_addr = txr->txdma.dma_paddr;
3249                 /* Base and Len of TX Ring */
3250                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3251                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3252                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3253                     (u32)(bus_addr >> 32));
3254                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3255                     (u32)bus_addr);
3256                 /* Init the HEAD/TAIL indices */
3257                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3258                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3259
3260                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3261                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3262                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3263
3264                 txr->queue_status = EM_QUEUE_IDLE;
3265         }
3266
3267         /* Set the default values for the Tx Inter Packet Gap timer */
3268         switch (adapter->hw.mac.type) {
3269         case e1000_82542:
3270                 tipg = DEFAULT_82542_TIPG_IPGT;
3271                 tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3272                 tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3273                 break;
3274         case e1000_80003es2lan:
3275                 tipg = DEFAULT_82543_TIPG_IPGR1;
3276                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3277                     E1000_TIPG_IPGR2_SHIFT;
3278                 break;
3279         default:
3280                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3281                     (adapter->hw.phy.media_type ==
3282                     e1000_media_type_internal_serdes))
3283                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3284                 else
3285                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3286                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3287                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3288         }
3289
3290         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3291         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3292
3293         if(adapter->hw.mac.type >= e1000_82540)
3294                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3295                     adapter->tx_abs_int_delay.value);
3296
3297         if ((adapter->hw.mac.type == e1000_82571) ||
3298             (adapter->hw.mac.type == e1000_82572)) {
3299                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3300                 tarc |= SPEED_MODE_BIT;
3301                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3302         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3303                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3304                 tarc |= 1;
3305                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3306                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3307                 tarc |= 1;
3308                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3309         }
3310
3311         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3312         if (adapter->tx_int_delay.value > 0)
3313                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3314
3315         /* Program the Transmit Control Register */
3316         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3317         tctl &= ~E1000_TCTL_CT;
3318         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3319                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3320
3321         if (adapter->hw.mac.type >= e1000_82571)
3322                 tctl |= E1000_TCTL_MULR;
3323
3324         /* This write will effectively turn on the transmit unit. */
3325         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3326
3327 }
3328
3329
3330 /*********************************************************************
3331  *
3332  *  Free all transmit rings.
3333  *
3334  **********************************************************************/
3335 static void
3336 em_free_transmit_structures(struct adapter *adapter)
3337 {
3338         struct tx_ring *txr = adapter->tx_rings;
3339
3340         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3341                 EM_TX_LOCK(txr);
3342                 em_free_transmit_buffers(txr);
3343                 em_dma_free(adapter, &txr->txdma);
3344                 EM_TX_UNLOCK(txr);
3345                 EM_TX_LOCK_DESTROY(txr);
3346         }
3347
3348         free(adapter->tx_rings, M_DEVBUF);
3349 }
3350
3351 /*********************************************************************
3352  *
3353  *  Free transmit ring related data structures.
3354  *
3355  **********************************************************************/
3356 static void
3357 em_free_transmit_buffers(struct tx_ring *txr)
3358 {
3359         struct adapter          *adapter = txr->adapter;
3360         struct em_buffer        *txbuf;
3361
3362         INIT_DEBUGOUT("free_transmit_ring: begin");
3363
3364         if (txr->tx_buffers == NULL)
3365                 return;
3366
3367         for (int i = 0; i < adapter->num_tx_desc; i++) {
3368                 txbuf = &txr->tx_buffers[i];
3369                 if (txbuf->m_head != NULL) {
3370                         bus_dmamap_sync(txr->txtag, txbuf->map,
3371                             BUS_DMASYNC_POSTWRITE);
3372                         bus_dmamap_unload(txr->txtag,
3373                             txbuf->map);
3374                         m_freem(txbuf->m_head);
3375                         txbuf->m_head = NULL;
3376                         if (txbuf->map != NULL) {
3377                                 bus_dmamap_destroy(txr->txtag,
3378                                     txbuf->map);
3379                                 txbuf->map = NULL;
3380                         }
3381                 } else if (txbuf->map != NULL) {
3382                         bus_dmamap_unload(txr->txtag,
3383                             txbuf->map);
3384                         bus_dmamap_destroy(txr->txtag,
3385                             txbuf->map);
3386                         txbuf->map = NULL;
3387                 }
3388         }
3389 #if __FreeBSD_version >= 800000
3390         if (txr->br != NULL)
3391                 buf_ring_free(txr->br, M_DEVBUF);
3392 #endif
3393         if (txr->tx_buffers != NULL) {
3394                 free(txr->tx_buffers, M_DEVBUF);
3395                 txr->tx_buffers = NULL;
3396         }
3397         if (txr->txtag != NULL) {
3398                 bus_dma_tag_destroy(txr->txtag);
3399                 txr->txtag = NULL;
3400         }
3401         return;
3402 }
3403
3404
3405 /*********************************************************************
3406  *  The offload context is protocol specific (TCP/UDP) and thus
3407  *  only needs to be set when the protocol changes. The occasion
3408  *  of a context change can be a performance detriment, and
3409  *  might be better just disabled. The reason arises in the way
3410  *  in which the controller supports pipelined requests from the
3411  *  Tx data DMA. Up to four requests can be pipelined, and they may
3412  *  belong to the same packet or to multiple packets. However all
3413  *  requests for one packet are issued before a request is issued
3414  *  for a subsequent packet and if a request for the next packet
3415  *  requires a context change, that request will be stalled
3416  *  until the previous request completes. This means setting up
3417  *  a new context effectively disables pipelined Tx data DMA which
3418  *  in turn greatly slow down performance to send small sized
3419  *  frames. 
3420  **********************************************************************/
3421 static void
3422 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3423     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3424 {
3425         struct adapter                  *adapter = txr->adapter;
3426         struct e1000_context_desc       *TXD = NULL;
3427         struct em_buffer                *tx_buffer;
3428         int                             cur, hdr_len;
3429         u32                             cmd = 0;
3430         u16                             offload = 0;
3431         u8                              ipcso, ipcss, tucso, tucss;
3432
3433         ipcss = ipcso = tucss = tucso = 0;
3434         hdr_len = ip_off + (ip->ip_hl << 2);
3435         cur = txr->next_avail_desc;
3436
3437         /* Setup of IP header checksum. */
3438         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3439                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3440                 offload |= CSUM_IP;
3441                 ipcss = ip_off;
3442                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3443                 /*
3444                  * Start offset for header checksum calculation.
3445                  * End offset for header checksum calculation.
3446                  * Offset of place to put the checksum.
3447                  */
3448                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3449                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3450                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3451                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3452                 cmd |= E1000_TXD_CMD_IP;
3453         }
3454
3455         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3456                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3457                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3458                 offload |= CSUM_TCP;
3459                 tucss = hdr_len;
3460                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3461                 /*
3462                  * Setting up new checksum offload context for every frames
3463                  * takes a lot of processing time for hardware. This also
3464                  * reduces performance a lot for small sized frames so avoid
3465                  * it if driver can use previously configured checksum
3466                  * offload context.
3467                  */
3468                 if (txr->last_hw_offload == offload) {
3469                         if (offload & CSUM_IP) {
3470                                 if (txr->last_hw_ipcss == ipcss &&
3471                                     txr->last_hw_ipcso == ipcso &&
3472                                     txr->last_hw_tucss == tucss &&
3473                                     txr->last_hw_tucso == tucso)
3474                                         return;
3475                         } else {
3476                                 if (txr->last_hw_tucss == tucss &&
3477                                     txr->last_hw_tucso == tucso)
3478                                         return;
3479                         }
3480                 }
3481                 txr->last_hw_offload = offload;
3482                 txr->last_hw_tucss = tucss;
3483                 txr->last_hw_tucso = tucso;
3484                 /*
3485                  * Start offset for payload checksum calculation.
3486                  * End offset for payload checksum calculation.
3487                  * Offset of place to put the checksum.
3488                  */
3489                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3490                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3491                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3492                 TXD->upper_setup.tcp_fields.tucso = tucso;
3493                 cmd |= E1000_TXD_CMD_TCP;
3494         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3495                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3496                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3497                 tucss = hdr_len;
3498                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3499                 /*
3500                  * Setting up new checksum offload context for every frames
3501                  * takes a lot of processing time for hardware. This also
3502                  * reduces performance a lot for small sized frames so avoid
3503                  * it if driver can use previously configured checksum
3504                  * offload context.
3505                  */
3506                 if (txr->last_hw_offload == offload) {
3507                         if (offload & CSUM_IP) {
3508                                 if (txr->last_hw_ipcss == ipcss &&
3509                                     txr->last_hw_ipcso == ipcso &&
3510                                     txr->last_hw_tucss == tucss &&
3511                                     txr->last_hw_tucso == tucso)
3512                                         return;
3513                         } else {
3514                                 if (txr->last_hw_tucss == tucss &&
3515                                     txr->last_hw_tucso == tucso)
3516                                         return;
3517                         }
3518                 }
3519                 txr->last_hw_offload = offload;
3520                 txr->last_hw_tucss = tucss;
3521                 txr->last_hw_tucso = tucso;
3522                 /*
3523                  * Start offset for header checksum calculation.
3524                  * End offset for header checksum calculation.
3525                  * Offset of place to put the checksum.
3526                  */
3527                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3528                 TXD->upper_setup.tcp_fields.tucss = tucss;
3529                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3530                 TXD->upper_setup.tcp_fields.tucso = tucso;
3531         }
3532   
3533         if (offload & CSUM_IP) {
3534                 txr->last_hw_ipcss = ipcss;
3535                 txr->last_hw_ipcso = ipcso;
3536         }
3537
3538         TXD->tcp_seg_setup.data = htole32(0);
3539         TXD->cmd_and_length =
3540             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3541         tx_buffer = &txr->tx_buffers[cur];
3542         tx_buffer->m_head = NULL;
3543         tx_buffer->next_eop = -1;
3544
3545         if (++cur == adapter->num_tx_desc)
3546                 cur = 0;
3547
3548         txr->tx_avail--;
3549         txr->next_avail_desc = cur;
3550 }
3551
3552
3553 /**********************************************************************
3554  *
3555  *  Setup work for hardware segmentation offload (TSO)
3556  *
3557  **********************************************************************/
3558 static void
3559 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3560     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3561 {
3562         struct adapter                  *adapter = txr->adapter;
3563         struct e1000_context_desc       *TXD;
3564         struct em_buffer                *tx_buffer;
3565         int cur, hdr_len;
3566
3567         /*
3568          * In theory we can use the same TSO context if and only if
3569          * frame is the same type(IP/TCP) and the same MSS. However
3570          * checking whether a frame has the same IP/TCP structure is
3571          * hard thing so just ignore that and always restablish a
3572          * new TSO context.
3573          */
3574         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3575         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3576                       E1000_TXD_DTYP_D |        /* Data descr type */
3577                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3578
3579         /* IP and/or TCP header checksum calculation and insertion. */
3580         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3581
3582         cur = txr->next_avail_desc;
3583         tx_buffer = &txr->tx_buffers[cur];
3584         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3585
3586         /*
3587          * Start offset for header checksum calculation.
3588          * End offset for header checksum calculation.
3589          * Offset of place put the checksum.
3590          */
3591         TXD->lower_setup.ip_fields.ipcss = ip_off;
3592         TXD->lower_setup.ip_fields.ipcse =
3593             htole16(ip_off + (ip->ip_hl << 2) - 1);
3594         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3595         /*
3596          * Start offset for payload checksum calculation.
3597          * End offset for payload checksum calculation.
3598          * Offset of place to put the checksum.
3599          */
3600         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3601         TXD->upper_setup.tcp_fields.tucse = 0;
3602         TXD->upper_setup.tcp_fields.tucso =
3603             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3604         /*
3605          * Payload size per packet w/o any headers.
3606          * Length of all headers up to payload.
3607          */
3608         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3609         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3610
3611         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3612                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3613                                 E1000_TXD_CMD_TSE |     /* TSE context */
3614                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3615                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3616                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3617
3618         tx_buffer->m_head = NULL;
3619         tx_buffer->next_eop = -1;
3620
3621         if (++cur == adapter->num_tx_desc)
3622                 cur = 0;
3623
3624         txr->tx_avail--;
3625         txr->next_avail_desc = cur;
3626         txr->tx_tso = TRUE;
3627 }
3628
3629
3630 /**********************************************************************
3631  *
3632  *  Examine each tx_buffer in the used queue. If the hardware is done
3633  *  processing the packet then free associated resources. The
3634  *  tx_buffer is put back on the free queue.
3635  *
3636  **********************************************************************/
3637 static bool
3638 em_txeof(struct tx_ring *txr)
3639 {
3640         struct adapter  *adapter = txr->adapter;
3641         int first, last, done, processed;
3642         struct em_buffer *tx_buffer;
3643         struct e1000_tx_desc   *tx_desc, *eop_desc;
3644         struct ifnet   *ifp = adapter->ifp;
3645
3646         EM_TX_LOCK_ASSERT(txr);
3647
3648         /* No work, make sure watchdog is off */
3649         if (txr->tx_avail == adapter->num_tx_desc) {
3650                 txr->queue_status = EM_QUEUE_IDLE;
3651                 return (FALSE);
3652         }
3653
3654         processed = 0;
3655         first = txr->next_to_clean;
3656         tx_desc = &txr->tx_base[first];
3657         tx_buffer = &txr->tx_buffers[first];
3658         last = tx_buffer->next_eop;
3659         eop_desc = &txr->tx_base[last];
3660
3661         /*
3662          * What this does is get the index of the
3663          * first descriptor AFTER the EOP of the 
3664          * first packet, that way we can do the
3665          * simple comparison on the inner while loop.
3666          */
3667         if (++last == adapter->num_tx_desc)
3668                 last = 0;
3669         done = last;
3670
3671         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3672             BUS_DMASYNC_POSTREAD);
3673
3674         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3675                 /* We clean the range of the packet */
3676                 while (first != done) {
3677                         tx_desc->upper.data = 0;
3678                         tx_desc->lower.data = 0;
3679                         tx_desc->buffer_addr = 0;
3680                         ++txr->tx_avail;
3681                         ++processed;
3682
3683                         if (tx_buffer->m_head) {
3684                                 bus_dmamap_sync(txr->txtag,
3685                                     tx_buffer->map,
3686                                     BUS_DMASYNC_POSTWRITE);
3687                                 bus_dmamap_unload(txr->txtag,
3688                                     tx_buffer->map);
3689                                 m_freem(tx_buffer->m_head);
3690                                 tx_buffer->m_head = NULL;
3691                         }
3692                         tx_buffer->next_eop = -1;
3693                         txr->watchdog_time = ticks;
3694
3695                         if (++first == adapter->num_tx_desc)
3696                                 first = 0;
3697
3698                         tx_buffer = &txr->tx_buffers[first];
3699                         tx_desc = &txr->tx_base[first];
3700                 }
3701                 ++ifp->if_opackets;
3702                 /* See if we can continue to the next packet */
3703                 last = tx_buffer->next_eop;
3704                 if (last != -1) {
3705                         eop_desc = &txr->tx_base[last];
3706                         /* Get new done point */
3707                         if (++last == adapter->num_tx_desc) last = 0;
3708                         done = last;
3709                 } else
3710                         break;
3711         }
3712         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3713             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3714
3715         txr->next_to_clean = first;
3716
3717         /*
3718         ** Watchdog calculation, we know there's
3719         ** work outstanding or the first return
3720         ** would have been taken, so none processed
3721         ** for too long indicates a hang. local timer
3722         ** will examine this and do a reset if needed.
3723         */
3724         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3725                 txr->queue_status = EM_QUEUE_HUNG;
3726
3727         /*
3728          * If we have enough room, clear IFF_DRV_OACTIVE
3729          * to tell the stack that it is OK to send packets.
3730          */
3731         if (txr->tx_avail > EM_TX_CLEANUP_THRESHOLD) {                
3732                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3733                 /* Disable watchdog if all clean */
3734                 if (txr->tx_avail == adapter->num_tx_desc) {
3735                         txr->queue_status = EM_QUEUE_IDLE;
3736                         return (FALSE);
3737                 } 
3738         }
3739
3740         return (TRUE);
3741 }
3742
3743
3744 /*********************************************************************
3745  *
3746  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3747  *
3748  **********************************************************************/
3749 static void
3750 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3751 {
3752         struct adapter          *adapter = rxr->adapter;
3753         struct mbuf             *m;
3754         bus_dma_segment_t       segs[1];
3755         struct em_buffer        *rxbuf;
3756         int                     i, error, nsegs, cleaned;
3757
3758         i = rxr->next_to_refresh;
3759         cleaned = -1;
3760         while (i != limit) {
3761                 rxbuf = &rxr->rx_buffers[i];
3762                 if (rxbuf->m_head == NULL) {
3763                         m = m_getjcl(M_DONTWAIT, MT_DATA,
3764                             M_PKTHDR, adapter->rx_mbuf_sz);
3765                         /*
3766                         ** If we have a temporary resource shortage
3767                         ** that causes a failure, just abort refresh
3768                         ** for now, we will return to this point when
3769                         ** reinvoked from em_rxeof.
3770                         */
3771                         if (m == NULL)
3772                                 goto update;
3773                 } else
3774                         m = rxbuf->m_head;
3775
3776                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3777                 m->m_flags |= M_PKTHDR;
3778                 m->m_data = m->m_ext.ext_buf;
3779
3780                 /* Use bus_dma machinery to setup the memory mapping  */
3781                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3782                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3783                 if (error != 0) {
3784                         printf("Refresh mbufs: hdr dmamap load"
3785                             " failure - %d\n", error);
3786                         m_free(m);
3787                         rxbuf->m_head = NULL;
3788                         goto update;
3789                 }
3790                 rxbuf->m_head = m;
3791                 bus_dmamap_sync(rxr->rxtag,
3792                     rxbuf->map, BUS_DMASYNC_PREREAD);
3793                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3794
3795                 cleaned = i;
3796                 /* Calculate next index */
3797                 if (++i == adapter->num_rx_desc)
3798                         i = 0;
3799                 rxr->next_to_refresh = i;
3800         }
3801 update:
3802         /*
3803         ** Update the tail pointer only if,
3804         ** and as far as we have refreshed.
3805         */
3806         if (cleaned != -1) /* Update tail index */
3807                 E1000_WRITE_REG(&adapter->hw,
3808                     E1000_RDT(rxr->me), cleaned);
3809
3810         return;
3811 }
3812
3813
3814 /*********************************************************************
3815  *
3816  *  Allocate memory for rx_buffer structures. Since we use one
3817  *  rx_buffer per received packet, the maximum number of rx_buffer's
3818  *  that we'll need is equal to the number of receive descriptors
3819  *  that we've allocated.
3820  *
3821  **********************************************************************/
3822 static int
3823 em_allocate_receive_buffers(struct rx_ring *rxr)
3824 {
3825         struct adapter          *adapter = rxr->adapter;
3826         device_t                dev = adapter->dev;
3827         struct em_buffer        *rxbuf;
3828         int                     error;
3829
3830         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3831             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3832         if (rxr->rx_buffers == NULL) {
3833                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3834                 return (ENOMEM);
3835         }
3836
3837         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3838                                 1, 0,                   /* alignment, bounds */
3839                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3840                                 BUS_SPACE_MAXADDR,      /* highaddr */
3841                                 NULL, NULL,             /* filter, filterarg */
3842                                 MJUM9BYTES,             /* maxsize */
3843                                 1,                      /* nsegments */
3844                                 MJUM9BYTES,             /* maxsegsize */
3845                                 0,                      /* flags */
3846                                 NULL,                   /* lockfunc */
3847                                 NULL,                   /* lockarg */
3848                                 &rxr->rxtag);
3849         if (error) {
3850                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3851                     __func__, error);
3852                 goto fail;
3853         }
3854
3855         rxbuf = rxr->rx_buffers;
3856         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3857                 rxbuf = &rxr->rx_buffers[i];
3858                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3859                     &rxbuf->map);
3860                 if (error) {
3861                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3862                             __func__, error);
3863                         goto fail;
3864                 }
3865         }
3866
3867         return (0);
3868
3869 fail:
3870         em_free_receive_structures(adapter);
3871         return (error);
3872 }
3873
3874
3875 /*********************************************************************
3876  *
3877  *  Initialize a receive ring and its buffers.
3878  *
3879  **********************************************************************/
3880 static int
3881 em_setup_receive_ring(struct rx_ring *rxr)
3882 {
3883         struct  adapter         *adapter = rxr->adapter;
3884         struct em_buffer        *rxbuf;
3885         bus_dma_segment_t       seg[1];
3886         int                     rsize, nsegs, error;
3887
3888
3889         /* Clear the ring contents */
3890         EM_RX_LOCK(rxr);
3891         rsize = roundup2(adapter->num_rx_desc *
3892             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3893         bzero((void *)rxr->rx_base, rsize);
3894
3895         /*
3896         ** Free current RX buffer structs and their mbufs
3897         */
3898         for (int i = 0; i < adapter->num_rx_desc; i++) {
3899                 rxbuf = &rxr->rx_buffers[i];
3900                 if (rxbuf->m_head != NULL) {
3901                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3902                             BUS_DMASYNC_POSTREAD);
3903                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3904                         m_freem(rxbuf->m_head);
3905                 }
3906         }
3907
3908         /* Now replenish the mbufs */
3909         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3910
3911                 rxbuf = &rxr->rx_buffers[j];
3912                 rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3913                     M_PKTHDR, adapter->rx_mbuf_sz);
3914                 if (rxbuf->m_head == NULL)
3915                         return (ENOBUFS);
3916                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3917                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3918                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3919
3920                 /* Get the memory mapping */
3921                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3922                     rxbuf->map, rxbuf->m_head, seg,
3923                     &nsegs, BUS_DMA_NOWAIT);
3924                 if (error != 0) {
3925                         m_freem(rxbuf->m_head);
3926                         rxbuf->m_head = NULL;
3927                         return (error);
3928                 }
3929                 bus_dmamap_sync(rxr->rxtag,
3930                     rxbuf->map, BUS_DMASYNC_PREREAD);
3931
3932                 /* Update descriptor */
3933                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3934         }
3935
3936
3937         /* Setup our descriptor indices */
3938         rxr->next_to_check = 0;
3939         rxr->next_to_refresh = 0;
3940
3941         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3942             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3943
3944         EM_RX_UNLOCK(rxr);
3945         return (0);
3946 }
3947
3948 /*********************************************************************
3949  *
3950  *  Initialize all receive rings.
3951  *
3952  **********************************************************************/
3953 static int
3954 em_setup_receive_structures(struct adapter *adapter)
3955 {
3956         struct rx_ring *rxr = adapter->rx_rings;
3957         int j;
3958
3959         for (j = 0; j < adapter->num_queues; j++, rxr++)
3960                 if (em_setup_receive_ring(rxr))
3961                         goto fail;
3962
3963         return (0);
3964 fail:
3965         /*
3966          * Free RX buffers allocated so far, we will only handle
3967          * the rings that completed, the failing case will have
3968          * cleaned up for itself. 'j' failed, so its the terminus.
3969          */
3970         for (int i = 0; i < j; ++i) {
3971                 rxr = &adapter->rx_rings[i];
3972                 for (int n = 0; n < adapter->num_rx_desc; n++) {
3973                         struct em_buffer *rxbuf;
3974                         rxbuf = &rxr->rx_buffers[n];
3975                         if (rxbuf->m_head != NULL) {
3976                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3977                                   BUS_DMASYNC_POSTREAD);
3978                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3979                                 m_freem(rxbuf->m_head);
3980                                 rxbuf->m_head = NULL;
3981                         }
3982                 }
3983         }
3984
3985         return (ENOBUFS);
3986 }
3987
3988 /*********************************************************************
3989  *
3990  *  Free all receive rings.
3991  *
3992  **********************************************************************/
3993 static void
3994 em_free_receive_structures(struct adapter *adapter)
3995 {
3996         struct rx_ring *rxr = adapter->rx_rings;
3997
3998         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3999                 em_free_receive_buffers(rxr);
4000                 /* Free the ring memory as well */
4001                 em_dma_free(adapter, &rxr->rxdma);
4002                 EM_RX_LOCK_DESTROY(rxr);
4003         }
4004
4005         free(adapter->rx_rings, M_DEVBUF);
4006 }
4007
4008
4009 /*********************************************************************
4010  *
4011  *  Free receive ring data structures
4012  *
4013  **********************************************************************/
4014 static void
4015 em_free_receive_buffers(struct rx_ring *rxr)
4016 {
4017         struct adapter          *adapter = rxr->adapter;
4018         struct em_buffer        *rxbuf = NULL;
4019
4020         INIT_DEBUGOUT("free_receive_buffers: begin");
4021
4022         if (rxr->rx_buffers != NULL) {
4023                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4024                         rxbuf = &rxr->rx_buffers[i];
4025                         if (rxbuf->map != NULL) {
4026                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4027                                     BUS_DMASYNC_POSTREAD);
4028                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4029                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4030                         }
4031                         if (rxbuf->m_head != NULL) {
4032                                 m_freem(rxbuf->m_head);
4033                                 rxbuf->m_head = NULL;
4034                         }
4035                 }
4036                 free(rxr->rx_buffers, M_DEVBUF);
4037                 rxr->rx_buffers = NULL;
4038         }
4039
4040         if (rxr->rxtag != NULL) {
4041                 bus_dma_tag_destroy(rxr->rxtag);
4042                 rxr->rxtag = NULL;
4043         }
4044
4045         return;
4046 }
4047
4048
4049 /*********************************************************************
4050  *
4051  *  Enable receive unit.
4052  *
4053  **********************************************************************/
4054 #define MAX_INTS_PER_SEC        8000
4055 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
4056
4057 static void
4058 em_initialize_receive_unit(struct adapter *adapter)
4059 {
4060         struct rx_ring  *rxr = adapter->rx_rings;
4061         struct ifnet    *ifp = adapter->ifp;
4062         struct e1000_hw *hw = &adapter->hw;
4063         u64     bus_addr;
4064         u32     rctl, rxcsum;
4065
4066         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4067
4068         /*
4069          * Make sure receives are disabled while setting
4070          * up the descriptor ring
4071          */
4072         rctl = E1000_READ_REG(hw, E1000_RCTL);
4073         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4074
4075         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4076             adapter->rx_abs_int_delay.value);
4077         /*
4078          * Set the interrupt throttling rate. Value is calculated
4079          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4080          */
4081         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4082
4083         /*
4084         ** When using MSIX interrupts we need to throttle
4085         ** using the EITR register (82574 only)
4086         */
4087         if (hw->mac.type == e1000_82574)
4088                 for (int i = 0; i < 4; i++)
4089                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4090                             DEFAULT_ITR);
4091
4092         /* Disable accelerated ackknowledge */
4093         if (adapter->hw.mac.type == e1000_82574)
4094                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4095
4096         if (ifp->if_capenable & IFCAP_RXCSUM) {
4097                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4098                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4099                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4100         }
4101
4102         /*
4103         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4104         ** long latencies are observed, like Lenovo X60. This
4105         ** change eliminates the problem, but since having positive
4106         ** values in RDTR is a known source of problems on other
4107         ** platforms another solution is being sought.
4108         */
4109         if (hw->mac.type == e1000_82573)
4110                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4111
4112         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4113                 /* Setup the Base and Length of the Rx Descriptor Ring */
4114                 bus_addr = rxr->rxdma.dma_paddr;
4115                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4116                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4117                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4118                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4119                 /* Setup the Head and Tail Descriptor Pointers */
4120                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4121                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4122         }
4123
4124         /* Set early receive threshold on appropriate hw */
4125         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4126             (adapter->hw.mac.type == e1000_pch2lan) ||
4127             (adapter->hw.mac.type == e1000_ich10lan)) &&
4128             (ifp->if_mtu > ETHERMTU)) {
4129                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4130                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4131                 E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4132         }
4133                 
4134         if (adapter->hw.mac.type == e1000_pch2lan) {
4135                 if (ifp->if_mtu > ETHERMTU)
4136                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4137                 else
4138                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4139         }
4140
4141         /* Setup the Receive Control Register */
4142         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4143         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4144             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4145             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4146
4147         /* Strip the CRC */
4148         rctl |= E1000_RCTL_SECRC;
4149
4150         /* Make sure VLAN Filters are off */
4151         rctl &= ~E1000_RCTL_VFE;
4152         rctl &= ~E1000_RCTL_SBP;
4153
4154         if (adapter->rx_mbuf_sz == MCLBYTES)
4155                 rctl |= E1000_RCTL_SZ_2048;
4156         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4157                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4158         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4159                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4160
4161         if (ifp->if_mtu > ETHERMTU)
4162                 rctl |= E1000_RCTL_LPE;
4163         else
4164                 rctl &= ~E1000_RCTL_LPE;
4165
4166         /* Write out the settings */
4167         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4168
4169         return;
4170 }
4171
4172
4173 /*********************************************************************
4174  *
4175  *  This routine executes in interrupt context. It replenishes
4176  *  the mbufs in the descriptor and sends data which has been
4177  *  dma'ed into host memory to upper layer.
4178  *
4179  *  We loop at most count times if count is > 0, or until done if
4180  *  count < 0.
4181  *  
4182  *  For polling we also now return the number of cleaned packets
4183  *********************************************************************/
4184 static bool
4185 em_rxeof(struct rx_ring *rxr, int count, int *done)
4186 {
4187         struct adapter          *adapter = rxr->adapter;
4188         struct ifnet            *ifp = adapter->ifp;
4189         struct mbuf             *mp, *sendmp;
4190         u8                      status = 0;
4191         u16                     len;
4192         int                     i, processed, rxdone = 0;
4193         bool                    eop;
4194         struct e1000_rx_desc    *cur;
4195
4196         EM_RX_LOCK(rxr);
4197
4198         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4199
4200                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4201                         break;
4202
4203                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4204                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4205
4206                 cur = &rxr->rx_base[i];
4207                 status = cur->status;
4208                 mp = sendmp = NULL;
4209
4210                 if ((status & E1000_RXD_STAT_DD) == 0)
4211                         break;
4212
4213                 len = le16toh(cur->length);
4214                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4215
4216                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4217                     (rxr->discard == TRUE)) {
4218                         ifp->if_ierrors++;
4219                         ++rxr->rx_discarded;
4220                         if (!eop) /* Catch subsequent segs */
4221                                 rxr->discard = TRUE;
4222                         else
4223                                 rxr->discard = FALSE;
4224                         em_rx_discard(rxr, i);
4225                         goto next_desc;
4226                 }
4227
4228                 /* Assign correct length to the current fragment */
4229                 mp = rxr->rx_buffers[i].m_head;
4230                 mp->m_len = len;
4231
4232                 /* Trigger for refresh */
4233                 rxr->rx_buffers[i].m_head = NULL;
4234
4235                 /* First segment? */
4236                 if (rxr->fmp == NULL) {
4237                         mp->m_pkthdr.len = len;
4238                         rxr->fmp = rxr->lmp = mp;
4239                 } else {
4240                         /* Chain mbuf's together */
4241                         mp->m_flags &= ~M_PKTHDR;
4242                         rxr->lmp->m_next = mp;
4243                         rxr->lmp = mp;
4244                         rxr->fmp->m_pkthdr.len += len;
4245                 }
4246
4247                 if (eop) {
4248                         --count;
4249                         sendmp = rxr->fmp;
4250                         sendmp->m_pkthdr.rcvif = ifp;
4251                         ifp->if_ipackets++;
4252                         em_receive_checksum(cur, sendmp);
4253 #ifndef __NO_STRICT_ALIGNMENT
4254                         if (adapter->max_frame_size >
4255                             (MCLBYTES - ETHER_ALIGN) &&
4256                             em_fixup_rx(rxr) != 0)
4257                                 goto skip;
4258 #endif
4259                         if (status & E1000_RXD_STAT_VP) {
4260                                 sendmp->m_pkthdr.ether_vtag =
4261                                     (le16toh(cur->special) &
4262                                     E1000_RXD_SPC_VLAN_MASK);
4263                                 sendmp->m_flags |= M_VLANTAG;
4264                         }
4265 #ifdef EM_MULTIQUEUE
4266                         sendmp->m_pkthdr.flowid = rxr->msix;
4267                         sendmp->m_flags |= M_FLOWID;
4268 #endif
4269 #ifndef __NO_STRICT_ALIGNMENT
4270 skip:
4271 #endif
4272                         rxr->fmp = rxr->lmp = NULL;
4273                 }
4274 next_desc:
4275                 /* Zero out the receive descriptors status. */
4276                 cur->status = 0;
4277                 ++rxdone;       /* cumulative for POLL */
4278                 ++processed;
4279
4280                 /* Advance our pointers to the next descriptor. */
4281                 if (++i == adapter->num_rx_desc)
4282                         i = 0;
4283
4284                 /* Send to the stack */
4285                 if (sendmp != NULL) {
4286                         rxr->next_to_check = i;
4287                         EM_RX_UNLOCK(rxr);
4288                         (*ifp->if_input)(ifp, sendmp);
4289                         EM_RX_LOCK(rxr);
4290                         i = rxr->next_to_check;
4291                 }
4292
4293                 /* Only refresh mbufs every 8 descriptors */
4294                 if (processed == 8) {
4295                         em_refresh_mbufs(rxr, i);
4296                         processed = 0;
4297                 }
4298         }
4299
4300         /* Catch any remaining refresh work */
4301         em_refresh_mbufs(rxr, i);
4302
4303         rxr->next_to_check = i;
4304         if (done != NULL)
4305                 *done = rxdone;
4306         EM_RX_UNLOCK(rxr);
4307
4308         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4309 }
4310
4311 static __inline void
4312 em_rx_discard(struct rx_ring *rxr, int i)
4313 {
4314         struct em_buffer        *rbuf;
4315
4316         rbuf = &rxr->rx_buffers[i];
4317         /* Free any previous pieces */
4318         if (rxr->fmp != NULL) {
4319                 rxr->fmp->m_flags |= M_PKTHDR;
4320                 m_freem(rxr->fmp);
4321                 rxr->fmp = NULL;
4322                 rxr->lmp = NULL;
4323         }
4324         /*
4325         ** Free buffer and allow em_refresh_mbufs()
4326         ** to clean up and recharge buffer.
4327         */
4328         if (rbuf->m_head) {
4329                 m_free(rbuf->m_head);
4330                 rbuf->m_head = NULL;
4331         }
4332         return;
4333 }
4334
4335 #ifndef __NO_STRICT_ALIGNMENT
4336 /*
4337  * When jumbo frames are enabled we should realign entire payload on
4338  * architecures with strict alignment. This is serious design mistake of 8254x
4339  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4340  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4341  * payload. On architecures without strict alignment restrictions 8254x still
4342  * performs unaligned memory access which would reduce the performance too.
4343  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4344  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4345  * existing mbuf chain.
4346  *
4347  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4348  * not used at all on architectures with strict alignment.
4349  */
4350 static int
4351 em_fixup_rx(struct rx_ring *rxr)
4352 {
4353         struct adapter *adapter = rxr->adapter;
4354         struct mbuf *m, *n;
4355         int error;
4356
4357         error = 0;
4358         m = rxr->fmp;
4359         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4360                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4361                 m->m_data += ETHER_HDR_LEN;
4362         } else {
4363                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4364                 if (n != NULL) {
4365                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4366                         m->m_data += ETHER_HDR_LEN;
4367                         m->m_len -= ETHER_HDR_LEN;
4368                         n->m_len = ETHER_HDR_LEN;
4369                         M_MOVE_PKTHDR(n, m);
4370                         n->m_next = m;
4371                         rxr->fmp = n;
4372                 } else {
4373                         adapter->dropped_pkts++;
4374                         m_freem(rxr->fmp);
4375                         rxr->fmp = NULL;
4376                         error = ENOMEM;
4377                 }
4378         }
4379
4380         return (error);
4381 }
4382 #endif
4383
4384 /*********************************************************************
4385  *
4386  *  Verify that the hardware indicated that the checksum is valid.
4387  *  Inform the stack about the status of checksum so that stack
4388  *  doesn't spend time verifying the checksum.
4389  *
4390  *********************************************************************/
4391 static void
4392 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4393 {
4394         /* Ignore Checksum bit is set */
4395         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4396                 mp->m_pkthdr.csum_flags = 0;
4397                 return;
4398         }
4399
4400         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4401                 /* Did it pass? */
4402                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4403                         /* IP Checksum Good */
4404                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4405                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4406
4407                 } else {
4408                         mp->m_pkthdr.csum_flags = 0;
4409                 }
4410         }
4411
4412         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4413                 /* Did it pass? */
4414                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4415                         mp->m_pkthdr.csum_flags |=
4416                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4417                         mp->m_pkthdr.csum_data = htons(0xffff);
4418                 }
4419         }
4420 }
4421
4422 /*
4423  * This routine is run via an vlan
4424  * config EVENT
4425  */
4426 static void
4427 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4428 {
4429         struct adapter  *adapter = ifp->if_softc;
4430         u32             index, bit;
4431
4432         if (ifp->if_softc !=  arg)   /* Not our event */
4433                 return;
4434
4435         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4436                 return;
4437
4438         EM_CORE_LOCK(adapter);
4439         index = (vtag >> 5) & 0x7F;
4440         bit = vtag & 0x1F;
4441         adapter->shadow_vfta[index] |= (1 << bit);
4442         ++adapter->num_vlans;
4443         /* Re-init to load the changes */
4444         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4445                 em_init_locked(adapter);
4446         EM_CORE_UNLOCK(adapter);
4447 }
4448
4449 /*
4450  * This routine is run via an vlan
4451  * unconfig EVENT
4452  */
4453 static void
4454 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4455 {
4456         struct adapter  *adapter = ifp->if_softc;
4457         u32             index, bit;
4458
4459         if (ifp->if_softc !=  arg)
4460                 return;
4461
4462         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4463                 return;
4464
4465         EM_CORE_LOCK(adapter);
4466         index = (vtag >> 5) & 0x7F;
4467         bit = vtag & 0x1F;
4468         adapter->shadow_vfta[index] &= ~(1 << bit);
4469         --adapter->num_vlans;
4470         /* Re-init to load the changes */
4471         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4472                 em_init_locked(adapter);
4473         EM_CORE_UNLOCK(adapter);
4474 }
4475
4476 static void
4477 em_setup_vlan_hw_support(struct adapter *adapter)
4478 {
4479         struct e1000_hw *hw = &adapter->hw;
4480         u32             reg;
4481
4482         /*
4483         ** We get here thru init_locked, meaning
4484         ** a soft reset, this has already cleared
4485         ** the VFTA and other state, so if there
4486         ** have been no vlan's registered do nothing.
4487         */
4488         if (adapter->num_vlans == 0)
4489                 return;
4490
4491         /*
4492         ** A soft reset zero's out the VFTA, so
4493         ** we need to repopulate it now.
4494         */
4495         for (int i = 0; i < EM_VFTA_SIZE; i++)
4496                 if (adapter->shadow_vfta[i] != 0)
4497                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4498                             i, adapter->shadow_vfta[i]);
4499
4500         reg = E1000_READ_REG(hw, E1000_CTRL);
4501         reg |= E1000_CTRL_VME;
4502         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4503
4504         /* Enable the Filter Table */
4505         reg = E1000_READ_REG(hw, E1000_RCTL);
4506         reg &= ~E1000_RCTL_CFIEN;
4507         reg |= E1000_RCTL_VFE;
4508         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4509 }
4510
4511 static void
4512 em_enable_intr(struct adapter *adapter)
4513 {
4514         struct e1000_hw *hw = &adapter->hw;
4515         u32 ims_mask = IMS_ENABLE_MASK;
4516
4517         if (hw->mac.type == e1000_82574) {
4518                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4519                 ims_mask |= EM_MSIX_MASK;
4520         } 
4521         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4522 }
4523
4524 static void
4525 em_disable_intr(struct adapter *adapter)
4526 {
4527         struct e1000_hw *hw = &adapter->hw;
4528
4529         if (hw->mac.type == e1000_82574)
4530                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4531         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4532 }
4533
4534 /*
4535  * Bit of a misnomer, what this really means is
4536  * to enable OS management of the system... aka
4537  * to disable special hardware management features 
4538  */
4539 static void
4540 em_init_manageability(struct adapter *adapter)
4541 {
4542         /* A shared code workaround */
4543 #define E1000_82542_MANC2H E1000_MANC2H
4544         if (adapter->has_manage) {
4545                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4546                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4547
4548                 /* disable hardware interception of ARP */
4549                 manc &= ~(E1000_MANC_ARP_EN);
4550
4551                 /* enable receiving management packets to the host */
4552                 manc |= E1000_MANC_EN_MNG2HOST;
4553 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4554 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4555                 manc2h |= E1000_MNG2HOST_PORT_623;
4556                 manc2h |= E1000_MNG2HOST_PORT_664;
4557                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4558                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4559         }
4560 }
4561
4562 /*
4563  * Give control back to hardware management
4564  * controller if there is one.
4565  */
4566 static void
4567 em_release_manageability(struct adapter *adapter)
4568 {
4569         if (adapter->has_manage) {
4570                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4571
4572                 /* re-enable hardware interception of ARP */
4573                 manc |= E1000_MANC_ARP_EN;
4574                 manc &= ~E1000_MANC_EN_MNG2HOST;
4575
4576                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4577         }
4578 }
4579
4580 /*
4581  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4582  * For ASF and Pass Through versions of f/w this means
4583  * that the driver is loaded. For AMT version type f/w
4584  * this means that the network i/f is open.
4585  */
4586 static void
4587 em_get_hw_control(struct adapter *adapter)
4588 {
4589         u32 ctrl_ext, swsm;
4590
4591         if (adapter->hw.mac.type == e1000_82573) {
4592                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4593                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4594                     swsm | E1000_SWSM_DRV_LOAD);
4595                 return;
4596         }
4597         /* else */
4598         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4599         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4600             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4601         return;
4602 }
4603
4604 /*
4605  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4606  * For ASF and Pass Through versions of f/w this means that
4607  * the driver is no longer loaded. For AMT versions of the
4608  * f/w this means that the network i/f is closed.
4609  */
4610 static void
4611 em_release_hw_control(struct adapter *adapter)
4612 {
4613         u32 ctrl_ext, swsm;
4614
4615         if (!adapter->has_manage)
4616                 return;
4617
4618         if (adapter->hw.mac.type == e1000_82573) {
4619                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4620                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4621                     swsm & ~E1000_SWSM_DRV_LOAD);
4622                 return;
4623         }
4624         /* else */
4625         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4626         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4627             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4628         return;
4629 }
4630
4631 static int
4632 em_is_valid_ether_addr(u8 *addr)
4633 {
4634         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4635
4636         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4637                 return (FALSE);
4638         }
4639
4640         return (TRUE);
4641 }
4642
4643 /*
4644 ** Parse the interface capabilities with regard
4645 ** to both system management and wake-on-lan for
4646 ** later use.
4647 */
4648 static void
4649 em_get_wakeup(device_t dev)
4650 {
4651         struct adapter  *adapter = device_get_softc(dev);
4652         u16             eeprom_data = 0, device_id, apme_mask;
4653
4654         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4655         apme_mask = EM_EEPROM_APME;
4656
4657         switch (adapter->hw.mac.type) {
4658         case e1000_82573:
4659         case e1000_82583:
4660                 adapter->has_amt = TRUE;
4661                 /* Falls thru */
4662         case e1000_82571:
4663         case e1000_82572:
4664         case e1000_80003es2lan:
4665                 if (adapter->hw.bus.func == 1) {
4666                         e1000_read_nvm(&adapter->hw,
4667                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4668                         break;
4669                 } else
4670                         e1000_read_nvm(&adapter->hw,
4671                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4672                 break;
4673         case e1000_ich8lan:
4674         case e1000_ich9lan:
4675         case e1000_ich10lan:
4676         case e1000_pchlan:
4677         case e1000_pch2lan:
4678                 apme_mask = E1000_WUC_APME;
4679                 adapter->has_amt = TRUE;
4680                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4681                 break;
4682         default:
4683                 e1000_read_nvm(&adapter->hw,
4684                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4685                 break;
4686         }
4687         if (eeprom_data & apme_mask)
4688                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4689         /*
4690          * We have the eeprom settings, now apply the special cases
4691          * where the eeprom may be wrong or the board won't support
4692          * wake on lan on a particular port
4693          */
4694         device_id = pci_get_device(dev);
4695         switch (device_id) {
4696         case E1000_DEV_ID_82571EB_FIBER:
4697                 /* Wake events only supported on port A for dual fiber
4698                  * regardless of eeprom setting */
4699                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4700                     E1000_STATUS_FUNC_1)
4701                         adapter->wol = 0;
4702                 break;
4703         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4704         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4705         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4706                 /* if quad port adapter, disable WoL on all but port A */
4707                 if (global_quad_port_a != 0)
4708                         adapter->wol = 0;
4709                 /* Reset for multiple quad port adapters */
4710                 if (++global_quad_port_a == 4)
4711                         global_quad_port_a = 0;
4712                 break;
4713         }
4714         return;
4715 }
4716
4717
4718 /*
4719  * Enable PCI Wake On Lan capability
4720  */
4721 static void
4722 em_enable_wakeup(device_t dev)
4723 {
4724         struct adapter  *adapter = device_get_softc(dev);
4725         struct ifnet    *ifp = adapter->ifp;
4726         u32             pmc, ctrl, ctrl_ext, rctl;
4727         u16             status;
4728
4729         if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4730                 return;
4731
4732         /* Advertise the wakeup capability */
4733         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4734         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4735         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4736         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4737
4738         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4739             (adapter->hw.mac.type == e1000_pchlan) ||
4740             (adapter->hw.mac.type == e1000_ich9lan) ||
4741             (adapter->hw.mac.type == e1000_ich10lan)) {
4742                 e1000_disable_gig_wol_ich8lan(&adapter->hw);
4743                 e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4744         }
4745
4746         /* Keep the laser running on Fiber adapters */
4747         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4748             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4749                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4750                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4751                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4752         }
4753
4754         /*
4755         ** Determine type of Wakeup: note that wol
4756         ** is set with all bits on by default.
4757         */
4758         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4759                 adapter->wol &= ~E1000_WUFC_MAG;
4760
4761         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4762                 adapter->wol &= ~E1000_WUFC_MC;
4763         else {
4764                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4765                 rctl |= E1000_RCTL_MPE;
4766                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4767         }
4768
4769         if ((adapter->hw.mac.type == e1000_pchlan) ||
4770             (adapter->hw.mac.type == e1000_pch2lan)) {
4771                 if (em_enable_phy_wakeup(adapter))
4772                         return;
4773         } else {
4774                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4775                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4776         }
4777
4778         if (adapter->hw.phy.type == e1000_phy_igp_3)
4779                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4780
4781         /* Request PME */
4782         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4783         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4784         if (ifp->if_capenable & IFCAP_WOL)
4785                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4786         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4787
4788         return;
4789 }
4790
4791 /*
4792 ** WOL in the newer chipset interfaces (pchlan)
4793 ** require thing to be copied into the phy
4794 */
4795 static int
4796 em_enable_phy_wakeup(struct adapter *adapter)
4797 {
4798         struct e1000_hw *hw = &adapter->hw;
4799         u32 mreg, ret = 0;
4800         u16 preg;
4801
4802         /* copy MAC RARs to PHY RARs */
4803         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4804
4805         /* copy MAC MTA to PHY MTA */
4806         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4807                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4808                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4809                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4810                     (u16)((mreg >> 16) & 0xFFFF));
4811         }
4812
4813         /* configure PHY Rx Control register */
4814         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4815         mreg = E1000_READ_REG(hw, E1000_RCTL);
4816         if (mreg & E1000_RCTL_UPE)
4817                 preg |= BM_RCTL_UPE;
4818         if (mreg & E1000_RCTL_MPE)
4819                 preg |= BM_RCTL_MPE;
4820         preg &= ~(BM_RCTL_MO_MASK);
4821         if (mreg & E1000_RCTL_MO_3)
4822                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4823                                 << BM_RCTL_MO_SHIFT);
4824         if (mreg & E1000_RCTL_BAM)
4825                 preg |= BM_RCTL_BAM;
4826         if (mreg & E1000_RCTL_PMCF)
4827                 preg |= BM_RCTL_PMCF;
4828         mreg = E1000_READ_REG(hw, E1000_CTRL);
4829         if (mreg & E1000_CTRL_RFCE)
4830                 preg |= BM_RCTL_RFCE;
4831         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4832
4833         /* enable PHY wakeup in MAC register */
4834         E1000_WRITE_REG(hw, E1000_WUC,
4835             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4836         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4837
4838         /* configure and enable PHY wakeup in PHY registers */
4839         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4840         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4841
4842         /* activate PHY wakeup */
4843         ret = hw->phy.ops.acquire(hw);
4844         if (ret) {
4845                 printf("Could not acquire PHY\n");
4846                 return ret;
4847         }
4848         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4849                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4850         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4851         if (ret) {
4852                 printf("Could not read PHY page 769\n");
4853                 goto out;
4854         }
4855         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4856         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4857         if (ret)
4858                 printf("Could not set PHY Host Wakeup bit\n");
4859 out:
4860         hw->phy.ops.release(hw);
4861
4862         return ret;
4863 }
4864
4865 static void
4866 em_led_func(void *arg, int onoff)
4867 {
4868         struct adapter  *adapter = arg;
4869  
4870         EM_CORE_LOCK(adapter);
4871         if (onoff) {
4872                 e1000_setup_led(&adapter->hw);
4873                 e1000_led_on(&adapter->hw);
4874         } else {
4875                 e1000_led_off(&adapter->hw);
4876                 e1000_cleanup_led(&adapter->hw);
4877         }
4878         EM_CORE_UNLOCK(adapter);
4879 }
4880
4881 /*
4882 ** Disable the L0S and L1 LINK states
4883 */
4884 static void
4885 em_disable_aspm(struct adapter *adapter)
4886 {
4887         int             base, reg;
4888         u16             link_cap,link_ctrl;
4889         device_t        dev = adapter->dev;
4890
4891         switch (adapter->hw.mac.type) {
4892                 case e1000_82573:
4893                 case e1000_82574:
4894                 case e1000_82583:
4895                         break;
4896                 default:
4897                         return;
4898         }
4899         if (pci_find_extcap(dev, PCIY_EXPRESS, &base) != 0)
4900                 return;
4901         reg = base + PCIR_EXPRESS_LINK_CAP;
4902         link_cap = pci_read_config(dev, reg, 2);
4903         if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4904                 return;
4905         reg = base + PCIR_EXPRESS_LINK_CTL;
4906         link_ctrl = pci_read_config(dev, reg, 2);
4907         link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4908         pci_write_config(dev, reg, link_ctrl, 2);
4909         return;
4910 }
4911
4912 /**********************************************************************
4913  *
4914  *  Update the board statistics counters.
4915  *
4916  **********************************************************************/
4917 static void
4918 em_update_stats_counters(struct adapter *adapter)
4919 {
4920         struct ifnet   *ifp;
4921
4922         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4923            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4924                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4925                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4926         }
4927         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4928         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4929         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4930         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4931
4932         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4933         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4934         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4935         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4936         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4937         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4938         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4939         /*
4940         ** For watchdog management we need to know if we have been
4941         ** paused during the last interval, so capture that here.
4942         */
4943         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4944         adapter->stats.xoffrxc += adapter->pause_frames;
4945         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4946         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4947         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4948         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4949         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4950         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4951         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4952         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4953         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4954         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4955         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4956         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4957
4958         /* For the 64-bit byte counters the low dword must be read first. */
4959         /* Both registers clear on the read of the high dword */
4960
4961         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4962             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4963         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4964             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4965
4966         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4967         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4968         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4969         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4970         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4971
4972         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4973         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4974
4975         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4976         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4977         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4978         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4979         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4980         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4981         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4982         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4983         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4984         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4985
4986         /* Interrupt Counts */
4987
4988         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
4989         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
4990         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
4991         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
4992         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
4993         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
4994         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
4995         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
4996         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
4997
4998         if (adapter->hw.mac.type >= e1000_82543) {
4999                 adapter->stats.algnerrc += 
5000                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5001                 adapter->stats.rxerrc += 
5002                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5003                 adapter->stats.tncrs += 
5004                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5005                 adapter->stats.cexterr += 
5006                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5007                 adapter->stats.tsctc += 
5008                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5009                 adapter->stats.tsctfc += 
5010                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5011         }
5012         ifp = adapter->ifp;
5013
5014         ifp->if_collisions = adapter->stats.colc;
5015
5016         /* Rx Errors */
5017         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5018             adapter->stats.crcerrs + adapter->stats.algnerrc +
5019             adapter->stats.ruc + adapter->stats.roc +
5020             adapter->stats.mpc + adapter->stats.cexterr;
5021
5022         /* Tx Errors */
5023         ifp->if_oerrors = adapter->stats.ecol +
5024             adapter->stats.latecol + adapter->watchdog_events;
5025 }
5026
5027 /* Export a single 32-bit register via a read-only sysctl. */
5028 static int
5029 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5030 {
5031         struct adapter *adapter;
5032         u_int val;
5033
5034         adapter = oidp->oid_arg1;
5035         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5036         return (sysctl_handle_int(oidp, &val, 0, req));
5037 }
5038
5039 /*
5040  * Add sysctl variables, one per statistic, to the system.
5041  */
5042 static void
5043 em_add_hw_stats(struct adapter *adapter)
5044 {
5045         device_t dev = adapter->dev;
5046
5047         struct tx_ring *txr = adapter->tx_rings;
5048         struct rx_ring *rxr = adapter->rx_rings;
5049
5050         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5051         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5052         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5053         struct e1000_hw_stats *stats = &adapter->stats;
5054
5055         struct sysctl_oid *stat_node, *queue_node, *int_node;
5056         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5057
5058 #define QUEUE_NAME_LEN 32
5059         char namebuf[QUEUE_NAME_LEN];
5060         
5061         /* Driver Statistics */
5062         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5063                         CTLFLAG_RD, &adapter->link_irq, 0,
5064                         "Link MSIX IRQ Handled");
5065         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5066                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5067                          "Std mbuf failed");
5068         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5069                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5070                          "Std mbuf cluster failed");
5071         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5072                         CTLFLAG_RD, &adapter->dropped_pkts,
5073                         "Driver dropped packets");
5074         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5075                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5076                         "Driver tx dma failure in xmit");
5077         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5078                         CTLFLAG_RD, &adapter->rx_overruns,
5079                         "RX overruns");
5080         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5081                         CTLFLAG_RD, &adapter->watchdog_events,
5082                         "Watchdog timeouts");
5083         
5084         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5085                         CTLFLAG_RD, adapter, E1000_CTRL,
5086                         em_sysctl_reg_handler, "IU",
5087                         "Device Control Register");
5088         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5089                         CTLFLAG_RD, adapter, E1000_RCTL,
5090                         em_sysctl_reg_handler, "IU",
5091                         "Receiver Control Register");
5092         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5093                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5094                         "Flow Control High Watermark");
5095         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5096                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5097                         "Flow Control Low Watermark");
5098
5099         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5100                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5101                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5102                                             CTLFLAG_RD, NULL, "Queue Name");
5103                 queue_list = SYSCTL_CHILDREN(queue_node);
5104
5105                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5106                                 CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5107                                 em_sysctl_reg_handler, "IU",
5108                                 "Transmit Descriptor Head");
5109                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5110                                 CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5111                                 em_sysctl_reg_handler, "IU",
5112                                 "Transmit Descriptor Tail");
5113                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5114                                 CTLFLAG_RD, &txr->tx_irq,
5115                                 "Queue MSI-X Transmit Interrupts");
5116                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5117                                 CTLFLAG_RD, &txr->no_desc_avail,
5118                                 "Queue No Descriptor Available");
5119                 
5120                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5121                                 CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5122                                 em_sysctl_reg_handler, "IU",
5123                                 "Receive Descriptor Head");
5124                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5125                                 CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5126                                 em_sysctl_reg_handler, "IU",
5127                                 "Receive Descriptor Tail");
5128                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5129                                 CTLFLAG_RD, &rxr->rx_irq,
5130                                 "Queue MSI-X Receive Interrupts");
5131         }
5132
5133         /* MAC stats get their own sub node */
5134
5135         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5136                                     CTLFLAG_RD, NULL, "Statistics");
5137         stat_list = SYSCTL_CHILDREN(stat_node);
5138
5139         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5140                         CTLFLAG_RD, &stats->ecol,
5141                         "Excessive collisions");
5142         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5143                         CTLFLAG_RD, &stats->scc,
5144                         "Single collisions");
5145         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5146                         CTLFLAG_RD, &stats->mcc,
5147                         "Multiple collisions");
5148         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5149                         CTLFLAG_RD, &stats->latecol,
5150                         "Late collisions");
5151         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5152                         CTLFLAG_RD, &stats->colc,
5153                         "Collision Count");
5154         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5155                         CTLFLAG_RD, &adapter->stats.symerrs,
5156                         "Symbol Errors");
5157         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5158                         CTLFLAG_RD, &adapter->stats.sec,
5159                         "Sequence Errors");
5160         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5161                         CTLFLAG_RD, &adapter->stats.dc,
5162                         "Defer Count");
5163         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5164                         CTLFLAG_RD, &adapter->stats.mpc,
5165                         "Missed Packets");
5166         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5167                         CTLFLAG_RD, &adapter->stats.rnbc,
5168                         "Receive No Buffers");
5169         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5170                         CTLFLAG_RD, &adapter->stats.ruc,
5171                         "Receive Undersize");
5172         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5173                         CTLFLAG_RD, &adapter->stats.rfc,
5174                         "Fragmented Packets Received ");
5175         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5176                         CTLFLAG_RD, &adapter->stats.roc,
5177                         "Oversized Packets Received");
5178         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5179                         CTLFLAG_RD, &adapter->stats.rjc,
5180                         "Recevied Jabber");
5181         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5182                         CTLFLAG_RD, &adapter->stats.rxerrc,
5183                         "Receive Errors");
5184         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5185                         CTLFLAG_RD, &adapter->stats.crcerrs,
5186                         "CRC errors");
5187         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5188                         CTLFLAG_RD, &adapter->stats.algnerrc,
5189                         "Alignment Errors");
5190         /* On 82575 these are collision counts */
5191         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5192                         CTLFLAG_RD, &adapter->stats.cexterr,
5193                         "Collision/Carrier extension errors");
5194         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5195                         CTLFLAG_RD, &adapter->stats.xonrxc,
5196                         "XON Received");
5197         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5198                         CTLFLAG_RD, &adapter->stats.xontxc,
5199                         "XON Transmitted");
5200         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5201                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5202                         "XOFF Received");
5203         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5204                         CTLFLAG_RD, &adapter->stats.xofftxc,
5205                         "XOFF Transmitted");
5206
5207         /* Packet Reception Stats */
5208         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5209                         CTLFLAG_RD, &adapter->stats.tpr,
5210                         "Total Packets Received ");
5211         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5212                         CTLFLAG_RD, &adapter->stats.gprc,
5213                         "Good Packets Received");
5214         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5215                         CTLFLAG_RD, &adapter->stats.bprc,
5216                         "Broadcast Packets Received");
5217         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5218                         CTLFLAG_RD, &adapter->stats.mprc,
5219                         "Multicast Packets Received");
5220         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5221                         CTLFLAG_RD, &adapter->stats.prc64,
5222                         "64 byte frames received ");
5223         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5224                         CTLFLAG_RD, &adapter->stats.prc127,
5225                         "65-127 byte frames received");
5226         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5227                         CTLFLAG_RD, &adapter->stats.prc255,
5228                         "128-255 byte frames received");
5229         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5230                         CTLFLAG_RD, &adapter->stats.prc511,
5231                         "256-511 byte frames received");
5232         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5233                         CTLFLAG_RD, &adapter->stats.prc1023,
5234                         "512-1023 byte frames received");
5235         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5236                         CTLFLAG_RD, &adapter->stats.prc1522,
5237                         "1023-1522 byte frames received");
5238         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5239                         CTLFLAG_RD, &adapter->stats.gorc, 
5240                         "Good Octets Received"); 
5241
5242         /* Packet Transmission Stats */
5243         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5244                         CTLFLAG_RD, &adapter->stats.gotc, 
5245                         "Good Octets Transmitted"); 
5246         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5247                         CTLFLAG_RD, &adapter->stats.tpt,
5248                         "Total Packets Transmitted");
5249         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5250                         CTLFLAG_RD, &adapter->stats.gptc,
5251                         "Good Packets Transmitted");
5252         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5253                         CTLFLAG_RD, &adapter->stats.bptc,
5254                         "Broadcast Packets Transmitted");
5255         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5256                         CTLFLAG_RD, &adapter->stats.mptc,
5257                         "Multicast Packets Transmitted");
5258         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5259                         CTLFLAG_RD, &adapter->stats.ptc64,
5260                         "64 byte frames transmitted ");
5261         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5262                         CTLFLAG_RD, &adapter->stats.ptc127,
5263                         "65-127 byte frames transmitted");
5264         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5265                         CTLFLAG_RD, &adapter->stats.ptc255,
5266                         "128-255 byte frames transmitted");
5267         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5268                         CTLFLAG_RD, &adapter->stats.ptc511,
5269                         "256-511 byte frames transmitted");
5270         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5271                         CTLFLAG_RD, &adapter->stats.ptc1023,
5272                         "512-1023 byte frames transmitted");
5273         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5274                         CTLFLAG_RD, &adapter->stats.ptc1522,
5275                         "1024-1522 byte frames transmitted");
5276         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5277                         CTLFLAG_RD, &adapter->stats.tsctc,
5278                         "TSO Contexts Transmitted");
5279         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5280                         CTLFLAG_RD, &adapter->stats.tsctfc,
5281                         "TSO Contexts Failed");
5282
5283
5284         /* Interrupt Stats */
5285
5286         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5287                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5288         int_list = SYSCTL_CHILDREN(int_node);
5289
5290         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5291                         CTLFLAG_RD, &adapter->stats.iac,
5292                         "Interrupt Assertion Count");
5293
5294         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5295                         CTLFLAG_RD, &adapter->stats.icrxptc,
5296                         "Interrupt Cause Rx Pkt Timer Expire Count");
5297
5298         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5299                         CTLFLAG_RD, &adapter->stats.icrxatc,
5300                         "Interrupt Cause Rx Abs Timer Expire Count");
5301
5302         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5303                         CTLFLAG_RD, &adapter->stats.ictxptc,
5304                         "Interrupt Cause Tx Pkt Timer Expire Count");
5305
5306         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5307                         CTLFLAG_RD, &adapter->stats.ictxatc,
5308                         "Interrupt Cause Tx Abs Timer Expire Count");
5309
5310         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5311                         CTLFLAG_RD, &adapter->stats.ictxqec,
5312                         "Interrupt Cause Tx Queue Empty Count");
5313
5314         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5315                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5316                         "Interrupt Cause Tx Queue Min Thresh Count");
5317
5318         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5319                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5320                         "Interrupt Cause Rx Desc Min Thresh Count");
5321
5322         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5323                         CTLFLAG_RD, &adapter->stats.icrxoc,
5324                         "Interrupt Cause Receiver Overrun Count");
5325 }
5326
5327 /**********************************************************************
5328  *
5329  *  This routine provides a way to dump out the adapter eeprom,
5330  *  often a useful debug/service tool. This only dumps the first
5331  *  32 words, stuff that matters is in that extent.
5332  *
5333  **********************************************************************/
5334 static int
5335 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5336 {
5337         struct adapter *adapter;
5338         int error;
5339         int result;
5340
5341         result = -1;
5342         error = sysctl_handle_int(oidp, &result, 0, req);
5343
5344         if (error || !req->newptr)
5345                 return (error);
5346
5347         /*
5348          * This value will cause a hex dump of the
5349          * first 32 16-bit words of the EEPROM to
5350          * the screen.
5351          */
5352         if (result == 1) {
5353                 adapter = (struct adapter *)arg1;
5354                 em_print_nvm_info(adapter);
5355         }
5356
5357         return (error);
5358 }
5359
5360 static void
5361 em_print_nvm_info(struct adapter *adapter)
5362 {
5363         u16     eeprom_data;
5364         int     i, j, row = 0;
5365
5366         /* Its a bit crude, but it gets the job done */
5367         printf("\nInterface EEPROM Dump:\n");
5368         printf("Offset\n0x0000  ");
5369         for (i = 0, j = 0; i < 32; i++, j++) {
5370                 if (j == 8) { /* Make the offset block */
5371                         j = 0; ++row;
5372                         printf("\n0x00%x0  ",row);
5373                 }
5374                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5375                 printf("%04x ", eeprom_data);
5376         }
5377         printf("\n");
5378 }
5379
5380 static int
5381 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5382 {
5383         struct em_int_delay_info *info;
5384         struct adapter *adapter;
5385         u32 regval;
5386         int error, usecs, ticks;
5387
5388         info = (struct em_int_delay_info *)arg1;
5389         usecs = info->value;
5390         error = sysctl_handle_int(oidp, &usecs, 0, req);
5391         if (error != 0 || req->newptr == NULL)
5392                 return (error);
5393         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5394                 return (EINVAL);
5395         info->value = usecs;
5396         ticks = EM_USECS_TO_TICKS(usecs);
5397
5398         adapter = info->adapter;
5399         
5400         EM_CORE_LOCK(adapter);
5401         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5402         regval = (regval & ~0xffff) | (ticks & 0xffff);
5403         /* Handle a few special cases. */
5404         switch (info->offset) {
5405         case E1000_RDTR:
5406                 break;
5407         case E1000_TIDV:
5408                 if (ticks == 0) {
5409                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5410                         /* Don't write 0 into the TIDV register. */
5411                         regval++;
5412                 } else
5413                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5414                 break;
5415         }
5416         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5417         EM_CORE_UNLOCK(adapter);
5418         return (0);
5419 }
5420
5421 static void
5422 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5423         const char *description, struct em_int_delay_info *info,
5424         int offset, int value)
5425 {
5426         info->adapter = adapter;
5427         info->offset = offset;
5428         info->value = value;
5429         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5430             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5431             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5432             info, 0, em_sysctl_int_delay, "I", description);
5433 }
5434
5435 static void
5436 em_add_rx_process_limit(struct adapter *adapter, const char *name,
5437         const char *description, int *limit, int value)
5438 {
5439         *limit = value;
5440         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5441             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5442             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5443 }
5444
5445 static void
5446 em_set_flow_cntrl(struct adapter *adapter, const char *name,
5447         const char *description, int *limit, int value)
5448 {
5449         *limit = value;
5450         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5451             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5452             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5453 }
5454
5455 static int
5456 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5457 {
5458         struct adapter *adapter;
5459         int error;
5460         int result;
5461
5462         result = -1;
5463         error = sysctl_handle_int(oidp, &result, 0, req);
5464
5465         if (error || !req->newptr)
5466                 return (error);
5467
5468         if (result == 1) {
5469                 adapter = (struct adapter *)arg1;
5470                 em_print_debug_info(adapter);
5471         }
5472
5473         return (error);
5474 }
5475
5476 /*
5477 ** This routine is meant to be fluid, add whatever is
5478 ** needed for debugging a problem.  -jfv
5479 */
5480 static void
5481 em_print_debug_info(struct adapter *adapter)
5482 {
5483         device_t dev = adapter->dev;
5484         struct tx_ring *txr = adapter->tx_rings;
5485         struct rx_ring *rxr = adapter->rx_rings;
5486
5487         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5488                 printf("Interface is RUNNING ");
5489         else
5490                 printf("Interface is NOT RUNNING\n");
5491         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5492                 printf("and ACTIVE\n");
5493         else
5494                 printf("and INACTIVE\n");
5495
5496         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5497             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5498             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5499         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5500             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5501             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5502         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5503         device_printf(dev, "TX descriptors avail = %d\n",
5504             txr->tx_avail);
5505         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5506             txr->no_desc_avail);
5507         device_printf(dev, "RX discarded packets = %ld\n",
5508             rxr->rx_discarded);
5509         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5510         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5511 }