]> CyberLeo.Net >> Repos - FreeBSD/releng/8.2.git/blob - sys/dev/e1000/if_em.c
Copy stable/8 to releng/8.2 in preparation for FreeBSD-8.2 release.
[FreeBSD/releng/8.2.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int     em_display_debug_stats = 0;
92
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.1.8";
97
98 /*********************************************************************
99  *  PCI Device ID Table
100  *
101  *  Used by probe to select devices to load on
102  *  Last field stores an index into e1000_strings
103  *  Last entry must be all 0s
104  *
105  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106  *********************************************************************/
107
108 static em_vendor_info_t em_vendor_info_array[] =
109 {
110         /* Intel(R) PRO/1000 Network Connection */
111         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
112         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
130
131         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
174         /* required last entry */
175         { 0, 0, 0, 0, 0}
176 };
177
178 /*********************************************************************
179  *  Table of branding strings for all supported NICs.
180  *********************************************************************/
181
182 static char *em_strings[] = {
183         "Intel(R) PRO/1000 Network Connection"
184 };
185
186 /*********************************************************************
187  *  Function prototypes
188  *********************************************************************/
189 static int      em_probe(device_t);
190 static int      em_attach(device_t);
191 static int      em_detach(device_t);
192 static int      em_shutdown(device_t);
193 static int      em_suspend(device_t);
194 static int      em_resume(device_t);
195 static void     em_start(struct ifnet *);
196 static void     em_start_locked(struct ifnet *, struct tx_ring *);
197 #ifdef EM_MULTIQUEUE
198 static int      em_mq_start(struct ifnet *, struct mbuf *);
199 static int      em_mq_start_locked(struct ifnet *,
200                     struct tx_ring *, struct mbuf *);
201 static void     em_qflush(struct ifnet *);
202 #endif
203 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
204 static void     em_init(void *);
205 static void     em_init_locked(struct adapter *);
206 static void     em_stop(void *);
207 static void     em_media_status(struct ifnet *, struct ifmediareq *);
208 static int      em_media_change(struct ifnet *);
209 static void     em_identify_hardware(struct adapter *);
210 static int      em_allocate_pci_resources(struct adapter *);
211 static int      em_allocate_legacy(struct adapter *);
212 static int      em_allocate_msix(struct adapter *);
213 static int      em_allocate_queues(struct adapter *);
214 static int      em_setup_msix(struct adapter *);
215 static void     em_free_pci_resources(struct adapter *);
216 static void     em_local_timer(void *);
217 static void     em_reset(struct adapter *);
218 static int      em_setup_interface(device_t, struct adapter *);
219
220 static void     em_setup_transmit_structures(struct adapter *);
221 static void     em_initialize_transmit_unit(struct adapter *);
222 static int      em_allocate_transmit_buffers(struct tx_ring *);
223 static void     em_free_transmit_structures(struct adapter *);
224 static void     em_free_transmit_buffers(struct tx_ring *);
225
226 static int      em_setup_receive_structures(struct adapter *);
227 static int      em_allocate_receive_buffers(struct rx_ring *);
228 static void     em_initialize_receive_unit(struct adapter *);
229 static void     em_free_receive_structures(struct adapter *);
230 static void     em_free_receive_buffers(struct rx_ring *);
231
232 static void     em_enable_intr(struct adapter *);
233 static void     em_disable_intr(struct adapter *);
234 static void     em_update_stats_counters(struct adapter *);
235 static void     em_add_hw_stats(struct adapter *adapter);
236 static bool     em_txeof(struct tx_ring *);
237 static bool     em_rxeof(struct rx_ring *, int, int *);
238 #ifndef __NO_STRICT_ALIGNMENT
239 static int      em_fixup_rx(struct rx_ring *);
240 #endif
241 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243                     struct ip *, u32 *, u32 *);
244 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245                     struct tcphdr *, u32 *, u32 *);
246 static void     em_set_promisc(struct adapter *);
247 static void     em_disable_promisc(struct adapter *);
248 static void     em_set_multi(struct adapter *);
249 static void     em_update_link_status(struct adapter *);
250 static void     em_refresh_mbufs(struct rx_ring *, int);
251 static void     em_register_vlan(void *, struct ifnet *, u16);
252 static void     em_unregister_vlan(void *, struct ifnet *, u16);
253 static void     em_setup_vlan_hw_support(struct adapter *);
254 static int      em_xmit(struct tx_ring *, struct mbuf **);
255 static int      em_dma_malloc(struct adapter *, bus_size_t,
256                     struct em_dma_alloc *, int);
257 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
258 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259 static void     em_print_nvm_info(struct adapter *);
260 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261 static void     em_print_debug_info(struct adapter *);
262 static int      em_is_valid_ether_addr(u8 *);
263 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
265                     const char *, struct em_int_delay_info *, int, int);
266 /* Management and WOL Support */
267 static void     em_init_manageability(struct adapter *);
268 static void     em_release_manageability(struct adapter *);
269 static void     em_get_hw_control(struct adapter *);
270 static void     em_release_hw_control(struct adapter *);
271 static void     em_get_wakeup(device_t);
272 static void     em_enable_wakeup(device_t);
273 static int      em_enable_phy_wakeup(struct adapter *);
274 static void     em_led_func(void *, int);
275 static void     em_disable_aspm(struct adapter *);
276
277 static int      em_irq_fast(void *);
278
279 /* MSIX handlers */
280 static void     em_msix_tx(void *);
281 static void     em_msix_rx(void *);
282 static void     em_msix_link(void *);
283 static void     em_handle_tx(void *context, int pending);
284 static void     em_handle_rx(void *context, int pending);
285 static void     em_handle_link(void *context, int pending);
286
287 static void     em_add_rx_process_limit(struct adapter *, const char *,
288                     const char *, int *, int);
289 static void     em_set_flow_cntrl(struct adapter *, const char *,
290                     const char *, int *, int);
291
292 static __inline void em_rx_discard(struct rx_ring *, int);
293
294 #ifdef DEVICE_POLLING
295 static poll_handler_t em_poll;
296 #endif /* POLLING */
297
298 /*********************************************************************
299  *  FreeBSD Device Interface Entry Points
300  *********************************************************************/
301
302 static device_method_t em_methods[] = {
303         /* Device interface */
304         DEVMETHOD(device_probe, em_probe),
305         DEVMETHOD(device_attach, em_attach),
306         DEVMETHOD(device_detach, em_detach),
307         DEVMETHOD(device_shutdown, em_shutdown),
308         DEVMETHOD(device_suspend, em_suspend),
309         DEVMETHOD(device_resume, em_resume),
310         {0, 0}
311 };
312
313 static driver_t em_driver = {
314         "em", em_methods, sizeof(struct adapter),
315 };
316
317 devclass_t em_devclass;
318 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
319 MODULE_DEPEND(em, pci, 1, 1, 1);
320 MODULE_DEPEND(em, ether, 1, 1, 1);
321
322 /*********************************************************************
323  *  Tunable default values.
324  *********************************************************************/
325
326 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
327 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
328 #define M_TSO_LEN                       66
329
330 /* Allow common code without TSO */
331 #ifndef CSUM_TSO
332 #define CSUM_TSO        0
333 #endif
334
335 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
336 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
337 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
338 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
339
340 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
343 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
344
345 static int em_rxd = EM_DEFAULT_RXD;
346 static int em_txd = EM_DEFAULT_TXD;
347 TUNABLE_INT("hw.em.rxd", &em_rxd);
348 TUNABLE_INT("hw.em.txd", &em_txd);
349
350 static int em_smart_pwr_down = FALSE;
351 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
352
353 /* Controls whether promiscuous also shows bad packets */
354 static int em_debug_sbp = FALSE;
355 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
356
357 static int em_enable_msix = TRUE;
358 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
359
360 /* How many packets rxeof tries to clean at a time */
361 static int em_rx_process_limit = 100;
362 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
363
364 /* Flow control setting - default to FULL */
365 static int em_fc_setting = e1000_fc_full;
366 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
367
368 /* Global used in WOL setup with multiport cards */
369 static int global_quad_port_a = 0;
370
371 /*********************************************************************
372  *  Device identification routine
373  *
374  *  em_probe determines if the driver should be loaded on
375  *  adapter based on PCI vendor/device id of the adapter.
376  *
377  *  return BUS_PROBE_DEFAULT on success, positive on failure
378  *********************************************************************/
379
380 static int
381 em_probe(device_t dev)
382 {
383         char            adapter_name[60];
384         u16             pci_vendor_id = 0;
385         u16             pci_device_id = 0;
386         u16             pci_subvendor_id = 0;
387         u16             pci_subdevice_id = 0;
388         em_vendor_info_t *ent;
389
390         INIT_DEBUGOUT("em_probe: begin");
391
392         pci_vendor_id = pci_get_vendor(dev);
393         if (pci_vendor_id != EM_VENDOR_ID)
394                 return (ENXIO);
395
396         pci_device_id = pci_get_device(dev);
397         pci_subvendor_id = pci_get_subvendor(dev);
398         pci_subdevice_id = pci_get_subdevice(dev);
399
400         ent = em_vendor_info_array;
401         while (ent->vendor_id != 0) {
402                 if ((pci_vendor_id == ent->vendor_id) &&
403                     (pci_device_id == ent->device_id) &&
404
405                     ((pci_subvendor_id == ent->subvendor_id) ||
406                     (ent->subvendor_id == PCI_ANY_ID)) &&
407
408                     ((pci_subdevice_id == ent->subdevice_id) ||
409                     (ent->subdevice_id == PCI_ANY_ID))) {
410                         sprintf(adapter_name, "%s %s",
411                                 em_strings[ent->index],
412                                 em_driver_version);
413                         device_set_desc_copy(dev, adapter_name);
414                         return (BUS_PROBE_DEFAULT);
415                 }
416                 ent++;
417         }
418
419         return (ENXIO);
420 }
421
422 /*********************************************************************
423  *  Device initialization routine
424  *
425  *  The attach entry point is called when the driver is being loaded.
426  *  This routine identifies the type of hardware, allocates all resources
427  *  and initializes the hardware.
428  *
429  *  return 0 on success, positive on failure
430  *********************************************************************/
431
432 static int
433 em_attach(device_t dev)
434 {
435         struct adapter  *adapter;
436         int             error = 0;
437
438         INIT_DEBUGOUT("em_attach: begin");
439
440         adapter = device_get_softc(dev);
441         adapter->dev = adapter->osdep.dev = dev;
442         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
443
444         /* SYSCTL stuff */
445         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
446             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
447             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
448             em_sysctl_nvm_info, "I", "NVM Information");
449
450         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
451             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
452             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
453             em_sysctl_debug_info, "I", "Debug Information");
454
455         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
456
457         /* Determine hardware and mac info */
458         em_identify_hardware(adapter);
459
460         /* Setup PCI resources */
461         if (em_allocate_pci_resources(adapter)) {
462                 device_printf(dev, "Allocation of PCI resources failed\n");
463                 error = ENXIO;
464                 goto err_pci;
465         }
466
467         /*
468         ** For ICH8 and family we need to
469         ** map the flash memory, and this
470         ** must happen after the MAC is 
471         ** identified
472         */
473         if ((adapter->hw.mac.type == e1000_ich8lan) ||
474             (adapter->hw.mac.type == e1000_ich9lan) ||
475             (adapter->hw.mac.type == e1000_ich10lan) ||
476             (adapter->hw.mac.type == e1000_pchlan) ||
477             (adapter->hw.mac.type == e1000_pch2lan)) {
478                 int rid = EM_BAR_TYPE_FLASH;
479                 adapter->flash = bus_alloc_resource_any(dev,
480                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
481                 if (adapter->flash == NULL) {
482                         device_printf(dev, "Mapping of Flash failed\n");
483                         error = ENXIO;
484                         goto err_pci;
485                 }
486                 /* This is used in the shared code */
487                 adapter->hw.flash_address = (u8 *)adapter->flash;
488                 adapter->osdep.flash_bus_space_tag =
489                     rman_get_bustag(adapter->flash);
490                 adapter->osdep.flash_bus_space_handle =
491                     rman_get_bushandle(adapter->flash);
492         }
493
494         /* Do Shared Code initialization */
495         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496                 device_printf(dev, "Setup of Shared code failed\n");
497                 error = ENXIO;
498                 goto err_pci;
499         }
500
501         e1000_get_bus_info(&adapter->hw);
502
503         /* Set up some sysctls for the tunable interrupt delays */
504         em_add_int_delay_sysctl(adapter, "rx_int_delay",
505             "receive interrupt delay in usecs", &adapter->rx_int_delay,
506             E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
507         em_add_int_delay_sysctl(adapter, "tx_int_delay",
508             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
509             E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
510         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
511             "receive interrupt delay limit in usecs",
512             &adapter->rx_abs_int_delay,
513             E1000_REGISTER(&adapter->hw, E1000_RADV),
514             em_rx_abs_int_delay_dflt);
515         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
516             "transmit interrupt delay limit in usecs",
517             &adapter->tx_abs_int_delay,
518             E1000_REGISTER(&adapter->hw, E1000_TADV),
519             em_tx_abs_int_delay_dflt);
520
521         /* Sysctl for limiting the amount of work done in the taskqueue */
522         em_add_rx_process_limit(adapter, "rx_processing_limit",
523             "max number of rx packets to process", &adapter->rx_process_limit,
524             em_rx_process_limit);
525
526         /* Sysctl for setting the interface flow control */
527         em_set_flow_cntrl(adapter, "flow_control",
528             "configure flow control",
529             &adapter->fc_setting, em_fc_setting);
530
531         /*
532          * Validate number of transmit and receive descriptors. It
533          * must not exceed hardware maximum, and must be multiple
534          * of E1000_DBA_ALIGN.
535          */
536         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
537             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
538                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
539                     EM_DEFAULT_TXD, em_txd);
540                 adapter->num_tx_desc = EM_DEFAULT_TXD;
541         } else
542                 adapter->num_tx_desc = em_txd;
543
544         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
545             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
546                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
547                     EM_DEFAULT_RXD, em_rxd);
548                 adapter->num_rx_desc = EM_DEFAULT_RXD;
549         } else
550                 adapter->num_rx_desc = em_rxd;
551
552         adapter->hw.mac.autoneg = DO_AUTO_NEG;
553         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
554         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
555
556         /* Copper options */
557         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
558                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
559                 adapter->hw.phy.disable_polarity_correction = FALSE;
560                 adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
561         }
562
563         /*
564          * Set the frame limits assuming
565          * standard ethernet sized frames.
566          */
567         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
568         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
569
570         /*
571          * This controls when hardware reports transmit completion
572          * status.
573          */
574         adapter->hw.mac.report_tx_early = 1;
575
576         /* 
577         ** Get queue/ring memory
578         */
579         if (em_allocate_queues(adapter)) {
580                 error = ENOMEM;
581                 goto err_pci;
582         }
583
584         /* Allocate multicast array memory. */
585         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
586             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
587         if (adapter->mta == NULL) {
588                 device_printf(dev, "Can not allocate multicast setup array\n");
589                 error = ENOMEM;
590                 goto err_late;
591         }
592
593         /* Check SOL/IDER usage */
594         if (e1000_check_reset_block(&adapter->hw))
595                 device_printf(dev, "PHY reset is blocked"
596                     " due to SOL/IDER session.\n");
597
598         /*
599         ** Start from a known state, this is
600         ** important in reading the nvm and
601         ** mac from that.
602         */
603         e1000_reset_hw(&adapter->hw);
604
605         /* Make sure we have a good EEPROM before we read from it */
606         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
607                 /*
608                 ** Some PCI-E parts fail the first check due to
609                 ** the link being in sleep state, call it again,
610                 ** if it fails a second time its a real issue.
611                 */
612                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
613                         device_printf(dev,
614                             "The EEPROM Checksum Is Not Valid\n");
615                         error = EIO;
616                         goto err_late;
617                 }
618         }
619
620         /* Copy the permanent MAC address out of the EEPROM */
621         if (e1000_read_mac_addr(&adapter->hw) < 0) {
622                 device_printf(dev, "EEPROM read error while reading MAC"
623                     " address\n");
624                 error = EIO;
625                 goto err_late;
626         }
627
628         if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
629                 device_printf(dev, "Invalid MAC address\n");
630                 error = EIO;
631                 goto err_late;
632         }
633
634         /*
635         **  Do interrupt configuration
636         */
637         if (adapter->msix > 1) /* Do MSIX */
638                 error = em_allocate_msix(adapter);
639         else  /* MSI or Legacy */
640                 error = em_allocate_legacy(adapter);
641         if (error)
642                 goto err_late;
643
644         /*
645          * Get Wake-on-Lan and Management info for later use
646          */
647         em_get_wakeup(dev);
648
649         /* Setup OS specific network interface */
650         if (em_setup_interface(dev, adapter) != 0)
651                 goto err_late;
652
653         em_reset(adapter);
654
655         /* Initialize statistics */
656         em_update_stats_counters(adapter);
657
658         adapter->hw.mac.get_link_status = 1;
659         em_update_link_status(adapter);
660
661         /* Register for VLAN events */
662         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
663             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
664         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
665             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
666
667         em_add_hw_stats(adapter);
668
669         /* Non-AMT based hardware can now take control from firmware */
670         if (adapter->has_manage && !adapter->has_amt)
671                 em_get_hw_control(adapter);
672
673         /* Tell the stack that the interface is not active */
674         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
675
676         adapter->led_dev = led_create(em_led_func, adapter,
677             device_get_nameunit(dev));
678
679         INIT_DEBUGOUT("em_attach: end");
680
681         return (0);
682
683 err_late:
684         em_free_transmit_structures(adapter);
685         em_free_receive_structures(adapter);
686         em_release_hw_control(adapter);
687         if (adapter->ifp != NULL)
688                 if_free(adapter->ifp);
689 err_pci:
690         em_free_pci_resources(adapter);
691         free(adapter->mta, M_DEVBUF);
692         EM_CORE_LOCK_DESTROY(adapter);
693
694         return (error);
695 }
696
697 /*********************************************************************
698  *  Device removal routine
699  *
700  *  The detach entry point is called when the driver is being removed.
701  *  This routine stops the adapter and deallocates all the resources
702  *  that were allocated for driver operation.
703  *
704  *  return 0 on success, positive on failure
705  *********************************************************************/
706
707 static int
708 em_detach(device_t dev)
709 {
710         struct adapter  *adapter = device_get_softc(dev);
711         struct ifnet    *ifp = adapter->ifp;
712
713         INIT_DEBUGOUT("em_detach: begin");
714
715         /* Make sure VLANS are not using driver */
716         if (adapter->ifp->if_vlantrunk != NULL) {
717                 device_printf(dev,"Vlan in use, detach first\n");
718                 return (EBUSY);
719         }
720
721 #ifdef DEVICE_POLLING
722         if (ifp->if_capenable & IFCAP_POLLING)
723                 ether_poll_deregister(ifp);
724 #endif
725
726         if (adapter->led_dev != NULL)
727                 led_destroy(adapter->led_dev);
728
729         EM_CORE_LOCK(adapter);
730         adapter->in_detach = 1;
731         em_stop(adapter);
732         EM_CORE_UNLOCK(adapter);
733         EM_CORE_LOCK_DESTROY(adapter);
734
735         e1000_phy_hw_reset(&adapter->hw);
736
737         em_release_manageability(adapter);
738         em_release_hw_control(adapter);
739
740         /* Unregister VLAN events */
741         if (adapter->vlan_attach != NULL)
742                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
743         if (adapter->vlan_detach != NULL)
744                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
745
746         ether_ifdetach(adapter->ifp);
747         callout_drain(&adapter->timer);
748
749         em_free_pci_resources(adapter);
750         bus_generic_detach(dev);
751         if_free(ifp);
752
753         em_free_transmit_structures(adapter);
754         em_free_receive_structures(adapter);
755
756         em_release_hw_control(adapter);
757         free(adapter->mta, M_DEVBUF);
758
759         return (0);
760 }
761
762 /*********************************************************************
763  *
764  *  Shutdown entry point
765  *
766  **********************************************************************/
767
768 static int
769 em_shutdown(device_t dev)
770 {
771         return em_suspend(dev);
772 }
773
774 /*
775  * Suspend/resume device methods.
776  */
777 static int
778 em_suspend(device_t dev)
779 {
780         struct adapter *adapter = device_get_softc(dev);
781
782         EM_CORE_LOCK(adapter);
783
784         em_release_manageability(adapter);
785         em_release_hw_control(adapter);
786         em_enable_wakeup(dev);
787
788         EM_CORE_UNLOCK(adapter);
789
790         return bus_generic_suspend(dev);
791 }
792
793 static int
794 em_resume(device_t dev)
795 {
796         struct adapter *adapter = device_get_softc(dev);
797         struct ifnet *ifp = adapter->ifp;
798
799         EM_CORE_LOCK(adapter);
800         em_init_locked(adapter);
801         em_init_manageability(adapter);
802         EM_CORE_UNLOCK(adapter);
803         em_start(ifp);
804
805         return bus_generic_resume(dev);
806 }
807
808
809 /*********************************************************************
810  *  Transmit entry point
811  *
812  *  em_start is called by the stack to initiate a transmit.
813  *  The driver will remain in this routine as long as there are
814  *  packets to transmit and transmit resources are available.
815  *  In case resources are not available stack is notified and
816  *  the packet is requeued.
817  **********************************************************************/
818
819 #ifdef EM_MULTIQUEUE
820 static int
821 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
822 {
823         struct adapter  *adapter = txr->adapter;
824         struct mbuf     *next;
825         int             err = 0, enq = 0;
826
827         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
828             IFF_DRV_RUNNING || adapter->link_active == 0) {
829                 if (m != NULL)
830                         err = drbr_enqueue(ifp, txr->br, m);
831                 return (err);
832         }
833
834         /* Call cleanup if number of TX descriptors low */
835         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
836                 em_txeof(txr);
837
838         enq = 0;
839         if (m == NULL) {
840                 next = drbr_dequeue(ifp, txr->br);
841         } else if (drbr_needs_enqueue(ifp, txr->br)) {
842                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
843                         return (err);
844                 next = drbr_dequeue(ifp, txr->br);
845         } else
846                 next = m;
847
848         /* Process the queue */
849         while (next != NULL) {
850                 if ((err = em_xmit(txr, &next)) != 0) {
851                         if (next != NULL)
852                                 err = drbr_enqueue(ifp, txr->br, next);
853                         break;
854                 }
855                 enq++;
856                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
857                 ETHER_BPF_MTAP(ifp, next);
858                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
859                         break;
860                 if (txr->tx_avail < EM_MAX_SCATTER) {
861                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
862                         break;
863                 }
864                 next = drbr_dequeue(ifp, txr->br);
865         }
866
867         if (enq > 0) {
868                 /* Set the watchdog */
869                 txr->queue_status = EM_QUEUE_WORKING;
870                 txr->watchdog_time = ticks;
871         }
872         return (err);
873 }
874
875 /*
876 ** Multiqueue capable stack interface
877 */
878 static int
879 em_mq_start(struct ifnet *ifp, struct mbuf *m)
880 {
881         struct adapter  *adapter = ifp->if_softc;
882         struct tx_ring  *txr = adapter->tx_rings;
883         int             error;
884
885         if (EM_TX_TRYLOCK(txr)) {
886                 error = em_mq_start_locked(ifp, txr, m);
887                 EM_TX_UNLOCK(txr);
888         } else 
889                 error = drbr_enqueue(ifp, txr->br, m);
890
891         return (error);
892 }
893
894 /*
895 ** Flush all ring buffers
896 */
897 static void
898 em_qflush(struct ifnet *ifp)
899 {
900         struct adapter  *adapter = ifp->if_softc;
901         struct tx_ring  *txr = adapter->tx_rings;
902         struct mbuf     *m;
903
904         for (int i = 0; i < adapter->num_queues; i++, txr++) {
905                 EM_TX_LOCK(txr);
906                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
907                         m_freem(m);
908                 EM_TX_UNLOCK(txr);
909         }
910         if_qflush(ifp);
911 }
912
913 #endif /* EM_MULTIQUEUE */
914
915 static void
916 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
917 {
918         struct adapter  *adapter = ifp->if_softc;
919         struct mbuf     *m_head;
920
921         EM_TX_LOCK_ASSERT(txr);
922
923         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
924             IFF_DRV_RUNNING)
925                 return;
926
927         if (!adapter->link_active)
928                 return;
929
930         /* Call cleanup if number of TX descriptors low */
931         if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
932                 em_txeof(txr);
933
934         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
935                 if (txr->tx_avail < EM_MAX_SCATTER) {
936                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
937                         break;
938                 }
939                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
940                 if (m_head == NULL)
941                         break;
942                 /*
943                  *  Encapsulation can modify our pointer, and or make it
944                  *  NULL on failure.  In that event, we can't requeue.
945                  */
946                 if (em_xmit(txr, &m_head)) {
947                         if (m_head == NULL)
948                                 break;
949                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
950                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
951                         break;
952                 }
953
954                 /* Send a copy of the frame to the BPF listener */
955                 ETHER_BPF_MTAP(ifp, m_head);
956
957                 /* Set timeout in case hardware has problems transmitting. */
958                 txr->watchdog_time = ticks;
959                 txr->queue_status = EM_QUEUE_WORKING;
960         }
961
962         return;
963 }
964
965 static void
966 em_start(struct ifnet *ifp)
967 {
968         struct adapter  *adapter = ifp->if_softc;
969         struct tx_ring  *txr = adapter->tx_rings;
970
971         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
972                 EM_TX_LOCK(txr);
973                 em_start_locked(ifp, txr);
974                 EM_TX_UNLOCK(txr);
975         }
976         return;
977 }
978
979 /*********************************************************************
980  *  Ioctl entry point
981  *
982  *  em_ioctl is called when the user wants to configure the
983  *  interface.
984  *
985  *  return 0 on success, positive on failure
986  **********************************************************************/
987
988 static int
989 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
990 {
991         struct adapter  *adapter = ifp->if_softc;
992         struct ifreq *ifr = (struct ifreq *)data;
993 #ifdef INET
994         struct ifaddr *ifa = (struct ifaddr *)data;
995 #endif
996         int error = 0;
997
998         if (adapter->in_detach)
999                 return (error);
1000
1001         switch (command) {
1002         case SIOCSIFADDR:
1003 #ifdef INET
1004                 if (ifa->ifa_addr->sa_family == AF_INET) {
1005                         /*
1006                          * XXX
1007                          * Since resetting hardware takes a very long time
1008                          * and results in link renegotiation we only
1009                          * initialize the hardware only when it is absolutely
1010                          * required.
1011                          */
1012                         ifp->if_flags |= IFF_UP;
1013                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1014                                 EM_CORE_LOCK(adapter);
1015                                 em_init_locked(adapter);
1016                                 EM_CORE_UNLOCK(adapter);
1017                         }
1018                         arp_ifinit(ifp, ifa);
1019                 } else
1020 #endif
1021                         error = ether_ioctl(ifp, command, data);
1022                 break;
1023         case SIOCSIFMTU:
1024             {
1025                 int max_frame_size;
1026
1027                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1028
1029                 EM_CORE_LOCK(adapter);
1030                 switch (adapter->hw.mac.type) {
1031                 case e1000_82571:
1032                 case e1000_82572:
1033                 case e1000_ich9lan:
1034                 case e1000_ich10lan:
1035                 case e1000_pch2lan:
1036                 case e1000_82574:
1037                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1038                         max_frame_size = 9234;
1039                         break;
1040                 case e1000_pchlan:
1041                         max_frame_size = 4096;
1042                         break;
1043                         /* Adapters that do not support jumbo frames */
1044                 case e1000_82583:
1045                 case e1000_ich8lan:
1046                         max_frame_size = ETHER_MAX_LEN;
1047                         break;
1048                 default:
1049                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1050                 }
1051                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1052                     ETHER_CRC_LEN) {
1053                         EM_CORE_UNLOCK(adapter);
1054                         error = EINVAL;
1055                         break;
1056                 }
1057
1058                 ifp->if_mtu = ifr->ifr_mtu;
1059                 adapter->max_frame_size =
1060                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1061                 em_init_locked(adapter);
1062                 EM_CORE_UNLOCK(adapter);
1063                 break;
1064             }
1065         case SIOCSIFFLAGS:
1066                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1067                     SIOCSIFFLAGS (Set Interface Flags)");
1068                 EM_CORE_LOCK(adapter);
1069                 if (ifp->if_flags & IFF_UP) {
1070                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1071                                 if ((ifp->if_flags ^ adapter->if_flags) &
1072                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1073                                         em_disable_promisc(adapter);
1074                                         em_set_promisc(adapter);
1075                                 }
1076                         } else
1077                                 em_init_locked(adapter);
1078                 } else
1079                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1080                                 em_stop(adapter);
1081                 adapter->if_flags = ifp->if_flags;
1082                 EM_CORE_UNLOCK(adapter);
1083                 break;
1084         case SIOCADDMULTI:
1085         case SIOCDELMULTI:
1086                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1087                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1088                         EM_CORE_LOCK(adapter);
1089                         em_disable_intr(adapter);
1090                         em_set_multi(adapter);
1091 #ifdef DEVICE_POLLING
1092                         if (!(ifp->if_capenable & IFCAP_POLLING))
1093 #endif
1094                                 em_enable_intr(adapter);
1095                         EM_CORE_UNLOCK(adapter);
1096                 }
1097                 break;
1098         case SIOCSIFMEDIA:
1099                 /*
1100                 ** As the speed/duplex settings are being
1101                 ** changed, we need to reset the PHY.
1102                 */
1103                 adapter->hw.phy.reset_disable = FALSE;
1104                 /* Check SOL/IDER usage */
1105                 EM_CORE_LOCK(adapter);
1106                 if (e1000_check_reset_block(&adapter->hw)) {
1107                         EM_CORE_UNLOCK(adapter);
1108                         device_printf(adapter->dev, "Media change is"
1109                             " blocked due to SOL/IDER session.\n");
1110                         break;
1111                 }
1112                 EM_CORE_UNLOCK(adapter);
1113                 /* falls thru */
1114         case SIOCGIFMEDIA:
1115                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1116                     SIOCxIFMEDIA (Get/Set Interface Media)");
1117                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1118                 break;
1119         case SIOCSIFCAP:
1120             {
1121                 int mask, reinit;
1122
1123                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1124                 reinit = 0;
1125                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1126 #ifdef DEVICE_POLLING
1127                 if (mask & IFCAP_POLLING) {
1128                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1129                                 error = ether_poll_register(em_poll, ifp);
1130                                 if (error)
1131                                         return (error);
1132                                 EM_CORE_LOCK(adapter);
1133                                 em_disable_intr(adapter);
1134                                 ifp->if_capenable |= IFCAP_POLLING;
1135                                 EM_CORE_UNLOCK(adapter);
1136                         } else {
1137                                 error = ether_poll_deregister(ifp);
1138                                 /* Enable interrupt even in error case */
1139                                 EM_CORE_LOCK(adapter);
1140                                 em_enable_intr(adapter);
1141                                 ifp->if_capenable &= ~IFCAP_POLLING;
1142                                 EM_CORE_UNLOCK(adapter);
1143                         }
1144                 }
1145 #endif
1146                 if (mask & IFCAP_HWCSUM) {
1147                         ifp->if_capenable ^= IFCAP_HWCSUM;
1148                         reinit = 1;
1149                 }
1150                 if (mask & IFCAP_TSO4) {
1151                         ifp->if_capenable ^= IFCAP_TSO4;
1152                         reinit = 1;
1153                 }
1154                 if (mask & IFCAP_VLAN_HWTAGGING) {
1155                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1156                         reinit = 1;
1157                 }
1158                 if (mask & IFCAP_VLAN_HWFILTER) {
1159                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1160                         reinit = 1;
1161                 }
1162                 if ((mask & IFCAP_WOL) &&
1163                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1164                         if (mask & IFCAP_WOL_MCAST)
1165                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1166                         if (mask & IFCAP_WOL_MAGIC)
1167                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1168                 }
1169                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1170                         em_init(adapter);
1171                 VLAN_CAPABILITIES(ifp);
1172                 break;
1173             }
1174
1175         default:
1176                 error = ether_ioctl(ifp, command, data);
1177                 break;
1178         }
1179
1180         return (error);
1181 }
1182
1183
1184 /*********************************************************************
1185  *  Init entry point
1186  *
1187  *  This routine is used in two ways. It is used by the stack as
1188  *  init entry point in network interface structure. It is also used
1189  *  by the driver as a hw/sw initialization routine to get to a
1190  *  consistent state.
1191  *
1192  *  return 0 on success, positive on failure
1193  **********************************************************************/
1194
1195 static void
1196 em_init_locked(struct adapter *adapter)
1197 {
1198         struct ifnet    *ifp = adapter->ifp;
1199         device_t        dev = adapter->dev;
1200         u32             pba;
1201
1202         INIT_DEBUGOUT("em_init: begin");
1203
1204         EM_CORE_LOCK_ASSERT(adapter);
1205
1206         em_disable_intr(adapter);
1207         callout_stop(&adapter->timer);
1208
1209         /*
1210          * Packet Buffer Allocation (PBA)
1211          * Writing PBA sets the receive portion of the buffer
1212          * the remainder is used for the transmit buffer.
1213          */
1214         switch (adapter->hw.mac.type) {
1215         /* Total Packet Buffer on these is 48K */
1216         case e1000_82571:
1217         case e1000_82572:
1218         case e1000_80003es2lan:
1219                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1220                 break;
1221         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1222                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1223                 break;
1224         case e1000_82574:
1225         case e1000_82583:
1226                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1227                 break;
1228         case e1000_ich8lan:
1229                 pba = E1000_PBA_8K;
1230                 break;
1231         case e1000_ich9lan:
1232         case e1000_ich10lan:
1233                 pba = E1000_PBA_10K;
1234                 break;
1235         case e1000_pchlan:
1236         case e1000_pch2lan:
1237                 pba = E1000_PBA_26K;
1238                 break;
1239         default:
1240                 if (adapter->max_frame_size > 8192)
1241                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1242                 else
1243                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1244         }
1245
1246         INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1247         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1248         
1249         /* Get the latest mac address, User can use a LAA */
1250         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1251               ETHER_ADDR_LEN);
1252
1253         /* Put the address into the Receive Address Array */
1254         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1255
1256         /*
1257          * With the 82571 adapter, RAR[0] may be overwritten
1258          * when the other port is reset, we make a duplicate
1259          * in RAR[14] for that eventuality, this assures
1260          * the interface continues to function.
1261          */
1262         if (adapter->hw.mac.type == e1000_82571) {
1263                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1264                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1265                     E1000_RAR_ENTRIES - 1);
1266         }
1267
1268         /* Initialize the hardware */
1269         em_reset(adapter);
1270         em_update_link_status(adapter);
1271
1272         /* Setup VLAN support, basic and offload if available */
1273         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1274
1275         /* Set hardware offload abilities */
1276         ifp->if_hwassist = 0;
1277         if (ifp->if_capenable & IFCAP_TXCSUM)
1278                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1279         if (ifp->if_capenable & IFCAP_TSO4)
1280                 ifp->if_hwassist |= CSUM_TSO;
1281
1282         /* Configure for OS presence */
1283         em_init_manageability(adapter);
1284
1285         /* Prepare transmit descriptors and buffers */
1286         em_setup_transmit_structures(adapter);
1287         em_initialize_transmit_unit(adapter);
1288
1289         /* Setup Multicast table */
1290         em_set_multi(adapter);
1291
1292         /*
1293         ** Figure out the desired mbuf
1294         ** pool for doing jumbos
1295         */
1296         if (adapter->max_frame_size <= 2048)
1297                 adapter->rx_mbuf_sz = MCLBYTES;
1298         else if (adapter->max_frame_size <= 4096)
1299                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1300         else
1301                 adapter->rx_mbuf_sz = MJUM9BYTES;
1302
1303         /* Prepare receive descriptors and buffers */
1304         if (em_setup_receive_structures(adapter)) {
1305                 device_printf(dev, "Could not setup receive structures\n");
1306                 em_stop(adapter);
1307                 return;
1308         }
1309         em_initialize_receive_unit(adapter);
1310
1311         /* Use real VLAN Filter support? */
1312         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1313                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1314                         /* Use real VLAN Filter support */
1315                         em_setup_vlan_hw_support(adapter);
1316                 else {
1317                         u32 ctrl;
1318                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1319                         ctrl |= E1000_CTRL_VME;
1320                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1321                 }
1322         }
1323
1324         /* Don't lose promiscuous settings */
1325         em_set_promisc(adapter);
1326
1327         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1328         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1329
1330         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1331         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1332
1333         /* MSI/X configuration for 82574 */
1334         if (adapter->hw.mac.type == e1000_82574) {
1335                 int tmp;
1336                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1337                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1338                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1339                 /* Set the IVAR - interrupt vector routing. */
1340                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1341         }
1342
1343 #ifdef DEVICE_POLLING
1344         /*
1345          * Only enable interrupts if we are not polling, make sure
1346          * they are off otherwise.
1347          */
1348         if (ifp->if_capenable & IFCAP_POLLING)
1349                 em_disable_intr(adapter);
1350         else
1351 #endif /* DEVICE_POLLING */
1352                 em_enable_intr(adapter);
1353
1354         /* AMT based hardware can now take control from firmware */
1355         if (adapter->has_manage && adapter->has_amt)
1356                 em_get_hw_control(adapter);
1357
1358         /* Don't reset the phy next time init gets called */
1359         adapter->hw.phy.reset_disable = TRUE;
1360 }
1361
1362 static void
1363 em_init(void *arg)
1364 {
1365         struct adapter *adapter = arg;
1366
1367         EM_CORE_LOCK(adapter);
1368         em_init_locked(adapter);
1369         EM_CORE_UNLOCK(adapter);
1370 }
1371
1372
1373 #ifdef DEVICE_POLLING
1374 /*********************************************************************
1375  *
1376  *  Legacy polling routine: note this only works with single queue
1377  *
1378  *********************************************************************/
1379 static int
1380 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1381 {
1382         struct adapter *adapter = ifp->if_softc;
1383         struct tx_ring  *txr = adapter->tx_rings;
1384         struct rx_ring  *rxr = adapter->rx_rings;
1385         u32             reg_icr;
1386         int             rx_done;
1387
1388         EM_CORE_LOCK(adapter);
1389         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1390                 EM_CORE_UNLOCK(adapter);
1391                 return (0);
1392         }
1393
1394         if (cmd == POLL_AND_CHECK_STATUS) {
1395                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1396                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1397                         callout_stop(&adapter->timer);
1398                         adapter->hw.mac.get_link_status = 1;
1399                         em_update_link_status(adapter);
1400                         callout_reset(&adapter->timer, hz,
1401                             em_local_timer, adapter);
1402                 }
1403         }
1404         EM_CORE_UNLOCK(adapter);
1405
1406         em_rxeof(rxr, count, &rx_done);
1407
1408         EM_TX_LOCK(txr);
1409         em_txeof(txr);
1410 #ifdef EM_MULTIQUEUE
1411         if (!drbr_empty(ifp, txr->br))
1412                 em_mq_start_locked(ifp, txr, NULL);
1413 #else
1414         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1415                 em_start_locked(ifp, txr);
1416 #endif
1417         EM_TX_UNLOCK(txr);
1418
1419         return (rx_done);
1420 }
1421 #endif /* DEVICE_POLLING */
1422
1423
1424 /*********************************************************************
1425  *
1426  *  Fast Legacy/MSI Combined Interrupt Service routine  
1427  *
1428  *********************************************************************/
1429 static int
1430 em_irq_fast(void *arg)
1431 {
1432         struct adapter  *adapter = arg;
1433         struct ifnet    *ifp;
1434         u32             reg_icr;
1435
1436         ifp = adapter->ifp;
1437
1438         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1439
1440         /* Hot eject?  */
1441         if (reg_icr == 0xffffffff)
1442                 return FILTER_STRAY;
1443
1444         /* Definitely not our interrupt.  */
1445         if (reg_icr == 0x0)
1446                 return FILTER_STRAY;
1447
1448         /*
1449          * Starting with the 82571 chip, bit 31 should be used to
1450          * determine whether the interrupt belongs to us.
1451          */
1452         if (adapter->hw.mac.type >= e1000_82571 &&
1453             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1454                 return FILTER_STRAY;
1455
1456         em_disable_intr(adapter);
1457         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1458
1459         /* Link status change */
1460         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1461                 adapter->hw.mac.get_link_status = 1;
1462                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1463         }
1464
1465         if (reg_icr & E1000_ICR_RXO)
1466                 adapter->rx_overruns++;
1467         return FILTER_HANDLED;
1468 }
1469
1470 /* Combined RX/TX handler, used by Legacy and MSI */
1471 static void
1472 em_handle_que(void *context, int pending)
1473 {
1474         struct adapter  *adapter = context;
1475         struct ifnet    *ifp = adapter->ifp;
1476         struct tx_ring  *txr = adapter->tx_rings;
1477         struct rx_ring  *rxr = adapter->rx_rings;
1478         bool            more;
1479
1480
1481         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1482                 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1483
1484                 EM_TX_LOCK(txr);
1485                 em_txeof(txr);
1486 #ifdef EM_MULTIQUEUE
1487                 if (!drbr_empty(ifp, txr->br))
1488                         em_mq_start_locked(ifp, txr, NULL);
1489 #else
1490                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1491                         em_start_locked(ifp, txr);
1492 #endif
1493                 em_txeof(txr);
1494                 EM_TX_UNLOCK(txr);
1495                 if (more) {
1496                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1497                         return;
1498                 }
1499         }
1500
1501         em_enable_intr(adapter);
1502         return;
1503 }
1504
1505
1506 /*********************************************************************
1507  *
1508  *  MSIX Interrupt Service Routines
1509  *
1510  **********************************************************************/
1511 static void
1512 em_msix_tx(void *arg)
1513 {
1514         struct tx_ring *txr = arg;
1515         struct adapter *adapter = txr->adapter;
1516         bool            more;
1517
1518         ++txr->tx_irq;
1519         EM_TX_LOCK(txr);
1520         more = em_txeof(txr);
1521         EM_TX_UNLOCK(txr);
1522         if (more)
1523                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1524         else
1525                 /* Reenable this interrupt */
1526                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1527         return;
1528 }
1529
1530 /*********************************************************************
1531  *
1532  *  MSIX RX Interrupt Service routine
1533  *
1534  **********************************************************************/
1535
1536 static void
1537 em_msix_rx(void *arg)
1538 {
1539         struct rx_ring  *rxr = arg;
1540         struct adapter  *adapter = rxr->adapter;
1541         bool            more;
1542
1543         ++rxr->rx_irq;
1544         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1545         if (more)
1546                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1547         else
1548                 /* Reenable this interrupt */
1549                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1550         return;
1551 }
1552
1553 /*********************************************************************
1554  *
1555  *  MSIX Link Fast Interrupt Service routine
1556  *
1557  **********************************************************************/
1558 static void
1559 em_msix_link(void *arg)
1560 {
1561         struct adapter  *adapter = arg;
1562         u32             reg_icr;
1563
1564         ++adapter->link_irq;
1565         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1566
1567         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1568                 adapter->hw.mac.get_link_status = 1;
1569                 em_handle_link(adapter, 0);
1570         } else
1571                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1572                     EM_MSIX_LINK | E1000_IMS_LSC);
1573         return;
1574 }
1575
1576 static void
1577 em_handle_rx(void *context, int pending)
1578 {
1579         struct rx_ring  *rxr = context;
1580         struct adapter  *adapter = rxr->adapter;
1581         bool            more;
1582
1583         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1584         if (more)
1585                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1586         else
1587                 /* Reenable this interrupt */
1588                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1589 }
1590
1591 static void
1592 em_handle_tx(void *context, int pending)
1593 {
1594         struct tx_ring  *txr = context;
1595         struct adapter  *adapter = txr->adapter;
1596         struct ifnet    *ifp = adapter->ifp;
1597
1598         EM_TX_LOCK(txr);
1599         em_txeof(txr);
1600 #ifdef EM_MULTIQUEUE
1601         if (!drbr_empty(ifp, txr->br))
1602                 em_mq_start_locked(ifp, txr, NULL);
1603 #else
1604         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1605                 em_start_locked(ifp, txr);
1606 #endif
1607         em_txeof(txr);
1608         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1609         EM_TX_UNLOCK(txr);
1610 }
1611
1612 static void
1613 em_handle_link(void *context, int pending)
1614 {
1615         struct adapter  *adapter = context;
1616         struct ifnet *ifp = adapter->ifp;
1617
1618         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1619                 return;
1620
1621         EM_CORE_LOCK(adapter);
1622         callout_stop(&adapter->timer);
1623         em_update_link_status(adapter);
1624         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1625         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1626             EM_MSIX_LINK | E1000_IMS_LSC);
1627         EM_CORE_UNLOCK(adapter);
1628 }
1629
1630
1631 /*********************************************************************
1632  *
1633  *  Media Ioctl callback
1634  *
1635  *  This routine is called whenever the user queries the status of
1636  *  the interface using ifconfig.
1637  *
1638  **********************************************************************/
1639 static void
1640 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1641 {
1642         struct adapter *adapter = ifp->if_softc;
1643         u_char fiber_type = IFM_1000_SX;
1644
1645         INIT_DEBUGOUT("em_media_status: begin");
1646
1647         EM_CORE_LOCK(adapter);
1648         em_update_link_status(adapter);
1649
1650         ifmr->ifm_status = IFM_AVALID;
1651         ifmr->ifm_active = IFM_ETHER;
1652
1653         if (!adapter->link_active) {
1654                 EM_CORE_UNLOCK(adapter);
1655                 return;
1656         }
1657
1658         ifmr->ifm_status |= IFM_ACTIVE;
1659
1660         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1661             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1662                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1663         } else {
1664                 switch (adapter->link_speed) {
1665                 case 10:
1666                         ifmr->ifm_active |= IFM_10_T;
1667                         break;
1668                 case 100:
1669                         ifmr->ifm_active |= IFM_100_TX;
1670                         break;
1671                 case 1000:
1672                         ifmr->ifm_active |= IFM_1000_T;
1673                         break;
1674                 }
1675                 if (adapter->link_duplex == FULL_DUPLEX)
1676                         ifmr->ifm_active |= IFM_FDX;
1677                 else
1678                         ifmr->ifm_active |= IFM_HDX;
1679         }
1680         EM_CORE_UNLOCK(adapter);
1681 }
1682
1683 /*********************************************************************
1684  *
1685  *  Media Ioctl callback
1686  *
1687  *  This routine is called when the user changes speed/duplex using
1688  *  media/mediopt option with ifconfig.
1689  *
1690  **********************************************************************/
1691 static int
1692 em_media_change(struct ifnet *ifp)
1693 {
1694         struct adapter *adapter = ifp->if_softc;
1695         struct ifmedia  *ifm = &adapter->media;
1696
1697         INIT_DEBUGOUT("em_media_change: begin");
1698
1699         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1700                 return (EINVAL);
1701
1702         EM_CORE_LOCK(adapter);
1703         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1704         case IFM_AUTO:
1705                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1706                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1707                 break;
1708         case IFM_1000_LX:
1709         case IFM_1000_SX:
1710         case IFM_1000_T:
1711                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1712                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1713                 break;
1714         case IFM_100_TX:
1715                 adapter->hw.mac.autoneg = FALSE;
1716                 adapter->hw.phy.autoneg_advertised = 0;
1717                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1718                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1719                 else
1720                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1721                 break;
1722         case IFM_10_T:
1723                 adapter->hw.mac.autoneg = FALSE;
1724                 adapter->hw.phy.autoneg_advertised = 0;
1725                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1726                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1727                 else
1728                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1729                 break;
1730         default:
1731                 device_printf(adapter->dev, "Unsupported media type\n");
1732         }
1733
1734         em_init_locked(adapter);
1735         EM_CORE_UNLOCK(adapter);
1736
1737         return (0);
1738 }
1739
1740 /*********************************************************************
1741  *
1742  *  This routine maps the mbufs to tx descriptors.
1743  *
1744  *  return 0 on success, positive on failure
1745  **********************************************************************/
1746
1747 static int
1748 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1749 {
1750         struct adapter          *adapter = txr->adapter;
1751         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1752         bus_dmamap_t            map;
1753         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1754         struct e1000_tx_desc    *ctxd = NULL;
1755         struct mbuf             *m_head;
1756         struct ether_header     *eh;
1757         struct ip               *ip = NULL;
1758         struct tcphdr           *tp = NULL;
1759         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1760         int                     ip_off, poff;
1761         int                     nsegs, i, j, first, last = 0;
1762         int                     error, do_tso, tso_desc = 0;
1763
1764         m_head = *m_headp;
1765         txd_upper = txd_lower = txd_used = txd_saved = 0;
1766         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1767         ip_off = poff = 0;
1768
1769         /*
1770          * Intel recommends entire IP/TCP header length reside in a single
1771          * buffer. If multiple descriptors are used to describe the IP and
1772          * TCP header, each descriptor should describe one or more
1773          * complete headers; descriptors referencing only parts of headers
1774          * are not supported. If all layer headers are not coalesced into
1775          * a single buffer, each buffer should not cross a 4KB boundary,
1776          * or be larger than the maximum read request size.
1777          * Controller also requires modifing IP/TCP header to make TSO work
1778          * so we firstly get a writable mbuf chain then coalesce ethernet/
1779          * IP/TCP header into a single buffer to meet the requirement of
1780          * controller. This also simplifies IP/TCP/UDP checksum offloading
1781          * which also has similiar restrictions.
1782          */
1783         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1784                 if (do_tso || (m_head->m_next != NULL && 
1785                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1786                         if (M_WRITABLE(*m_headp) == 0) {
1787                                 m_head = m_dup(*m_headp, M_DONTWAIT);
1788                                 m_freem(*m_headp);
1789                                 if (m_head == NULL) {
1790                                         *m_headp = NULL;
1791                                         return (ENOBUFS);
1792                                 }
1793                                 *m_headp = m_head;
1794                         }
1795                 }
1796                 /*
1797                  * XXX
1798                  * Assume IPv4, we don't have TSO/checksum offload support
1799                  * for IPv6 yet.
1800                  */
1801                 ip_off = sizeof(struct ether_header);
1802                 m_head = m_pullup(m_head, ip_off);
1803                 if (m_head == NULL) {
1804                         *m_headp = NULL;
1805                         return (ENOBUFS);
1806                 }
1807                 eh = mtod(m_head, struct ether_header *);
1808                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1809                         ip_off = sizeof(struct ether_vlan_header);
1810                         m_head = m_pullup(m_head, ip_off);
1811                         if (m_head == NULL) {
1812                                 *m_headp = NULL;
1813                                 return (ENOBUFS);
1814                         }
1815                 }
1816                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1817                 if (m_head == NULL) {
1818                         *m_headp = NULL;
1819                         return (ENOBUFS);
1820                 }
1821                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1822                 poff = ip_off + (ip->ip_hl << 2);
1823                 m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1824                 if (m_head == NULL) {
1825                         *m_headp = NULL;
1826                         return (ENOBUFS);
1827                 }
1828                 if (do_tso) {
1829                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1830                         /*
1831                          * TSO workaround:
1832                          *   pull 4 more bytes of data into it.
1833                          */
1834                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1835                         if (m_head == NULL) {
1836                                 *m_headp = NULL;
1837                                 return (ENOBUFS);
1838                         }
1839                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1840                         ip->ip_len = 0;
1841                         ip->ip_sum = 0;
1842                         /*
1843                          * The pseudo TCP checksum does not include TCP payload
1844                          * length so driver should recompute the checksum here
1845                          * what hardware expect to see. This is adherence of
1846                          * Microsoft's Large Send specification.
1847                          */
1848                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1849                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1850                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1851                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1852                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1853                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1854                         if (m_head == NULL) {
1855                                 *m_headp = NULL;
1856                                 return (ENOBUFS);
1857                         }
1858                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1859                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1860                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1861                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1862                         if (m_head == NULL) {
1863                                 *m_headp = NULL;
1864                                 return (ENOBUFS);
1865                         }
1866                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1867                 }
1868                 *m_headp = m_head;
1869         }
1870
1871         /*
1872          * Map the packet for DMA
1873          *
1874          * Capture the first descriptor index,
1875          * this descriptor will have the index
1876          * of the EOP which is the only one that
1877          * now gets a DONE bit writeback.
1878          */
1879         first = txr->next_avail_desc;
1880         tx_buffer = &txr->tx_buffers[first];
1881         tx_buffer_mapped = tx_buffer;
1882         map = tx_buffer->map;
1883
1884         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1885             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1886
1887         /*
1888          * There are two types of errors we can (try) to handle:
1889          * - EFBIG means the mbuf chain was too long and bus_dma ran
1890          *   out of segments.  Defragment the mbuf chain and try again.
1891          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1892          *   at this point in time.  Defer sending and try again later.
1893          * All other errors, in particular EINVAL, are fatal and prevent the
1894          * mbuf chain from ever going through.  Drop it and report error.
1895          */
1896         if (error == EFBIG) {
1897                 struct mbuf *m;
1898
1899                 m = m_defrag(*m_headp, M_DONTWAIT);
1900                 if (m == NULL) {
1901                         adapter->mbuf_alloc_failed++;
1902                         m_freem(*m_headp);
1903                         *m_headp = NULL;
1904                         return (ENOBUFS);
1905                 }
1906                 *m_headp = m;
1907
1908                 /* Try it again */
1909                 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1910                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1911
1912                 if (error) {
1913                         adapter->no_tx_dma_setup++;
1914                         m_freem(*m_headp);
1915                         *m_headp = NULL;
1916                         return (error);
1917                 }
1918         } else if (error != 0) {
1919                 adapter->no_tx_dma_setup++;
1920                 return (error);
1921         }
1922
1923         /*
1924          * TSO Hardware workaround, if this packet is not
1925          * TSO, and is only a single descriptor long, and
1926          * it follows a TSO burst, then we need to add a
1927          * sentinel descriptor to prevent premature writeback.
1928          */
1929         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1930                 if (nsegs == 1)
1931                         tso_desc = TRUE;
1932                 txr->tx_tso = FALSE;
1933         }
1934
1935         if (nsegs > (txr->tx_avail - 2)) {
1936                 txr->no_desc_avail++;
1937                 bus_dmamap_unload(txr->txtag, map);
1938                 return (ENOBUFS);
1939         }
1940         m_head = *m_headp;
1941
1942         /* Do hardware assists */
1943         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1944                 em_tso_setup(txr, m_head, ip_off, ip, tp,
1945                     &txd_upper, &txd_lower);
1946                 /* we need to make a final sentinel transmit desc */
1947                 tso_desc = TRUE;
1948         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1949                 em_transmit_checksum_setup(txr, m_head,
1950                     ip_off, ip, &txd_upper, &txd_lower);
1951
1952         i = txr->next_avail_desc;
1953
1954         /* Set up our transmit descriptors */
1955         for (j = 0; j < nsegs; j++) {
1956                 bus_size_t seg_len;
1957                 bus_addr_t seg_addr;
1958
1959                 tx_buffer = &txr->tx_buffers[i];
1960                 ctxd = &txr->tx_base[i];
1961                 seg_addr = segs[j].ds_addr;
1962                 seg_len  = segs[j].ds_len;
1963                 /*
1964                 ** TSO Workaround:
1965                 ** If this is the last descriptor, we want to
1966                 ** split it so we have a small final sentinel
1967                 */
1968                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1969                         seg_len -= 4;
1970                         ctxd->buffer_addr = htole64(seg_addr);
1971                         ctxd->lower.data = htole32(
1972                         adapter->txd_cmd | txd_lower | seg_len);
1973                         ctxd->upper.data =
1974                             htole32(txd_upper);
1975                         if (++i == adapter->num_tx_desc)
1976                                 i = 0;
1977                         /* Now make the sentinel */     
1978                         ++txd_used; /* using an extra txd */
1979                         ctxd = &txr->tx_base[i];
1980                         tx_buffer = &txr->tx_buffers[i];
1981                         ctxd->buffer_addr =
1982                             htole64(seg_addr + seg_len);
1983                         ctxd->lower.data = htole32(
1984                         adapter->txd_cmd | txd_lower | 4);
1985                         ctxd->upper.data =
1986                             htole32(txd_upper);
1987                         last = i;
1988                         if (++i == adapter->num_tx_desc)
1989                                 i = 0;
1990                 } else {
1991                         ctxd->buffer_addr = htole64(seg_addr);
1992                         ctxd->lower.data = htole32(
1993                         adapter->txd_cmd | txd_lower | seg_len);
1994                         ctxd->upper.data =
1995                             htole32(txd_upper);
1996                         last = i;
1997                         if (++i == adapter->num_tx_desc)
1998                                 i = 0;
1999                 }
2000                 tx_buffer->m_head = NULL;
2001                 tx_buffer->next_eop = -1;
2002         }
2003
2004         txr->next_avail_desc = i;
2005         txr->tx_avail -= nsegs;
2006         if (tso_desc) /* TSO used an extra for sentinel */
2007                 txr->tx_avail -= txd_used;
2008
2009         if (m_head->m_flags & M_VLANTAG) {
2010                 /* Set the vlan id. */
2011                 ctxd->upper.fields.special =
2012                     htole16(m_head->m_pkthdr.ether_vtag);
2013                 /* Tell hardware to add tag */
2014                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2015         }
2016
2017         tx_buffer->m_head = m_head;
2018         tx_buffer_mapped->map = tx_buffer->map;
2019         tx_buffer->map = map;
2020         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2021
2022         /*
2023          * Last Descriptor of Packet
2024          * needs End Of Packet (EOP)
2025          * and Report Status (RS)
2026          */
2027         ctxd->lower.data |=
2028             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2029         /*
2030          * Keep track in the first buffer which
2031          * descriptor will be written back
2032          */
2033         tx_buffer = &txr->tx_buffers[first];
2034         tx_buffer->next_eop = last;
2035         /* Update the watchdog time early and often */
2036         txr->watchdog_time = ticks;
2037
2038         /*
2039          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2040          * that this frame is available to transmit.
2041          */
2042         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2043             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2044         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2045
2046         return (0);
2047 }
2048
2049 static void
2050 em_set_promisc(struct adapter *adapter)
2051 {
2052         struct ifnet    *ifp = adapter->ifp;
2053         u32             reg_rctl;
2054
2055         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2056
2057         if (ifp->if_flags & IFF_PROMISC) {
2058                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2059                 /* Turn this on if you want to see bad packets */
2060                 if (em_debug_sbp)
2061                         reg_rctl |= E1000_RCTL_SBP;
2062                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2063         } else if (ifp->if_flags & IFF_ALLMULTI) {
2064                 reg_rctl |= E1000_RCTL_MPE;
2065                 reg_rctl &= ~E1000_RCTL_UPE;
2066                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2067         }
2068 }
2069
2070 static void
2071 em_disable_promisc(struct adapter *adapter)
2072 {
2073         u32     reg_rctl;
2074
2075         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2076
2077         reg_rctl &=  (~E1000_RCTL_UPE);
2078         reg_rctl &=  (~E1000_RCTL_MPE);
2079         reg_rctl &=  (~E1000_RCTL_SBP);
2080         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2081 }
2082
2083
2084 /*********************************************************************
2085  *  Multicast Update
2086  *
2087  *  This routine is called whenever multicast address list is updated.
2088  *
2089  **********************************************************************/
2090
2091 static void
2092 em_set_multi(struct adapter *adapter)
2093 {
2094         struct ifnet    *ifp = adapter->ifp;
2095         struct ifmultiaddr *ifma;
2096         u32 reg_rctl = 0;
2097         u8  *mta; /* Multicast array memory */
2098         int mcnt = 0;
2099
2100         IOCTL_DEBUGOUT("em_set_multi: begin");
2101
2102         mta = adapter->mta;
2103         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2104
2105         if (adapter->hw.mac.type == e1000_82542 && 
2106             adapter->hw.revision_id == E1000_REVISION_2) {
2107                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2108                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2109                         e1000_pci_clear_mwi(&adapter->hw);
2110                 reg_rctl |= E1000_RCTL_RST;
2111                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2112                 msec_delay(5);
2113         }
2114
2115 #if __FreeBSD_version < 800000
2116         IF_ADDR_LOCK(ifp);
2117 #else
2118         if_maddr_rlock(ifp);
2119 #endif
2120         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2121                 if (ifma->ifma_addr->sa_family != AF_LINK)
2122                         continue;
2123
2124                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2125                         break;
2126
2127                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2128                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2129                 mcnt++;
2130         }
2131 #if __FreeBSD_version < 800000
2132         IF_ADDR_UNLOCK(ifp);
2133 #else
2134         if_maddr_runlock(ifp);
2135 #endif
2136         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2137                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2138                 reg_rctl |= E1000_RCTL_MPE;
2139                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2140         } else
2141                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2142
2143         if (adapter->hw.mac.type == e1000_82542 && 
2144             adapter->hw.revision_id == E1000_REVISION_2) {
2145                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2146                 reg_rctl &= ~E1000_RCTL_RST;
2147                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2148                 msec_delay(5);
2149                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2150                         e1000_pci_set_mwi(&adapter->hw);
2151         }
2152 }
2153
2154
2155 /*********************************************************************
2156  *  Timer routine
2157  *
2158  *  This routine checks for link status and updates statistics.
2159  *
2160  **********************************************************************/
2161
2162 static void
2163 em_local_timer(void *arg)
2164 {
2165         struct adapter  *adapter = arg;
2166         struct ifnet    *ifp = adapter->ifp;
2167         struct tx_ring  *txr = adapter->tx_rings;
2168
2169         EM_CORE_LOCK_ASSERT(adapter);
2170
2171         em_update_link_status(adapter);
2172         em_update_stats_counters(adapter);
2173
2174         /* Reset LAA into RAR[0] on 82571 */
2175         if ((adapter->hw.mac.type == e1000_82571) &&
2176             e1000_get_laa_state_82571(&adapter->hw))
2177                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2178
2179         /* 
2180         ** Don't do TX watchdog check if we've been paused
2181         */
2182         if (adapter->pause_frames) {
2183                 adapter->pause_frames = 0;
2184                 goto out;
2185         }
2186         /*
2187         ** Check on the state of the TX queue(s), this 
2188         ** can be done without the lock because its RO
2189         ** and the HUNG state will be static if set.
2190         */
2191         for (int i = 0; i < adapter->num_queues; i++, txr++)
2192                 if (txr->queue_status == EM_QUEUE_HUNG)
2193                         goto hung;
2194 out:
2195         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2196         return;
2197 hung:
2198         /* Looks like we're hung */
2199         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2200         device_printf(adapter->dev,
2201             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2202             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2203             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2204         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2205             "Next TX to Clean = %d\n",
2206             txr->me, txr->tx_avail, txr->next_to_clean);
2207         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2208         adapter->watchdog_events++;
2209         EM_TX_UNLOCK(txr);
2210         em_init_locked(adapter);
2211 }
2212
2213
2214 static void
2215 em_update_link_status(struct adapter *adapter)
2216 {
2217         struct e1000_hw *hw = &adapter->hw;
2218         struct ifnet *ifp = adapter->ifp;
2219         device_t dev = adapter->dev;
2220         struct tx_ring *txr = adapter->tx_rings;
2221         u32 link_check = 0;
2222
2223         /* Get the cached link value or read phy for real */
2224         switch (hw->phy.media_type) {
2225         case e1000_media_type_copper:
2226                 if (hw->mac.get_link_status) {
2227                         /* Do the work to read phy */
2228                         e1000_check_for_link(hw);
2229                         link_check = !hw->mac.get_link_status;
2230                         if (link_check) /* ESB2 fix */
2231                                 e1000_cfg_on_link_up(hw);
2232                 } else
2233                         link_check = TRUE;
2234                 break;
2235         case e1000_media_type_fiber:
2236                 e1000_check_for_link(hw);
2237                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2238                                  E1000_STATUS_LU);
2239                 break;
2240         case e1000_media_type_internal_serdes:
2241                 e1000_check_for_link(hw);
2242                 link_check = adapter->hw.mac.serdes_has_link;
2243                 break;
2244         default:
2245         case e1000_media_type_unknown:
2246                 break;
2247         }
2248
2249         /* Now check for a transition */
2250         if (link_check && (adapter->link_active == 0)) {
2251                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2252                     &adapter->link_duplex);
2253                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2254                 if ((adapter->link_speed != SPEED_1000) &&
2255                     ((hw->mac.type == e1000_82571) ||
2256                     (hw->mac.type == e1000_82572))) {
2257                         int tarc0;
2258                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2259                         tarc0 &= ~SPEED_MODE_BIT;
2260                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2261                 }
2262                 if (bootverbose)
2263                         device_printf(dev, "Link is up %d Mbps %s\n",
2264                             adapter->link_speed,
2265                             ((adapter->link_duplex == FULL_DUPLEX) ?
2266                             "Full Duplex" : "Half Duplex"));
2267                 adapter->link_active = 1;
2268                 adapter->smartspeed = 0;
2269                 ifp->if_baudrate = adapter->link_speed * 1000000;
2270                 if_link_state_change(ifp, LINK_STATE_UP);
2271         } else if (!link_check && (adapter->link_active == 1)) {
2272                 ifp->if_baudrate = adapter->link_speed = 0;
2273                 adapter->link_duplex = 0;
2274                 if (bootverbose)
2275                         device_printf(dev, "Link is Down\n");
2276                 adapter->link_active = 0;
2277                 /* Link down, disable watchdog */
2278                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2279                         txr->queue_status = EM_QUEUE_IDLE;
2280                 if_link_state_change(ifp, LINK_STATE_DOWN);
2281         }
2282 }
2283
2284 /*********************************************************************
2285  *
2286  *  This routine disables all traffic on the adapter by issuing a
2287  *  global reset on the MAC and deallocates TX/RX buffers.
2288  *
2289  *  This routine should always be called with BOTH the CORE
2290  *  and TX locks.
2291  **********************************************************************/
2292
2293 static void
2294 em_stop(void *arg)
2295 {
2296         struct adapter  *adapter = arg;
2297         struct ifnet    *ifp = adapter->ifp;
2298         struct tx_ring  *txr = adapter->tx_rings;
2299
2300         EM_CORE_LOCK_ASSERT(adapter);
2301
2302         INIT_DEBUGOUT("em_stop: begin");
2303
2304         em_disable_intr(adapter);
2305         callout_stop(&adapter->timer);
2306
2307         /* Tell the stack that the interface is no longer active */
2308         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2309
2310         /* Unarm watchdog timer. */
2311         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2312                 EM_TX_LOCK(txr);
2313                 txr->queue_status = EM_QUEUE_IDLE;
2314                 EM_TX_UNLOCK(txr);
2315         }
2316
2317         e1000_reset_hw(&adapter->hw);
2318         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2319
2320         e1000_led_off(&adapter->hw);
2321         e1000_cleanup_led(&adapter->hw);
2322 }
2323
2324
2325 /*********************************************************************
2326  *
2327  *  Determine hardware revision.
2328  *
2329  **********************************************************************/
2330 static void
2331 em_identify_hardware(struct adapter *adapter)
2332 {
2333         device_t dev = adapter->dev;
2334
2335         /* Make sure our PCI config space has the necessary stuff set */
2336         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2337         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2338             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2339                 device_printf(dev, "Memory Access and/or Bus Master bits "
2340                     "were not set!\n");
2341                 adapter->hw.bus.pci_cmd_word |=
2342                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2343                 pci_write_config(dev, PCIR_COMMAND,
2344                     adapter->hw.bus.pci_cmd_word, 2);
2345         }
2346
2347         /* Save off the information about this board */
2348         adapter->hw.vendor_id = pci_get_vendor(dev);
2349         adapter->hw.device_id = pci_get_device(dev);
2350         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2351         adapter->hw.subsystem_vendor_id =
2352             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2353         adapter->hw.subsystem_device_id =
2354             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2355
2356         /* Do Shared Code Init and Setup */
2357         if (e1000_set_mac_type(&adapter->hw)) {
2358                 device_printf(dev, "Setup init failure\n");
2359                 return;
2360         }
2361 }
2362
2363 static int
2364 em_allocate_pci_resources(struct adapter *adapter)
2365 {
2366         device_t        dev = adapter->dev;
2367         int             rid;
2368
2369         rid = PCIR_BAR(0);
2370         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2371             &rid, RF_ACTIVE);
2372         if (adapter->memory == NULL) {
2373                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2374                 return (ENXIO);
2375         }
2376         adapter->osdep.mem_bus_space_tag =
2377             rman_get_bustag(adapter->memory);
2378         adapter->osdep.mem_bus_space_handle =
2379             rman_get_bushandle(adapter->memory);
2380         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2381
2382         /* Default to a single queue */
2383         adapter->num_queues = 1;
2384
2385         /*
2386          * Setup MSI/X or MSI if PCI Express
2387          */
2388         adapter->msix = em_setup_msix(adapter);
2389
2390         adapter->hw.back = &adapter->osdep;
2391
2392         return (0);
2393 }
2394
2395 /*********************************************************************
2396  *
2397  *  Setup the Legacy or MSI Interrupt handler
2398  *
2399  **********************************************************************/
2400 int
2401 em_allocate_legacy(struct adapter *adapter)
2402 {
2403         device_t dev = adapter->dev;
2404         int error, rid = 0;
2405
2406         /* Manually turn off all interrupts */
2407         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2408
2409         if (adapter->msix == 1) /* using MSI */
2410                 rid = 1;
2411         /* We allocate a single interrupt resource */
2412         adapter->res = bus_alloc_resource_any(dev,
2413             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2414         if (adapter->res == NULL) {
2415                 device_printf(dev, "Unable to allocate bus resource: "
2416                     "interrupt\n");
2417                 return (ENXIO);
2418         }
2419
2420         /*
2421          * Allocate a fast interrupt and the associated
2422          * deferred processing contexts.
2423          */
2424         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2425         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2426         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2427             taskqueue_thread_enqueue, &adapter->tq);
2428         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2429             device_get_nameunit(adapter->dev));
2430         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2431             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2432                 device_printf(dev, "Failed to register fast interrupt "
2433                             "handler: %d\n", error);
2434                 taskqueue_free(adapter->tq);
2435                 adapter->tq = NULL;
2436                 return (error);
2437         }
2438         
2439         return (0);
2440 }
2441
2442 /*********************************************************************
2443  *
2444  *  Setup the MSIX Interrupt handlers
2445  *   This is not really Multiqueue, rather
2446  *   its just multiple interrupt vectors.
2447  *
2448  **********************************************************************/
2449 int
2450 em_allocate_msix(struct adapter *adapter)
2451 {
2452         device_t        dev = adapter->dev;
2453         struct          tx_ring *txr = adapter->tx_rings;
2454         struct          rx_ring *rxr = adapter->rx_rings;
2455         int             error, rid, vector = 0;
2456
2457
2458         /* Make sure all interrupts are disabled */
2459         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2460
2461         /* First set up ring resources */
2462         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2463
2464                 /* RX ring */
2465                 rid = vector + 1;
2466
2467                 rxr->res = bus_alloc_resource_any(dev,
2468                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2469                 if (rxr->res == NULL) {
2470                         device_printf(dev,
2471                             "Unable to allocate bus resource: "
2472                             "RX MSIX Interrupt %d\n", i);
2473                         return (ENXIO);
2474                 }
2475                 if ((error = bus_setup_intr(dev, rxr->res,
2476                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2477                     rxr, &rxr->tag)) != 0) {
2478                         device_printf(dev, "Failed to register RX handler");
2479                         return (error);
2480                 }
2481 #if __FreeBSD_version >= 800504
2482                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2483 #endif
2484                 rxr->msix = vector++; /* NOTE increment vector for TX */
2485                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2486                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2487                     taskqueue_thread_enqueue, &rxr->tq);
2488                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2489                     device_get_nameunit(adapter->dev));
2490                 /*
2491                 ** Set the bit to enable interrupt
2492                 ** in E1000_IMS -- bits 20 and 21
2493                 ** are for RX0 and RX1, note this has
2494                 ** NOTHING to do with the MSIX vector
2495                 */
2496                 rxr->ims = 1 << (20 + i);
2497                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2498
2499                 /* TX ring */
2500                 rid = vector + 1;
2501                 txr->res = bus_alloc_resource_any(dev,
2502                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2503                 if (txr->res == NULL) {
2504                         device_printf(dev,
2505                             "Unable to allocate bus resource: "
2506                             "TX MSIX Interrupt %d\n", i);
2507                         return (ENXIO);
2508                 }
2509                 if ((error = bus_setup_intr(dev, txr->res,
2510                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2511                     txr, &txr->tag)) != 0) {
2512                         device_printf(dev, "Failed to register TX handler");
2513                         return (error);
2514                 }
2515 #if __FreeBSD_version >= 800504
2516                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2517 #endif
2518                 txr->msix = vector++; /* Increment vector for next pass */
2519                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2520                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2521                     taskqueue_thread_enqueue, &txr->tq);
2522                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2523                     device_get_nameunit(adapter->dev));
2524                 /*
2525                 ** Set the bit to enable interrupt
2526                 ** in E1000_IMS -- bits 22 and 23
2527                 ** are for TX0 and TX1, note this has
2528                 ** NOTHING to do with the MSIX vector
2529                 */
2530                 txr->ims = 1 << (22 + i);
2531                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2532         }
2533
2534         /* Link interrupt */
2535         ++rid;
2536         adapter->res = bus_alloc_resource_any(dev,
2537             SYS_RES_IRQ, &rid, RF_ACTIVE);
2538         if (!adapter->res) {
2539                 device_printf(dev,"Unable to allocate "
2540                     "bus resource: Link interrupt [%d]\n", rid);
2541                 return (ENXIO);
2542         }
2543         /* Set the link handler function */
2544         error = bus_setup_intr(dev, adapter->res,
2545             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2546             em_msix_link, adapter, &adapter->tag);
2547         if (error) {
2548                 adapter->res = NULL;
2549                 device_printf(dev, "Failed to register LINK handler");
2550                 return (error);
2551         }
2552 #if __FreeBSD_version >= 800504
2553                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2554 #endif
2555         adapter->linkvec = vector;
2556         adapter->ivars |=  (8 | vector) << 16;
2557         adapter->ivars |= 0x80000000;
2558
2559         return (0);
2560 }
2561
2562
2563 static void
2564 em_free_pci_resources(struct adapter *adapter)
2565 {
2566         device_t        dev = adapter->dev;
2567         struct tx_ring  *txr;
2568         struct rx_ring  *rxr;
2569         int             rid;
2570
2571
2572         /*
2573         ** Release all the queue interrupt resources:
2574         */
2575         for (int i = 0; i < adapter->num_queues; i++) {
2576                 txr = &adapter->tx_rings[i];
2577                 rxr = &adapter->rx_rings[i];
2578                 /* an early abort? */
2579                 if ((txr == NULL) || (rxr == NULL))
2580                         break;
2581                 rid = txr->msix +1;
2582                 if (txr->tag != NULL) {
2583                         bus_teardown_intr(dev, txr->res, txr->tag);
2584                         txr->tag = NULL;
2585                 }
2586                 if (txr->res != NULL)
2587                         bus_release_resource(dev, SYS_RES_IRQ,
2588                             rid, txr->res);
2589                 rid = rxr->msix +1;
2590                 if (rxr->tag != NULL) {
2591                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2592                         rxr->tag = NULL;
2593                 }
2594                 if (rxr->res != NULL)
2595                         bus_release_resource(dev, SYS_RES_IRQ,
2596                             rid, rxr->res);
2597         }
2598
2599         if (adapter->linkvec) /* we are doing MSIX */
2600                 rid = adapter->linkvec + 1;
2601         else
2602                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2603
2604         if (adapter->tag != NULL) {
2605                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2606                 adapter->tag = NULL;
2607         }
2608
2609         if (adapter->res != NULL)
2610                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2611
2612
2613         if (adapter->msix)
2614                 pci_release_msi(dev);
2615
2616         if (adapter->msix_mem != NULL)
2617                 bus_release_resource(dev, SYS_RES_MEMORY,
2618                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2619
2620         if (adapter->memory != NULL)
2621                 bus_release_resource(dev, SYS_RES_MEMORY,
2622                     PCIR_BAR(0), adapter->memory);
2623
2624         if (adapter->flash != NULL)
2625                 bus_release_resource(dev, SYS_RES_MEMORY,
2626                     EM_FLASH, adapter->flash);
2627 }
2628
2629 /*
2630  * Setup MSI or MSI/X
2631  */
2632 static int
2633 em_setup_msix(struct adapter *adapter)
2634 {
2635         device_t dev = adapter->dev;
2636         int val = 0;
2637
2638
2639         /*
2640         ** Setup MSI/X for Hartwell: tests have shown
2641         ** use of two queues to be unstable, and to
2642         ** provide no great gain anyway, so we simply
2643         ** seperate the interrupts and use a single queue.
2644         */
2645         if ((adapter->hw.mac.type == e1000_82574) &&
2646             (em_enable_msix == TRUE)) {
2647                 /* Map the MSIX BAR */
2648                 int rid = PCIR_BAR(EM_MSIX_BAR);
2649                 adapter->msix_mem = bus_alloc_resource_any(dev,
2650                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2651                 if (!adapter->msix_mem) {
2652                         /* May not be enabled */
2653                         device_printf(adapter->dev,
2654                             "Unable to map MSIX table \n");
2655                         goto msi;
2656                 }
2657                 val = pci_msix_count(dev); 
2658                 if (val < 3) {
2659                         bus_release_resource(dev, SYS_RES_MEMORY,
2660                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2661                         adapter->msix_mem = NULL;
2662                         device_printf(adapter->dev,
2663                             "MSIX: insufficient vectors, using MSI\n");
2664                         goto msi;
2665                 }
2666                 val = 3;
2667                 adapter->num_queues = 1;
2668                 if (pci_alloc_msix(dev, &val) == 0) {
2669                         device_printf(adapter->dev,
2670                             "Using MSIX interrupts "
2671                             "with %d vectors\n", val);
2672                 }
2673
2674                 return (val);
2675         }
2676 msi:
2677         val = pci_msi_count(dev);
2678         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2679                 adapter->msix = 1;
2680                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2681                 return (val);
2682         } 
2683         /* Should only happen due to manual configuration */
2684         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2685         return (0);
2686 }
2687
2688
2689 /*********************************************************************
2690  *
2691  *  Initialize the hardware to a configuration
2692  *  as specified by the adapter structure.
2693  *
2694  **********************************************************************/
2695 static void
2696 em_reset(struct adapter *adapter)
2697 {
2698         device_t        dev = adapter->dev;
2699         struct ifnet    *ifp = adapter->ifp;
2700         struct e1000_hw *hw = &adapter->hw;
2701         u16             rx_buffer_size;
2702
2703         INIT_DEBUGOUT("em_reset: begin");
2704
2705         /* Set up smart power down as default off on newer adapters. */
2706         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2707             hw->mac.type == e1000_82572)) {
2708                 u16 phy_tmp = 0;
2709
2710                 /* Speed up time to link by disabling smart power down. */
2711                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2712                 phy_tmp &= ~IGP02E1000_PM_SPD;
2713                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2714         }
2715
2716         /*
2717          * These parameters control the automatic generation (Tx) and
2718          * response (Rx) to Ethernet PAUSE frames.
2719          * - High water mark should allow for at least two frames to be
2720          *   received after sending an XOFF.
2721          * - Low water mark works best when it is very near the high water mark.
2722          *   This allows the receiver to restart by sending XON when it has
2723          *   drained a bit. Here we use an arbitary value of 1500 which will
2724          *   restart after one full frame is pulled from the buffer. There
2725          *   could be several smaller frames in the buffer and if so they will
2726          *   not trigger the XON until their total number reduces the buffer
2727          *   by 1500.
2728          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2729          */
2730         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2731
2732         hw->fc.high_water = rx_buffer_size -
2733             roundup2(adapter->max_frame_size, 1024);
2734         hw->fc.low_water = hw->fc.high_water - 1500;
2735
2736         if (hw->mac.type == e1000_80003es2lan)
2737                 hw->fc.pause_time = 0xFFFF;
2738         else
2739                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2740
2741         hw->fc.send_xon = TRUE;
2742
2743         /* Set Flow control, use the tunable location if sane */
2744         hw->fc.requested_mode = adapter->fc_setting;
2745
2746         /* Workaround: no TX flow ctrl for PCH */
2747         if (hw->mac.type == e1000_pchlan)
2748                 hw->fc.requested_mode = e1000_fc_rx_pause;
2749
2750         /* Override - settings for PCH2LAN, ya its magic :) */
2751         if (hw->mac.type == e1000_pch2lan) {
2752                 hw->fc.high_water = 0x5C20;
2753                 hw->fc.low_water = 0x5048;
2754                 hw->fc.pause_time = 0x0650;
2755                 hw->fc.refresh_time = 0x0400;
2756                 /* Jumbos need adjusted PBA */
2757                 if (ifp->if_mtu > ETHERMTU)
2758                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2759                 else
2760                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2761         }
2762
2763         /* Issue a global reset */
2764         e1000_reset_hw(hw);
2765         E1000_WRITE_REG(hw, E1000_WUC, 0);
2766         em_disable_aspm(adapter);
2767
2768         if (e1000_init_hw(hw) < 0) {
2769                 device_printf(dev, "Hardware Initialization Failed\n");
2770                 return;
2771         }
2772
2773         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2774         e1000_get_phy_info(hw);
2775         e1000_check_for_link(hw);
2776         return;
2777 }
2778
2779 /*********************************************************************
2780  *
2781  *  Setup networking device structure and register an interface.
2782  *
2783  **********************************************************************/
2784 static int
2785 em_setup_interface(device_t dev, struct adapter *adapter)
2786 {
2787         struct ifnet   *ifp;
2788
2789         INIT_DEBUGOUT("em_setup_interface: begin");
2790
2791         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2792         if (ifp == NULL) {
2793                 device_printf(dev, "can not allocate ifnet structure\n");
2794                 return (-1);
2795         }
2796         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2797         ifp->if_mtu = ETHERMTU;
2798         ifp->if_init =  em_init;
2799         ifp->if_softc = adapter;
2800         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2801         ifp->if_ioctl = em_ioctl;
2802         ifp->if_start = em_start;
2803         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2804         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2805         IFQ_SET_READY(&ifp->if_snd);
2806
2807         ether_ifattach(ifp, adapter->hw.mac.addr);
2808
2809         ifp->if_capabilities = ifp->if_capenable = 0;
2810
2811 #ifdef EM_MULTIQUEUE
2812         /* Multiqueue tx functions */
2813         ifp->if_transmit = em_mq_start;
2814         ifp->if_qflush = em_qflush;
2815 #endif  
2816
2817         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2818         ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2819
2820         /* Enable TSO by default, can disable with ifconfig */
2821         ifp->if_capabilities |= IFCAP_TSO4;
2822         ifp->if_capenable |= IFCAP_TSO4;
2823
2824         /*
2825          * Tell the upper layer(s) we
2826          * support full VLAN capability
2827          */
2828         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2829         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2830         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2831
2832         /*
2833         ** Dont turn this on by default, if vlans are
2834         ** created on another pseudo device (eg. lagg)
2835         ** then vlan events are not passed thru, breaking
2836         ** operation, but with HW FILTER off it works. If
2837         ** using vlans directly on the em driver you can
2838         ** enable this and get full hardware tag filtering.
2839         */
2840         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2841
2842 #ifdef DEVICE_POLLING
2843         ifp->if_capabilities |= IFCAP_POLLING;
2844 #endif
2845
2846         /* Enable only WOL MAGIC by default */
2847         if (adapter->wol) {
2848                 ifp->if_capabilities |= IFCAP_WOL;
2849                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2850         }
2851                 
2852         /*
2853          * Specify the media types supported by this adapter and register
2854          * callbacks to update media and link information
2855          */
2856         ifmedia_init(&adapter->media, IFM_IMASK,
2857             em_media_change, em_media_status);
2858         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2859             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2860                 u_char fiber_type = IFM_1000_SX;        /* default type */
2861
2862                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
2863                             0, NULL);
2864                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2865         } else {
2866                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2867                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2868                             0, NULL);
2869                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2870                             0, NULL);
2871                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2872                             0, NULL);
2873                 if (adapter->hw.phy.type != e1000_phy_ife) {
2874                         ifmedia_add(&adapter->media,
2875                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2876                         ifmedia_add(&adapter->media,
2877                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2878                 }
2879         }
2880         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2881         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2882         return (0);
2883 }
2884
2885
2886 /*
2887  * Manage DMA'able memory.
2888  */
2889 static void
2890 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2891 {
2892         if (error)
2893                 return;
2894         *(bus_addr_t *) arg = segs[0].ds_addr;
2895 }
2896
2897 static int
2898 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2899         struct em_dma_alloc *dma, int mapflags)
2900 {
2901         int error;
2902
2903         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2904                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
2905                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2906                                 BUS_SPACE_MAXADDR,      /* highaddr */
2907                                 NULL, NULL,             /* filter, filterarg */
2908                                 size,                   /* maxsize */
2909                                 1,                      /* nsegments */
2910                                 size,                   /* maxsegsize */
2911                                 0,                      /* flags */
2912                                 NULL,                   /* lockfunc */
2913                                 NULL,                   /* lockarg */
2914                                 &dma->dma_tag);
2915         if (error) {
2916                 device_printf(adapter->dev,
2917                     "%s: bus_dma_tag_create failed: %d\n",
2918                     __func__, error);
2919                 goto fail_0;
2920         }
2921
2922         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2923             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2924         if (error) {
2925                 device_printf(adapter->dev,
2926                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2927                     __func__, (uintmax_t)size, error);
2928                 goto fail_2;
2929         }
2930
2931         dma->dma_paddr = 0;
2932         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2933             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2934         if (error || dma->dma_paddr == 0) {
2935                 device_printf(adapter->dev,
2936                     "%s: bus_dmamap_load failed: %d\n",
2937                     __func__, error);
2938                 goto fail_3;
2939         }
2940
2941         return (0);
2942
2943 fail_3:
2944         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2945 fail_2:
2946         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2947         bus_dma_tag_destroy(dma->dma_tag);
2948 fail_0:
2949         dma->dma_map = NULL;
2950         dma->dma_tag = NULL;
2951
2952         return (error);
2953 }
2954
2955 static void
2956 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2957 {
2958         if (dma->dma_tag == NULL)
2959                 return;
2960         if (dma->dma_map != NULL) {
2961                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2962                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2963                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2964                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2965                 dma->dma_map = NULL;
2966         }
2967         bus_dma_tag_destroy(dma->dma_tag);
2968         dma->dma_tag = NULL;
2969 }
2970
2971
2972 /*********************************************************************
2973  *
2974  *  Allocate memory for the transmit and receive rings, and then
2975  *  the descriptors associated with each, called only once at attach.
2976  *
2977  **********************************************************************/
2978 static int
2979 em_allocate_queues(struct adapter *adapter)
2980 {
2981         device_t                dev = adapter->dev;
2982         struct tx_ring          *txr = NULL;
2983         struct rx_ring          *rxr = NULL;
2984         int rsize, tsize, error = E1000_SUCCESS;
2985         int txconf = 0, rxconf = 0;
2986
2987
2988         /* Allocate the TX ring struct memory */
2989         if (!(adapter->tx_rings =
2990             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2991             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2992                 device_printf(dev, "Unable to allocate TX ring memory\n");
2993                 error = ENOMEM;
2994                 goto fail;
2995         }
2996
2997         /* Now allocate the RX */
2998         if (!(adapter->rx_rings =
2999             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3000             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3001                 device_printf(dev, "Unable to allocate RX ring memory\n");
3002                 error = ENOMEM;
3003                 goto rx_fail;
3004         }
3005
3006         tsize = roundup2(adapter->num_tx_desc *
3007             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3008         /*
3009          * Now set up the TX queues, txconf is needed to handle the
3010          * possibility that things fail midcourse and we need to
3011          * undo memory gracefully
3012          */ 
3013         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3014                 /* Set up some basics */
3015                 txr = &adapter->tx_rings[i];
3016                 txr->adapter = adapter;
3017                 txr->me = i;
3018
3019                 /* Initialize the TX lock */
3020                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3021                     device_get_nameunit(dev), txr->me);
3022                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3023
3024                 if (em_dma_malloc(adapter, tsize,
3025                         &txr->txdma, BUS_DMA_NOWAIT)) {
3026                         device_printf(dev,
3027                             "Unable to allocate TX Descriptor memory\n");
3028                         error = ENOMEM;
3029                         goto err_tx_desc;
3030                 }
3031                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3032                 bzero((void *)txr->tx_base, tsize);
3033
3034                 if (em_allocate_transmit_buffers(txr)) {
3035                         device_printf(dev,
3036                             "Critical Failure setting up transmit buffers\n");
3037                         error = ENOMEM;
3038                         goto err_tx_desc;
3039                 }
3040 #if __FreeBSD_version >= 800000
3041                 /* Allocate a buf ring */
3042                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3043                     M_WAITOK, &txr->tx_mtx);
3044 #endif
3045         }
3046
3047         /*
3048          * Next the RX queues...
3049          */ 
3050         rsize = roundup2(adapter->num_rx_desc *
3051             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3052         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3053                 rxr = &adapter->rx_rings[i];
3054                 rxr->adapter = adapter;
3055                 rxr->me = i;
3056
3057                 /* Initialize the RX lock */
3058                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3059                     device_get_nameunit(dev), txr->me);
3060                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3061
3062                 if (em_dma_malloc(adapter, rsize,
3063                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3064                         device_printf(dev,
3065                             "Unable to allocate RxDescriptor memory\n");
3066                         error = ENOMEM;
3067                         goto err_rx_desc;
3068                 }
3069                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3070                 bzero((void *)rxr->rx_base, rsize);
3071
3072                 /* Allocate receive buffers for the ring*/
3073                 if (em_allocate_receive_buffers(rxr)) {
3074                         device_printf(dev,
3075                             "Critical Failure setting up receive buffers\n");
3076                         error = ENOMEM;
3077                         goto err_rx_desc;
3078                 }
3079         }
3080
3081         return (0);
3082
3083 err_rx_desc:
3084         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3085                 em_dma_free(adapter, &rxr->rxdma);
3086 err_tx_desc:
3087         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3088                 em_dma_free(adapter, &txr->txdma);
3089         free(adapter->rx_rings, M_DEVBUF);
3090 rx_fail:
3091 #if __FreeBSD_version >= 800000
3092         buf_ring_free(txr->br, M_DEVBUF);
3093 #endif
3094         free(adapter->tx_rings, M_DEVBUF);
3095 fail:
3096         return (error);
3097 }
3098
3099
3100 /*********************************************************************
3101  *
3102  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3103  *  the information needed to transmit a packet on the wire. This is
3104  *  called only once at attach, setup is done every reset.
3105  *
3106  **********************************************************************/
3107 static int
3108 em_allocate_transmit_buffers(struct tx_ring *txr)
3109 {
3110         struct adapter *adapter = txr->adapter;
3111         device_t dev = adapter->dev;
3112         struct em_buffer *txbuf;
3113         int error, i;
3114
3115         /*
3116          * Setup DMA descriptor areas.
3117          */
3118         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3119                                1, 0,                    /* alignment, bounds */
3120                                BUS_SPACE_MAXADDR,       /* lowaddr */
3121                                BUS_SPACE_MAXADDR,       /* highaddr */
3122                                NULL, NULL,              /* filter, filterarg */
3123                                EM_TSO_SIZE,             /* maxsize */
3124                                EM_MAX_SCATTER,          /* nsegments */
3125                                PAGE_SIZE,               /* maxsegsize */
3126                                0,                       /* flags */
3127                                NULL,                    /* lockfunc */
3128                                NULL,                    /* lockfuncarg */
3129                                &txr->txtag))) {
3130                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3131                 goto fail;
3132         }
3133
3134         if (!(txr->tx_buffers =
3135             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3136             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3137                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3138                 error = ENOMEM;
3139                 goto fail;
3140         }
3141
3142         /* Create the descriptor buffer dma maps */
3143         txbuf = txr->tx_buffers;
3144         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3145                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3146                 if (error != 0) {
3147                         device_printf(dev, "Unable to create TX DMA map\n");
3148                         goto fail;
3149                 }
3150         }
3151
3152         return 0;
3153 fail:
3154         /* We free all, it handles case where we are in the middle */
3155         em_free_transmit_structures(adapter);
3156         return (error);
3157 }
3158
3159 /*********************************************************************
3160  *
3161  *  Initialize a transmit ring.
3162  *
3163  **********************************************************************/
3164 static void
3165 em_setup_transmit_ring(struct tx_ring *txr)
3166 {
3167         struct adapter *adapter = txr->adapter;
3168         struct em_buffer *txbuf;
3169         int i;
3170
3171         /* Clear the old descriptor contents */
3172         EM_TX_LOCK(txr);
3173         bzero((void *)txr->tx_base,
3174               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3175         /* Reset indices */
3176         txr->next_avail_desc = 0;
3177         txr->next_to_clean = 0;
3178
3179         /* Free any existing tx buffers. */
3180         txbuf = txr->tx_buffers;
3181         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3182                 if (txbuf->m_head != NULL) {
3183                         bus_dmamap_sync(txr->txtag, txbuf->map,
3184                             BUS_DMASYNC_POSTWRITE);
3185                         bus_dmamap_unload(txr->txtag, txbuf->map);
3186                         m_freem(txbuf->m_head);
3187                         txbuf->m_head = NULL;
3188                 }
3189                 /* clear the watch index */
3190                 txbuf->next_eop = -1;
3191         }
3192
3193         /* Set number of descriptors available */
3194         txr->tx_avail = adapter->num_tx_desc;
3195         txr->queue_status = EM_QUEUE_IDLE;
3196
3197         /* Clear checksum offload context. */
3198         txr->last_hw_offload = 0;
3199         txr->last_hw_ipcss = 0;
3200         txr->last_hw_ipcso = 0;
3201         txr->last_hw_tucss = 0;
3202         txr->last_hw_tucso = 0;
3203
3204         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3205             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3206         EM_TX_UNLOCK(txr);
3207 }
3208
3209 /*********************************************************************
3210  *
3211  *  Initialize all transmit rings.
3212  *
3213  **********************************************************************/
3214 static void
3215 em_setup_transmit_structures(struct adapter *adapter)
3216 {
3217         struct tx_ring *txr = adapter->tx_rings;
3218
3219         for (int i = 0; i < adapter->num_queues; i++, txr++)
3220                 em_setup_transmit_ring(txr);
3221
3222         return;
3223 }
3224
3225 /*********************************************************************
3226  *
3227  *  Enable transmit unit.
3228  *
3229  **********************************************************************/
3230 static void
3231 em_initialize_transmit_unit(struct adapter *adapter)
3232 {
3233         struct tx_ring  *txr = adapter->tx_rings;
3234         struct e1000_hw *hw = &adapter->hw;
3235         u32     tctl, tarc, tipg = 0;
3236
3237          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3238
3239         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3240                 u64 bus_addr = txr->txdma.dma_paddr;
3241                 /* Base and Len of TX Ring */
3242                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3243                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3244                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3245                     (u32)(bus_addr >> 32));
3246                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3247                     (u32)bus_addr);
3248                 /* Init the HEAD/TAIL indices */
3249                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3250                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3251
3252                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3253                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3254                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3255
3256                 txr->queue_status = EM_QUEUE_IDLE;
3257         }
3258
3259         /* Set the default values for the Tx Inter Packet Gap timer */
3260         switch (adapter->hw.mac.type) {
3261         case e1000_82542:
3262                 tipg = DEFAULT_82542_TIPG_IPGT;
3263                 tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3264                 tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3265                 break;
3266         case e1000_80003es2lan:
3267                 tipg = DEFAULT_82543_TIPG_IPGR1;
3268                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3269                     E1000_TIPG_IPGR2_SHIFT;
3270                 break;
3271         default:
3272                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3273                     (adapter->hw.phy.media_type ==
3274                     e1000_media_type_internal_serdes))
3275                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3276                 else
3277                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3278                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3279                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3280         }
3281
3282         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3283         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3284
3285         if(adapter->hw.mac.type >= e1000_82540)
3286                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3287                     adapter->tx_abs_int_delay.value);
3288
3289         if ((adapter->hw.mac.type == e1000_82571) ||
3290             (adapter->hw.mac.type == e1000_82572)) {
3291                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3292                 tarc |= SPEED_MODE_BIT;
3293                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3294         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3295                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3296                 tarc |= 1;
3297                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3298                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3299                 tarc |= 1;
3300                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3301         }
3302
3303         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3304         if (adapter->tx_int_delay.value > 0)
3305                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3306
3307         /* Program the Transmit Control Register */
3308         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3309         tctl &= ~E1000_TCTL_CT;
3310         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3311                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3312
3313         if (adapter->hw.mac.type >= e1000_82571)
3314                 tctl |= E1000_TCTL_MULR;
3315
3316         /* This write will effectively turn on the transmit unit. */
3317         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3318
3319 }
3320
3321
3322 /*********************************************************************
3323  *
3324  *  Free all transmit rings.
3325  *
3326  **********************************************************************/
3327 static void
3328 em_free_transmit_structures(struct adapter *adapter)
3329 {
3330         struct tx_ring *txr = adapter->tx_rings;
3331
3332         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3333                 EM_TX_LOCK(txr);
3334                 em_free_transmit_buffers(txr);
3335                 em_dma_free(adapter, &txr->txdma);
3336                 EM_TX_UNLOCK(txr);
3337                 EM_TX_LOCK_DESTROY(txr);
3338         }
3339
3340         free(adapter->tx_rings, M_DEVBUF);
3341 }
3342
3343 /*********************************************************************
3344  *
3345  *  Free transmit ring related data structures.
3346  *
3347  **********************************************************************/
3348 static void
3349 em_free_transmit_buffers(struct tx_ring *txr)
3350 {
3351         struct adapter          *adapter = txr->adapter;
3352         struct em_buffer        *txbuf;
3353
3354         INIT_DEBUGOUT("free_transmit_ring: begin");
3355
3356         if (txr->tx_buffers == NULL)
3357                 return;
3358
3359         for (int i = 0; i < adapter->num_tx_desc; i++) {
3360                 txbuf = &txr->tx_buffers[i];
3361                 if (txbuf->m_head != NULL) {
3362                         bus_dmamap_sync(txr->txtag, txbuf->map,
3363                             BUS_DMASYNC_POSTWRITE);
3364                         bus_dmamap_unload(txr->txtag,
3365                             txbuf->map);
3366                         m_freem(txbuf->m_head);
3367                         txbuf->m_head = NULL;
3368                         if (txbuf->map != NULL) {
3369                                 bus_dmamap_destroy(txr->txtag,
3370                                     txbuf->map);
3371                                 txbuf->map = NULL;
3372                         }
3373                 } else if (txbuf->map != NULL) {
3374                         bus_dmamap_unload(txr->txtag,
3375                             txbuf->map);
3376                         bus_dmamap_destroy(txr->txtag,
3377                             txbuf->map);
3378                         txbuf->map = NULL;
3379                 }
3380         }
3381 #if __FreeBSD_version >= 800000
3382         if (txr->br != NULL)
3383                 buf_ring_free(txr->br, M_DEVBUF);
3384 #endif
3385         if (txr->tx_buffers != NULL) {
3386                 free(txr->tx_buffers, M_DEVBUF);
3387                 txr->tx_buffers = NULL;
3388         }
3389         if (txr->txtag != NULL) {
3390                 bus_dma_tag_destroy(txr->txtag);
3391                 txr->txtag = NULL;
3392         }
3393         return;
3394 }
3395
3396
3397 /*********************************************************************
3398  *  The offload context is protocol specific (TCP/UDP) and thus
3399  *  only needs to be set when the protocol changes. The occasion
3400  *  of a context change can be a performance detriment, and
3401  *  might be better just disabled. The reason arises in the way
3402  *  in which the controller supports pipelined requests from the
3403  *  Tx data DMA. Up to four requests can be pipelined, and they may
3404  *  belong to the same packet or to multiple packets. However all
3405  *  requests for one packet are issued before a request is issued
3406  *  for a subsequent packet and if a request for the next packet
3407  *  requires a context change, that request will be stalled
3408  *  until the previous request completes. This means setting up
3409  *  a new context effectively disables pipelined Tx data DMA which
3410  *  in turn greatly slow down performance to send small sized
3411  *  frames. 
3412  **********************************************************************/
3413 static void
3414 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3415     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3416 {
3417         struct adapter                  *adapter = txr->adapter;
3418         struct e1000_context_desc       *TXD = NULL;
3419         struct em_buffer                *tx_buffer;
3420         int                             cur, hdr_len;
3421         u32                             cmd = 0;
3422         u16                             offload = 0;
3423         u8                              ipcso, ipcss, tucso, tucss;
3424
3425         ipcss = ipcso = tucss = tucso = 0;
3426         hdr_len = ip_off + (ip->ip_hl << 2);
3427         cur = txr->next_avail_desc;
3428
3429         /* Setup of IP header checksum. */
3430         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3431                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3432                 offload |= CSUM_IP;
3433                 ipcss = ip_off;
3434                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3435                 /*
3436                  * Start offset for header checksum calculation.
3437                  * End offset for header checksum calculation.
3438                  * Offset of place to put the checksum.
3439                  */
3440                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3441                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3442                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3443                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3444                 cmd |= E1000_TXD_CMD_IP;
3445         }
3446
3447         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3448                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3449                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3450                 offload |= CSUM_TCP;
3451                 tucss = hdr_len;
3452                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3453                 /*
3454                  * Setting up new checksum offload context for every frames
3455                  * takes a lot of processing time for hardware. This also
3456                  * reduces performance a lot for small sized frames so avoid
3457                  * it if driver can use previously configured checksum
3458                  * offload context.
3459                  */
3460                 if (txr->last_hw_offload == offload) {
3461                         if (offload & CSUM_IP) {
3462                                 if (txr->last_hw_ipcss == ipcss &&
3463                                     txr->last_hw_ipcso == ipcso &&
3464                                     txr->last_hw_tucss == tucss &&
3465                                     txr->last_hw_tucso == tucso)
3466                                         return;
3467                         } else {
3468                                 if (txr->last_hw_tucss == tucss &&
3469                                     txr->last_hw_tucso == tucso)
3470                                         return;
3471                         }
3472                 }
3473                 txr->last_hw_offload = offload;
3474                 txr->last_hw_tucss = tucss;
3475                 txr->last_hw_tucso = tucso;
3476                 /*
3477                  * Start offset for payload checksum calculation.
3478                  * End offset for payload checksum calculation.
3479                  * Offset of place to put the checksum.
3480                  */
3481                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3482                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3483                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3484                 TXD->upper_setup.tcp_fields.tucso = tucso;
3485                 cmd |= E1000_TXD_CMD_TCP;
3486         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3487                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3488                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3489                 tucss = hdr_len;
3490                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3491                 /*
3492                  * Setting up new checksum offload context for every frames
3493                  * takes a lot of processing time for hardware. This also
3494                  * reduces performance a lot for small sized frames so avoid
3495                  * it if driver can use previously configured checksum
3496                  * offload context.
3497                  */
3498                 if (txr->last_hw_offload == offload) {
3499                         if (offload & CSUM_IP) {
3500                                 if (txr->last_hw_ipcss == ipcss &&
3501                                     txr->last_hw_ipcso == ipcso &&
3502                                     txr->last_hw_tucss == tucss &&
3503                                     txr->last_hw_tucso == tucso)
3504                                         return;
3505                         } else {
3506                                 if (txr->last_hw_tucss == tucss &&
3507                                     txr->last_hw_tucso == tucso)
3508                                         return;
3509                         }
3510                 }
3511                 txr->last_hw_offload = offload;
3512                 txr->last_hw_tucss = tucss;
3513                 txr->last_hw_tucso = tucso;
3514                 /*
3515                  * Start offset for header checksum calculation.
3516                  * End offset for header checksum calculation.
3517                  * Offset of place to put the checksum.
3518                  */
3519                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3520                 TXD->upper_setup.tcp_fields.tucss = tucss;
3521                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3522                 TXD->upper_setup.tcp_fields.tucso = tucso;
3523         }
3524   
3525         if (offload & CSUM_IP) {
3526                 txr->last_hw_ipcss = ipcss;
3527                 txr->last_hw_ipcso = ipcso;
3528         }
3529
3530         TXD->tcp_seg_setup.data = htole32(0);
3531         TXD->cmd_and_length =
3532             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3533         tx_buffer = &txr->tx_buffers[cur];
3534         tx_buffer->m_head = NULL;
3535         tx_buffer->next_eop = -1;
3536
3537         if (++cur == adapter->num_tx_desc)
3538                 cur = 0;
3539
3540         txr->tx_avail--;
3541         txr->next_avail_desc = cur;
3542 }
3543
3544
3545 /**********************************************************************
3546  *
3547  *  Setup work for hardware segmentation offload (TSO)
3548  *
3549  **********************************************************************/
3550 static void
3551 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3552     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3553 {
3554         struct adapter                  *adapter = txr->adapter;
3555         struct e1000_context_desc       *TXD;
3556         struct em_buffer                *tx_buffer;
3557         int cur, hdr_len;
3558
3559         /*
3560          * In theory we can use the same TSO context if and only if
3561          * frame is the same type(IP/TCP) and the same MSS. However
3562          * checking whether a frame has the same IP/TCP structure is
3563          * hard thing so just ignore that and always restablish a
3564          * new TSO context.
3565          */
3566         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3567         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3568                       E1000_TXD_DTYP_D |        /* Data descr type */
3569                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3570
3571         /* IP and/or TCP header checksum calculation and insertion. */
3572         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3573
3574         cur = txr->next_avail_desc;
3575         tx_buffer = &txr->tx_buffers[cur];
3576         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3577
3578         /*
3579          * Start offset for header checksum calculation.
3580          * End offset for header checksum calculation.
3581          * Offset of place put the checksum.
3582          */
3583         TXD->lower_setup.ip_fields.ipcss = ip_off;
3584         TXD->lower_setup.ip_fields.ipcse =
3585             htole16(ip_off + (ip->ip_hl << 2) - 1);
3586         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3587         /*
3588          * Start offset for payload checksum calculation.
3589          * End offset for payload checksum calculation.
3590          * Offset of place to put the checksum.
3591          */
3592         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3593         TXD->upper_setup.tcp_fields.tucse = 0;
3594         TXD->upper_setup.tcp_fields.tucso =
3595             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3596         /*
3597          * Payload size per packet w/o any headers.
3598          * Length of all headers up to payload.
3599          */
3600         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3601         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3602
3603         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3604                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3605                                 E1000_TXD_CMD_TSE |     /* TSE context */
3606                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3607                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3608                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3609
3610         tx_buffer->m_head = NULL;
3611         tx_buffer->next_eop = -1;
3612
3613         if (++cur == adapter->num_tx_desc)
3614                 cur = 0;
3615
3616         txr->tx_avail--;
3617         txr->next_avail_desc = cur;
3618         txr->tx_tso = TRUE;
3619 }
3620
3621
3622 /**********************************************************************
3623  *
3624  *  Examine each tx_buffer in the used queue. If the hardware is done
3625  *  processing the packet then free associated resources. The
3626  *  tx_buffer is put back on the free queue.
3627  *
3628  **********************************************************************/
3629 static bool
3630 em_txeof(struct tx_ring *txr)
3631 {
3632         struct adapter  *adapter = txr->adapter;
3633         int first, last, done, processed;
3634         struct em_buffer *tx_buffer;
3635         struct e1000_tx_desc   *tx_desc, *eop_desc;
3636         struct ifnet   *ifp = adapter->ifp;
3637
3638         EM_TX_LOCK_ASSERT(txr);
3639
3640         /* No work, make sure watchdog is off */
3641         if (txr->tx_avail == adapter->num_tx_desc) {
3642                 txr->queue_status = EM_QUEUE_IDLE;
3643                 return (FALSE);
3644         }
3645
3646         processed = 0;
3647         first = txr->next_to_clean;
3648         tx_desc = &txr->tx_base[first];
3649         tx_buffer = &txr->tx_buffers[first];
3650         last = tx_buffer->next_eop;
3651         eop_desc = &txr->tx_base[last];
3652
3653         /*
3654          * What this does is get the index of the
3655          * first descriptor AFTER the EOP of the 
3656          * first packet, that way we can do the
3657          * simple comparison on the inner while loop.
3658          */
3659         if (++last == adapter->num_tx_desc)
3660                 last = 0;
3661         done = last;
3662
3663         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3664             BUS_DMASYNC_POSTREAD);
3665
3666         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3667                 /* We clean the range of the packet */
3668                 while (first != done) {
3669                         tx_desc->upper.data = 0;
3670                         tx_desc->lower.data = 0;
3671                         tx_desc->buffer_addr = 0;
3672                         ++txr->tx_avail;
3673                         ++processed;
3674
3675                         if (tx_buffer->m_head) {
3676                                 bus_dmamap_sync(txr->txtag,
3677                                     tx_buffer->map,
3678                                     BUS_DMASYNC_POSTWRITE);
3679                                 bus_dmamap_unload(txr->txtag,
3680                                     tx_buffer->map);
3681                                 m_freem(tx_buffer->m_head);
3682                                 tx_buffer->m_head = NULL;
3683                         }
3684                         tx_buffer->next_eop = -1;
3685                         txr->watchdog_time = ticks;
3686
3687                         if (++first == adapter->num_tx_desc)
3688                                 first = 0;
3689
3690                         tx_buffer = &txr->tx_buffers[first];
3691                         tx_desc = &txr->tx_base[first];
3692                 }
3693                 ++ifp->if_opackets;
3694                 /* See if we can continue to the next packet */
3695                 last = tx_buffer->next_eop;
3696                 if (last != -1) {
3697                         eop_desc = &txr->tx_base[last];
3698                         /* Get new done point */
3699                         if (++last == adapter->num_tx_desc) last = 0;
3700                         done = last;
3701                 } else
3702                         break;
3703         }
3704         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3705             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3706
3707         txr->next_to_clean = first;
3708
3709         /*
3710         ** Watchdog calculation, we know there's
3711         ** work outstanding or the first return
3712         ** would have been taken, so none processed
3713         ** for too long indicates a hang. local timer
3714         ** will examine this and do a reset if needed.
3715         */
3716         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3717                 txr->queue_status = EM_QUEUE_HUNG;
3718
3719         /*
3720          * If we have enough room, clear IFF_DRV_OACTIVE
3721          * to tell the stack that it is OK to send packets.
3722          */
3723         if (txr->tx_avail > EM_TX_CLEANUP_THRESHOLD) {                
3724                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3725                 /* Disable watchdog if all clean */
3726                 if (txr->tx_avail == adapter->num_tx_desc) {
3727                         txr->queue_status = EM_QUEUE_IDLE;
3728                         return (FALSE);
3729                 } 
3730         }
3731
3732         return (TRUE);
3733 }
3734
3735
3736 /*********************************************************************
3737  *
3738  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3739  *
3740  **********************************************************************/
3741 static void
3742 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3743 {
3744         struct adapter          *adapter = rxr->adapter;
3745         struct mbuf             *m;
3746         bus_dma_segment_t       segs[1];
3747         struct em_buffer        *rxbuf;
3748         int                     i, error, nsegs, cleaned;
3749
3750         i = rxr->next_to_refresh;
3751         cleaned = -1;
3752         while (i != limit) {
3753                 rxbuf = &rxr->rx_buffers[i];
3754                 if (rxbuf->m_head == NULL) {
3755                         m = m_getjcl(M_DONTWAIT, MT_DATA,
3756                             M_PKTHDR, adapter->rx_mbuf_sz);
3757                         /*
3758                         ** If we have a temporary resource shortage
3759                         ** that causes a failure, just abort refresh
3760                         ** for now, we will return to this point when
3761                         ** reinvoked from em_rxeof.
3762                         */
3763                         if (m == NULL)
3764                                 goto update;
3765                 } else
3766                         m = rxbuf->m_head;
3767
3768                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3769                 m->m_flags |= M_PKTHDR;
3770                 m->m_data = m->m_ext.ext_buf;
3771
3772                 /* Use bus_dma machinery to setup the memory mapping  */
3773                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3774                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3775                 if (error != 0) {
3776                         printf("Refresh mbufs: hdr dmamap load"
3777                             " failure - %d\n", error);
3778                         m_free(m);
3779                         rxbuf->m_head = NULL;
3780                         goto update;
3781                 }
3782                 rxbuf->m_head = m;
3783                 bus_dmamap_sync(rxr->rxtag,
3784                     rxbuf->map, BUS_DMASYNC_PREREAD);
3785                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3786
3787                 cleaned = i;
3788                 /* Calculate next index */
3789                 if (++i == adapter->num_rx_desc)
3790                         i = 0;
3791                 rxr->next_to_refresh = i;
3792         }
3793 update:
3794         /*
3795         ** Update the tail pointer only if,
3796         ** and as far as we have refreshed.
3797         */
3798         if (cleaned != -1) /* Update tail index */
3799                 E1000_WRITE_REG(&adapter->hw,
3800                     E1000_RDT(rxr->me), cleaned);
3801
3802         return;
3803 }
3804
3805
3806 /*********************************************************************
3807  *
3808  *  Allocate memory for rx_buffer structures. Since we use one
3809  *  rx_buffer per received packet, the maximum number of rx_buffer's
3810  *  that we'll need is equal to the number of receive descriptors
3811  *  that we've allocated.
3812  *
3813  **********************************************************************/
3814 static int
3815 em_allocate_receive_buffers(struct rx_ring *rxr)
3816 {
3817         struct adapter          *adapter = rxr->adapter;
3818         device_t                dev = adapter->dev;
3819         struct em_buffer        *rxbuf;
3820         int                     error;
3821
3822         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3823             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3824         if (rxr->rx_buffers == NULL) {
3825                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3826                 return (ENOMEM);
3827         }
3828
3829         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3830                                 1, 0,                   /* alignment, bounds */
3831                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3832                                 BUS_SPACE_MAXADDR,      /* highaddr */
3833                                 NULL, NULL,             /* filter, filterarg */
3834                                 MJUM9BYTES,             /* maxsize */
3835                                 1,                      /* nsegments */
3836                                 MJUM9BYTES,             /* maxsegsize */
3837                                 0,                      /* flags */
3838                                 NULL,                   /* lockfunc */
3839                                 NULL,                   /* lockarg */
3840                                 &rxr->rxtag);
3841         if (error) {
3842                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3843                     __func__, error);
3844                 goto fail;
3845         }
3846
3847         rxbuf = rxr->rx_buffers;
3848         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3849                 rxbuf = &rxr->rx_buffers[i];
3850                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3851                     &rxbuf->map);
3852                 if (error) {
3853                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3854                             __func__, error);
3855                         goto fail;
3856                 }
3857         }
3858
3859         return (0);
3860
3861 fail:
3862         em_free_receive_structures(adapter);
3863         return (error);
3864 }
3865
3866
3867 /*********************************************************************
3868  *
3869  *  Initialize a receive ring and its buffers.
3870  *
3871  **********************************************************************/
3872 static int
3873 em_setup_receive_ring(struct rx_ring *rxr)
3874 {
3875         struct  adapter         *adapter = rxr->adapter;
3876         struct em_buffer        *rxbuf;
3877         bus_dma_segment_t       seg[1];
3878         int                     rsize, nsegs, error;
3879
3880
3881         /* Clear the ring contents */
3882         EM_RX_LOCK(rxr);
3883         rsize = roundup2(adapter->num_rx_desc *
3884             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3885         bzero((void *)rxr->rx_base, rsize);
3886
3887         /*
3888         ** Free current RX buffer structs and their mbufs
3889         */
3890         for (int i = 0; i < adapter->num_rx_desc; i++) {
3891                 rxbuf = &rxr->rx_buffers[i];
3892                 if (rxbuf->m_head != NULL) {
3893                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3894                             BUS_DMASYNC_POSTREAD);
3895                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3896                         m_freem(rxbuf->m_head);
3897                 }
3898         }
3899
3900         /* Now replenish the mbufs */
3901         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3902
3903                 rxbuf = &rxr->rx_buffers[j];
3904                 rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3905                     M_PKTHDR, adapter->rx_mbuf_sz);
3906                 if (rxbuf->m_head == NULL)
3907                         return (ENOBUFS);
3908                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3909                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3910                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3911
3912                 /* Get the memory mapping */
3913                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3914                     rxbuf->map, rxbuf->m_head, seg,
3915                     &nsegs, BUS_DMA_NOWAIT);
3916                 if (error != 0) {
3917                         m_freem(rxbuf->m_head);
3918                         rxbuf->m_head = NULL;
3919                         return (error);
3920                 }
3921                 bus_dmamap_sync(rxr->rxtag,
3922                     rxbuf->map, BUS_DMASYNC_PREREAD);
3923
3924                 /* Update descriptor */
3925                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3926         }
3927
3928
3929         /* Setup our descriptor indices */
3930         rxr->next_to_check = 0;
3931         rxr->next_to_refresh = 0;
3932
3933         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3934             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3935
3936         EM_RX_UNLOCK(rxr);
3937         return (0);
3938 }
3939
3940 /*********************************************************************
3941  *
3942  *  Initialize all receive rings.
3943  *
3944  **********************************************************************/
3945 static int
3946 em_setup_receive_structures(struct adapter *adapter)
3947 {
3948         struct rx_ring *rxr = adapter->rx_rings;
3949         int j;
3950
3951         for (j = 0; j < adapter->num_queues; j++, rxr++)
3952                 if (em_setup_receive_ring(rxr))
3953                         goto fail;
3954
3955         return (0);
3956 fail:
3957         /*
3958          * Free RX buffers allocated so far, we will only handle
3959          * the rings that completed, the failing case will have
3960          * cleaned up for itself. 'j' failed, so its the terminus.
3961          */
3962         for (int i = 0; i < j; ++i) {
3963                 rxr = &adapter->rx_rings[i];
3964                 for (int n = 0; n < adapter->num_rx_desc; n++) {
3965                         struct em_buffer *rxbuf;
3966                         rxbuf = &rxr->rx_buffers[n];
3967                         if (rxbuf->m_head != NULL) {
3968                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3969                                   BUS_DMASYNC_POSTREAD);
3970                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3971                                 m_freem(rxbuf->m_head);
3972                                 rxbuf->m_head = NULL;
3973                         }
3974                 }
3975         }
3976
3977         return (ENOBUFS);
3978 }
3979
3980 /*********************************************************************
3981  *
3982  *  Free all receive rings.
3983  *
3984  **********************************************************************/
3985 static void
3986 em_free_receive_structures(struct adapter *adapter)
3987 {
3988         struct rx_ring *rxr = adapter->rx_rings;
3989
3990         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3991                 em_free_receive_buffers(rxr);
3992                 /* Free the ring memory as well */
3993                 em_dma_free(adapter, &rxr->rxdma);
3994                 EM_RX_LOCK_DESTROY(rxr);
3995         }
3996
3997         free(adapter->rx_rings, M_DEVBUF);
3998 }
3999
4000
4001 /*********************************************************************
4002  *
4003  *  Free receive ring data structures
4004  *
4005  **********************************************************************/
4006 static void
4007 em_free_receive_buffers(struct rx_ring *rxr)
4008 {
4009         struct adapter          *adapter = rxr->adapter;
4010         struct em_buffer        *rxbuf = NULL;
4011
4012         INIT_DEBUGOUT("free_receive_buffers: begin");
4013
4014         if (rxr->rx_buffers != NULL) {
4015                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4016                         rxbuf = &rxr->rx_buffers[i];
4017                         if (rxbuf->map != NULL) {
4018                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4019                                     BUS_DMASYNC_POSTREAD);
4020                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4021                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4022                         }
4023                         if (rxbuf->m_head != NULL) {
4024                                 m_freem(rxbuf->m_head);
4025                                 rxbuf->m_head = NULL;
4026                         }
4027                 }
4028                 free(rxr->rx_buffers, M_DEVBUF);
4029                 rxr->rx_buffers = NULL;
4030         }
4031
4032         if (rxr->rxtag != NULL) {
4033                 bus_dma_tag_destroy(rxr->rxtag);
4034                 rxr->rxtag = NULL;
4035         }
4036
4037         return;
4038 }
4039
4040
4041 /*********************************************************************
4042  *
4043  *  Enable receive unit.
4044  *
4045  **********************************************************************/
4046 #define MAX_INTS_PER_SEC        8000
4047 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
4048
4049 static void
4050 em_initialize_receive_unit(struct adapter *adapter)
4051 {
4052         struct rx_ring  *rxr = adapter->rx_rings;
4053         struct ifnet    *ifp = adapter->ifp;
4054         struct e1000_hw *hw = &adapter->hw;
4055         u64     bus_addr;
4056         u32     rctl, rxcsum;
4057
4058         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4059
4060         /*
4061          * Make sure receives are disabled while setting
4062          * up the descriptor ring
4063          */
4064         rctl = E1000_READ_REG(hw, E1000_RCTL);
4065         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4066
4067         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4068             adapter->rx_abs_int_delay.value);
4069         /*
4070          * Set the interrupt throttling rate. Value is calculated
4071          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4072          */
4073         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4074
4075         /*
4076         ** When using MSIX interrupts we need to throttle
4077         ** using the EITR register (82574 only)
4078         */
4079         if (hw->mac.type == e1000_82574)
4080                 for (int i = 0; i < 4; i++)
4081                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4082                             DEFAULT_ITR);
4083
4084         /* Disable accelerated ackknowledge */
4085         if (adapter->hw.mac.type == e1000_82574)
4086                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4087
4088         if (ifp->if_capenable & IFCAP_RXCSUM) {
4089                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4090                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4091                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4092         }
4093
4094         /*
4095         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4096         ** long latencies are observed, like Lenovo X60. This
4097         ** change eliminates the problem, but since having positive
4098         ** values in RDTR is a known source of problems on other
4099         ** platforms another solution is being sought.
4100         */
4101         if (hw->mac.type == e1000_82573)
4102                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4103
4104         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4105                 /* Setup the Base and Length of the Rx Descriptor Ring */
4106                 bus_addr = rxr->rxdma.dma_paddr;
4107                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4108                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4109                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4110                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4111                 /* Setup the Head and Tail Descriptor Pointers */
4112                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4113                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4114         }
4115
4116         /* Set early receive threshold on appropriate hw */
4117         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4118             (adapter->hw.mac.type == e1000_pch2lan) ||
4119             (adapter->hw.mac.type == e1000_ich10lan)) &&
4120             (ifp->if_mtu > ETHERMTU)) {
4121                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4122                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4123                 E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4124         }
4125                 
4126         if (adapter->hw.mac.type == e1000_pch2lan) {
4127                 if (ifp->if_mtu > ETHERMTU)
4128                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4129                 else
4130                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4131         }
4132
4133         /* Setup the Receive Control Register */
4134         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4135         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4136             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4137             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4138
4139         /* Strip the CRC */
4140         rctl |= E1000_RCTL_SECRC;
4141
4142         /* Make sure VLAN Filters are off */
4143         rctl &= ~E1000_RCTL_VFE;
4144         rctl &= ~E1000_RCTL_SBP;
4145
4146         if (adapter->rx_mbuf_sz == MCLBYTES)
4147                 rctl |= E1000_RCTL_SZ_2048;
4148         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4149                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4150         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4151                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4152
4153         if (ifp->if_mtu > ETHERMTU)
4154                 rctl |= E1000_RCTL_LPE;
4155         else
4156                 rctl &= ~E1000_RCTL_LPE;
4157
4158         /* Write out the settings */
4159         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4160
4161         return;
4162 }
4163
4164
4165 /*********************************************************************
4166  *
4167  *  This routine executes in interrupt context. It replenishes
4168  *  the mbufs in the descriptor and sends data which has been
4169  *  dma'ed into host memory to upper layer.
4170  *
4171  *  We loop at most count times if count is > 0, or until done if
4172  *  count < 0.
4173  *  
4174  *  For polling we also now return the number of cleaned packets
4175  *********************************************************************/
4176 static bool
4177 em_rxeof(struct rx_ring *rxr, int count, int *done)
4178 {
4179         struct adapter          *adapter = rxr->adapter;
4180         struct ifnet            *ifp = adapter->ifp;
4181         struct mbuf             *mp, *sendmp;
4182         u8                      status = 0;
4183         u16                     len;
4184         int                     i, processed, rxdone = 0;
4185         bool                    eop;
4186         struct e1000_rx_desc    *cur;
4187
4188         EM_RX_LOCK(rxr);
4189
4190         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4191
4192                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4193                         break;
4194
4195                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4196                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4197
4198                 cur = &rxr->rx_base[i];
4199                 status = cur->status;
4200                 mp = sendmp = NULL;
4201
4202                 if ((status & E1000_RXD_STAT_DD) == 0)
4203                         break;
4204
4205                 len = le16toh(cur->length);
4206                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4207
4208                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4209                     (rxr->discard == TRUE)) {
4210                         ifp->if_ierrors++;
4211                         ++rxr->rx_discarded;
4212                         if (!eop) /* Catch subsequent segs */
4213                                 rxr->discard = TRUE;
4214                         else
4215                                 rxr->discard = FALSE;
4216                         em_rx_discard(rxr, i);
4217                         goto next_desc;
4218                 }
4219
4220                 /* Assign correct length to the current fragment */
4221                 mp = rxr->rx_buffers[i].m_head;
4222                 mp->m_len = len;
4223
4224                 /* Trigger for refresh */
4225                 rxr->rx_buffers[i].m_head = NULL;
4226
4227                 /* First segment? */
4228                 if (rxr->fmp == NULL) {
4229                         mp->m_pkthdr.len = len;
4230                         rxr->fmp = rxr->lmp = mp;
4231                 } else {
4232                         /* Chain mbuf's together */
4233                         mp->m_flags &= ~M_PKTHDR;
4234                         rxr->lmp->m_next = mp;
4235                         rxr->lmp = mp;
4236                         rxr->fmp->m_pkthdr.len += len;
4237                 }
4238
4239                 if (eop) {
4240                         --count;
4241                         sendmp = rxr->fmp;
4242                         sendmp->m_pkthdr.rcvif = ifp;
4243                         ifp->if_ipackets++;
4244                         em_receive_checksum(cur, sendmp);
4245 #ifndef __NO_STRICT_ALIGNMENT
4246                         if (adapter->max_frame_size >
4247                             (MCLBYTES - ETHER_ALIGN) &&
4248                             em_fixup_rx(rxr) != 0)
4249                                 goto skip;
4250 #endif
4251                         if (status & E1000_RXD_STAT_VP) {
4252                                 sendmp->m_pkthdr.ether_vtag =
4253                                     (le16toh(cur->special) &
4254                                     E1000_RXD_SPC_VLAN_MASK);
4255                                 sendmp->m_flags |= M_VLANTAG;
4256                         }
4257 #ifdef EM_MULTIQUEUE
4258                         sendmp->m_pkthdr.flowid = rxr->msix;
4259                         sendmp->m_flags |= M_FLOWID;
4260 #endif
4261 #ifndef __NO_STRICT_ALIGNMENT
4262 skip:
4263 #endif
4264                         rxr->fmp = rxr->lmp = NULL;
4265                 }
4266 next_desc:
4267                 /* Zero out the receive descriptors status. */
4268                 cur->status = 0;
4269                 ++rxdone;       /* cumulative for POLL */
4270                 ++processed;
4271
4272                 /* Advance our pointers to the next descriptor. */
4273                 if (++i == adapter->num_rx_desc)
4274                         i = 0;
4275
4276                 /* Send to the stack */
4277                 if (sendmp != NULL) {
4278                         rxr->next_to_check = i;
4279                         EM_RX_UNLOCK(rxr);
4280                         (*ifp->if_input)(ifp, sendmp);
4281                         EM_RX_LOCK(rxr);
4282                         i = rxr->next_to_check;
4283                 }
4284
4285                 /* Only refresh mbufs every 8 descriptors */
4286                 if (processed == 8) {
4287                         em_refresh_mbufs(rxr, i);
4288                         processed = 0;
4289                 }
4290         }
4291
4292         /* Catch any remaining refresh work */
4293         em_refresh_mbufs(rxr, i);
4294
4295         rxr->next_to_check = i;
4296         if (done != NULL)
4297                 *done = rxdone;
4298         EM_RX_UNLOCK(rxr);
4299
4300         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4301 }
4302
4303 static __inline void
4304 em_rx_discard(struct rx_ring *rxr, int i)
4305 {
4306         struct em_buffer        *rbuf;
4307
4308         rbuf = &rxr->rx_buffers[i];
4309         /* Free any previous pieces */
4310         if (rxr->fmp != NULL) {
4311                 rxr->fmp->m_flags |= M_PKTHDR;
4312                 m_freem(rxr->fmp);
4313                 rxr->fmp = NULL;
4314                 rxr->lmp = NULL;
4315         }
4316         /*
4317         ** Free buffer and allow em_refresh_mbufs()
4318         ** to clean up and recharge buffer.
4319         */
4320         if (rbuf->m_head) {
4321                 m_free(rbuf->m_head);
4322                 rbuf->m_head = NULL;
4323         }
4324         return;
4325 }
4326
4327 #ifndef __NO_STRICT_ALIGNMENT
4328 /*
4329  * When jumbo frames are enabled we should realign entire payload on
4330  * architecures with strict alignment. This is serious design mistake of 8254x
4331  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4332  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4333  * payload. On architecures without strict alignment restrictions 8254x still
4334  * performs unaligned memory access which would reduce the performance too.
4335  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4336  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4337  * existing mbuf chain.
4338  *
4339  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4340  * not used at all on architectures with strict alignment.
4341  */
4342 static int
4343 em_fixup_rx(struct rx_ring *rxr)
4344 {
4345         struct adapter *adapter = rxr->adapter;
4346         struct mbuf *m, *n;
4347         int error;
4348
4349         error = 0;
4350         m = rxr->fmp;
4351         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4352                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4353                 m->m_data += ETHER_HDR_LEN;
4354         } else {
4355                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4356                 if (n != NULL) {
4357                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4358                         m->m_data += ETHER_HDR_LEN;
4359                         m->m_len -= ETHER_HDR_LEN;
4360                         n->m_len = ETHER_HDR_LEN;
4361                         M_MOVE_PKTHDR(n, m);
4362                         n->m_next = m;
4363                         rxr->fmp = n;
4364                 } else {
4365                         adapter->dropped_pkts++;
4366                         m_freem(rxr->fmp);
4367                         rxr->fmp = NULL;
4368                         error = ENOMEM;
4369                 }
4370         }
4371
4372         return (error);
4373 }
4374 #endif
4375
4376 /*********************************************************************
4377  *
4378  *  Verify that the hardware indicated that the checksum is valid.
4379  *  Inform the stack about the status of checksum so that stack
4380  *  doesn't spend time verifying the checksum.
4381  *
4382  *********************************************************************/
4383 static void
4384 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4385 {
4386         /* Ignore Checksum bit is set */
4387         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4388                 mp->m_pkthdr.csum_flags = 0;
4389                 return;
4390         }
4391
4392         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4393                 /* Did it pass? */
4394                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4395                         /* IP Checksum Good */
4396                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4397                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4398
4399                 } else {
4400                         mp->m_pkthdr.csum_flags = 0;
4401                 }
4402         }
4403
4404         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4405                 /* Did it pass? */
4406                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4407                         mp->m_pkthdr.csum_flags |=
4408                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4409                         mp->m_pkthdr.csum_data = htons(0xffff);
4410                 }
4411         }
4412 }
4413
4414 /*
4415  * This routine is run via an vlan
4416  * config EVENT
4417  */
4418 static void
4419 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4420 {
4421         struct adapter  *adapter = ifp->if_softc;
4422         u32             index, bit;
4423
4424         if (ifp->if_softc !=  arg)   /* Not our event */
4425                 return;
4426
4427         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4428                 return;
4429
4430         EM_CORE_LOCK(adapter);
4431         index = (vtag >> 5) & 0x7F;
4432         bit = vtag & 0x1F;
4433         adapter->shadow_vfta[index] |= (1 << bit);
4434         ++adapter->num_vlans;
4435         /* Re-init to load the changes */
4436         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4437                 em_init_locked(adapter);
4438         EM_CORE_UNLOCK(adapter);
4439 }
4440
4441 /*
4442  * This routine is run via an vlan
4443  * unconfig EVENT
4444  */
4445 static void
4446 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4447 {
4448         struct adapter  *adapter = ifp->if_softc;
4449         u32             index, bit;
4450
4451         if (ifp->if_softc !=  arg)
4452                 return;
4453
4454         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4455                 return;
4456
4457         EM_CORE_LOCK(adapter);
4458         index = (vtag >> 5) & 0x7F;
4459         bit = vtag & 0x1F;
4460         adapter->shadow_vfta[index] &= ~(1 << bit);
4461         --adapter->num_vlans;
4462         /* Re-init to load the changes */
4463         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4464                 em_init_locked(adapter);
4465         EM_CORE_UNLOCK(adapter);
4466 }
4467
4468 static void
4469 em_setup_vlan_hw_support(struct adapter *adapter)
4470 {
4471         struct e1000_hw *hw = &adapter->hw;
4472         u32             reg;
4473
4474         /*
4475         ** We get here thru init_locked, meaning
4476         ** a soft reset, this has already cleared
4477         ** the VFTA and other state, so if there
4478         ** have been no vlan's registered do nothing.
4479         */
4480         if (adapter->num_vlans == 0)
4481                 return;
4482
4483         /*
4484         ** A soft reset zero's out the VFTA, so
4485         ** we need to repopulate it now.
4486         */
4487         for (int i = 0; i < EM_VFTA_SIZE; i++)
4488                 if (adapter->shadow_vfta[i] != 0)
4489                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4490                             i, adapter->shadow_vfta[i]);
4491
4492         reg = E1000_READ_REG(hw, E1000_CTRL);
4493         reg |= E1000_CTRL_VME;
4494         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4495
4496         /* Enable the Filter Table */
4497         reg = E1000_READ_REG(hw, E1000_RCTL);
4498         reg &= ~E1000_RCTL_CFIEN;
4499         reg |= E1000_RCTL_VFE;
4500         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4501 }
4502
4503 static void
4504 em_enable_intr(struct adapter *adapter)
4505 {
4506         struct e1000_hw *hw = &adapter->hw;
4507         u32 ims_mask = IMS_ENABLE_MASK;
4508
4509         if (hw->mac.type == e1000_82574) {
4510                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4511                 ims_mask |= EM_MSIX_MASK;
4512         } 
4513         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4514 }
4515
4516 static void
4517 em_disable_intr(struct adapter *adapter)
4518 {
4519         struct e1000_hw *hw = &adapter->hw;
4520
4521         if (hw->mac.type == e1000_82574)
4522                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4523         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4524 }
4525
4526 /*
4527  * Bit of a misnomer, what this really means is
4528  * to enable OS management of the system... aka
4529  * to disable special hardware management features 
4530  */
4531 static void
4532 em_init_manageability(struct adapter *adapter)
4533 {
4534         /* A shared code workaround */
4535 #define E1000_82542_MANC2H E1000_MANC2H
4536         if (adapter->has_manage) {
4537                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4538                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4539
4540                 /* disable hardware interception of ARP */
4541                 manc &= ~(E1000_MANC_ARP_EN);
4542
4543                 /* enable receiving management packets to the host */
4544                 manc |= E1000_MANC_EN_MNG2HOST;
4545 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4546 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4547                 manc2h |= E1000_MNG2HOST_PORT_623;
4548                 manc2h |= E1000_MNG2HOST_PORT_664;
4549                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4550                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4551         }
4552 }
4553
4554 /*
4555  * Give control back to hardware management
4556  * controller if there is one.
4557  */
4558 static void
4559 em_release_manageability(struct adapter *adapter)
4560 {
4561         if (adapter->has_manage) {
4562                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4563
4564                 /* re-enable hardware interception of ARP */
4565                 manc |= E1000_MANC_ARP_EN;
4566                 manc &= ~E1000_MANC_EN_MNG2HOST;
4567
4568                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4569         }
4570 }
4571
4572 /*
4573  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4574  * For ASF and Pass Through versions of f/w this means
4575  * that the driver is loaded. For AMT version type f/w
4576  * this means that the network i/f is open.
4577  */
4578 static void
4579 em_get_hw_control(struct adapter *adapter)
4580 {
4581         u32 ctrl_ext, swsm;
4582
4583         if (adapter->hw.mac.type == e1000_82573) {
4584                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4585                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4586                     swsm | E1000_SWSM_DRV_LOAD);
4587                 return;
4588         }
4589         /* else */
4590         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4591         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4592             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4593         return;
4594 }
4595
4596 /*
4597  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4598  * For ASF and Pass Through versions of f/w this means that
4599  * the driver is no longer loaded. For AMT versions of the
4600  * f/w this means that the network i/f is closed.
4601  */
4602 static void
4603 em_release_hw_control(struct adapter *adapter)
4604 {
4605         u32 ctrl_ext, swsm;
4606
4607         if (!adapter->has_manage)
4608                 return;
4609
4610         if (adapter->hw.mac.type == e1000_82573) {
4611                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4612                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4613                     swsm & ~E1000_SWSM_DRV_LOAD);
4614                 return;
4615         }
4616         /* else */
4617         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4618         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4619             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4620         return;
4621 }
4622
4623 static int
4624 em_is_valid_ether_addr(u8 *addr)
4625 {
4626         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4627
4628         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4629                 return (FALSE);
4630         }
4631
4632         return (TRUE);
4633 }
4634
4635 /*
4636 ** Parse the interface capabilities with regard
4637 ** to both system management and wake-on-lan for
4638 ** later use.
4639 */
4640 static void
4641 em_get_wakeup(device_t dev)
4642 {
4643         struct adapter  *adapter = device_get_softc(dev);
4644         u16             eeprom_data = 0, device_id, apme_mask;
4645
4646         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4647         apme_mask = EM_EEPROM_APME;
4648
4649         switch (adapter->hw.mac.type) {
4650         case e1000_82573:
4651         case e1000_82583:
4652                 adapter->has_amt = TRUE;
4653                 /* Falls thru */
4654         case e1000_82571:
4655         case e1000_82572:
4656         case e1000_80003es2lan:
4657                 if (adapter->hw.bus.func == 1) {
4658                         e1000_read_nvm(&adapter->hw,
4659                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4660                         break;
4661                 } else
4662                         e1000_read_nvm(&adapter->hw,
4663                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4664                 break;
4665         case e1000_ich8lan:
4666         case e1000_ich9lan:
4667         case e1000_ich10lan:
4668         case e1000_pchlan:
4669         case e1000_pch2lan:
4670                 apme_mask = E1000_WUC_APME;
4671                 adapter->has_amt = TRUE;
4672                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4673                 break;
4674         default:
4675                 e1000_read_nvm(&adapter->hw,
4676                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4677                 break;
4678         }
4679         if (eeprom_data & apme_mask)
4680                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4681         /*
4682          * We have the eeprom settings, now apply the special cases
4683          * where the eeprom may be wrong or the board won't support
4684          * wake on lan on a particular port
4685          */
4686         device_id = pci_get_device(dev);
4687         switch (device_id) {
4688         case E1000_DEV_ID_82571EB_FIBER:
4689                 /* Wake events only supported on port A for dual fiber
4690                  * regardless of eeprom setting */
4691                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4692                     E1000_STATUS_FUNC_1)
4693                         adapter->wol = 0;
4694                 break;
4695         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4696         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4697         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4698                 /* if quad port adapter, disable WoL on all but port A */
4699                 if (global_quad_port_a != 0)
4700                         adapter->wol = 0;
4701                 /* Reset for multiple quad port adapters */
4702                 if (++global_quad_port_a == 4)
4703                         global_quad_port_a = 0;
4704                 break;
4705         }
4706         return;
4707 }
4708
4709
4710 /*
4711  * Enable PCI Wake On Lan capability
4712  */
4713 static void
4714 em_enable_wakeup(device_t dev)
4715 {
4716         struct adapter  *adapter = device_get_softc(dev);
4717         struct ifnet    *ifp = adapter->ifp;
4718         u32             pmc, ctrl, ctrl_ext, rctl;
4719         u16             status;
4720
4721         if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4722                 return;
4723
4724         /* Advertise the wakeup capability */
4725         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4726         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4727         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4728         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4729
4730         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4731             (adapter->hw.mac.type == e1000_pchlan) ||
4732             (adapter->hw.mac.type == e1000_ich9lan) ||
4733             (adapter->hw.mac.type == e1000_ich10lan)) {
4734                 e1000_disable_gig_wol_ich8lan(&adapter->hw);
4735                 e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4736         }
4737
4738         /* Keep the laser running on Fiber adapters */
4739         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4740             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4741                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4742                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4743                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4744         }
4745
4746         /*
4747         ** Determine type of Wakeup: note that wol
4748         ** is set with all bits on by default.
4749         */
4750         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4751                 adapter->wol &= ~E1000_WUFC_MAG;
4752
4753         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4754                 adapter->wol &= ~E1000_WUFC_MC;
4755         else {
4756                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4757                 rctl |= E1000_RCTL_MPE;
4758                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4759         }
4760
4761         if ((adapter->hw.mac.type == e1000_pchlan) ||
4762             (adapter->hw.mac.type == e1000_pch2lan)) {
4763                 if (em_enable_phy_wakeup(adapter))
4764                         return;
4765         } else {
4766                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4767                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4768         }
4769
4770         if (adapter->hw.phy.type == e1000_phy_igp_3)
4771                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4772
4773         /* Request PME */
4774         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4775         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4776         if (ifp->if_capenable & IFCAP_WOL)
4777                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4778         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4779
4780         return;
4781 }
4782
4783 /*
4784 ** WOL in the newer chipset interfaces (pchlan)
4785 ** require thing to be copied into the phy
4786 */
4787 static int
4788 em_enable_phy_wakeup(struct adapter *adapter)
4789 {
4790         struct e1000_hw *hw = &adapter->hw;
4791         u32 mreg, ret = 0;
4792         u16 preg;
4793
4794         /* copy MAC RARs to PHY RARs */
4795         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4796
4797         /* copy MAC MTA to PHY MTA */
4798         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4799                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4800                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4801                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4802                     (u16)((mreg >> 16) & 0xFFFF));
4803         }
4804
4805         /* configure PHY Rx Control register */
4806         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4807         mreg = E1000_READ_REG(hw, E1000_RCTL);
4808         if (mreg & E1000_RCTL_UPE)
4809                 preg |= BM_RCTL_UPE;
4810         if (mreg & E1000_RCTL_MPE)
4811                 preg |= BM_RCTL_MPE;
4812         preg &= ~(BM_RCTL_MO_MASK);
4813         if (mreg & E1000_RCTL_MO_3)
4814                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4815                                 << BM_RCTL_MO_SHIFT);
4816         if (mreg & E1000_RCTL_BAM)
4817                 preg |= BM_RCTL_BAM;
4818         if (mreg & E1000_RCTL_PMCF)
4819                 preg |= BM_RCTL_PMCF;
4820         mreg = E1000_READ_REG(hw, E1000_CTRL);
4821         if (mreg & E1000_CTRL_RFCE)
4822                 preg |= BM_RCTL_RFCE;
4823         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4824
4825         /* enable PHY wakeup in MAC register */
4826         E1000_WRITE_REG(hw, E1000_WUC,
4827             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4828         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4829
4830         /* configure and enable PHY wakeup in PHY registers */
4831         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4832         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4833
4834         /* activate PHY wakeup */
4835         ret = hw->phy.ops.acquire(hw);
4836         if (ret) {
4837                 printf("Could not acquire PHY\n");
4838                 return ret;
4839         }
4840         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4841                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4842         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4843         if (ret) {
4844                 printf("Could not read PHY page 769\n");
4845                 goto out;
4846         }
4847         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4848         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4849         if (ret)
4850                 printf("Could not set PHY Host Wakeup bit\n");
4851 out:
4852         hw->phy.ops.release(hw);
4853
4854         return ret;
4855 }
4856
4857 static void
4858 em_led_func(void *arg, int onoff)
4859 {
4860         struct adapter  *adapter = arg;
4861  
4862         EM_CORE_LOCK(adapter);
4863         if (onoff) {
4864                 e1000_setup_led(&adapter->hw);
4865                 e1000_led_on(&adapter->hw);
4866         } else {
4867                 e1000_led_off(&adapter->hw);
4868                 e1000_cleanup_led(&adapter->hw);
4869         }
4870         EM_CORE_UNLOCK(adapter);
4871 }
4872
4873 /*
4874 ** Disable the L0S and L1 LINK states
4875 */
4876 static void
4877 em_disable_aspm(struct adapter *adapter)
4878 {
4879         int             base, reg;
4880         u16             link_cap,link_ctrl;
4881         device_t        dev = adapter->dev;
4882
4883         switch (adapter->hw.mac.type) {
4884                 case e1000_82573:
4885                 case e1000_82574:
4886                 case e1000_82583:
4887                         break;
4888                 default:
4889                         return;
4890         }
4891         if (pci_find_extcap(dev, PCIY_EXPRESS, &base) != 0)
4892                 return;
4893         reg = base + PCIR_EXPRESS_LINK_CAP;
4894         link_cap = pci_read_config(dev, reg, 2);
4895         if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4896                 return;
4897         reg = base + PCIR_EXPRESS_LINK_CTL;
4898         link_ctrl = pci_read_config(dev, reg, 2);
4899         link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4900         pci_write_config(dev, reg, link_ctrl, 2);
4901         return;
4902 }
4903
4904 /**********************************************************************
4905  *
4906  *  Update the board statistics counters.
4907  *
4908  **********************************************************************/
4909 static void
4910 em_update_stats_counters(struct adapter *adapter)
4911 {
4912         struct ifnet   *ifp;
4913
4914         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4915            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4916                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4917                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4918         }
4919         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4920         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4921         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4922         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4923
4924         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4925         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4926         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4927         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4928         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4929         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4930         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4931         /*
4932         ** For watchdog management we need to know if we have been
4933         ** paused during the last interval, so capture that here.
4934         */
4935         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4936         adapter->stats.xoffrxc += adapter->pause_frames;
4937         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4938         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4939         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4940         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4941         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4942         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4943         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4944         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4945         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4946         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4947         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4948         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4949
4950         /* For the 64-bit byte counters the low dword must be read first. */
4951         /* Both registers clear on the read of the high dword */
4952
4953         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4954             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4955         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4956             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4957
4958         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4959         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4960         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4961         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4962         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4963
4964         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4965         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4966
4967         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4968         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4969         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4970         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4971         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4972         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4973         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4974         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4975         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4976         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4977
4978         /* Interrupt Counts */
4979
4980         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
4981         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
4982         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
4983         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
4984         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
4985         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
4986         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
4987         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
4988         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
4989
4990         if (adapter->hw.mac.type >= e1000_82543) {
4991                 adapter->stats.algnerrc += 
4992                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4993                 adapter->stats.rxerrc += 
4994                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4995                 adapter->stats.tncrs += 
4996                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4997                 adapter->stats.cexterr += 
4998                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4999                 adapter->stats.tsctc += 
5000                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5001                 adapter->stats.tsctfc += 
5002                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5003         }
5004         ifp = adapter->ifp;
5005
5006         ifp->if_collisions = adapter->stats.colc;
5007
5008         /* Rx Errors */
5009         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5010             adapter->stats.crcerrs + adapter->stats.algnerrc +
5011             adapter->stats.ruc + adapter->stats.roc +
5012             adapter->stats.mpc + adapter->stats.cexterr;
5013
5014         /* Tx Errors */
5015         ifp->if_oerrors = adapter->stats.ecol +
5016             adapter->stats.latecol + adapter->watchdog_events;
5017 }
5018
5019 /* Export a single 32-bit register via a read-only sysctl. */
5020 static int
5021 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5022 {
5023         struct adapter *adapter;
5024         u_int val;
5025
5026         adapter = oidp->oid_arg1;
5027         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5028         return (sysctl_handle_int(oidp, &val, 0, req));
5029 }
5030
5031 /*
5032  * Add sysctl variables, one per statistic, to the system.
5033  */
5034 static void
5035 em_add_hw_stats(struct adapter *adapter)
5036 {
5037         device_t dev = adapter->dev;
5038
5039         struct tx_ring *txr = adapter->tx_rings;
5040         struct rx_ring *rxr = adapter->rx_rings;
5041
5042         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5043         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5044         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5045         struct e1000_hw_stats *stats = &adapter->stats;
5046
5047         struct sysctl_oid *stat_node, *queue_node, *int_node;
5048         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5049
5050 #define QUEUE_NAME_LEN 32
5051         char namebuf[QUEUE_NAME_LEN];
5052         
5053         /* Driver Statistics */
5054         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5055                         CTLFLAG_RD, &adapter->link_irq, 0,
5056                         "Link MSIX IRQ Handled");
5057         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5058                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5059                          "Std mbuf failed");
5060         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5061                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5062                          "Std mbuf cluster failed");
5063         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5064                         CTLFLAG_RD, &adapter->dropped_pkts,
5065                         "Driver dropped packets");
5066         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5067                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5068                         "Driver tx dma failure in xmit");
5069         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5070                         CTLFLAG_RD, &adapter->rx_overruns,
5071                         "RX overruns");
5072         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5073                         CTLFLAG_RD, &adapter->watchdog_events,
5074                         "Watchdog timeouts");
5075         
5076         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5077                         CTLFLAG_RD, adapter, E1000_CTRL,
5078                         em_sysctl_reg_handler, "IU",
5079                         "Device Control Register");
5080         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5081                         CTLFLAG_RD, adapter, E1000_RCTL,
5082                         em_sysctl_reg_handler, "IU",
5083                         "Receiver Control Register");
5084         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5085                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5086                         "Flow Control High Watermark");
5087         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5088                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5089                         "Flow Control Low Watermark");
5090
5091         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5092                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5093                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5094                                             CTLFLAG_RD, NULL, "Queue Name");
5095                 queue_list = SYSCTL_CHILDREN(queue_node);
5096
5097                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5098                                 CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5099                                 em_sysctl_reg_handler, "IU",
5100                                 "Transmit Descriptor Head");
5101                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5102                                 CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5103                                 em_sysctl_reg_handler, "IU",
5104                                 "Transmit Descriptor Tail");
5105                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5106                                 CTLFLAG_RD, &txr->tx_irq,
5107                                 "Queue MSI-X Transmit Interrupts");
5108                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5109                                 CTLFLAG_RD, &txr->no_desc_avail,
5110                                 "Queue No Descriptor Available");
5111                 
5112                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5113                                 CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5114                                 em_sysctl_reg_handler, "IU",
5115                                 "Receive Descriptor Head");
5116                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5117                                 CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5118                                 em_sysctl_reg_handler, "IU",
5119                                 "Receive Descriptor Tail");
5120                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5121                                 CTLFLAG_RD, &rxr->rx_irq,
5122                                 "Queue MSI-X Receive Interrupts");
5123         }
5124
5125         /* MAC stats get their own sub node */
5126
5127         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5128                                     CTLFLAG_RD, NULL, "Statistics");
5129         stat_list = SYSCTL_CHILDREN(stat_node);
5130
5131         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5132                         CTLFLAG_RD, &stats->ecol,
5133                         "Excessive collisions");
5134         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5135                         CTLFLAG_RD, &stats->scc,
5136                         "Single collisions");
5137         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5138                         CTLFLAG_RD, &stats->mcc,
5139                         "Multiple collisions");
5140         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5141                         CTLFLAG_RD, &stats->latecol,
5142                         "Late collisions");
5143         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5144                         CTLFLAG_RD, &stats->colc,
5145                         "Collision Count");
5146         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5147                         CTLFLAG_RD, &adapter->stats.symerrs,
5148                         "Symbol Errors");
5149         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5150                         CTLFLAG_RD, &adapter->stats.sec,
5151                         "Sequence Errors");
5152         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5153                         CTLFLAG_RD, &adapter->stats.dc,
5154                         "Defer Count");
5155         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5156                         CTLFLAG_RD, &adapter->stats.mpc,
5157                         "Missed Packets");
5158         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5159                         CTLFLAG_RD, &adapter->stats.rnbc,
5160                         "Receive No Buffers");
5161         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5162                         CTLFLAG_RD, &adapter->stats.ruc,
5163                         "Receive Undersize");
5164         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5165                         CTLFLAG_RD, &adapter->stats.rfc,
5166                         "Fragmented Packets Received ");
5167         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5168                         CTLFLAG_RD, &adapter->stats.roc,
5169                         "Oversized Packets Received");
5170         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5171                         CTLFLAG_RD, &adapter->stats.rjc,
5172                         "Recevied Jabber");
5173         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5174                         CTLFLAG_RD, &adapter->stats.rxerrc,
5175                         "Receive Errors");
5176         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5177                         CTLFLAG_RD, &adapter->stats.crcerrs,
5178                         "CRC errors");
5179         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5180                         CTLFLAG_RD, &adapter->stats.algnerrc,
5181                         "Alignment Errors");
5182         /* On 82575 these are collision counts */
5183         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5184                         CTLFLAG_RD, &adapter->stats.cexterr,
5185                         "Collision/Carrier extension errors");
5186         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5187                         CTLFLAG_RD, &adapter->stats.xonrxc,
5188                         "XON Received");
5189         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5190                         CTLFLAG_RD, &adapter->stats.xontxc,
5191                         "XON Transmitted");
5192         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5193                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5194                         "XOFF Received");
5195         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5196                         CTLFLAG_RD, &adapter->stats.xofftxc,
5197                         "XOFF Transmitted");
5198
5199         /* Packet Reception Stats */
5200         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5201                         CTLFLAG_RD, &adapter->stats.tpr,
5202                         "Total Packets Received ");
5203         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5204                         CTLFLAG_RD, &adapter->stats.gprc,
5205                         "Good Packets Received");
5206         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5207                         CTLFLAG_RD, &adapter->stats.bprc,
5208                         "Broadcast Packets Received");
5209         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5210                         CTLFLAG_RD, &adapter->stats.mprc,
5211                         "Multicast Packets Received");
5212         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5213                         CTLFLAG_RD, &adapter->stats.prc64,
5214                         "64 byte frames received ");
5215         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5216                         CTLFLAG_RD, &adapter->stats.prc127,
5217                         "65-127 byte frames received");
5218         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5219                         CTLFLAG_RD, &adapter->stats.prc255,
5220                         "128-255 byte frames received");
5221         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5222                         CTLFLAG_RD, &adapter->stats.prc511,
5223                         "256-511 byte frames received");
5224         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5225                         CTLFLAG_RD, &adapter->stats.prc1023,
5226                         "512-1023 byte frames received");
5227         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5228                         CTLFLAG_RD, &adapter->stats.prc1522,
5229                         "1023-1522 byte frames received");
5230         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5231                         CTLFLAG_RD, &adapter->stats.gorc, 
5232                         "Good Octets Received"); 
5233
5234         /* Packet Transmission Stats */
5235         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5236                         CTLFLAG_RD, &adapter->stats.gotc, 
5237                         "Good Octets Transmitted"); 
5238         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5239                         CTLFLAG_RD, &adapter->stats.tpt,
5240                         "Total Packets Transmitted");
5241         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5242                         CTLFLAG_RD, &adapter->stats.gptc,
5243                         "Good Packets Transmitted");
5244         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5245                         CTLFLAG_RD, &adapter->stats.bptc,
5246                         "Broadcast Packets Transmitted");
5247         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5248                         CTLFLAG_RD, &adapter->stats.mptc,
5249                         "Multicast Packets Transmitted");
5250         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5251                         CTLFLAG_RD, &adapter->stats.ptc64,
5252                         "64 byte frames transmitted ");
5253         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5254                         CTLFLAG_RD, &adapter->stats.ptc127,
5255                         "65-127 byte frames transmitted");
5256         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5257                         CTLFLAG_RD, &adapter->stats.ptc255,
5258                         "128-255 byte frames transmitted");
5259         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5260                         CTLFLAG_RD, &adapter->stats.ptc511,
5261                         "256-511 byte frames transmitted");
5262         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5263                         CTLFLAG_RD, &adapter->stats.ptc1023,
5264                         "512-1023 byte frames transmitted");
5265         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5266                         CTLFLAG_RD, &adapter->stats.ptc1522,
5267                         "1024-1522 byte frames transmitted");
5268         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5269                         CTLFLAG_RD, &adapter->stats.tsctc,
5270                         "TSO Contexts Transmitted");
5271         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5272                         CTLFLAG_RD, &adapter->stats.tsctfc,
5273                         "TSO Contexts Failed");
5274
5275
5276         /* Interrupt Stats */
5277
5278         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5279                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5280         int_list = SYSCTL_CHILDREN(int_node);
5281
5282         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5283                         CTLFLAG_RD, &adapter->stats.iac,
5284                         "Interrupt Assertion Count");
5285
5286         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5287                         CTLFLAG_RD, &adapter->stats.icrxptc,
5288                         "Interrupt Cause Rx Pkt Timer Expire Count");
5289
5290         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5291                         CTLFLAG_RD, &adapter->stats.icrxatc,
5292                         "Interrupt Cause Rx Abs Timer Expire Count");
5293
5294         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5295                         CTLFLAG_RD, &adapter->stats.ictxptc,
5296                         "Interrupt Cause Tx Pkt Timer Expire Count");
5297
5298         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5299                         CTLFLAG_RD, &adapter->stats.ictxatc,
5300                         "Interrupt Cause Tx Abs Timer Expire Count");
5301
5302         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5303                         CTLFLAG_RD, &adapter->stats.ictxqec,
5304                         "Interrupt Cause Tx Queue Empty Count");
5305
5306         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5307                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5308                         "Interrupt Cause Tx Queue Min Thresh Count");
5309
5310         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5311                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5312                         "Interrupt Cause Rx Desc Min Thresh Count");
5313
5314         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5315                         CTLFLAG_RD, &adapter->stats.icrxoc,
5316                         "Interrupt Cause Receiver Overrun Count");
5317 }
5318
5319 /**********************************************************************
5320  *
5321  *  This routine provides a way to dump out the adapter eeprom,
5322  *  often a useful debug/service tool. This only dumps the first
5323  *  32 words, stuff that matters is in that extent.
5324  *
5325  **********************************************************************/
5326 static int
5327 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5328 {
5329         struct adapter *adapter;
5330         int error;
5331         int result;
5332
5333         result = -1;
5334         error = sysctl_handle_int(oidp, &result, 0, req);
5335
5336         if (error || !req->newptr)
5337                 return (error);
5338
5339         /*
5340          * This value will cause a hex dump of the
5341          * first 32 16-bit words of the EEPROM to
5342          * the screen.
5343          */
5344         if (result == 1) {
5345                 adapter = (struct adapter *)arg1;
5346                 em_print_nvm_info(adapter);
5347         }
5348
5349         return (error);
5350 }
5351
5352 static void
5353 em_print_nvm_info(struct adapter *adapter)
5354 {
5355         u16     eeprom_data;
5356         int     i, j, row = 0;
5357
5358         /* Its a bit crude, but it gets the job done */
5359         printf("\nInterface EEPROM Dump:\n");
5360         printf("Offset\n0x0000  ");
5361         for (i = 0, j = 0; i < 32; i++, j++) {
5362                 if (j == 8) { /* Make the offset block */
5363                         j = 0; ++row;
5364                         printf("\n0x00%x0  ",row);
5365                 }
5366                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5367                 printf("%04x ", eeprom_data);
5368         }
5369         printf("\n");
5370 }
5371
5372 static int
5373 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5374 {
5375         struct em_int_delay_info *info;
5376         struct adapter *adapter;
5377         u32 regval;
5378         int error, usecs, ticks;
5379
5380         info = (struct em_int_delay_info *)arg1;
5381         usecs = info->value;
5382         error = sysctl_handle_int(oidp, &usecs, 0, req);
5383         if (error != 0 || req->newptr == NULL)
5384                 return (error);
5385         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5386                 return (EINVAL);
5387         info->value = usecs;
5388         ticks = EM_USECS_TO_TICKS(usecs);
5389
5390         adapter = info->adapter;
5391         
5392         EM_CORE_LOCK(adapter);
5393         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5394         regval = (regval & ~0xffff) | (ticks & 0xffff);
5395         /* Handle a few special cases. */
5396         switch (info->offset) {
5397         case E1000_RDTR:
5398                 break;
5399         case E1000_TIDV:
5400                 if (ticks == 0) {
5401                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5402                         /* Don't write 0 into the TIDV register. */
5403                         regval++;
5404                 } else
5405                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5406                 break;
5407         }
5408         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5409         EM_CORE_UNLOCK(adapter);
5410         return (0);
5411 }
5412
5413 static void
5414 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5415         const char *description, struct em_int_delay_info *info,
5416         int offset, int value)
5417 {
5418         info->adapter = adapter;
5419         info->offset = offset;
5420         info->value = value;
5421         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5422             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5423             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5424             info, 0, em_sysctl_int_delay, "I", description);
5425 }
5426
5427 static void
5428 em_add_rx_process_limit(struct adapter *adapter, const char *name,
5429         const char *description, int *limit, int value)
5430 {
5431         *limit = value;
5432         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5433             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5434             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5435 }
5436
5437 static void
5438 em_set_flow_cntrl(struct adapter *adapter, const char *name,
5439         const char *description, int *limit, int value)
5440 {
5441         *limit = value;
5442         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5443             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5444             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5445 }
5446
5447 static int
5448 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5449 {
5450         struct adapter *adapter;
5451         int error;
5452         int result;
5453
5454         result = -1;
5455         error = sysctl_handle_int(oidp, &result, 0, req);
5456
5457         if (error || !req->newptr)
5458                 return (error);
5459
5460         if (result == 1) {
5461                 adapter = (struct adapter *)arg1;
5462                 em_print_debug_info(adapter);
5463         }
5464
5465         return (error);
5466 }
5467
5468 /*
5469 ** This routine is meant to be fluid, add whatever is
5470 ** needed for debugging a problem.  -jfv
5471 */
5472 static void
5473 em_print_debug_info(struct adapter *adapter)
5474 {
5475         device_t dev = adapter->dev;
5476         struct tx_ring *txr = adapter->tx_rings;
5477         struct rx_ring *rxr = adapter->rx_rings;
5478
5479         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5480                 printf("Interface is RUNNING ");
5481         else
5482                 printf("Interface is NOT RUNNING\n");
5483         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5484                 printf("and ACTIVE\n");
5485         else
5486                 printf("and INACTIVE\n");
5487
5488         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5489             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5490             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5491         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5492             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5493             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5494         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5495         device_printf(dev, "TX descriptors avail = %d\n",
5496             txr->tx_avail);
5497         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5498             txr->no_desc_avail);
5499         device_printf(dev, "RX discarded packets = %ld\n",
5500             rxr->rx_discarded);
5501         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5502         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5503 }