]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
Add support for sysctl knobs to live tune the tx packet processing limits
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97
98 /*********************************************************************
99  *  Set this to one to display debug statistics
100  *********************************************************************/
101 int     em_display_debug_stats = 0;
102
103 /*********************************************************************
104  *  Driver version:
105  *********************************************************************/
106 char em_driver_version[] = "7.4.2";
107
108 /*********************************************************************
109  *  PCI Device ID Table
110  *
111  *  Used by probe to select devices to load on
112  *  Last field stores an index into e1000_strings
113  *  Last entry must be all 0s
114  *
115  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
116  *********************************************************************/
117
118 static em_vendor_info_t em_vendor_info_array[] =
119 {
120         /* Intel(R) PRO/1000 Network Connection */
121         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
127                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
131                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
133                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
135                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
140
141         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
146                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
148                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
150                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
152                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
181         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
182         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
183         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
184         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
185         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
186         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
187                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
188         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
189                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
190         { 0x8086, E1000_DEV_ID_PCH_I218_LM2,    PCI_ANY_ID, PCI_ANY_ID, 0},
191         { 0x8086, E1000_DEV_ID_PCH_I218_V2,     PCI_ANY_ID, PCI_ANY_ID, 0},
192         { 0x8086, E1000_DEV_ID_PCH_I218_LM3,    PCI_ANY_ID, PCI_ANY_ID, 0},
193         { 0x8086, E1000_DEV_ID_PCH_I218_V3,     PCI_ANY_ID, PCI_ANY_ID, 0},
194         /* required last entry */
195         { 0, 0, 0, 0, 0}
196 };
197
198 /*********************************************************************
199  *  Table of branding strings for all supported NICs.
200  *********************************************************************/
201
202 static char *em_strings[] = {
203         "Intel(R) PRO/1000 Network Connection"
204 };
205
206 /*********************************************************************
207  *  Function prototypes
208  *********************************************************************/
209 static int      em_probe(device_t);
210 static int      em_attach(device_t);
211 static int      em_detach(device_t);
212 static int      em_shutdown(device_t);
213 static int      em_suspend(device_t);
214 static int      em_resume(device_t);
215 #ifdef EM_MULTIQUEUE
216 static int      em_mq_start(if_t, struct mbuf *);
217 static int      em_mq_start_locked(if_t,
218                     struct tx_ring *);
219 static void     em_qflush(if_t);
220 #else
221 static void     em_start(if_t);
222 static void     em_start_locked(if_t, struct tx_ring *);
223 #endif
224 static int      em_ioctl(if_t, u_long, caddr_t);
225 static uint64_t em_get_counter(if_t, ift_counter);
226 static void     em_init(void *);
227 static void     em_init_locked(struct adapter *);
228 static void     em_stop(void *);
229 static void     em_media_status(if_t, struct ifmediareq *);
230 static int      em_media_change(if_t);
231 static void     em_identify_hardware(struct adapter *);
232 static int      em_allocate_pci_resources(struct adapter *);
233 static int      em_allocate_legacy(struct adapter *);
234 static int      em_allocate_msix(struct adapter *);
235 static int      em_allocate_queues(struct adapter *);
236 static int      em_setup_msix(struct adapter *);
237 static void     em_free_pci_resources(struct adapter *);
238 static void     em_local_timer(void *);
239 static void     em_reset(struct adapter *);
240 static int      em_setup_interface(device_t, struct adapter *);
241
242 static void     em_setup_transmit_structures(struct adapter *);
243 static void     em_initialize_transmit_unit(struct adapter *);
244 static int      em_allocate_transmit_buffers(struct tx_ring *);
245 static void     em_free_transmit_structures(struct adapter *);
246 static void     em_free_transmit_buffers(struct tx_ring *);
247
248 static int      em_setup_receive_structures(struct adapter *);
249 static int      em_allocate_receive_buffers(struct rx_ring *);
250 static void     em_initialize_receive_unit(struct adapter *);
251 static void     em_free_receive_structures(struct adapter *);
252 static void     em_free_receive_buffers(struct rx_ring *);
253
254 static void     em_enable_intr(struct adapter *);
255 static void     em_disable_intr(struct adapter *);
256 static void     em_update_stats_counters(struct adapter *);
257 static void     em_add_hw_stats(struct adapter *adapter);
258 static void     em_txeof(struct tx_ring *);
259 static bool     em_rxeof(struct rx_ring *, int, int *);
260 #ifndef __NO_STRICT_ALIGNMENT
261 static int      em_fixup_rx(struct rx_ring *);
262 #endif
263 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
264 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
265                     struct ip *, u32 *, u32 *);
266 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
267                     struct tcphdr *, u32 *, u32 *);
268 static void     em_set_promisc(struct adapter *);
269 static void     em_disable_promisc(struct adapter *);
270 static void     em_set_multi(struct adapter *);
271 static void     em_update_link_status(struct adapter *);
272 static void     em_refresh_mbufs(struct rx_ring *, int);
273 static void     em_register_vlan(void *, if_t, u16);
274 static void     em_unregister_vlan(void *, if_t, u16);
275 static void     em_setup_vlan_hw_support(struct adapter *);
276 static int      em_xmit(struct tx_ring *, struct mbuf **);
277 static int      em_dma_malloc(struct adapter *, bus_size_t,
278                     struct em_dma_alloc *, int);
279 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
280 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
281 static void     em_print_nvm_info(struct adapter *);
282 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
283 static void     em_print_debug_info(struct adapter *);
284 static int      em_is_valid_ether_addr(u8 *);
285 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
286 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
287                     const char *, struct em_int_delay_info *, int, int);
288 /* Management and WOL Support */
289 static void     em_init_manageability(struct adapter *);
290 static void     em_release_manageability(struct adapter *);
291 static void     em_get_hw_control(struct adapter *);
292 static void     em_release_hw_control(struct adapter *);
293 static void     em_get_wakeup(device_t);
294 static void     em_enable_wakeup(device_t);
295 static int      em_enable_phy_wakeup(struct adapter *);
296 static void     em_led_func(void *, int);
297 static void     em_disable_aspm(struct adapter *);
298
299 static int      em_irq_fast(void *);
300
301 /* MSIX handlers */
302 static void     em_msix_tx(void *);
303 static void     em_msix_rx(void *);
304 static void     em_msix_link(void *);
305 static void     em_handle_tx(void *context, int pending);
306 static void     em_handle_rx(void *context, int pending);
307 static void     em_handle_link(void *context, int pending);
308
309 #ifdef EM_MULTIQUEUE
310 static void     em_enable_vectors_82574(struct adapter *);
311 #endif
312
313 static void     em_set_sysctl_value(struct adapter *, const char *,
314                     const char *, int *, int);
315 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
316 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
317
318 static __inline void em_rx_discard(struct rx_ring *, int);
319
320 #ifdef DEVICE_POLLING
321 static poll_handler_t em_poll;
322 #endif /* POLLING */
323
324 /*********************************************************************
325  *  FreeBSD Device Interface Entry Points
326  *********************************************************************/
327
328 static device_method_t em_methods[] = {
329         /* Device interface */
330         DEVMETHOD(device_probe, em_probe),
331         DEVMETHOD(device_attach, em_attach),
332         DEVMETHOD(device_detach, em_detach),
333         DEVMETHOD(device_shutdown, em_shutdown),
334         DEVMETHOD(device_suspend, em_suspend),
335         DEVMETHOD(device_resume, em_resume),
336         DEVMETHOD_END
337 };
338
339 static driver_t em_driver = {
340         "em", em_methods, sizeof(struct adapter),
341 };
342
343 devclass_t em_devclass;
344 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
345 MODULE_DEPEND(em, pci, 1, 1, 1);
346 MODULE_DEPEND(em, ether, 1, 1, 1);
347 #ifdef DEV_NETMAP
348 MODULE_DEPEND(em, netmap, 1, 1, 1);
349 #endif /* DEV_NETMAP */
350
351 /*********************************************************************
352  *  Tunable default values.
353  *********************************************************************/
354
355 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
356 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
357 #define M_TSO_LEN                       66
358
359 #define MAX_INTS_PER_SEC        8000
360 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
361
362 /* Allow common code without TSO */
363 #ifndef CSUM_TSO
364 #define CSUM_TSO        0
365 #endif
366
367 #define TSO_WORKAROUND  4
368
369 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
370
371 static int em_disable_crc_stripping = 0;
372 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
373     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
374
375 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
376 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
377 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
378     0, "Default transmit interrupt delay in usecs");
379 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
380     0, "Default receive interrupt delay in usecs");
381
382 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
383 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
384 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
385     &em_tx_abs_int_delay_dflt, 0,
386     "Default transmit interrupt delay limit in usecs");
387 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
388     &em_rx_abs_int_delay_dflt, 0,
389     "Default receive interrupt delay limit in usecs");
390
391 static int em_rxd = EM_DEFAULT_RXD;
392 static int em_txd = EM_DEFAULT_TXD;
393 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
394     "Number of receive descriptors per queue");
395 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
396     "Number of transmit descriptors per queue");
397
398 static int em_smart_pwr_down = FALSE;
399 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
400     0, "Set to true to leave smart power down enabled on newer adapters");
401
402 /* Controls whether promiscuous also shows bad packets */
403 static int em_debug_sbp = FALSE;
404 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
405     "Show bad packets in promiscuous mode");
406
407 static int em_enable_msix = TRUE;
408 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
409     "Enable MSI-X interrupts");
410
411 #ifdef EM_MULTIQUEUE
412 static int em_num_queues = 1;
413 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
414     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
415 #endif
416
417 /*
418 ** Global variable to store last used CPU when binding queues
419 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
420 ** queue is bound to a cpu.
421 */
422 static int em_last_bind_cpu = -1;
423
424 /* How many packets rxeof tries to clean at a time */
425 static int em_rx_process_limit = 100;
426 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
427     &em_rx_process_limit, 0,
428     "Maximum number of received packets to process "
429     "at a time, -1 means unlimited");
430
431 /* Energy efficient ethernet - default to OFF */
432 static int eee_setting = 1;
433 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
434     "Enable Energy Efficient Ethernet");
435
436 /* Global used in WOL setup with multiport cards */
437 static int global_quad_port_a = 0;
438
439 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
440 #include <dev/netmap/if_em_netmap.h>
441 #endif /* DEV_NETMAP */
442
443 /*********************************************************************
444  *  Device identification routine
445  *
446  *  em_probe determines if the driver should be loaded on
447  *  adapter based on PCI vendor/device id of the adapter.
448  *
449  *  return BUS_PROBE_DEFAULT on success, positive on failure
450  *********************************************************************/
451
452 static int
453 em_probe(device_t dev)
454 {
455         char            adapter_name[60];
456         uint16_t        pci_vendor_id = 0;
457         uint16_t        pci_device_id = 0;
458         uint16_t        pci_subvendor_id = 0;
459         uint16_t        pci_subdevice_id = 0;
460         em_vendor_info_t *ent;
461
462         INIT_DEBUGOUT("em_probe: begin");
463
464         pci_vendor_id = pci_get_vendor(dev);
465         if (pci_vendor_id != EM_VENDOR_ID)
466                 return (ENXIO);
467
468         pci_device_id = pci_get_device(dev);
469         pci_subvendor_id = pci_get_subvendor(dev);
470         pci_subdevice_id = pci_get_subdevice(dev);
471
472         ent = em_vendor_info_array;
473         while (ent->vendor_id != 0) {
474                 if ((pci_vendor_id == ent->vendor_id) &&
475                     (pci_device_id == ent->device_id) &&
476
477                     ((pci_subvendor_id == ent->subvendor_id) ||
478                     (ent->subvendor_id == PCI_ANY_ID)) &&
479
480                     ((pci_subdevice_id == ent->subdevice_id) ||
481                     (ent->subdevice_id == PCI_ANY_ID))) {
482                         sprintf(adapter_name, "%s %s",
483                                 em_strings[ent->index],
484                                 em_driver_version);
485                         device_set_desc_copy(dev, adapter_name);
486                         return (BUS_PROBE_DEFAULT);
487                 }
488                 ent++;
489         }
490
491         return (ENXIO);
492 }
493
494 /*********************************************************************
495  *  Device initialization routine
496  *
497  *  The attach entry point is called when the driver is being loaded.
498  *  This routine identifies the type of hardware, allocates all resources
499  *  and initializes the hardware.
500  *
501  *  return 0 on success, positive on failure
502  *********************************************************************/
503
504 static int
505 em_attach(device_t dev)
506 {
507         struct adapter  *adapter;
508         struct e1000_hw *hw;
509         int             error = 0;
510
511         INIT_DEBUGOUT("em_attach: begin");
512
513         if (resource_disabled("em", device_get_unit(dev))) {
514                 device_printf(dev, "Disabled by device hint\n");
515                 return (ENXIO);
516         }
517
518         adapter = device_get_softc(dev);
519         adapter->dev = adapter->osdep.dev = dev;
520         hw = &adapter->hw;
521         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
522
523         /* SYSCTL stuff */
524         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
525             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
526             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
527             em_sysctl_nvm_info, "I", "NVM Information");
528
529         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
530             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
531             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
532             em_sysctl_debug_info, "I", "Debug Information");
533
534         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
535             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
536             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
537             em_set_flowcntl, "I", "Flow Control");
538
539         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
540
541         /* Determine hardware and mac info */
542         em_identify_hardware(adapter);
543
544         /* Setup PCI resources */
545         if (em_allocate_pci_resources(adapter)) {
546                 device_printf(dev, "Allocation of PCI resources failed\n");
547                 error = ENXIO;
548                 goto err_pci;
549         }
550
551         /*
552         ** For ICH8 and family we need to
553         ** map the flash memory, and this
554         ** must happen after the MAC is 
555         ** identified
556         */
557         if ((hw->mac.type == e1000_ich8lan) ||
558             (hw->mac.type == e1000_ich9lan) ||
559             (hw->mac.type == e1000_ich10lan) ||
560             (hw->mac.type == e1000_pchlan) ||
561             (hw->mac.type == e1000_pch2lan) ||
562             (hw->mac.type == e1000_pch_lpt)) {
563                 int rid = EM_BAR_TYPE_FLASH;
564                 adapter->flash = bus_alloc_resource_any(dev,
565                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
566                 if (adapter->flash == NULL) {
567                         device_printf(dev, "Mapping of Flash failed\n");
568                         error = ENXIO;
569                         goto err_pci;
570                 }
571                 /* This is used in the shared code */
572                 hw->flash_address = (u8 *)adapter->flash;
573                 adapter->osdep.flash_bus_space_tag =
574                     rman_get_bustag(adapter->flash);
575                 adapter->osdep.flash_bus_space_handle =
576                     rman_get_bushandle(adapter->flash);
577         }
578
579         /* Do Shared Code initialization */
580         if (e1000_setup_init_funcs(hw, TRUE)) {
581                 device_printf(dev, "Setup of Shared code failed\n");
582                 error = ENXIO;
583                 goto err_pci;
584         }
585
586         /*
587          * Setup MSI/X or MSI if PCI Express
588          */
589         adapter->msix = em_setup_msix(adapter);
590
591         e1000_get_bus_info(hw);
592
593         /* Set up some sysctls for the tunable interrupt delays */
594         em_add_int_delay_sysctl(adapter, "rx_int_delay",
595             "receive interrupt delay in usecs", &adapter->rx_int_delay,
596             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
597         em_add_int_delay_sysctl(adapter, "tx_int_delay",
598             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
599             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
600         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
601             "receive interrupt delay limit in usecs",
602             &adapter->rx_abs_int_delay,
603             E1000_REGISTER(hw, E1000_RADV),
604             em_rx_abs_int_delay_dflt);
605         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
606             "transmit interrupt delay limit in usecs",
607             &adapter->tx_abs_int_delay,
608             E1000_REGISTER(hw, E1000_TADV),
609             em_tx_abs_int_delay_dflt);
610         em_add_int_delay_sysctl(adapter, "itr",
611             "interrupt delay limit in usecs/4",
612             &adapter->tx_itr,
613             E1000_REGISTER(hw, E1000_ITR),
614             DEFAULT_ITR);
615
616         /* Sysctl for limiting the amount of work done in the taskqueue */
617         em_set_sysctl_value(adapter, "rx_processing_limit",
618             "max number of rx packets to process", &adapter->rx_process_limit,
619             em_rx_process_limit);
620
621         /*
622          * Validate number of transmit and receive descriptors. It
623          * must not exceed hardware maximum, and must be multiple
624          * of E1000_DBA_ALIGN.
625          */
626         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
627             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
628                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
629                     EM_DEFAULT_TXD, em_txd);
630                 adapter->num_tx_desc = EM_DEFAULT_TXD;
631         } else
632                 adapter->num_tx_desc = em_txd;
633
634         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
635             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
636                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
637                     EM_DEFAULT_RXD, em_rxd);
638                 adapter->num_rx_desc = EM_DEFAULT_RXD;
639         } else
640                 adapter->num_rx_desc = em_rxd;
641
642         hw->mac.autoneg = DO_AUTO_NEG;
643         hw->phy.autoneg_wait_to_complete = FALSE;
644         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
645
646         /* Copper options */
647         if (hw->phy.media_type == e1000_media_type_copper) {
648                 hw->phy.mdix = AUTO_ALL_MODES;
649                 hw->phy.disable_polarity_correction = FALSE;
650                 hw->phy.ms_type = EM_MASTER_SLAVE;
651         }
652
653         /*
654          * Set the frame limits assuming
655          * standard ethernet sized frames.
656          */
657         adapter->hw.mac.max_frame_size =
658             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
659
660         /*
661          * This controls when hardware reports transmit completion
662          * status.
663          */
664         hw->mac.report_tx_early = 1;
665
666         /* 
667         ** Get queue/ring memory
668         */
669         if (em_allocate_queues(adapter)) {
670                 error = ENOMEM;
671                 goto err_pci;
672         }
673
674         /* Allocate multicast array memory. */
675         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
676             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
677         if (adapter->mta == NULL) {
678                 device_printf(dev, "Can not allocate multicast setup array\n");
679                 error = ENOMEM;
680                 goto err_late;
681         }
682
683         /* Check SOL/IDER usage */
684         if (e1000_check_reset_block(hw))
685                 device_printf(dev, "PHY reset is blocked"
686                     " due to SOL/IDER session.\n");
687
688         /* Sysctl for setting Energy Efficient Ethernet */
689         hw->dev_spec.ich8lan.eee_disable = eee_setting;
690         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
691             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
692             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
693             adapter, 0, em_sysctl_eee, "I",
694             "Disable Energy Efficient Ethernet");
695
696         /*
697         ** Start from a known state, this is
698         ** important in reading the nvm and
699         ** mac from that.
700         */
701         e1000_reset_hw(hw);
702
703
704         /* Make sure we have a good EEPROM before we read from it */
705         if (e1000_validate_nvm_checksum(hw) < 0) {
706                 /*
707                 ** Some PCI-E parts fail the first check due to
708                 ** the link being in sleep state, call it again,
709                 ** if it fails a second time its a real issue.
710                 */
711                 if (e1000_validate_nvm_checksum(hw) < 0) {
712                         device_printf(dev,
713                             "The EEPROM Checksum Is Not Valid\n");
714                         error = EIO;
715                         goto err_late;
716                 }
717         }
718
719         /* Copy the permanent MAC address out of the EEPROM */
720         if (e1000_read_mac_addr(hw) < 0) {
721                 device_printf(dev, "EEPROM read error while reading MAC"
722                     " address\n");
723                 error = EIO;
724                 goto err_late;
725         }
726
727         if (!em_is_valid_ether_addr(hw->mac.addr)) {
728                 device_printf(dev, "Invalid MAC address\n");
729                 error = EIO;
730                 goto err_late;
731         }
732
733         /* Disable ULP support */
734         e1000_disable_ulp_lpt_lp(hw, TRUE);
735
736         /*
737         **  Do interrupt configuration
738         */
739         if (adapter->msix > 1) /* Do MSIX */
740                 error = em_allocate_msix(adapter);
741         else  /* MSI or Legacy */
742                 error = em_allocate_legacy(adapter);
743         if (error)
744                 goto err_late;
745
746         /*
747          * Get Wake-on-Lan and Management info for later use
748          */
749         em_get_wakeup(dev);
750
751         /* Setup OS specific network interface */
752         if (em_setup_interface(dev, adapter) != 0)
753                 goto err_late;
754
755         em_reset(adapter);
756
757         /* Initialize statistics */
758         em_update_stats_counters(adapter);
759
760         hw->mac.get_link_status = 1;
761         em_update_link_status(adapter);
762
763         /* Register for VLAN events */
764         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
765             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
766         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
767             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
768
769         em_add_hw_stats(adapter);
770
771         /* Non-AMT based hardware can now take control from firmware */
772         if (adapter->has_manage && !adapter->has_amt)
773                 em_get_hw_control(adapter);
774
775         /* Tell the stack that the interface is not active */
776         if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
777
778         adapter->led_dev = led_create(em_led_func, adapter,
779             device_get_nameunit(dev));
780 #ifdef DEV_NETMAP
781         em_netmap_attach(adapter);
782 #endif /* DEV_NETMAP */
783
784         INIT_DEBUGOUT("em_attach: end");
785
786         return (0);
787
788 err_late:
789         em_free_transmit_structures(adapter);
790         em_free_receive_structures(adapter);
791         em_release_hw_control(adapter);
792         if (adapter->ifp != (void *)NULL)
793                 if_free(adapter->ifp);
794 err_pci:
795         em_free_pci_resources(adapter);
796         free(adapter->mta, M_DEVBUF);
797         EM_CORE_LOCK_DESTROY(adapter);
798
799         return (error);
800 }
801
802 /*********************************************************************
803  *  Device removal routine
804  *
805  *  The detach entry point is called when the driver is being removed.
806  *  This routine stops the adapter and deallocates all the resources
807  *  that were allocated for driver operation.
808  *
809  *  return 0 on success, positive on failure
810  *********************************************************************/
811
812 static int
813 em_detach(device_t dev)
814 {
815         struct adapter  *adapter = device_get_softc(dev);
816         if_t ifp = adapter->ifp;
817
818         INIT_DEBUGOUT("em_detach: begin");
819
820         /* Make sure VLANS are not using driver */
821         if (if_vlantrunkinuse(ifp)) {
822                 device_printf(dev,"Vlan in use, detach first\n");
823                 return (EBUSY);
824         }
825
826 #ifdef DEVICE_POLLING
827         if (if_getcapenable(ifp) & IFCAP_POLLING)
828                 ether_poll_deregister(ifp);
829 #endif
830
831         if (adapter->led_dev != NULL)
832                 led_destroy(adapter->led_dev);
833
834         EM_CORE_LOCK(adapter);
835         adapter->in_detach = 1;
836         em_stop(adapter);
837         EM_CORE_UNLOCK(adapter);
838         EM_CORE_LOCK_DESTROY(adapter);
839
840         e1000_phy_hw_reset(&adapter->hw);
841
842         em_release_manageability(adapter);
843         em_release_hw_control(adapter);
844
845         /* Unregister VLAN events */
846         if (adapter->vlan_attach != NULL)
847                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
848         if (adapter->vlan_detach != NULL)
849                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
850
851         ether_ifdetach(adapter->ifp);
852         callout_drain(&adapter->timer);
853
854 #ifdef DEV_NETMAP
855         netmap_detach(ifp);
856 #endif /* DEV_NETMAP */
857
858         em_free_pci_resources(adapter);
859         bus_generic_detach(dev);
860         if_free(ifp);
861
862         em_free_transmit_structures(adapter);
863         em_free_receive_structures(adapter);
864
865         em_release_hw_control(adapter);
866         free(adapter->mta, M_DEVBUF);
867
868         return (0);
869 }
870
871 /*********************************************************************
872  *
873  *  Shutdown entry point
874  *
875  **********************************************************************/
876
877 static int
878 em_shutdown(device_t dev)
879 {
880         return em_suspend(dev);
881 }
882
883 /*
884  * Suspend/resume device methods.
885  */
886 static int
887 em_suspend(device_t dev)
888 {
889         struct adapter *adapter = device_get_softc(dev);
890
891         EM_CORE_LOCK(adapter);
892
893         em_release_manageability(adapter);
894         em_release_hw_control(adapter);
895         em_enable_wakeup(dev);
896
897         EM_CORE_UNLOCK(adapter);
898
899         return bus_generic_suspend(dev);
900 }
901
902 static int
903 em_resume(device_t dev)
904 {
905         struct adapter *adapter = device_get_softc(dev);
906         struct tx_ring  *txr = adapter->tx_rings;
907         if_t ifp = adapter->ifp;
908
909         EM_CORE_LOCK(adapter);
910         if (adapter->hw.mac.type == e1000_pch2lan)
911                 e1000_resume_workarounds_pchlan(&adapter->hw);
912         em_init_locked(adapter);
913         em_init_manageability(adapter);
914
915         if ((if_getflags(ifp) & IFF_UP) &&
916             (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
917                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
918                         EM_TX_LOCK(txr);
919 #ifdef EM_MULTIQUEUE
920                         if (!drbr_empty(ifp, txr->br))
921                                 em_mq_start_locked(ifp, txr);
922 #else
923                         if (!if_sendq_empty(ifp))
924                                 em_start_locked(ifp, txr);
925 #endif
926                         EM_TX_UNLOCK(txr);
927                 }
928         }
929         EM_CORE_UNLOCK(adapter);
930
931         return bus_generic_resume(dev);
932 }
933
934
935 #ifndef EM_MULTIQUEUE
936 static void
937 em_start_locked(if_t ifp, struct tx_ring *txr)
938 {
939         struct adapter  *adapter = if_getsoftc(ifp);
940         struct mbuf     *m_head;
941
942         EM_TX_LOCK_ASSERT(txr);
943
944         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
945             IFF_DRV_RUNNING)
946                 return;
947
948         if (!adapter->link_active)
949                 return;
950
951         while (!if_sendq_empty(ifp)) {
952                 /* Call cleanup if number of TX descriptors low */
953                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
954                         em_txeof(txr);
955                 if (txr->tx_avail < EM_MAX_SCATTER) {
956                         if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
957                         break;
958                 }
959                 m_head = if_dequeue(ifp);
960                 if (m_head == NULL)
961                         break;
962                 /*
963                  *  Encapsulation can modify our pointer, and or make it
964                  *  NULL on failure.  In that event, we can't requeue.
965                  */
966                 if (em_xmit(txr, &m_head)) {
967                         if (m_head == NULL)
968                                 break;
969                         if_sendq_prepend(ifp, m_head);
970                         break;
971                 }
972
973                 /* Mark the queue as having work */
974                 if (txr->busy == EM_TX_IDLE)
975                         txr->busy = EM_TX_BUSY;
976
977                 /* Send a copy of the frame to the BPF listener */
978                 ETHER_BPF_MTAP(ifp, m_head);
979
980         }
981
982         return;
983 }
984
985 static void
986 em_start(if_t ifp)
987 {
988         struct adapter  *adapter = if_getsoftc(ifp);
989         struct tx_ring  *txr = adapter->tx_rings;
990
991         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
992                 EM_TX_LOCK(txr);
993                 em_start_locked(ifp, txr);
994                 EM_TX_UNLOCK(txr);
995         }
996         return;
997 }
998 #else /* EM_MULTIQUEUE */
999 /*********************************************************************
1000  *  Multiqueue Transmit routines 
1001  *
1002  *  em_mq_start is called by the stack to initiate a transmit.
1003  *  however, if busy the driver can queue the request rather
1004  *  than do an immediate send. It is this that is an advantage
1005  *  in this driver, rather than also having multiple tx queues.
1006  **********************************************************************/
1007 /*
1008 ** Multiqueue capable stack interface
1009 */
1010 static int
1011 em_mq_start(if_t ifp, struct mbuf *m)
1012 {
1013         struct adapter  *adapter = if_getsoftc(ifp);
1014         struct tx_ring  *txr = adapter->tx_rings;
1015         unsigned int    i, error;
1016
1017         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1018                 i = m->m_pkthdr.flowid % adapter->num_queues;
1019         else
1020                 i = curcpu % adapter->num_queues;
1021
1022         txr = &adapter->tx_rings[i];
1023
1024         error = drbr_enqueue(ifp, txr->br, m);
1025         if (error)
1026                 return (error);
1027
1028         if (EM_TX_TRYLOCK(txr)) {
1029                 em_mq_start_locked(ifp, txr);
1030                 EM_TX_UNLOCK(txr);
1031         } else 
1032                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1033
1034         return (0);
1035 }
1036
1037 static int
1038 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1039 {
1040         struct adapter  *adapter = txr->adapter;
1041         struct mbuf     *next;
1042         int             err = 0, enq = 0;
1043
1044         EM_TX_LOCK_ASSERT(txr);
1045
1046         if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1047             adapter->link_active == 0) {
1048                 return (ENETDOWN);
1049         }
1050
1051         /* Process the queue */
1052         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1053                 if ((err = em_xmit(txr, &next)) != 0) {
1054                         if (next == NULL) {
1055                                 /* It was freed, move forward */
1056                                 drbr_advance(ifp, txr->br);
1057                         } else {
1058                                 /* 
1059                                  * Still have one left, it may not be
1060                                  * the same since the transmit function
1061                                  * may have changed it.
1062                                  */
1063                                 drbr_putback(ifp, txr->br, next);
1064                         }
1065                         break;
1066                 }
1067                 drbr_advance(ifp, txr->br);
1068                 enq++;
1069                 if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1070                 if (next->m_flags & M_MCAST)
1071                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1072                 ETHER_BPF_MTAP(ifp, next);
1073                 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1074                         break;
1075         }
1076
1077         /* Mark the queue as having work */
1078         if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1079                 txr->busy = EM_TX_BUSY;
1080
1081         if (txr->tx_avail < EM_MAX_SCATTER)
1082                 em_txeof(txr);
1083         if (txr->tx_avail < EM_MAX_SCATTER) {
1084                 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1085         }
1086         return (err);
1087 }
1088
1089 /*
1090 ** Flush all ring buffers
1091 */
1092 static void
1093 em_qflush(if_t ifp)
1094 {
1095         struct adapter  *adapter = if_getsoftc(ifp);
1096         struct tx_ring  *txr = adapter->tx_rings;
1097         struct mbuf     *m;
1098
1099         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1100                 EM_TX_LOCK(txr);
1101                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1102                         m_freem(m);
1103                 EM_TX_UNLOCK(txr);
1104         }
1105         if_qflush(ifp);
1106 }
1107 #endif /* EM_MULTIQUEUE */
1108
1109 /*********************************************************************
1110  *  Ioctl entry point
1111  *
1112  *  em_ioctl is called when the user wants to configure the
1113  *  interface.
1114  *
1115  *  return 0 on success, positive on failure
1116  **********************************************************************/
1117
1118 static int
1119 em_ioctl(if_t ifp, u_long command, caddr_t data)
1120 {
1121         struct adapter  *adapter = if_getsoftc(ifp);
1122         struct ifreq    *ifr = (struct ifreq *)data;
1123 #if defined(INET) || defined(INET6)
1124         struct ifaddr   *ifa = (struct ifaddr *)data;
1125 #endif
1126         bool            avoid_reset = FALSE;
1127         int             error = 0;
1128
1129         if (adapter->in_detach)
1130                 return (error);
1131
1132         switch (command) {
1133         case SIOCSIFADDR:
1134 #ifdef INET
1135                 if (ifa->ifa_addr->sa_family == AF_INET)
1136                         avoid_reset = TRUE;
1137 #endif
1138 #ifdef INET6
1139                 if (ifa->ifa_addr->sa_family == AF_INET6)
1140                         avoid_reset = TRUE;
1141 #endif
1142                 /*
1143                 ** Calling init results in link renegotiation,
1144                 ** so we avoid doing it when possible.
1145                 */
1146                 if (avoid_reset) {
1147                         if_setflagbits(ifp,IFF_UP,0);
1148                         if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1149                                 em_init(adapter);
1150 #ifdef INET
1151                         if (!(if_getflags(ifp) & IFF_NOARP))
1152                                 arp_ifinit(ifp, ifa);
1153 #endif
1154                 } else
1155                         error = ether_ioctl(ifp, command, data);
1156                 break;
1157         case SIOCSIFMTU:
1158             {
1159                 int max_frame_size;
1160
1161                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1162
1163                 EM_CORE_LOCK(adapter);
1164                 switch (adapter->hw.mac.type) {
1165                 case e1000_82571:
1166                 case e1000_82572:
1167                 case e1000_ich9lan:
1168                 case e1000_ich10lan:
1169                 case e1000_pch2lan:
1170                 case e1000_pch_lpt:
1171                 case e1000_82574:
1172                 case e1000_82583:
1173                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1174                         max_frame_size = 9234;
1175                         break;
1176                 case e1000_pchlan:
1177                         max_frame_size = 4096;
1178                         break;
1179                         /* Adapters that do not support jumbo frames */
1180                 case e1000_ich8lan:
1181                         max_frame_size = ETHER_MAX_LEN;
1182                         break;
1183                 default:
1184                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1185                 }
1186                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1187                     ETHER_CRC_LEN) {
1188                         EM_CORE_UNLOCK(adapter);
1189                         error = EINVAL;
1190                         break;
1191                 }
1192
1193                 if_setmtu(ifp, ifr->ifr_mtu);
1194                 adapter->hw.mac.max_frame_size =
1195                     if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1196                 em_init_locked(adapter);
1197                 EM_CORE_UNLOCK(adapter);
1198                 break;
1199             }
1200         case SIOCSIFFLAGS:
1201                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1202                     SIOCSIFFLAGS (Set Interface Flags)");
1203                 EM_CORE_LOCK(adapter);
1204                 if (if_getflags(ifp) & IFF_UP) {
1205                         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1206                                 if ((if_getflags(ifp) ^ adapter->if_flags) &
1207                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1208                                         em_disable_promisc(adapter);
1209                                         em_set_promisc(adapter);
1210                                 }
1211                         } else
1212                                 em_init_locked(adapter);
1213                 } else
1214                         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1215                                 em_stop(adapter);
1216                 adapter->if_flags = if_getflags(ifp);
1217                 EM_CORE_UNLOCK(adapter);
1218                 break;
1219         case SIOCADDMULTI:
1220         case SIOCDELMULTI:
1221                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1222                 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1223                         EM_CORE_LOCK(adapter);
1224                         em_disable_intr(adapter);
1225                         em_set_multi(adapter);
1226 #ifdef DEVICE_POLLING
1227                         if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1228 #endif
1229                                 em_enable_intr(adapter);
1230                         EM_CORE_UNLOCK(adapter);
1231                 }
1232                 break;
1233         case SIOCSIFMEDIA:
1234                 /* Check SOL/IDER usage */
1235                 EM_CORE_LOCK(adapter);
1236                 if (e1000_check_reset_block(&adapter->hw)) {
1237                         EM_CORE_UNLOCK(adapter);
1238                         device_printf(adapter->dev, "Media change is"
1239                             " blocked due to SOL/IDER session.\n");
1240                         break;
1241                 }
1242                 EM_CORE_UNLOCK(adapter);
1243                 /* falls thru */
1244         case SIOCGIFMEDIA:
1245                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1246                     SIOCxIFMEDIA (Get/Set Interface Media)");
1247                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1248                 break;
1249         case SIOCSIFCAP:
1250             {
1251                 int mask, reinit;
1252
1253                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1254                 reinit = 0;
1255                 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1256 #ifdef DEVICE_POLLING
1257                 if (mask & IFCAP_POLLING) {
1258                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1259                                 error = ether_poll_register(em_poll, ifp);
1260                                 if (error)
1261                                         return (error);
1262                                 EM_CORE_LOCK(adapter);
1263                                 em_disable_intr(adapter);
1264                                 if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1265                                 EM_CORE_UNLOCK(adapter);
1266                         } else {
1267                                 error = ether_poll_deregister(ifp);
1268                                 /* Enable interrupt even in error case */
1269                                 EM_CORE_LOCK(adapter);
1270                                 em_enable_intr(adapter);
1271                                 if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1272                                 EM_CORE_UNLOCK(adapter);
1273                         }
1274                 }
1275 #endif
1276                 if (mask & IFCAP_HWCSUM) {
1277                         if_togglecapenable(ifp,IFCAP_HWCSUM);
1278                         reinit = 1;
1279                 }
1280                 if (mask & IFCAP_TSO4) {
1281                         if_togglecapenable(ifp,IFCAP_TSO4);
1282                         reinit = 1;
1283                 }
1284                 if (mask & IFCAP_VLAN_HWTAGGING) {
1285                         if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1286                         reinit = 1;
1287                 }
1288                 if (mask & IFCAP_VLAN_HWFILTER) {
1289                         if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1290                         reinit = 1;
1291                 }
1292                 if (mask & IFCAP_VLAN_HWTSO) {
1293                         if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1294                         reinit = 1;
1295                 }
1296                 if ((mask & IFCAP_WOL) &&
1297                     (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1298                         if (mask & IFCAP_WOL_MCAST)
1299                                 if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1300                         if (mask & IFCAP_WOL_MAGIC)
1301                                 if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1302                 }
1303                 if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1304                         em_init(adapter);
1305                 if_vlancap(ifp);
1306                 break;
1307             }
1308
1309         default:
1310                 error = ether_ioctl(ifp, command, data);
1311                 break;
1312         }
1313
1314         return (error);
1315 }
1316
1317
1318 /*********************************************************************
1319  *  Init entry point
1320  *
1321  *  This routine is used in two ways. It is used by the stack as
1322  *  init entry point in network interface structure. It is also used
1323  *  by the driver as a hw/sw initialization routine to get to a
1324  *  consistent state.
1325  *
1326  *  return 0 on success, positive on failure
1327  **********************************************************************/
1328
1329 static void
1330 em_init_locked(struct adapter *adapter)
1331 {
1332         if_t ifp = adapter->ifp;
1333         device_t        dev = adapter->dev;
1334
1335         INIT_DEBUGOUT("em_init: begin");
1336
1337         EM_CORE_LOCK_ASSERT(adapter);
1338
1339         em_disable_intr(adapter);
1340         callout_stop(&adapter->timer);
1341
1342         /* Get the latest mac address, User can use a LAA */
1343         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1344               ETHER_ADDR_LEN);
1345
1346         /* Put the address into the Receive Address Array */
1347         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1348
1349         /*
1350          * With the 82571 adapter, RAR[0] may be overwritten
1351          * when the other port is reset, we make a duplicate
1352          * in RAR[14] for that eventuality, this assures
1353          * the interface continues to function.
1354          */
1355         if (adapter->hw.mac.type == e1000_82571) {
1356                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1357                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1358                     E1000_RAR_ENTRIES - 1);
1359         }
1360
1361         /* Initialize the hardware */
1362         em_reset(adapter);
1363         em_update_link_status(adapter);
1364
1365         /* Setup VLAN support, basic and offload if available */
1366         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1367
1368         /* Set hardware offload abilities */
1369         if_clearhwassist(ifp);
1370         if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1371                 if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1372         if (if_getcapenable(ifp) & IFCAP_TSO4)
1373                 if_sethwassistbits(ifp, CSUM_TSO, 0);
1374
1375         /* Configure for OS presence */
1376         em_init_manageability(adapter);
1377
1378         /* Prepare transmit descriptors and buffers */
1379         em_setup_transmit_structures(adapter);
1380         em_initialize_transmit_unit(adapter);
1381
1382         /* Setup Multicast table */
1383         em_set_multi(adapter);
1384
1385         /*
1386         ** Figure out the desired mbuf
1387         ** pool for doing jumbos
1388         */
1389         if (adapter->hw.mac.max_frame_size <= 2048)
1390                 adapter->rx_mbuf_sz = MCLBYTES;
1391         else if (adapter->hw.mac.max_frame_size <= 4096)
1392                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1393         else
1394                 adapter->rx_mbuf_sz = MJUM9BYTES;
1395
1396         /* Prepare receive descriptors and buffers */
1397         if (em_setup_receive_structures(adapter)) {
1398                 device_printf(dev, "Could not setup receive structures\n");
1399                 em_stop(adapter);
1400                 return;
1401         }
1402         em_initialize_receive_unit(adapter);
1403
1404         /* Use real VLAN Filter support? */
1405         if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1406                 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1407                         /* Use real VLAN Filter support */
1408                         em_setup_vlan_hw_support(adapter);
1409                 else {
1410                         u32 ctrl;
1411                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1412                         ctrl |= E1000_CTRL_VME;
1413                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1414                 }
1415         }
1416
1417         /* Don't lose promiscuous settings */
1418         em_set_promisc(adapter);
1419
1420         /* Set the interface as ACTIVE */
1421         if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1422
1423         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1424         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1425
1426         /* MSI/X configuration for 82574 */
1427         if (adapter->hw.mac.type == e1000_82574) {
1428                 int tmp;
1429                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1430                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1431                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1432                 /* Set the IVAR - interrupt vector routing. */
1433                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1434         }
1435
1436 #ifdef DEVICE_POLLING
1437         /*
1438          * Only enable interrupts if we are not polling, make sure
1439          * they are off otherwise.
1440          */
1441         if (if_getcapenable(ifp) & IFCAP_POLLING)
1442                 em_disable_intr(adapter);
1443         else
1444 #endif /* DEVICE_POLLING */
1445                 em_enable_intr(adapter);
1446
1447         /* AMT based hardware can now take control from firmware */
1448         if (adapter->has_manage && adapter->has_amt)
1449                 em_get_hw_control(adapter);
1450 }
1451
1452 static void
1453 em_init(void *arg)
1454 {
1455         struct adapter *adapter = arg;
1456
1457         EM_CORE_LOCK(adapter);
1458         em_init_locked(adapter);
1459         EM_CORE_UNLOCK(adapter);
1460 }
1461
1462
1463 #ifdef DEVICE_POLLING
1464 /*********************************************************************
1465  *
1466  *  Legacy polling routine: note this only works with single queue
1467  *
1468  *********************************************************************/
1469 static int
1470 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1471 {
1472         struct adapter *adapter = if_getsoftc(ifp);
1473         struct tx_ring  *txr = adapter->tx_rings;
1474         struct rx_ring  *rxr = adapter->rx_rings;
1475         u32             reg_icr;
1476         int             rx_done;
1477
1478         EM_CORE_LOCK(adapter);
1479         if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1480                 EM_CORE_UNLOCK(adapter);
1481                 return (0);
1482         }
1483
1484         if (cmd == POLL_AND_CHECK_STATUS) {
1485                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1486                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1487                         callout_stop(&adapter->timer);
1488                         adapter->hw.mac.get_link_status = 1;
1489                         em_update_link_status(adapter);
1490                         callout_reset(&adapter->timer, hz,
1491                             em_local_timer, adapter);
1492                 }
1493         }
1494         EM_CORE_UNLOCK(adapter);
1495
1496         em_rxeof(rxr, count, &rx_done);
1497
1498         EM_TX_LOCK(txr);
1499         em_txeof(txr);
1500 #ifdef EM_MULTIQUEUE
1501         if (!drbr_empty(ifp, txr->br))
1502                 em_mq_start_locked(ifp, txr);
1503 #else
1504         if (!if_sendq_empty(ifp))
1505                 em_start_locked(ifp, txr);
1506 #endif
1507         EM_TX_UNLOCK(txr);
1508
1509         return (rx_done);
1510 }
1511 #endif /* DEVICE_POLLING */
1512
1513
1514 /*********************************************************************
1515  *
1516  *  Fast Legacy/MSI Combined Interrupt Service routine  
1517  *
1518  *********************************************************************/
1519 static int
1520 em_irq_fast(void *arg)
1521 {
1522         struct adapter  *adapter = arg;
1523         if_t ifp;
1524         u32             reg_icr;
1525
1526         ifp = adapter->ifp;
1527
1528         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1529
1530         /* Hot eject?  */
1531         if (reg_icr == 0xffffffff)
1532                 return FILTER_STRAY;
1533
1534         /* Definitely not our interrupt.  */
1535         if (reg_icr == 0x0)
1536                 return FILTER_STRAY;
1537
1538         /*
1539          * Starting with the 82571 chip, bit 31 should be used to
1540          * determine whether the interrupt belongs to us.
1541          */
1542         if (adapter->hw.mac.type >= e1000_82571 &&
1543             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1544                 return FILTER_STRAY;
1545
1546         em_disable_intr(adapter);
1547         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1548
1549         /* Link status change */
1550         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1551                 adapter->hw.mac.get_link_status = 1;
1552                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1553         }
1554
1555         if (reg_icr & E1000_ICR_RXO)
1556                 adapter->rx_overruns++;
1557         return FILTER_HANDLED;
1558 }
1559
1560 /* Combined RX/TX handler, used by Legacy and MSI */
1561 static void
1562 em_handle_que(void *context, int pending)
1563 {
1564         struct adapter  *adapter = context;
1565         if_t ifp = adapter->ifp;
1566         struct tx_ring  *txr = adapter->tx_rings;
1567         struct rx_ring  *rxr = adapter->rx_rings;
1568
1569         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1570                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1571
1572                 EM_TX_LOCK(txr);
1573                 em_txeof(txr);
1574 #ifdef EM_MULTIQUEUE
1575                 if (!drbr_empty(ifp, txr->br))
1576                         em_mq_start_locked(ifp, txr);
1577 #else
1578                 if (!if_sendq_empty(ifp))
1579                         em_start_locked(ifp, txr);
1580 #endif
1581                 EM_TX_UNLOCK(txr);
1582                 if (more) {
1583                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1584                         return;
1585                 }
1586         }
1587
1588         em_enable_intr(adapter);
1589         return;
1590 }
1591
1592
1593 /*********************************************************************
1594  *
1595  *  MSIX Interrupt Service Routines
1596  *
1597  **********************************************************************/
1598 static void
1599 em_msix_tx(void *arg)
1600 {
1601         struct tx_ring *txr = arg;
1602         struct adapter *adapter = txr->adapter;
1603         if_t ifp = adapter->ifp;
1604
1605         ++txr->tx_irq;
1606         EM_TX_LOCK(txr);
1607         em_txeof(txr);
1608 #ifdef EM_MULTIQUEUE
1609         if (!drbr_empty(ifp, txr->br))
1610                 em_mq_start_locked(ifp, txr);
1611 #else
1612         if (!if_sendq_empty(ifp))
1613                 em_start_locked(ifp, txr);
1614 #endif
1615
1616         /* Reenable this interrupt */
1617         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1618         EM_TX_UNLOCK(txr);
1619         return;
1620 }
1621
1622 /*********************************************************************
1623  *
1624  *  MSIX RX Interrupt Service routine
1625  *
1626  **********************************************************************/
1627
1628 static void
1629 em_msix_rx(void *arg)
1630 {
1631         struct rx_ring  *rxr = arg;
1632         struct adapter  *adapter = rxr->adapter;
1633         bool            more;
1634
1635         ++rxr->rx_irq;
1636         if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1637                 return;
1638         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1639         if (more)
1640                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1641         else {
1642                 /* Reenable this interrupt */
1643                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1644         }
1645         return;
1646 }
1647
1648 /*********************************************************************
1649  *
1650  *  MSIX Link Fast Interrupt Service routine
1651  *
1652  **********************************************************************/
1653 static void
1654 em_msix_link(void *arg)
1655 {
1656         struct adapter  *adapter = arg;
1657         u32             reg_icr;
1658
1659         ++adapter->link_irq;
1660         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1661
1662         if (reg_icr & E1000_ICR_RXO)
1663                 adapter->rx_overruns++;
1664
1665         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1666                 adapter->hw.mac.get_link_status = 1;
1667                 em_handle_link(adapter, 0);
1668         } else
1669                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1670                     EM_MSIX_LINK | E1000_IMS_LSC);
1671         /*
1672         ** Because we must read the ICR for this interrupt
1673         ** it may clear other causes using autoclear, for
1674         ** this reason we simply create a soft interrupt
1675         ** for all these vectors.
1676         */
1677         if (reg_icr) {
1678                 E1000_WRITE_REG(&adapter->hw,
1679                         E1000_ICS, adapter->ims);
1680         }
1681         return;
1682 }
1683
1684 static void
1685 em_handle_rx(void *context, int pending)
1686 {
1687         struct rx_ring  *rxr = context;
1688         struct adapter  *adapter = rxr->adapter;
1689         bool            more;
1690
1691         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1692         if (more)
1693                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1694         else {
1695                 /* Reenable this interrupt */
1696                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1697         }
1698 }
1699
1700 static void
1701 em_handle_tx(void *context, int pending)
1702 {
1703         struct tx_ring  *txr = context;
1704         struct adapter  *adapter = txr->adapter;
1705         if_t ifp = adapter->ifp;
1706
1707         EM_TX_LOCK(txr);
1708         em_txeof(txr);
1709 #ifdef EM_MULTIQUEUE
1710         if (!drbr_empty(ifp, txr->br))
1711                 em_mq_start_locked(ifp, txr);
1712 #else
1713         if (!if_sendq_empty(ifp))
1714                 em_start_locked(ifp, txr);
1715 #endif
1716         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1717         EM_TX_UNLOCK(txr);
1718 }
1719
1720 static void
1721 em_handle_link(void *context, int pending)
1722 {
1723         struct adapter  *adapter = context;
1724         struct tx_ring  *txr = adapter->tx_rings;
1725         if_t ifp = adapter->ifp;
1726
1727         if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1728                 return;
1729
1730         EM_CORE_LOCK(adapter);
1731         callout_stop(&adapter->timer);
1732         em_update_link_status(adapter);
1733         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1734         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1735             EM_MSIX_LINK | E1000_IMS_LSC);
1736         if (adapter->link_active) {
1737                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1738                         EM_TX_LOCK(txr);
1739 #ifdef EM_MULTIQUEUE
1740                         if (!drbr_empty(ifp, txr->br))
1741                                 em_mq_start_locked(ifp, txr);
1742 #else
1743                         if (if_sendq_empty(ifp))
1744                                 em_start_locked(ifp, txr);
1745 #endif
1746                         EM_TX_UNLOCK(txr);
1747                 }
1748         }
1749         EM_CORE_UNLOCK(adapter);
1750 }
1751
1752
1753 /*********************************************************************
1754  *
1755  *  Media Ioctl callback
1756  *
1757  *  This routine is called whenever the user queries the status of
1758  *  the interface using ifconfig.
1759  *
1760  **********************************************************************/
1761 static void
1762 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1763 {
1764         struct adapter *adapter = if_getsoftc(ifp);
1765         u_char fiber_type = IFM_1000_SX;
1766
1767         INIT_DEBUGOUT("em_media_status: begin");
1768
1769         EM_CORE_LOCK(adapter);
1770         em_update_link_status(adapter);
1771
1772         ifmr->ifm_status = IFM_AVALID;
1773         ifmr->ifm_active = IFM_ETHER;
1774
1775         if (!adapter->link_active) {
1776                 EM_CORE_UNLOCK(adapter);
1777                 return;
1778         }
1779
1780         ifmr->ifm_status |= IFM_ACTIVE;
1781
1782         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1783             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1784                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1785         } else {
1786                 switch (adapter->link_speed) {
1787                 case 10:
1788                         ifmr->ifm_active |= IFM_10_T;
1789                         break;
1790                 case 100:
1791                         ifmr->ifm_active |= IFM_100_TX;
1792                         break;
1793                 case 1000:
1794                         ifmr->ifm_active |= IFM_1000_T;
1795                         break;
1796                 }
1797                 if (adapter->link_duplex == FULL_DUPLEX)
1798                         ifmr->ifm_active |= IFM_FDX;
1799                 else
1800                         ifmr->ifm_active |= IFM_HDX;
1801         }
1802         EM_CORE_UNLOCK(adapter);
1803 }
1804
1805 /*********************************************************************
1806  *
1807  *  Media Ioctl callback
1808  *
1809  *  This routine is called when the user changes speed/duplex using
1810  *  media/mediopt option with ifconfig.
1811  *
1812  **********************************************************************/
1813 static int
1814 em_media_change(if_t ifp)
1815 {
1816         struct adapter *adapter = if_getsoftc(ifp);
1817         struct ifmedia  *ifm = &adapter->media;
1818
1819         INIT_DEBUGOUT("em_media_change: begin");
1820
1821         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1822                 return (EINVAL);
1823
1824         EM_CORE_LOCK(adapter);
1825         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1826         case IFM_AUTO:
1827                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1828                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1829                 break;
1830         case IFM_1000_LX:
1831         case IFM_1000_SX:
1832         case IFM_1000_T:
1833                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1834                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1835                 break;
1836         case IFM_100_TX:
1837                 adapter->hw.mac.autoneg = FALSE;
1838                 adapter->hw.phy.autoneg_advertised = 0;
1839                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1840                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1841                 else
1842                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1843                 break;
1844         case IFM_10_T:
1845                 adapter->hw.mac.autoneg = FALSE;
1846                 adapter->hw.phy.autoneg_advertised = 0;
1847                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1848                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1849                 else
1850                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1851                 break;
1852         default:
1853                 device_printf(adapter->dev, "Unsupported media type\n");
1854         }
1855
1856         em_init_locked(adapter);
1857         EM_CORE_UNLOCK(adapter);
1858
1859         return (0);
1860 }
1861
1862 /*********************************************************************
1863  *
1864  *  This routine maps the mbufs to tx descriptors.
1865  *
1866  *  return 0 on success, positive on failure
1867  **********************************************************************/
1868
1869 static int
1870 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1871 {
1872         struct adapter          *adapter = txr->adapter;
1873         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1874         bus_dmamap_t            map;
1875         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1876         struct e1000_tx_desc    *ctxd = NULL;
1877         struct mbuf             *m_head;
1878         struct ether_header     *eh;
1879         struct ip               *ip = NULL;
1880         struct tcphdr           *tp = NULL;
1881         u32                     txd_upper = 0, txd_lower = 0;
1882         int                     ip_off, poff;
1883         int                     nsegs, i, j, first, last = 0;
1884         int                     error;
1885         bool                    do_tso, tso_desc, remap = TRUE;
1886
1887         m_head = *m_headp;
1888         do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1889         tso_desc = FALSE;
1890         ip_off = poff = 0;
1891
1892         /*
1893          * Intel recommends entire IP/TCP header length reside in a single
1894          * buffer. If multiple descriptors are used to describe the IP and
1895          * TCP header, each descriptor should describe one or more
1896          * complete headers; descriptors referencing only parts of headers
1897          * are not supported. If all layer headers are not coalesced into
1898          * a single buffer, each buffer should not cross a 4KB boundary,
1899          * or be larger than the maximum read request size.
1900          * Controller also requires modifing IP/TCP header to make TSO work
1901          * so we firstly get a writable mbuf chain then coalesce ethernet/
1902          * IP/TCP header into a single buffer to meet the requirement of
1903          * controller. This also simplifies IP/TCP/UDP checksum offloading
1904          * which also has similiar restrictions.
1905          */
1906         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1907                 if (do_tso || (m_head->m_next != NULL && 
1908                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1909                         if (M_WRITABLE(*m_headp) == 0) {
1910                                 m_head = m_dup(*m_headp, M_NOWAIT);
1911                                 m_freem(*m_headp);
1912                                 if (m_head == NULL) {
1913                                         *m_headp = NULL;
1914                                         return (ENOBUFS);
1915                                 }
1916                                 *m_headp = m_head;
1917                         }
1918                 }
1919                 /*
1920                  * XXX
1921                  * Assume IPv4, we don't have TSO/checksum offload support
1922                  * for IPv6 yet.
1923                  */
1924                 ip_off = sizeof(struct ether_header);
1925                 if (m_head->m_len < ip_off) {
1926                         m_head = m_pullup(m_head, ip_off);
1927                         if (m_head == NULL) {
1928                                 *m_headp = NULL;
1929                                 return (ENOBUFS);
1930                         }
1931                 }
1932                 eh = mtod(m_head, struct ether_header *);
1933                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1934                         ip_off = sizeof(struct ether_vlan_header);
1935                         if (m_head->m_len < ip_off) {
1936                                 m_head = m_pullup(m_head, ip_off);
1937                                 if (m_head == NULL) {
1938                                         *m_headp = NULL;
1939                                         return (ENOBUFS);
1940                                 }
1941                         }
1942                 }
1943                 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1944                         m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1945                         if (m_head == NULL) {
1946                                 *m_headp = NULL;
1947                                 return (ENOBUFS);
1948                         }
1949                 }
1950                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1951                 poff = ip_off + (ip->ip_hl << 2);
1952
1953                 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1954                         if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1955                                 m_head = m_pullup(m_head, poff +
1956                                     sizeof(struct tcphdr));
1957                                 if (m_head == NULL) {
1958                                         *m_headp = NULL;
1959                                         return (ENOBUFS);
1960                                 }
1961                         }
1962                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1963                         /*
1964                          * TSO workaround:
1965                          *   pull 4 more bytes of data into it.
1966                          */
1967                         if (m_head->m_len < poff + (tp->th_off << 2)) {
1968                                 m_head = m_pullup(m_head, poff +
1969                                                  (tp->th_off << 2) +
1970                                                  TSO_WORKAROUND);
1971                                 if (m_head == NULL) {
1972                                         *m_headp = NULL;
1973                                         return (ENOBUFS);
1974                                 }
1975                         }
1976                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1977                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1978                         if (do_tso) {
1979                                 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
1980                                                   (ip->ip_hl << 2) +
1981                                                   (tp->th_off << 2));
1982                                 ip->ip_sum = 0;
1983                                 /*
1984                                  * The pseudo TCP checksum does not include TCP
1985                                  * payload length so driver should recompute
1986                                  * the checksum here what hardware expect to
1987                                  * see. This is adherence of Microsoft's Large
1988                                  * Send specification.
1989                                 */
1990                                 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1991                                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1992                         }
1993                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1994                         if (m_head->m_len < poff + sizeof(struct udphdr)) {
1995                                 m_head = m_pullup(m_head, poff +
1996                                     sizeof(struct udphdr));
1997                                 if (m_head == NULL) {
1998                                         *m_headp = NULL;
1999                                         return (ENOBUFS);
2000                                 }
2001                         }
2002                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2003                 }
2004                 *m_headp = m_head;
2005         }
2006
2007         /*
2008          * Map the packet for DMA
2009          *
2010          * Capture the first descriptor index,
2011          * this descriptor will have the index
2012          * of the EOP which is the only one that
2013          * now gets a DONE bit writeback.
2014          */
2015         first = txr->next_avail_desc;
2016         tx_buffer = &txr->tx_buffers[first];
2017         tx_buffer_mapped = tx_buffer;
2018         map = tx_buffer->map;
2019
2020 retry:
2021         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2022             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2023
2024         /*
2025          * There are two types of errors we can (try) to handle:
2026          * - EFBIG means the mbuf chain was too long and bus_dma ran
2027          *   out of segments.  Defragment the mbuf chain and try again.
2028          * - ENOMEM means bus_dma could not obtain enough bounce buffers
2029          *   at this point in time.  Defer sending and try again later.
2030          * All other errors, in particular EINVAL, are fatal and prevent the
2031          * mbuf chain from ever going through.  Drop it and report error.
2032          */
2033         if (error == EFBIG && remap) {
2034                 struct mbuf *m;
2035
2036                 m = m_defrag(*m_headp, M_NOWAIT);
2037                 if (m == NULL) {
2038                         adapter->mbuf_alloc_failed++;
2039                         m_freem(*m_headp);
2040                         *m_headp = NULL;
2041                         return (ENOBUFS);
2042                 }
2043                 *m_headp = m;
2044
2045                 /* Try it again, but only once */
2046                 remap = FALSE;
2047                 goto retry;
2048         } else if (error != 0) {
2049                 adapter->no_tx_dma_setup++;
2050                 m_freem(*m_headp);
2051                 *m_headp = NULL;
2052                 return (error);
2053         }
2054
2055         /*
2056          * TSO Hardware workaround, if this packet is not
2057          * TSO, and is only a single descriptor long, and
2058          * it follows a TSO burst, then we need to add a
2059          * sentinel descriptor to prevent premature writeback.
2060          */
2061         if ((!do_tso) && (txr->tx_tso == TRUE)) {
2062                 if (nsegs == 1)
2063                         tso_desc = TRUE;
2064                 txr->tx_tso = FALSE;
2065         }
2066
2067         if (nsegs > (txr->tx_avail - EM_MAX_SCATTER)) {
2068                 txr->no_desc_avail++;
2069                 bus_dmamap_unload(txr->txtag, map);
2070                 return (ENOBUFS);
2071         }
2072         m_head = *m_headp;
2073
2074         /* Do hardware assists */
2075         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2076                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2077                     &txd_upper, &txd_lower);
2078                 /* we need to make a final sentinel transmit desc */
2079                 tso_desc = TRUE;
2080         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2081                 em_transmit_checksum_setup(txr, m_head,
2082                     ip_off, ip, &txd_upper, &txd_lower);
2083
2084         if (m_head->m_flags & M_VLANTAG) {
2085                 /* Set the vlan id. */
2086                 txd_upper |= htole16(if_getvtag(m_head)) << 16;
2087                 /* Tell hardware to add tag */
2088                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2089         }
2090
2091         i = txr->next_avail_desc;
2092
2093         /* Set up our transmit descriptors */
2094         for (j = 0; j < nsegs; j++) {
2095                 bus_size_t seg_len;
2096                 bus_addr_t seg_addr;
2097
2098                 tx_buffer = &txr->tx_buffers[i];
2099                 ctxd = &txr->tx_base[i];
2100                 seg_addr = segs[j].ds_addr;
2101                 seg_len  = segs[j].ds_len;
2102                 /*
2103                 ** TSO Workaround:
2104                 ** If this is the last descriptor, we want to
2105                 ** split it so we have a small final sentinel
2106                 */
2107                 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2108                         seg_len -= TSO_WORKAROUND;
2109                         ctxd->buffer_addr = htole64(seg_addr);
2110                         ctxd->lower.data = htole32(
2111                                 adapter->txd_cmd | txd_lower | seg_len);
2112                         ctxd->upper.data = htole32(txd_upper);
2113                         if (++i == adapter->num_tx_desc)
2114                                 i = 0;
2115
2116                         /* Now make the sentinel */     
2117                         txr->tx_avail--;
2118                         ctxd = &txr->tx_base[i];
2119                         tx_buffer = &txr->tx_buffers[i];
2120                         ctxd->buffer_addr =
2121                             htole64(seg_addr + seg_len);
2122                         ctxd->lower.data = htole32(
2123                         adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2124                         ctxd->upper.data =
2125                             htole32(txd_upper);
2126                         last = i;
2127                         if (++i == adapter->num_tx_desc)
2128                                 i = 0;
2129                 } else {
2130                         ctxd->buffer_addr = htole64(seg_addr);
2131                         ctxd->lower.data = htole32(
2132                         adapter->txd_cmd | txd_lower | seg_len);
2133                         ctxd->upper.data = htole32(txd_upper);
2134                         last = i;
2135                         if (++i == adapter->num_tx_desc)
2136                                 i = 0;
2137                 }
2138                 tx_buffer->m_head = NULL;
2139                 tx_buffer->next_eop = -1;
2140         }
2141
2142         txr->next_avail_desc = i;
2143         txr->tx_avail -= nsegs;
2144
2145         tx_buffer->m_head = m_head;
2146         /*
2147         ** Here we swap the map so the last descriptor,
2148         ** which gets the completion interrupt has the
2149         ** real map, and the first descriptor gets the
2150         ** unused map from this descriptor.
2151         */
2152         tx_buffer_mapped->map = tx_buffer->map;
2153         tx_buffer->map = map;
2154         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2155
2156         /*
2157          * Last Descriptor of Packet
2158          * needs End Of Packet (EOP)
2159          * and Report Status (RS)
2160          */
2161         ctxd->lower.data |=
2162             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2163         /*
2164          * Keep track in the first buffer which
2165          * descriptor will be written back
2166          */
2167         tx_buffer = &txr->tx_buffers[first];
2168         tx_buffer->next_eop = last;
2169
2170         /*
2171          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2172          * that this frame is available to transmit.
2173          */
2174         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2175             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2176         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2177
2178         return (0);
2179 }
2180
2181 static void
2182 em_set_promisc(struct adapter *adapter)
2183 {
2184         if_t ifp = adapter->ifp;
2185         u32             reg_rctl;
2186
2187         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2188
2189         if (if_getflags(ifp) & IFF_PROMISC) {
2190                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2191                 /* Turn this on if you want to see bad packets */
2192                 if (em_debug_sbp)
2193                         reg_rctl |= E1000_RCTL_SBP;
2194                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2195         } else if (if_getflags(ifp) & IFF_ALLMULTI) {
2196                 reg_rctl |= E1000_RCTL_MPE;
2197                 reg_rctl &= ~E1000_RCTL_UPE;
2198                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2199         }
2200 }
2201
2202 static void
2203 em_disable_promisc(struct adapter *adapter)
2204 {
2205         if_t            ifp = adapter->ifp;
2206         u32             reg_rctl;
2207         int             mcnt = 0;
2208
2209         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2210         reg_rctl &=  (~E1000_RCTL_UPE);
2211         if (if_getflags(ifp) & IFF_ALLMULTI)
2212                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2213         else
2214                 mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2215         /* Don't disable if in MAX groups */
2216         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2217                 reg_rctl &=  (~E1000_RCTL_MPE);
2218         reg_rctl &=  (~E1000_RCTL_SBP);
2219         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2220 }
2221
2222
2223 /*********************************************************************
2224  *  Multicast Update
2225  *
2226  *  This routine is called whenever multicast address list is updated.
2227  *
2228  **********************************************************************/
2229
2230 static void
2231 em_set_multi(struct adapter *adapter)
2232 {
2233         if_t ifp = adapter->ifp;
2234         u32 reg_rctl = 0;
2235         u8  *mta; /* Multicast array memory */
2236         int mcnt = 0;
2237
2238         IOCTL_DEBUGOUT("em_set_multi: begin");
2239
2240         mta = adapter->mta;
2241         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2242
2243         if (adapter->hw.mac.type == e1000_82542 && 
2244             adapter->hw.revision_id == E1000_REVISION_2) {
2245                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2246                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2247                         e1000_pci_clear_mwi(&adapter->hw);
2248                 reg_rctl |= E1000_RCTL_RST;
2249                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2250                 msec_delay(5);
2251         }
2252
2253         if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2254
2255         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2256                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2257                 reg_rctl |= E1000_RCTL_MPE;
2258                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2259         } else
2260                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2261
2262         if (adapter->hw.mac.type == e1000_82542 && 
2263             adapter->hw.revision_id == E1000_REVISION_2) {
2264                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2265                 reg_rctl &= ~E1000_RCTL_RST;
2266                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2267                 msec_delay(5);
2268                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2269                         e1000_pci_set_mwi(&adapter->hw);
2270         }
2271 }
2272
2273
2274 /*********************************************************************
2275  *  Timer routine
2276  *
2277  *  This routine checks for link status and updates statistics.
2278  *
2279  **********************************************************************/
2280
2281 static void
2282 em_local_timer(void *arg)
2283 {
2284         struct adapter  *adapter = arg;
2285         if_t ifp = adapter->ifp;
2286         struct tx_ring  *txr = adapter->tx_rings;
2287         struct rx_ring  *rxr = adapter->rx_rings;
2288         u32             trigger = 0;
2289
2290         EM_CORE_LOCK_ASSERT(adapter);
2291
2292         em_update_link_status(adapter);
2293         em_update_stats_counters(adapter);
2294
2295         /* Reset LAA into RAR[0] on 82571 */
2296         if ((adapter->hw.mac.type == e1000_82571) &&
2297             e1000_get_laa_state_82571(&adapter->hw))
2298                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2299
2300         /* Mask to use in the irq trigger */
2301         if (adapter->msix_mem) {
2302                 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2303                         trigger |= rxr->ims;
2304                 rxr = adapter->rx_rings;
2305         } else
2306                 trigger = E1000_ICS_RXDMT0;
2307
2308         /*
2309         ** Check on the state of the TX queue(s), this 
2310         ** can be done without the lock because its RO
2311         ** and the HUNG state will be static if set.
2312         */
2313         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2314                 if (txr->busy == EM_TX_HUNG)
2315                         goto hung;
2316                 if (txr->busy >= EM_TX_MAXTRIES)
2317                         txr->busy = EM_TX_HUNG;
2318                 /* Schedule a TX tasklet if needed */
2319                 if (txr->tx_avail <= EM_MAX_SCATTER)
2320                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2321         }
2322         
2323         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2324 #ifndef DEVICE_POLLING
2325         /* Trigger an RX interrupt to guarantee mbuf refresh */
2326         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2327 #endif
2328         return;
2329 hung:
2330         /* Looks like we're hung */
2331         device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2332                         txr->me);
2333         em_print_debug_info(adapter);
2334         if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2335         adapter->watchdog_events++;
2336         em_init_locked(adapter);
2337 }
2338
2339
2340 static void
2341 em_update_link_status(struct adapter *adapter)
2342 {
2343         struct e1000_hw *hw = &adapter->hw;
2344         if_t ifp = adapter->ifp;
2345         device_t dev = adapter->dev;
2346         struct tx_ring *txr = adapter->tx_rings;
2347         u32 link_check = 0;
2348
2349         /* Get the cached link value or read phy for real */
2350         switch (hw->phy.media_type) {
2351         case e1000_media_type_copper:
2352                 if (hw->mac.get_link_status) {
2353                         /* Do the work to read phy */
2354                         e1000_check_for_link(hw);
2355                         link_check = !hw->mac.get_link_status;
2356                         if (link_check) /* ESB2 fix */
2357                                 e1000_cfg_on_link_up(hw);
2358                 } else
2359                         link_check = TRUE;
2360                 break;
2361         case e1000_media_type_fiber:
2362                 e1000_check_for_link(hw);
2363                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2364                                  E1000_STATUS_LU);
2365                 break;
2366         case e1000_media_type_internal_serdes:
2367                 e1000_check_for_link(hw);
2368                 link_check = adapter->hw.mac.serdes_has_link;
2369                 break;
2370         default:
2371         case e1000_media_type_unknown:
2372                 break;
2373         }
2374
2375         /* Now check for a transition */
2376         if (link_check && (adapter->link_active == 0)) {
2377                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2378                     &adapter->link_duplex);
2379                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2380                 if ((adapter->link_speed != SPEED_1000) &&
2381                     ((hw->mac.type == e1000_82571) ||
2382                     (hw->mac.type == e1000_82572))) {
2383                         int tarc0;
2384                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2385                         tarc0 &= ~TARC_SPEED_MODE_BIT;
2386                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2387                 }
2388                 if (bootverbose)
2389                         device_printf(dev, "Link is up %d Mbps %s\n",
2390                             adapter->link_speed,
2391                             ((adapter->link_duplex == FULL_DUPLEX) ?
2392                             "Full Duplex" : "Half Duplex"));
2393                 adapter->link_active = 1;
2394                 adapter->smartspeed = 0;
2395                 if_setbaudrate(ifp, adapter->link_speed * 1000000);
2396                 if_link_state_change(ifp, LINK_STATE_UP);
2397         } else if (!link_check && (adapter->link_active == 1)) {
2398                 if_setbaudrate(ifp, 0);
2399                 adapter->link_speed = 0;
2400                 adapter->link_duplex = 0;
2401                 if (bootverbose)
2402                         device_printf(dev, "Link is Down\n");
2403                 adapter->link_active = 0;
2404                 /* Link down, disable hang detection */
2405                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2406                         txr->busy = EM_TX_IDLE;
2407                 if_link_state_change(ifp, LINK_STATE_DOWN);
2408         }
2409 }
2410
2411 /*********************************************************************
2412  *
2413  *  This routine disables all traffic on the adapter by issuing a
2414  *  global reset on the MAC and deallocates TX/RX buffers.
2415  *
2416  *  This routine should always be called with BOTH the CORE
2417  *  and TX locks.
2418  **********************************************************************/
2419
2420 static void
2421 em_stop(void *arg)
2422 {
2423         struct adapter  *adapter = arg;
2424         if_t ifp = adapter->ifp;
2425         struct tx_ring  *txr = adapter->tx_rings;
2426
2427         EM_CORE_LOCK_ASSERT(adapter);
2428
2429         INIT_DEBUGOUT("em_stop: begin");
2430
2431         em_disable_intr(adapter);
2432         callout_stop(&adapter->timer);
2433
2434         /* Tell the stack that the interface is no longer active */
2435         if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2436
2437         /* Disarm Hang Detection. */
2438         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2439                 EM_TX_LOCK(txr);
2440                 txr->busy = EM_TX_IDLE;
2441                 EM_TX_UNLOCK(txr);
2442         }
2443
2444         e1000_reset_hw(&adapter->hw);
2445         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2446
2447         e1000_led_off(&adapter->hw);
2448         e1000_cleanup_led(&adapter->hw);
2449 }
2450
2451
2452 /*********************************************************************
2453  *
2454  *  Determine hardware revision.
2455  *
2456  **********************************************************************/
2457 static void
2458 em_identify_hardware(struct adapter *adapter)
2459 {
2460         device_t dev = adapter->dev;
2461
2462         /* Make sure our PCI config space has the necessary stuff set */
2463         pci_enable_busmaster(dev);
2464         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2465
2466         /* Save off the information about this board */
2467         adapter->hw.vendor_id = pci_get_vendor(dev);
2468         adapter->hw.device_id = pci_get_device(dev);
2469         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2470         adapter->hw.subsystem_vendor_id =
2471             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2472         adapter->hw.subsystem_device_id =
2473             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2474
2475         /* Do Shared Code Init and Setup */
2476         if (e1000_set_mac_type(&adapter->hw)) {
2477                 device_printf(dev, "Setup init failure\n");
2478                 return;
2479         }
2480 }
2481
2482 static int
2483 em_allocate_pci_resources(struct adapter *adapter)
2484 {
2485         device_t        dev = adapter->dev;
2486         int             rid;
2487
2488         rid = PCIR_BAR(0);
2489         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2490             &rid, RF_ACTIVE);
2491         if (adapter->memory == NULL) {
2492                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2493                 return (ENXIO);
2494         }
2495         adapter->osdep.mem_bus_space_tag =
2496             rman_get_bustag(adapter->memory);
2497         adapter->osdep.mem_bus_space_handle =
2498             rman_get_bushandle(adapter->memory);
2499         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2500
2501         adapter->hw.back = &adapter->osdep;
2502
2503         return (0);
2504 }
2505
2506 /*********************************************************************
2507  *
2508  *  Setup the Legacy or MSI Interrupt handler
2509  *
2510  **********************************************************************/
2511 int
2512 em_allocate_legacy(struct adapter *adapter)
2513 {
2514         device_t dev = adapter->dev;
2515         struct tx_ring  *txr = adapter->tx_rings;
2516         int error, rid = 0;
2517
2518         /* Manually turn off all interrupts */
2519         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2520
2521         if (adapter->msix == 1) /* using MSI */
2522                 rid = 1;
2523         /* We allocate a single interrupt resource */
2524         adapter->res = bus_alloc_resource_any(dev,
2525             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2526         if (adapter->res == NULL) {
2527                 device_printf(dev, "Unable to allocate bus resource: "
2528                     "interrupt\n");
2529                 return (ENXIO);
2530         }
2531
2532         /*
2533          * Allocate a fast interrupt and the associated
2534          * deferred processing contexts.
2535          */
2536         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2537         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2538             taskqueue_thread_enqueue, &adapter->tq);
2539         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2540             device_get_nameunit(adapter->dev));
2541         /* Use a TX only tasklet for local timer */
2542         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2543         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2544             taskqueue_thread_enqueue, &txr->tq);
2545         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2546             device_get_nameunit(adapter->dev));
2547         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2548         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2549             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2550                 device_printf(dev, "Failed to register fast interrupt "
2551                             "handler: %d\n", error);
2552                 taskqueue_free(adapter->tq);
2553                 adapter->tq = NULL;
2554                 return (error);
2555         }
2556         
2557         return (0);
2558 }
2559
2560 /*********************************************************************
2561  *
2562  *  Setup the MSIX Interrupt handlers
2563  *   This is not really Multiqueue, rather
2564  *   its just seperate interrupt vectors
2565  *   for TX, RX, and Link.
2566  *
2567  **********************************************************************/
2568 int
2569 em_allocate_msix(struct adapter *adapter)
2570 {
2571         device_t        dev = adapter->dev;
2572         struct          tx_ring *txr = adapter->tx_rings;
2573         struct          rx_ring *rxr = adapter->rx_rings;
2574         int             error, rid, vector = 0;
2575         int             cpu_id = 0;
2576
2577
2578         /* Make sure all interrupts are disabled */
2579         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2580
2581         /* First set up ring resources */
2582         for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2583
2584                 /* RX ring */
2585                 rid = vector + 1;
2586
2587                 rxr->res = bus_alloc_resource_any(dev,
2588                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2589                 if (rxr->res == NULL) {
2590                         device_printf(dev,
2591                             "Unable to allocate bus resource: "
2592                             "RX MSIX Interrupt %d\n", i);
2593                         return (ENXIO);
2594                 }
2595                 if ((error = bus_setup_intr(dev, rxr->res,
2596                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2597                     rxr, &rxr->tag)) != 0) {
2598                         device_printf(dev, "Failed to register RX handler");
2599                         return (error);
2600                 }
2601 #if __FreeBSD_version >= 800504
2602                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2603 #endif
2604                 rxr->msix = vector;
2605
2606                 if (em_last_bind_cpu < 0)
2607                         em_last_bind_cpu = CPU_FIRST();
2608                 cpu_id = em_last_bind_cpu;
2609                 bus_bind_intr(dev, rxr->res, cpu_id);
2610
2611                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2612                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2613                     taskqueue_thread_enqueue, &rxr->tq);
2614                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2615                     device_get_nameunit(adapter->dev), cpu_id);
2616                 /*
2617                 ** Set the bit to enable interrupt
2618                 ** in E1000_IMS -- bits 20 and 21
2619                 ** are for RX0 and RX1, note this has
2620                 ** NOTHING to do with the MSIX vector
2621                 */
2622                 rxr->ims = 1 << (20 + i);
2623                 adapter->ims |= rxr->ims;
2624                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2625
2626                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2627         }
2628
2629         for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2630                 /* TX ring */
2631                 rid = vector + 1;
2632                 txr->res = bus_alloc_resource_any(dev,
2633                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2634                 if (txr->res == NULL) {
2635                         device_printf(dev,
2636                             "Unable to allocate bus resource: "
2637                             "TX MSIX Interrupt %d\n", i);
2638                         return (ENXIO);
2639                 }
2640                 if ((error = bus_setup_intr(dev, txr->res,
2641                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2642                     txr, &txr->tag)) != 0) {
2643                         device_printf(dev, "Failed to register TX handler");
2644                         return (error);
2645                 }
2646 #if __FreeBSD_version >= 800504
2647                 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2648 #endif
2649                 txr->msix = vector;
2650
2651                 if (em_last_bind_cpu < 0)
2652                         em_last_bind_cpu = CPU_FIRST();
2653                 cpu_id = em_last_bind_cpu;
2654                 bus_bind_intr(dev, txr->res, cpu_id);
2655
2656                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2657                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2658                     taskqueue_thread_enqueue, &txr->tq);
2659                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2660                     device_get_nameunit(adapter->dev), cpu_id);
2661                 /*
2662                 ** Set the bit to enable interrupt
2663                 ** in E1000_IMS -- bits 22 and 23
2664                 ** are for TX0 and TX1, note this has
2665                 ** NOTHING to do with the MSIX vector
2666                 */
2667                 txr->ims = 1 << (22 + i);
2668                 adapter->ims |= txr->ims;
2669                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2670
2671                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2672         }
2673
2674         /* Link interrupt */
2675         rid = vector + 1;
2676         adapter->res = bus_alloc_resource_any(dev,
2677             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2678         if (!adapter->res) {
2679                 device_printf(dev,"Unable to allocate "
2680                     "bus resource: Link interrupt [%d]\n", rid);
2681                 return (ENXIO);
2682         }
2683         /* Set the link handler function */
2684         error = bus_setup_intr(dev, adapter->res,
2685             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2686             em_msix_link, adapter, &adapter->tag);
2687         if (error) {
2688                 adapter->res = NULL;
2689                 device_printf(dev, "Failed to register LINK handler");
2690                 return (error);
2691         }
2692 #if __FreeBSD_version >= 800504
2693         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2694 #endif
2695         adapter->linkvec = vector;
2696         adapter->ivars |=  (8 | vector) << 16;
2697         adapter->ivars |= 0x80000000;
2698
2699         return (0);
2700 }
2701
2702
2703 static void
2704 em_free_pci_resources(struct adapter *adapter)
2705 {
2706         device_t        dev = adapter->dev;
2707         struct tx_ring  *txr;
2708         struct rx_ring  *rxr;
2709         int             rid;
2710
2711
2712         /*
2713         ** Release all the queue interrupt resources:
2714         */
2715         for (int i = 0; i < adapter->num_queues; i++) {
2716                 txr = &adapter->tx_rings[i];
2717                 /* an early abort? */
2718                 if (txr == NULL)
2719                         break;
2720                 rid = txr->msix +1;
2721                 if (txr->tag != NULL) {
2722                         bus_teardown_intr(dev, txr->res, txr->tag);
2723                         txr->tag = NULL;
2724                 }
2725                 if (txr->res != NULL)
2726                         bus_release_resource(dev, SYS_RES_IRQ,
2727                             rid, txr->res);
2728
2729                 rxr = &adapter->rx_rings[i];
2730                 /* an early abort? */
2731                 if (rxr == NULL)
2732                         break;
2733                 rid = rxr->msix +1;
2734                 if (rxr->tag != NULL) {
2735                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2736                         rxr->tag = NULL;
2737                 }
2738                 if (rxr->res != NULL)
2739                         bus_release_resource(dev, SYS_RES_IRQ,
2740                             rid, rxr->res);
2741         }
2742
2743         if (adapter->linkvec) /* we are doing MSIX */
2744                 rid = adapter->linkvec + 1;
2745         else
2746                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2747
2748         if (adapter->tag != NULL) {
2749                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2750                 adapter->tag = NULL;
2751         }
2752
2753         if (adapter->res != NULL)
2754                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2755
2756
2757         if (adapter->msix)
2758                 pci_release_msi(dev);
2759
2760         if (adapter->msix_mem != NULL)
2761                 bus_release_resource(dev, SYS_RES_MEMORY,
2762                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2763
2764         if (adapter->memory != NULL)
2765                 bus_release_resource(dev, SYS_RES_MEMORY,
2766                     PCIR_BAR(0), adapter->memory);
2767
2768         if (adapter->flash != NULL)
2769                 bus_release_resource(dev, SYS_RES_MEMORY,
2770                     EM_FLASH, adapter->flash);
2771 }
2772
2773 /*
2774  * Setup MSI or MSI/X
2775  */
2776 static int
2777 em_setup_msix(struct adapter *adapter)
2778 {
2779         device_t dev = adapter->dev;
2780         int val;
2781
2782         /* Nearly always going to use one queue */
2783         adapter->num_queues = 1;
2784
2785         /*
2786         ** Try using MSI-X for Hartwell adapters
2787         */
2788         if ((adapter->hw.mac.type == e1000_82574) &&
2789             (em_enable_msix == TRUE)) {
2790 #ifdef EM_MULTIQUEUE
2791                 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2792                 if (adapter->num_queues > 1)
2793                         em_enable_vectors_82574(adapter);
2794 #endif
2795                 /* Map the MSIX BAR */
2796                 int rid = PCIR_BAR(EM_MSIX_BAR);
2797                 adapter->msix_mem = bus_alloc_resource_any(dev,
2798                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2799                 if (adapter->msix_mem == NULL) {
2800                         /* May not be enabled */
2801                         device_printf(adapter->dev,
2802                             "Unable to map MSIX table \n");
2803                         goto msi;
2804                 }
2805                 val = pci_msix_count(dev); 
2806
2807 #ifdef EM_MULTIQUEUE
2808                 /* We need 5 vectors in the multiqueue case */
2809                 if (adapter->num_queues > 1 ) {
2810                         if (val >= 5)
2811                                 val = 5;
2812                         else {
2813                                 adapter->num_queues = 1;
2814                                 device_printf(adapter->dev,
2815                                     "Insufficient MSIX vectors for >1 queue, "
2816                                     "using single queue...\n");
2817                                 goto msix_one;
2818                         }
2819                 } else {
2820 msix_one:
2821 #endif
2822                         if (val >= 3)
2823                                 val = 3;
2824                         else {
2825                                 device_printf(adapter->dev,
2826                                 "Insufficient MSIX vectors, using MSI\n");
2827                                 goto msi;
2828                         }
2829 #ifdef EM_MULTIQUEUE
2830                 }
2831 #endif
2832
2833                 if ((pci_alloc_msix(dev, &val) == 0)) {
2834                         device_printf(adapter->dev,
2835                             "Using MSIX interrupts "
2836                             "with %d vectors\n", val);
2837                         return (val);
2838                 }
2839
2840                 /*
2841                 ** If MSIX alloc failed or provided us with
2842                 ** less than needed, free and fall through to MSI
2843                 */
2844                 pci_release_msi(dev);
2845         }
2846 msi:
2847         if (adapter->msix_mem != NULL) {
2848                 bus_release_resource(dev, SYS_RES_MEMORY,
2849                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2850                 adapter->msix_mem = NULL;
2851         }
2852         val = 1;
2853         if (pci_alloc_msi(dev, &val) == 0) {
2854                 device_printf(adapter->dev, "Using an MSI interrupt\n");
2855                 return (val);
2856         } 
2857         /* Should only happen due to manual configuration */
2858         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2859         return (0);
2860 }
2861
2862
2863 /*********************************************************************
2864  *
2865  *  Initialize the hardware to a configuration
2866  *  as specified by the adapter structure.
2867  *
2868  **********************************************************************/
2869 static void
2870 em_reset(struct adapter *adapter)
2871 {
2872         device_t        dev = adapter->dev;
2873         if_t ifp = adapter->ifp;
2874         struct e1000_hw *hw = &adapter->hw;
2875         u16             rx_buffer_size;
2876         u32             pba;
2877
2878         INIT_DEBUGOUT("em_reset: begin");
2879
2880         /* Set up smart power down as default off on newer adapters. */
2881         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2882             hw->mac.type == e1000_82572)) {
2883                 u16 phy_tmp = 0;
2884
2885                 /* Speed up time to link by disabling smart power down. */
2886                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2887                 phy_tmp &= ~IGP02E1000_PM_SPD;
2888                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2889         }
2890
2891         /*
2892          * Packet Buffer Allocation (PBA)
2893          * Writing PBA sets the receive portion of the buffer
2894          * the remainder is used for the transmit buffer.
2895          */
2896         switch (hw->mac.type) {
2897         /* Total Packet Buffer on these is 48K */
2898         case e1000_82571:
2899         case e1000_82572:
2900         case e1000_80003es2lan:
2901                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2902                 break;
2903         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2904                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2905                 break;
2906         case e1000_82574:
2907         case e1000_82583:
2908                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2909                 break;
2910         case e1000_ich8lan:
2911                 pba = E1000_PBA_8K;
2912                 break;
2913         case e1000_ich9lan:
2914         case e1000_ich10lan:
2915                 /* Boost Receive side for jumbo frames */
2916                 if (adapter->hw.mac.max_frame_size > 4096)
2917                         pba = E1000_PBA_14K;
2918                 else
2919                         pba = E1000_PBA_10K;
2920                 break;
2921         case e1000_pchlan:
2922         case e1000_pch2lan:
2923         case e1000_pch_lpt:
2924                 pba = E1000_PBA_26K;
2925                 break;
2926         default:
2927                 if (adapter->hw.mac.max_frame_size > 8192)
2928                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2929                 else
2930                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2931         }
2932         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2933
2934         /*
2935          * These parameters control the automatic generation (Tx) and
2936          * response (Rx) to Ethernet PAUSE frames.
2937          * - High water mark should allow for at least two frames to be
2938          *   received after sending an XOFF.
2939          * - Low water mark works best when it is very near the high water mark.
2940          *   This allows the receiver to restart by sending XON when it has
2941          *   drained a bit. Here we use an arbitary value of 1500 which will
2942          *   restart after one full frame is pulled from the buffer. There
2943          *   could be several smaller frames in the buffer and if so they will
2944          *   not trigger the XON until their total number reduces the buffer
2945          *   by 1500.
2946          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2947          */
2948         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2949         hw->fc.high_water = rx_buffer_size -
2950             roundup2(adapter->hw.mac.max_frame_size, 1024);
2951         hw->fc.low_water = hw->fc.high_water - 1500;
2952
2953         if (adapter->fc) /* locally set flow control value? */
2954                 hw->fc.requested_mode = adapter->fc;
2955         else
2956                 hw->fc.requested_mode = e1000_fc_full;
2957
2958         if (hw->mac.type == e1000_80003es2lan)
2959                 hw->fc.pause_time = 0xFFFF;
2960         else
2961                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2962
2963         hw->fc.send_xon = TRUE;
2964
2965         /* Device specific overrides/settings */
2966         switch (hw->mac.type) {
2967         case e1000_pchlan:
2968                 /* Workaround: no TX flow ctrl for PCH */
2969                 hw->fc.requested_mode = e1000_fc_rx_pause;
2970                 hw->fc.pause_time = 0xFFFF; /* override */
2971                 if (if_getmtu(ifp) > ETHERMTU) {
2972                         hw->fc.high_water = 0x3500;
2973                         hw->fc.low_water = 0x1500;
2974                 } else {
2975                         hw->fc.high_water = 0x5000;
2976                         hw->fc.low_water = 0x3000;
2977                 }
2978                 hw->fc.refresh_time = 0x1000;
2979                 break;
2980         case e1000_pch2lan:
2981         case e1000_pch_lpt:
2982                 hw->fc.high_water = 0x5C20;
2983                 hw->fc.low_water = 0x5048;
2984                 hw->fc.pause_time = 0x0650;
2985                 hw->fc.refresh_time = 0x0400;
2986                 /* Jumbos need adjusted PBA */
2987                 if (if_getmtu(ifp) > ETHERMTU)
2988                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2989                 else
2990                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2991                 break;
2992         case e1000_ich9lan:
2993         case e1000_ich10lan:
2994                 if (if_getmtu(ifp) > ETHERMTU) {
2995                         hw->fc.high_water = 0x2800;
2996                         hw->fc.low_water = hw->fc.high_water - 8;
2997                         break;
2998                 } 
2999                 /* else fall thru */
3000         default:
3001                 if (hw->mac.type == e1000_80003es2lan)
3002                         hw->fc.pause_time = 0xFFFF;
3003                 break;
3004         }
3005
3006         /* Issue a global reset */
3007         e1000_reset_hw(hw);
3008         E1000_WRITE_REG(hw, E1000_WUC, 0);
3009         em_disable_aspm(adapter);
3010         /* and a re-init */
3011         if (e1000_init_hw(hw) < 0) {
3012                 device_printf(dev, "Hardware Initialization Failed\n");
3013                 return;
3014         }
3015
3016         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3017         e1000_get_phy_info(hw);
3018         e1000_check_for_link(hw);
3019         return;
3020 }
3021
3022 /*********************************************************************
3023  *
3024  *  Setup networking device structure and register an interface.
3025  *
3026  **********************************************************************/
3027 static int
3028 em_setup_interface(device_t dev, struct adapter *adapter)
3029 {
3030         if_t ifp;
3031
3032         INIT_DEBUGOUT("em_setup_interface: begin");
3033
3034         ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3035         if (ifp == 0) {
3036                 device_printf(dev, "can not allocate ifnet structure\n");
3037                 return (-1);
3038         }
3039         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3040         if_setdev(ifp, dev);
3041         if_setinitfn(ifp, em_init);
3042         if_setsoftc(ifp, adapter);
3043         if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3044         if_setioctlfn(ifp, em_ioctl);
3045         if_setgetcounterfn(ifp, em_get_counter);
3046         /* TSO parameters */
3047         ifp->if_hw_tsomax = IP_MAXPACKET;
3048         ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER;
3049         ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3050
3051 #ifdef EM_MULTIQUEUE
3052         /* Multiqueue stack interface */
3053         if_settransmitfn(ifp, em_mq_start);
3054         if_setqflushfn(ifp, em_qflush);
3055 #else
3056         if_setstartfn(ifp, em_start);
3057         if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3058         if_setsendqready(ifp);
3059 #endif  
3060
3061         ether_ifattach(ifp, adapter->hw.mac.addr);
3062
3063         if_setcapabilities(ifp, 0);
3064         if_setcapenable(ifp, 0);
3065
3066
3067         if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3068             IFCAP_TSO4, 0);
3069         /*
3070          * Tell the upper layer(s) we
3071          * support full VLAN capability
3072          */
3073         if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3074         if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3075             IFCAP_VLAN_MTU, 0);
3076         if_setcapenable(ifp, if_getcapabilities(ifp));
3077
3078         /*
3079         ** Don't turn this on by default, if vlans are
3080         ** created on another pseudo device (eg. lagg)
3081         ** then vlan events are not passed thru, breaking
3082         ** operation, but with HW FILTER off it works. If
3083         ** using vlans directly on the em driver you can
3084         ** enable this and get full hardware tag filtering.
3085         */
3086         if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3087
3088 #ifdef DEVICE_POLLING
3089         if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3090 #endif
3091
3092         /* Enable only WOL MAGIC by default */
3093         if (adapter->wol) {
3094                 if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3095                 if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3096         }
3097                 
3098         /*
3099          * Specify the media types supported by this adapter and register
3100          * callbacks to update media and link information
3101          */
3102         ifmedia_init(&adapter->media, IFM_IMASK,
3103             em_media_change, em_media_status);
3104         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3105             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3106                 u_char fiber_type = IFM_1000_SX;        /* default type */
3107
3108                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3109                             0, NULL);
3110                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3111         } else {
3112                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3113                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3114                             0, NULL);
3115                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3116                             0, NULL);
3117                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3118                             0, NULL);
3119                 if (adapter->hw.phy.type != e1000_phy_ife) {
3120                         ifmedia_add(&adapter->media,
3121                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3122                         ifmedia_add(&adapter->media,
3123                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3124                 }
3125         }
3126         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3127         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3128         return (0);
3129 }
3130
3131
3132 /*
3133  * Manage DMA'able memory.
3134  */
3135 static void
3136 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3137 {
3138         if (error)
3139                 return;
3140         *(bus_addr_t *) arg = segs[0].ds_addr;
3141 }
3142
3143 static int
3144 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3145         struct em_dma_alloc *dma, int mapflags)
3146 {
3147         int error;
3148
3149         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3150                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3151                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3152                                 BUS_SPACE_MAXADDR,      /* highaddr */
3153                                 NULL, NULL,             /* filter, filterarg */
3154                                 size,                   /* maxsize */
3155                                 1,                      /* nsegments */
3156                                 size,                   /* maxsegsize */
3157                                 0,                      /* flags */
3158                                 NULL,                   /* lockfunc */
3159                                 NULL,                   /* lockarg */
3160                                 &dma->dma_tag);
3161         if (error) {
3162                 device_printf(adapter->dev,
3163                     "%s: bus_dma_tag_create failed: %d\n",
3164                     __func__, error);
3165                 goto fail_0;
3166         }
3167
3168         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3169             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3170         if (error) {
3171                 device_printf(adapter->dev,
3172                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3173                     __func__, (uintmax_t)size, error);
3174                 goto fail_2;
3175         }
3176
3177         dma->dma_paddr = 0;
3178         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3179             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3180         if (error || dma->dma_paddr == 0) {
3181                 device_printf(adapter->dev,
3182                     "%s: bus_dmamap_load failed: %d\n",
3183                     __func__, error);
3184                 goto fail_3;
3185         }
3186
3187         return (0);
3188
3189 fail_3:
3190         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3191 fail_2:
3192         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3193         bus_dma_tag_destroy(dma->dma_tag);
3194 fail_0:
3195         dma->dma_tag = NULL;
3196
3197         return (error);
3198 }
3199
3200 static void
3201 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3202 {
3203         if (dma->dma_tag == NULL)
3204                 return;
3205         if (dma->dma_paddr != 0) {
3206                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3207                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3208                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3209                 dma->dma_paddr = 0;
3210         }
3211         if (dma->dma_vaddr != NULL) {
3212                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3213                 dma->dma_vaddr = NULL;
3214         }
3215         bus_dma_tag_destroy(dma->dma_tag);
3216         dma->dma_tag = NULL;
3217 }
3218
3219
3220 /*********************************************************************
3221  *
3222  *  Allocate memory for the transmit and receive rings, and then
3223  *  the descriptors associated with each, called only once at attach.
3224  *
3225  **********************************************************************/
3226 static int
3227 em_allocate_queues(struct adapter *adapter)
3228 {
3229         device_t                dev = adapter->dev;
3230         struct tx_ring          *txr = NULL;
3231         struct rx_ring          *rxr = NULL;
3232         int rsize, tsize, error = E1000_SUCCESS;
3233         int txconf = 0, rxconf = 0;
3234
3235
3236         /* Allocate the TX ring struct memory */
3237         if (!(adapter->tx_rings =
3238             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3239             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3240                 device_printf(dev, "Unable to allocate TX ring memory\n");
3241                 error = ENOMEM;
3242                 goto fail;
3243         }
3244
3245         /* Now allocate the RX */
3246         if (!(adapter->rx_rings =
3247             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3248             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3249                 device_printf(dev, "Unable to allocate RX ring memory\n");
3250                 error = ENOMEM;
3251                 goto rx_fail;
3252         }
3253
3254         tsize = roundup2(adapter->num_tx_desc *
3255             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3256         /*
3257          * Now set up the TX queues, txconf is needed to handle the
3258          * possibility that things fail midcourse and we need to
3259          * undo memory gracefully
3260          */ 
3261         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3262                 /* Set up some basics */
3263                 txr = &adapter->tx_rings[i];
3264                 txr->adapter = adapter;
3265                 txr->me = i;
3266
3267                 /* Initialize the TX lock */
3268                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3269                     device_get_nameunit(dev), txr->me);
3270                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3271
3272                 if (em_dma_malloc(adapter, tsize,
3273                         &txr->txdma, BUS_DMA_NOWAIT)) {
3274                         device_printf(dev,
3275                             "Unable to allocate TX Descriptor memory\n");
3276                         error = ENOMEM;
3277                         goto err_tx_desc;
3278                 }
3279                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3280                 bzero((void *)txr->tx_base, tsize);
3281
3282                 if (em_allocate_transmit_buffers(txr)) {
3283                         device_printf(dev,
3284                             "Critical Failure setting up transmit buffers\n");
3285                         error = ENOMEM;
3286                         goto err_tx_desc;
3287                 }
3288 #if __FreeBSD_version >= 800000
3289                 /* Allocate a buf ring */
3290                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3291                     M_WAITOK, &txr->tx_mtx);
3292 #endif
3293         }
3294
3295         /*
3296          * Next the RX queues...
3297          */ 
3298         rsize = roundup2(adapter->num_rx_desc *
3299             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3300         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3301                 rxr = &adapter->rx_rings[i];
3302                 rxr->adapter = adapter;
3303                 rxr->me = i;
3304
3305                 /* Initialize the RX lock */
3306                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3307                     device_get_nameunit(dev), txr->me);
3308                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3309
3310                 if (em_dma_malloc(adapter, rsize,
3311                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3312                         device_printf(dev,
3313                             "Unable to allocate RxDescriptor memory\n");
3314                         error = ENOMEM;
3315                         goto err_rx_desc;
3316                 }
3317                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3318                 bzero((void *)rxr->rx_base, rsize);
3319
3320                 /* Allocate receive buffers for the ring*/
3321                 if (em_allocate_receive_buffers(rxr)) {
3322                         device_printf(dev,
3323                             "Critical Failure setting up receive buffers\n");
3324                         error = ENOMEM;
3325                         goto err_rx_desc;
3326                 }
3327         }
3328
3329         return (0);
3330
3331 err_rx_desc:
3332         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3333                 em_dma_free(adapter, &rxr->rxdma);
3334 err_tx_desc:
3335         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3336                 em_dma_free(adapter, &txr->txdma);
3337         free(adapter->rx_rings, M_DEVBUF);
3338 rx_fail:
3339 #if __FreeBSD_version >= 800000
3340         buf_ring_free(txr->br, M_DEVBUF);
3341 #endif
3342         free(adapter->tx_rings, M_DEVBUF);
3343 fail:
3344         return (error);
3345 }
3346
3347
3348 /*********************************************************************
3349  *
3350  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3351  *  the information needed to transmit a packet on the wire. This is
3352  *  called only once at attach, setup is done every reset.
3353  *
3354  **********************************************************************/
3355 static int
3356 em_allocate_transmit_buffers(struct tx_ring *txr)
3357 {
3358         struct adapter *adapter = txr->adapter;
3359         device_t dev = adapter->dev;
3360         struct em_buffer *txbuf;
3361         int error, i;
3362
3363         /*
3364          * Setup DMA descriptor areas.
3365          */
3366         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3367                                1, 0,                    /* alignment, bounds */
3368                                BUS_SPACE_MAXADDR,       /* lowaddr */
3369                                BUS_SPACE_MAXADDR,       /* highaddr */
3370                                NULL, NULL,              /* filter, filterarg */
3371                                EM_TSO_SIZE,             /* maxsize */
3372                                EM_MAX_SCATTER,          /* nsegments */
3373                                PAGE_SIZE,               /* maxsegsize */
3374                                0,                       /* flags */
3375                                NULL,                    /* lockfunc */
3376                                NULL,                    /* lockfuncarg */
3377                                &txr->txtag))) {
3378                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3379                 goto fail;
3380         }
3381
3382         if (!(txr->tx_buffers =
3383             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3384             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3385                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3386                 error = ENOMEM;
3387                 goto fail;
3388         }
3389
3390         /* Create the descriptor buffer dma maps */
3391         txbuf = txr->tx_buffers;
3392         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3393                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3394                 if (error != 0) {
3395                         device_printf(dev, "Unable to create TX DMA map\n");
3396                         goto fail;
3397                 }
3398         }
3399
3400         return 0;
3401 fail:
3402         /* We free all, it handles case where we are in the middle */
3403         em_free_transmit_structures(adapter);
3404         return (error);
3405 }
3406
3407 /*********************************************************************
3408  *
3409  *  Initialize a transmit ring.
3410  *
3411  **********************************************************************/
3412 static void
3413 em_setup_transmit_ring(struct tx_ring *txr)
3414 {
3415         struct adapter *adapter = txr->adapter;
3416         struct em_buffer *txbuf;
3417         int i;
3418 #ifdef DEV_NETMAP
3419         struct netmap_slot *slot;
3420         struct netmap_adapter *na = netmap_getna(adapter->ifp);
3421 #endif /* DEV_NETMAP */
3422
3423         /* Clear the old descriptor contents */
3424         EM_TX_LOCK(txr);
3425 #ifdef DEV_NETMAP
3426         slot = netmap_reset(na, NR_TX, txr->me, 0);
3427 #endif /* DEV_NETMAP */
3428
3429         bzero((void *)txr->tx_base,
3430               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3431         /* Reset indices */
3432         txr->next_avail_desc = 0;
3433         txr->next_to_clean = 0;
3434
3435         /* Free any existing tx buffers. */
3436         txbuf = txr->tx_buffers;
3437         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3438                 if (txbuf->m_head != NULL) {
3439                         bus_dmamap_sync(txr->txtag, txbuf->map,
3440                             BUS_DMASYNC_POSTWRITE);
3441                         bus_dmamap_unload(txr->txtag, txbuf->map);
3442                         m_freem(txbuf->m_head);
3443                         txbuf->m_head = NULL;
3444                 }
3445 #ifdef DEV_NETMAP
3446                 if (slot) {
3447                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3448                         uint64_t paddr;
3449                         void *addr;
3450
3451                         addr = PNMB(na, slot + si, &paddr);
3452                         txr->tx_base[i].buffer_addr = htole64(paddr);
3453                         /* reload the map for netmap mode */
3454                         netmap_load_map(na, txr->txtag, txbuf->map, addr);
3455                 }
3456 #endif /* DEV_NETMAP */
3457
3458                 /* clear the watch index */
3459                 txbuf->next_eop = -1;
3460         }
3461
3462         /* Set number of descriptors available */
3463         txr->tx_avail = adapter->num_tx_desc;
3464         txr->busy = EM_TX_IDLE;
3465
3466         /* Clear checksum offload context. */
3467         txr->last_hw_offload = 0;
3468         txr->last_hw_ipcss = 0;
3469         txr->last_hw_ipcso = 0;
3470         txr->last_hw_tucss = 0;
3471         txr->last_hw_tucso = 0;
3472
3473         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3474             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3475         EM_TX_UNLOCK(txr);
3476 }
3477
3478 /*********************************************************************
3479  *
3480  *  Initialize all transmit rings.
3481  *
3482  **********************************************************************/
3483 static void
3484 em_setup_transmit_structures(struct adapter *adapter)
3485 {
3486         struct tx_ring *txr = adapter->tx_rings;
3487
3488         for (int i = 0; i < adapter->num_queues; i++, txr++)
3489                 em_setup_transmit_ring(txr);
3490
3491         return;
3492 }
3493
3494 /*********************************************************************
3495  *
3496  *  Enable transmit unit.
3497  *
3498  **********************************************************************/
3499 static void
3500 em_initialize_transmit_unit(struct adapter *adapter)
3501 {
3502         struct tx_ring  *txr = adapter->tx_rings;
3503         struct e1000_hw *hw = &adapter->hw;
3504         u32     tctl, txdctl = 0, tarc, tipg = 0;
3505
3506          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3507
3508         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3509                 u64 bus_addr = txr->txdma.dma_paddr;
3510                 /* Base and Len of TX Ring */
3511                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3512                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3513                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3514                     (u32)(bus_addr >> 32));
3515                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3516                     (u32)bus_addr);
3517                 /* Init the HEAD/TAIL indices */
3518                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3519                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3520
3521                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3522                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3523                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3524
3525                 txr->busy = EM_TX_IDLE;
3526                 txdctl = 0; /* clear txdctl */
3527                 txdctl |= 0x1f; /* PTHRESH */
3528                 txdctl |= 1 << 8; /* HTHRESH */
3529                 txdctl |= 1 << 16;/* WTHRESH */
3530                 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3531                 txdctl |= E1000_TXDCTL_GRAN;
3532                 txdctl |= 1 << 25; /* LWTHRESH */
3533
3534                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3535         }
3536
3537         /* Set the default values for the Tx Inter Packet Gap timer */
3538         switch (adapter->hw.mac.type) {
3539         case e1000_80003es2lan:
3540                 tipg = DEFAULT_82543_TIPG_IPGR1;
3541                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3542                     E1000_TIPG_IPGR2_SHIFT;
3543                 break;
3544         default:
3545                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3546                     (adapter->hw.phy.media_type ==
3547                     e1000_media_type_internal_serdes))
3548                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3549                 else
3550                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3551                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3552                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3553         }
3554
3555         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3556         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3557
3558         if(adapter->hw.mac.type >= e1000_82540)
3559                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3560                     adapter->tx_abs_int_delay.value);
3561
3562         if ((adapter->hw.mac.type == e1000_82571) ||
3563             (adapter->hw.mac.type == e1000_82572)) {
3564                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3565                 tarc |= TARC_SPEED_MODE_BIT;
3566                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3567         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3568                 /* errata: program both queues to unweighted RR */
3569                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3570                 tarc |= 1;
3571                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3572                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3573                 tarc |= 1;
3574                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3575         } else if (adapter->hw.mac.type == e1000_82574) {
3576                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3577                 tarc |= TARC_ERRATA_BIT;
3578                 if ( adapter->num_queues > 1) {
3579                         tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3580                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3581                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3582                 } else
3583                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3584         }
3585
3586         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3587         if (adapter->tx_int_delay.value > 0)
3588                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3589
3590         /* Program the Transmit Control Register */
3591         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3592         tctl &= ~E1000_TCTL_CT;
3593         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3594                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3595
3596         if (adapter->hw.mac.type >= e1000_82571)
3597                 tctl |= E1000_TCTL_MULR;
3598
3599         /* This write will effectively turn on the transmit unit. */
3600         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3601
3602 }
3603
3604
3605 /*********************************************************************
3606  *
3607  *  Free all transmit rings.
3608  *
3609  **********************************************************************/
3610 static void
3611 em_free_transmit_structures(struct adapter *adapter)
3612 {
3613         struct tx_ring *txr = adapter->tx_rings;
3614
3615         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3616                 EM_TX_LOCK(txr);
3617                 em_free_transmit_buffers(txr);
3618                 em_dma_free(adapter, &txr->txdma);
3619                 EM_TX_UNLOCK(txr);
3620                 EM_TX_LOCK_DESTROY(txr);
3621         }
3622
3623         free(adapter->tx_rings, M_DEVBUF);
3624 }
3625
3626 /*********************************************************************
3627  *
3628  *  Free transmit ring related data structures.
3629  *
3630  **********************************************************************/
3631 static void
3632 em_free_transmit_buffers(struct tx_ring *txr)
3633 {
3634         struct adapter          *adapter = txr->adapter;
3635         struct em_buffer        *txbuf;
3636
3637         INIT_DEBUGOUT("free_transmit_ring: begin");
3638
3639         if (txr->tx_buffers == NULL)
3640                 return;
3641
3642         for (int i = 0; i < adapter->num_tx_desc; i++) {
3643                 txbuf = &txr->tx_buffers[i];
3644                 if (txbuf->m_head != NULL) {
3645                         bus_dmamap_sync(txr->txtag, txbuf->map,
3646                             BUS_DMASYNC_POSTWRITE);
3647                         bus_dmamap_unload(txr->txtag,
3648                             txbuf->map);
3649                         m_freem(txbuf->m_head);
3650                         txbuf->m_head = NULL;
3651                         if (txbuf->map != NULL) {
3652                                 bus_dmamap_destroy(txr->txtag,
3653                                     txbuf->map);
3654                                 txbuf->map = NULL;
3655                         }
3656                 } else if (txbuf->map != NULL) {
3657                         bus_dmamap_unload(txr->txtag,
3658                             txbuf->map);
3659                         bus_dmamap_destroy(txr->txtag,
3660                             txbuf->map);
3661                         txbuf->map = NULL;
3662                 }
3663         }
3664 #if __FreeBSD_version >= 800000
3665         if (txr->br != NULL)
3666                 buf_ring_free(txr->br, M_DEVBUF);
3667 #endif
3668         if (txr->tx_buffers != NULL) {
3669                 free(txr->tx_buffers, M_DEVBUF);
3670                 txr->tx_buffers = NULL;
3671         }
3672         if (txr->txtag != NULL) {
3673                 bus_dma_tag_destroy(txr->txtag);
3674                 txr->txtag = NULL;
3675         }
3676         return;
3677 }
3678
3679
3680 /*********************************************************************
3681  *  The offload context is protocol specific (TCP/UDP) and thus
3682  *  only needs to be set when the protocol changes. The occasion
3683  *  of a context change can be a performance detriment, and
3684  *  might be better just disabled. The reason arises in the way
3685  *  in which the controller supports pipelined requests from the
3686  *  Tx data DMA. Up to four requests can be pipelined, and they may
3687  *  belong to the same packet or to multiple packets. However all
3688  *  requests for one packet are issued before a request is issued
3689  *  for a subsequent packet and if a request for the next packet
3690  *  requires a context change, that request will be stalled
3691  *  until the previous request completes. This means setting up
3692  *  a new context effectively disables pipelined Tx data DMA which
3693  *  in turn greatly slow down performance to send small sized
3694  *  frames. 
3695  **********************************************************************/
3696 static void
3697 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3698     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3699 {
3700         struct adapter                  *adapter = txr->adapter;
3701         struct e1000_context_desc       *TXD = NULL;
3702         struct em_buffer                *tx_buffer;
3703         int                             cur, hdr_len;
3704         u32                             cmd = 0;
3705         u16                             offload = 0;
3706         u8                              ipcso, ipcss, tucso, tucss;
3707
3708         ipcss = ipcso = tucss = tucso = 0;
3709         hdr_len = ip_off + (ip->ip_hl << 2);
3710         cur = txr->next_avail_desc;
3711
3712         /* Setup of IP header checksum. */
3713         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3714                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3715                 offload |= CSUM_IP;
3716                 ipcss = ip_off;
3717                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3718                 /*
3719                  * Start offset for header checksum calculation.
3720                  * End offset for header checksum calculation.
3721                  * Offset of place to put the checksum.
3722                  */
3723                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3724                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3725                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3726                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3727                 cmd |= E1000_TXD_CMD_IP;
3728         }
3729
3730         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3731                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3732                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3733                 offload |= CSUM_TCP;
3734                 tucss = hdr_len;
3735                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3736                 /*
3737                  * Setting up new checksum offload context for every frames
3738                  * takes a lot of processing time for hardware. This also
3739                  * reduces performance a lot for small sized frames so avoid
3740                  * it if driver can use previously configured checksum
3741                  * offload context.
3742                  */
3743                 if (txr->last_hw_offload == offload) {
3744                         if (offload & CSUM_IP) {
3745                                 if (txr->last_hw_ipcss == ipcss &&
3746                                     txr->last_hw_ipcso == ipcso &&
3747                                     txr->last_hw_tucss == tucss &&
3748                                     txr->last_hw_tucso == tucso)
3749                                         return;
3750                         } else {
3751                                 if (txr->last_hw_tucss == tucss &&
3752                                     txr->last_hw_tucso == tucso)
3753                                         return;
3754                         }
3755                 }
3756                 txr->last_hw_offload = offload;
3757                 txr->last_hw_tucss = tucss;
3758                 txr->last_hw_tucso = tucso;
3759                 /*
3760                  * Start offset for payload checksum calculation.
3761                  * End offset for payload checksum calculation.
3762                  * Offset of place to put the checksum.
3763                  */
3764                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3765                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3766                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3767                 TXD->upper_setup.tcp_fields.tucso = tucso;
3768                 cmd |= E1000_TXD_CMD_TCP;
3769         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3770                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3771                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3772                 tucss = hdr_len;
3773                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3774                 /*
3775                  * Setting up new checksum offload context for every frames
3776                  * takes a lot of processing time for hardware. This also
3777                  * reduces performance a lot for small sized frames so avoid
3778                  * it if driver can use previously configured checksum
3779                  * offload context.
3780                  */
3781                 if (txr->last_hw_offload == offload) {
3782                         if (offload & CSUM_IP) {
3783                                 if (txr->last_hw_ipcss == ipcss &&
3784                                     txr->last_hw_ipcso == ipcso &&
3785                                     txr->last_hw_tucss == tucss &&
3786                                     txr->last_hw_tucso == tucso)
3787                                         return;
3788                         } else {
3789                                 if (txr->last_hw_tucss == tucss &&
3790                                     txr->last_hw_tucso == tucso)
3791                                         return;
3792                         }
3793                 }
3794                 txr->last_hw_offload = offload;
3795                 txr->last_hw_tucss = tucss;
3796                 txr->last_hw_tucso = tucso;
3797                 /*
3798                  * Start offset for header checksum calculation.
3799                  * End offset for header checksum calculation.
3800                  * Offset of place to put the checksum.
3801                  */
3802                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3803                 TXD->upper_setup.tcp_fields.tucss = tucss;
3804                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3805                 TXD->upper_setup.tcp_fields.tucso = tucso;
3806         }
3807   
3808         if (offload & CSUM_IP) {
3809                 txr->last_hw_ipcss = ipcss;
3810                 txr->last_hw_ipcso = ipcso;
3811         }
3812
3813         TXD->tcp_seg_setup.data = htole32(0);
3814         TXD->cmd_and_length =
3815             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3816         tx_buffer = &txr->tx_buffers[cur];
3817         tx_buffer->m_head = NULL;
3818         tx_buffer->next_eop = -1;
3819
3820         if (++cur == adapter->num_tx_desc)
3821                 cur = 0;
3822
3823         txr->tx_avail--;
3824         txr->next_avail_desc = cur;
3825 }
3826
3827
3828 /**********************************************************************
3829  *
3830  *  Setup work for hardware segmentation offload (TSO)
3831  *
3832  **********************************************************************/
3833 static void
3834 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3835     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3836 {
3837         struct adapter                  *adapter = txr->adapter;
3838         struct e1000_context_desc       *TXD;
3839         struct em_buffer                *tx_buffer;
3840         int cur, hdr_len;
3841
3842         /*
3843          * In theory we can use the same TSO context if and only if
3844          * frame is the same type(IP/TCP) and the same MSS. However
3845          * checking whether a frame has the same IP/TCP structure is
3846          * hard thing so just ignore that and always restablish a
3847          * new TSO context.
3848          */
3849         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3850         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3851                       E1000_TXD_DTYP_D |        /* Data descr type */
3852                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3853
3854         /* IP and/or TCP header checksum calculation and insertion. */
3855         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3856
3857         cur = txr->next_avail_desc;
3858         tx_buffer = &txr->tx_buffers[cur];
3859         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3860
3861         /*
3862          * Start offset for header checksum calculation.
3863          * End offset for header checksum calculation.
3864          * Offset of place put the checksum.
3865          */
3866         TXD->lower_setup.ip_fields.ipcss = ip_off;
3867         TXD->lower_setup.ip_fields.ipcse =
3868             htole16(ip_off + (ip->ip_hl << 2) - 1);
3869         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3870         /*
3871          * Start offset for payload checksum calculation.
3872          * End offset for payload checksum calculation.
3873          * Offset of place to put the checksum.
3874          */
3875         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3876         TXD->upper_setup.tcp_fields.tucse = 0;
3877         TXD->upper_setup.tcp_fields.tucso =
3878             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3879         /*
3880          * Payload size per packet w/o any headers.
3881          * Length of all headers up to payload.
3882          */
3883         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3884         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3885
3886         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3887                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3888                                 E1000_TXD_CMD_TSE |     /* TSE context */
3889                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3890                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3891                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3892
3893         tx_buffer->m_head = NULL;
3894         tx_buffer->next_eop = -1;
3895
3896         if (++cur == adapter->num_tx_desc)
3897                 cur = 0;
3898
3899         txr->tx_avail--;
3900         txr->next_avail_desc = cur;
3901         txr->tx_tso = TRUE;
3902 }
3903
3904
3905 /**********************************************************************
3906  *
3907  *  Examine each tx_buffer in the used queue. If the hardware is done
3908  *  processing the packet then free associated resources. The
3909  *  tx_buffer is put back on the free queue.
3910  *
3911  **********************************************************************/
3912 static void
3913 em_txeof(struct tx_ring *txr)
3914 {
3915         struct adapter  *adapter = txr->adapter;
3916         int first, last, done, processed;
3917         struct em_buffer *tx_buffer;
3918         struct e1000_tx_desc   *tx_desc, *eop_desc;
3919         if_t ifp = adapter->ifp;
3920
3921         EM_TX_LOCK_ASSERT(txr);
3922 #ifdef DEV_NETMAP
3923         if (netmap_tx_irq(ifp, txr->me))
3924                 return;
3925 #endif /* DEV_NETMAP */
3926
3927         /* No work, make sure hang detection is disabled */
3928         if (txr->tx_avail == adapter->num_tx_desc) {
3929                 txr->busy = EM_TX_IDLE;
3930                 return;
3931         }
3932
3933         processed = 0;
3934         first = txr->next_to_clean;
3935         tx_desc = &txr->tx_base[first];
3936         tx_buffer = &txr->tx_buffers[first];
3937         last = tx_buffer->next_eop;
3938         eop_desc = &txr->tx_base[last];
3939
3940         /*
3941          * What this does is get the index of the
3942          * first descriptor AFTER the EOP of the 
3943          * first packet, that way we can do the
3944          * simple comparison on the inner while loop.
3945          */
3946         if (++last == adapter->num_tx_desc)
3947                 last = 0;
3948         done = last;
3949
3950         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3951             BUS_DMASYNC_POSTREAD);
3952
3953         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3954                 /* We clean the range of the packet */
3955                 while (first != done) {
3956                         tx_desc->upper.data = 0;
3957                         tx_desc->lower.data = 0;
3958                         tx_desc->buffer_addr = 0;
3959                         ++txr->tx_avail;
3960                         ++processed;
3961
3962                         if (tx_buffer->m_head) {
3963                                 bus_dmamap_sync(txr->txtag,
3964                                     tx_buffer->map,
3965                                     BUS_DMASYNC_POSTWRITE);
3966                                 bus_dmamap_unload(txr->txtag,
3967                                     tx_buffer->map);
3968                                 m_freem(tx_buffer->m_head);
3969                                 tx_buffer->m_head = NULL;
3970                         }
3971                         tx_buffer->next_eop = -1;
3972
3973                         if (++first == adapter->num_tx_desc)
3974                                 first = 0;
3975
3976                         tx_buffer = &txr->tx_buffers[first];
3977                         tx_desc = &txr->tx_base[first];
3978                 }
3979                 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
3980                 /* See if we can continue to the next packet */
3981                 last = tx_buffer->next_eop;
3982                 if (last != -1) {
3983                         eop_desc = &txr->tx_base[last];
3984                         /* Get new done point */
3985                         if (++last == adapter->num_tx_desc) last = 0;
3986                         done = last;
3987                 } else
3988                         break;
3989         }
3990         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3991             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3992
3993         txr->next_to_clean = first;
3994
3995         /*
3996         ** Hang detection: we know there's work outstanding
3997         ** or the entry return would have been taken, so no
3998         ** descriptor processed here indicates a potential hang.
3999         ** The local timer will examine this and do a reset if needed.
4000         */
4001         if (processed == 0) {
4002                 if (txr->busy != EM_TX_HUNG)
4003                         ++txr->busy;
4004         } else /* At least one descriptor was cleaned */
4005                 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4006
4007         /*
4008          * If we have a minimum free, clear IFF_DRV_OACTIVE
4009          * to tell the stack that it is OK to send packets.
4010          * Notice that all writes of OACTIVE happen under the
4011          * TX lock which, with a single queue, guarantees 
4012          * sanity.
4013          */
4014         if (txr->tx_avail >= EM_MAX_SCATTER) {
4015                 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4016         }
4017
4018         /* Disable hang detection if all clean */
4019         if (txr->tx_avail == adapter->num_tx_desc)
4020                 txr->busy = EM_TX_IDLE;
4021 }
4022
4023
4024 /*********************************************************************
4025  *
4026  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4027  *
4028  **********************************************************************/
4029 static void
4030 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4031 {
4032         struct adapter          *adapter = rxr->adapter;
4033         struct mbuf             *m;
4034         bus_dma_segment_t       segs[1];
4035         struct em_buffer        *rxbuf;
4036         int                     i, j, error, nsegs;
4037         bool                    cleaned = FALSE;
4038
4039         i = j = rxr->next_to_refresh;
4040         /*
4041         ** Get one descriptor beyond
4042         ** our work mark to control
4043         ** the loop.
4044         */
4045         if (++j == adapter->num_rx_desc)
4046                 j = 0;
4047
4048         while (j != limit) {
4049                 rxbuf = &rxr->rx_buffers[i];
4050                 if (rxbuf->m_head == NULL) {
4051                         m = m_getjcl(M_NOWAIT, MT_DATA,
4052                             M_PKTHDR, adapter->rx_mbuf_sz);
4053                         /*
4054                         ** If we have a temporary resource shortage
4055                         ** that causes a failure, just abort refresh
4056                         ** for now, we will return to this point when
4057                         ** reinvoked from em_rxeof.
4058                         */
4059                         if (m == NULL)
4060                                 goto update;
4061                 } else
4062                         m = rxbuf->m_head;
4063
4064                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4065                 m->m_flags |= M_PKTHDR;
4066                 m->m_data = m->m_ext.ext_buf;
4067
4068                 /* Use bus_dma machinery to setup the memory mapping  */
4069                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4070                     m, segs, &nsegs, BUS_DMA_NOWAIT);
4071                 if (error != 0) {
4072                         printf("Refresh mbufs: hdr dmamap load"
4073                             " failure - %d\n", error);
4074                         m_free(m);
4075                         rxbuf->m_head = NULL;
4076                         goto update;
4077                 }
4078                 rxbuf->m_head = m;
4079                 bus_dmamap_sync(rxr->rxtag,
4080                     rxbuf->map, BUS_DMASYNC_PREREAD);
4081                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
4082                 cleaned = TRUE;
4083
4084                 i = j; /* Next is precalulated for us */
4085                 rxr->next_to_refresh = i;
4086                 /* Calculate next controlling index */
4087                 if (++j == adapter->num_rx_desc)
4088                         j = 0;
4089         }
4090 update:
4091         /*
4092         ** Update the tail pointer only if,
4093         ** and as far as we have refreshed.
4094         */
4095         if (cleaned)
4096                 E1000_WRITE_REG(&adapter->hw,
4097                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4098
4099         return;
4100 }
4101
4102
4103 /*********************************************************************
4104  *
4105  *  Allocate memory for rx_buffer structures. Since we use one
4106  *  rx_buffer per received packet, the maximum number of rx_buffer's
4107  *  that we'll need is equal to the number of receive descriptors
4108  *  that we've allocated.
4109  *
4110  **********************************************************************/
4111 static int
4112 em_allocate_receive_buffers(struct rx_ring *rxr)
4113 {
4114         struct adapter          *adapter = rxr->adapter;
4115         device_t                dev = adapter->dev;
4116         struct em_buffer        *rxbuf;
4117         int                     error;
4118
4119         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4120             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4121         if (rxr->rx_buffers == NULL) {
4122                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4123                 return (ENOMEM);
4124         }
4125
4126         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4127                                 1, 0,                   /* alignment, bounds */
4128                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4129                                 BUS_SPACE_MAXADDR,      /* highaddr */
4130                                 NULL, NULL,             /* filter, filterarg */
4131                                 MJUM9BYTES,             /* maxsize */
4132                                 1,                      /* nsegments */
4133                                 MJUM9BYTES,             /* maxsegsize */
4134                                 0,                      /* flags */
4135                                 NULL,                   /* lockfunc */
4136                                 NULL,                   /* lockarg */
4137                                 &rxr->rxtag);
4138         if (error) {
4139                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4140                     __func__, error);
4141                 goto fail;
4142         }
4143
4144         rxbuf = rxr->rx_buffers;
4145         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4146                 rxbuf = &rxr->rx_buffers[i];
4147                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4148                 if (error) {
4149                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4150                             __func__, error);
4151                         goto fail;
4152                 }
4153         }
4154
4155         return (0);
4156
4157 fail:
4158         em_free_receive_structures(adapter);
4159         return (error);
4160 }
4161
4162
4163 /*********************************************************************
4164  *
4165  *  Initialize a receive ring and its buffers.
4166  *
4167  **********************************************************************/
4168 static int
4169 em_setup_receive_ring(struct rx_ring *rxr)
4170 {
4171         struct  adapter         *adapter = rxr->adapter;
4172         struct em_buffer        *rxbuf;
4173         bus_dma_segment_t       seg[1];
4174         int                     rsize, nsegs, error = 0;
4175 #ifdef DEV_NETMAP
4176         struct netmap_slot *slot;
4177         struct netmap_adapter *na = netmap_getna(adapter->ifp);
4178 #endif
4179
4180
4181         /* Clear the ring contents */
4182         EM_RX_LOCK(rxr);
4183         rsize = roundup2(adapter->num_rx_desc *
4184             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4185         bzero((void *)rxr->rx_base, rsize);
4186 #ifdef DEV_NETMAP
4187         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4188 #endif
4189
4190         /*
4191         ** Free current RX buffer structs and their mbufs
4192         */
4193         for (int i = 0; i < adapter->num_rx_desc; i++) {
4194                 rxbuf = &rxr->rx_buffers[i];
4195                 if (rxbuf->m_head != NULL) {
4196                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4197                             BUS_DMASYNC_POSTREAD);
4198                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4199                         m_freem(rxbuf->m_head);
4200                         rxbuf->m_head = NULL; /* mark as freed */
4201                 }
4202         }
4203
4204         /* Now replenish the mbufs */
4205         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4206                 rxbuf = &rxr->rx_buffers[j];
4207 #ifdef DEV_NETMAP
4208                 if (slot) {
4209                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4210                         uint64_t paddr;
4211                         void *addr;
4212
4213                         addr = PNMB(na, slot + si, &paddr);
4214                         netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4215                         /* Update descriptor */
4216                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4217                         continue;
4218                 }
4219 #endif /* DEV_NETMAP */
4220                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4221                     M_PKTHDR, adapter->rx_mbuf_sz);
4222                 if (rxbuf->m_head == NULL) {
4223                         error = ENOBUFS;
4224                         goto fail;
4225                 }
4226                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4227                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4228                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4229
4230                 /* Get the memory mapping */
4231                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4232                     rxbuf->map, rxbuf->m_head, seg,
4233                     &nsegs, BUS_DMA_NOWAIT);
4234                 if (error != 0) {
4235                         m_freem(rxbuf->m_head);
4236                         rxbuf->m_head = NULL;
4237                         goto fail;
4238                 }
4239                 bus_dmamap_sync(rxr->rxtag,
4240                     rxbuf->map, BUS_DMASYNC_PREREAD);
4241
4242                 /* Update descriptor */
4243                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4244         }
4245         rxr->next_to_check = 0;
4246         rxr->next_to_refresh = 0;
4247         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4248             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4249
4250 fail:
4251         EM_RX_UNLOCK(rxr);
4252         return (error);
4253 }
4254
4255 /*********************************************************************
4256  *
4257  *  Initialize all receive rings.
4258  *
4259  **********************************************************************/
4260 static int
4261 em_setup_receive_structures(struct adapter *adapter)
4262 {
4263         struct rx_ring *rxr = adapter->rx_rings;
4264         int q;
4265
4266         for (q = 0; q < adapter->num_queues; q++, rxr++)
4267                 if (em_setup_receive_ring(rxr))
4268                         goto fail;
4269
4270         return (0);
4271 fail:
4272         /*
4273          * Free RX buffers allocated so far, we will only handle
4274          * the rings that completed, the failing case will have
4275          * cleaned up for itself. 'q' failed, so its the terminus.
4276          */
4277         for (int i = 0; i < q; ++i) {
4278                 rxr = &adapter->rx_rings[i];
4279                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4280                         struct em_buffer *rxbuf;
4281                         rxbuf = &rxr->rx_buffers[n];
4282                         if (rxbuf->m_head != NULL) {
4283                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4284                                   BUS_DMASYNC_POSTREAD);
4285                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4286                                 m_freem(rxbuf->m_head);
4287                                 rxbuf->m_head = NULL;
4288                         }
4289                 }
4290                 rxr->next_to_check = 0;
4291                 rxr->next_to_refresh = 0;
4292         }
4293
4294         return (ENOBUFS);
4295 }
4296
4297 /*********************************************************************
4298  *
4299  *  Free all receive rings.
4300  *
4301  **********************************************************************/
4302 static void
4303 em_free_receive_structures(struct adapter *adapter)
4304 {
4305         struct rx_ring *rxr = adapter->rx_rings;
4306
4307         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4308                 em_free_receive_buffers(rxr);
4309                 /* Free the ring memory as well */
4310                 em_dma_free(adapter, &rxr->rxdma);
4311                 EM_RX_LOCK_DESTROY(rxr);
4312         }
4313
4314         free(adapter->rx_rings, M_DEVBUF);
4315 }
4316
4317
4318 /*********************************************************************
4319  *
4320  *  Free receive ring data structures
4321  *
4322  **********************************************************************/
4323 static void
4324 em_free_receive_buffers(struct rx_ring *rxr)
4325 {
4326         struct adapter          *adapter = rxr->adapter;
4327         struct em_buffer        *rxbuf = NULL;
4328
4329         INIT_DEBUGOUT("free_receive_buffers: begin");
4330
4331         if (rxr->rx_buffers != NULL) {
4332                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4333                         rxbuf = &rxr->rx_buffers[i];
4334                         if (rxbuf->map != NULL) {
4335                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4336                                     BUS_DMASYNC_POSTREAD);
4337                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4338                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4339                         }
4340                         if (rxbuf->m_head != NULL) {
4341                                 m_freem(rxbuf->m_head);
4342                                 rxbuf->m_head = NULL;
4343                         }
4344                 }
4345                 free(rxr->rx_buffers, M_DEVBUF);
4346                 rxr->rx_buffers = NULL;
4347                 rxr->next_to_check = 0;
4348                 rxr->next_to_refresh = 0;
4349         }
4350
4351         if (rxr->rxtag != NULL) {
4352                 bus_dma_tag_destroy(rxr->rxtag);
4353                 rxr->rxtag = NULL;
4354         }
4355
4356         return;
4357 }
4358
4359
4360 /*********************************************************************
4361  *
4362  *  Enable receive unit.
4363  *
4364  **********************************************************************/
4365
4366 static void
4367 em_initialize_receive_unit(struct adapter *adapter)
4368 {
4369         struct rx_ring  *rxr = adapter->rx_rings;
4370         if_t ifp = adapter->ifp;
4371         struct e1000_hw *hw = &adapter->hw;
4372         u64     bus_addr;
4373         u32     rctl, rxcsum;
4374
4375         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4376
4377         /*
4378          * Make sure receives are disabled while setting
4379          * up the descriptor ring
4380          */
4381         rctl = E1000_READ_REG(hw, E1000_RCTL);
4382         /* Do not disable if ever enabled on this hardware */
4383         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4384                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4385
4386         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4387             adapter->rx_abs_int_delay.value);
4388
4389         E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4390             adapter->rx_int_delay.value);
4391         /*
4392          * Set the interrupt throttling rate. Value is calculated
4393          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4394          */
4395         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4396
4397         /*
4398         ** When using MSIX interrupts we need to throttle
4399         ** using the EITR register (82574 only)
4400         */
4401         if (hw->mac.type == e1000_82574) {
4402                 u32 rfctl;
4403                 for (int i = 0; i < 4; i++)
4404                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4405                             DEFAULT_ITR);
4406                 /* Disable accelerated acknowledge */
4407                 rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4408                 rfctl |= E1000_RFCTL_ACK_DIS;
4409                 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4410         }
4411
4412         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4413         if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4414 #ifdef EM_MULTIQUEUE
4415                 rxcsum |= E1000_RXCSUM_TUOFL |
4416                           E1000_RXCSUM_IPOFL |
4417                           E1000_RXCSUM_PCSD;
4418 #else
4419                 rxcsum |= E1000_RXCSUM_TUOFL;
4420 #endif
4421         } else
4422                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4423
4424         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4425
4426 #ifdef EM_MULTIQUEUE
4427         if (adapter->num_queues > 1) {
4428                 uint32_t rss_key[10];
4429                 uint32_t reta;
4430                 int i;
4431
4432                 /*
4433                 * Configure RSS key
4434                 */
4435                 arc4rand(rss_key, sizeof(rss_key), 0);
4436                 for (i = 0; i < 10; ++i)
4437                         E1000_WRITE_REG_ARRAY(hw,E1000_RSSRK(0), i, rss_key[i]);
4438
4439                 /*
4440                 * Configure RSS redirect table in following fashion:
4441                 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4442                 */
4443                 reta = 0;
4444                 for (i = 0; i < 4; ++i) {
4445                         uint32_t q;
4446                         q = (i % adapter->num_queues) << 7;
4447                         reta |= q << (8 * i);
4448                 }
4449                 for (i = 0; i < 32; ++i)
4450                         E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4451
4452                 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 
4453                                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4454                                 E1000_MRQC_RSS_FIELD_IPV4 |
4455                                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4456                                 E1000_MRQC_RSS_FIELD_IPV6_EX |
4457                                 E1000_MRQC_RSS_FIELD_IPV6 |
4458                                 E1000_MRQC_RSS_FIELD_IPV6_TCP);
4459         }
4460 #endif
4461         /*
4462         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4463         ** long latencies are observed, like Lenovo X60. This
4464         ** change eliminates the problem, but since having positive
4465         ** values in RDTR is a known source of problems on other
4466         ** platforms another solution is being sought.
4467         */
4468         if (hw->mac.type == e1000_82573)
4469                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4470
4471         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4472                 /* Setup the Base and Length of the Rx Descriptor Ring */
4473                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4474
4475                 bus_addr = rxr->rxdma.dma_paddr;
4476                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4477                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4478                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4479                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4480                 /* Setup the Head and Tail Descriptor Pointers */
4481                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4482 #ifdef DEV_NETMAP
4483                 /*
4484                  * an init() while a netmap client is active must
4485                  * preserve the rx buffers passed to userspace.
4486                  */
4487                 if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4488                         struct netmap_adapter *na = netmap_getna(adapter->ifp);
4489                         rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4490                 }
4491 #endif /* DEV_NETMAP */
4492                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4493         }
4494
4495         /*
4496          * Set PTHRESH for improved jumbo performance
4497          * According to 10.2.5.11 of Intel 82574 Datasheet,
4498          * RXDCTL(1) is written whenever RXDCTL(0) is written.
4499          * Only write to RXDCTL(1) if there is a need for different
4500          * settings.
4501          */
4502         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4503             (adapter->hw.mac.type == e1000_pch2lan) ||
4504             (adapter->hw.mac.type == e1000_ich10lan)) &&
4505             (if_getmtu(ifp) > ETHERMTU)) {
4506                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4507                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4508         } else if ((adapter->hw.mac.type == e1000_82574) &&
4509                   (if_getmtu(ifp) > ETHERMTU)) {
4510                 for (int i = 0; i < adapter->num_queues; i++) {
4511                         u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4512
4513                         rxdctl |= 0x20; /* PTHRESH */
4514                         rxdctl |= 4 << 8; /* HTHRESH */
4515                         rxdctl |= 4 << 16;/* WTHRESH */
4516                         rxdctl |= 1 << 24; /* Switch to granularity */
4517                         E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4518                 }
4519         }
4520                 
4521         if (adapter->hw.mac.type >= e1000_pch2lan) {
4522                 if (if_getmtu(ifp) > ETHERMTU)
4523                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4524                 else
4525                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4526         }
4527
4528         /* Setup the Receive Control Register */
4529         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4530         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4531             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4532             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4533
4534         /* Strip the CRC */
4535         if (!em_disable_crc_stripping)
4536                 rctl |= E1000_RCTL_SECRC;
4537
4538         /* Make sure VLAN Filters are off */
4539         rctl &= ~E1000_RCTL_VFE;
4540         rctl &= ~E1000_RCTL_SBP;
4541
4542         if (adapter->rx_mbuf_sz == MCLBYTES)
4543                 rctl |= E1000_RCTL_SZ_2048;
4544         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4545                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4546         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4547                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4548
4549         if (if_getmtu(ifp) > ETHERMTU)
4550                 rctl |= E1000_RCTL_LPE;
4551         else
4552                 rctl &= ~E1000_RCTL_LPE;
4553
4554         /* Write out the settings */
4555         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4556
4557         return;
4558 }
4559
4560
4561 /*********************************************************************
4562  *
4563  *  This routine executes in interrupt context. It replenishes
4564  *  the mbufs in the descriptor and sends data which has been
4565  *  dma'ed into host memory to upper layer.
4566  *
4567  *  We loop at most count times if count is > 0, or until done if
4568  *  count < 0.
4569  *  
4570  *  For polling we also now return the number of cleaned packets
4571  *********************************************************************/
4572 static bool
4573 em_rxeof(struct rx_ring *rxr, int count, int *done)
4574 {
4575         struct adapter          *adapter = rxr->adapter;
4576         if_t ifp = adapter->ifp;
4577         struct mbuf             *mp, *sendmp;
4578         u8                      status = 0;
4579         u16                     len;
4580         int                     i, processed, rxdone = 0;
4581         bool                    eop;
4582         struct e1000_rx_desc    *cur;
4583
4584         EM_RX_LOCK(rxr);
4585
4586         /* Sync the ring */
4587         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4588             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4589
4590
4591 #ifdef DEV_NETMAP
4592         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4593                 EM_RX_UNLOCK(rxr);
4594                 return (FALSE);
4595         }
4596 #endif /* DEV_NETMAP */
4597
4598         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4599
4600                 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4601                         break;
4602
4603                 cur = &rxr->rx_base[i];
4604                 status = cur->status;
4605                 mp = sendmp = NULL;
4606
4607                 if ((status & E1000_RXD_STAT_DD) == 0)
4608                         break;
4609
4610                 len = le16toh(cur->length);
4611                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4612
4613                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4614                     (rxr->discard == TRUE)) {
4615                         adapter->dropped_pkts++;
4616                         ++rxr->rx_discarded;
4617                         if (!eop) /* Catch subsequent segs */
4618                                 rxr->discard = TRUE;
4619                         else
4620                                 rxr->discard = FALSE;
4621                         em_rx_discard(rxr, i);
4622                         goto next_desc;
4623                 }
4624                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4625
4626                 /* Assign correct length to the current fragment */
4627                 mp = rxr->rx_buffers[i].m_head;
4628                 mp->m_len = len;
4629
4630                 /* Trigger for refresh */
4631                 rxr->rx_buffers[i].m_head = NULL;
4632
4633                 /* First segment? */
4634                 if (rxr->fmp == NULL) {
4635                         mp->m_pkthdr.len = len;
4636                         rxr->fmp = rxr->lmp = mp;
4637                 } else {
4638                         /* Chain mbuf's together */
4639                         mp->m_flags &= ~M_PKTHDR;
4640                         rxr->lmp->m_next = mp;
4641                         rxr->lmp = mp;
4642                         rxr->fmp->m_pkthdr.len += len;
4643                 }
4644
4645                 if (eop) {
4646                         --count;
4647                         sendmp = rxr->fmp;
4648                         if_setrcvif(sendmp, ifp);
4649                         if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4650                         em_receive_checksum(cur, sendmp);
4651 #ifndef __NO_STRICT_ALIGNMENT
4652                         if (adapter->hw.mac.max_frame_size >
4653                             (MCLBYTES - ETHER_ALIGN) &&
4654                             em_fixup_rx(rxr) != 0)
4655                                 goto skip;
4656 #endif
4657                         if (status & E1000_RXD_STAT_VP) {
4658                                 if_setvtag(sendmp, 
4659                                     le16toh(cur->special));
4660                                 sendmp->m_flags |= M_VLANTAG;
4661                         }
4662 #ifndef __NO_STRICT_ALIGNMENT
4663 skip:
4664 #endif
4665                         rxr->fmp = rxr->lmp = NULL;
4666                 }
4667 next_desc:
4668                 /* Sync the ring */
4669                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4670                         BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4671
4672                 /* Zero out the receive descriptors status. */
4673                 cur->status = 0;
4674                 ++rxdone;       /* cumulative for POLL */
4675                 ++processed;
4676
4677                 /* Advance our pointers to the next descriptor. */
4678                 if (++i == adapter->num_rx_desc)
4679                         i = 0;
4680
4681                 /* Send to the stack */
4682                 if (sendmp != NULL) {
4683                         rxr->next_to_check = i;
4684                         EM_RX_UNLOCK(rxr);
4685                         if_input(ifp, sendmp);
4686                         EM_RX_LOCK(rxr);
4687                         i = rxr->next_to_check;
4688                 }
4689
4690                 /* Only refresh mbufs every 8 descriptors */
4691                 if (processed == 8) {
4692                         em_refresh_mbufs(rxr, i);
4693                         processed = 0;
4694                 }
4695         }
4696
4697         /* Catch any remaining refresh work */
4698         if (e1000_rx_unrefreshed(rxr))
4699                 em_refresh_mbufs(rxr, i);
4700
4701         rxr->next_to_check = i;
4702         if (done != NULL)
4703                 *done = rxdone;
4704         EM_RX_UNLOCK(rxr);
4705
4706         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4707 }
4708
4709 static __inline void
4710 em_rx_discard(struct rx_ring *rxr, int i)
4711 {
4712         struct em_buffer        *rbuf;
4713
4714         rbuf = &rxr->rx_buffers[i];
4715         bus_dmamap_unload(rxr->rxtag, rbuf->map);
4716
4717         /* Free any previous pieces */
4718         if (rxr->fmp != NULL) {
4719                 rxr->fmp->m_flags |= M_PKTHDR;
4720                 m_freem(rxr->fmp);
4721                 rxr->fmp = NULL;
4722                 rxr->lmp = NULL;
4723         }
4724         /*
4725         ** Free buffer and allow em_refresh_mbufs()
4726         ** to clean up and recharge buffer.
4727         */
4728         if (rbuf->m_head) {
4729                 m_free(rbuf->m_head);
4730                 rbuf->m_head = NULL;
4731         }
4732         return;
4733 }
4734
4735 #ifndef __NO_STRICT_ALIGNMENT
4736 /*
4737  * When jumbo frames are enabled we should realign entire payload on
4738  * architecures with strict alignment. This is serious design mistake of 8254x
4739  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4740  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4741  * payload. On architecures without strict alignment restrictions 8254x still
4742  * performs unaligned memory access which would reduce the performance too.
4743  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4744  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4745  * existing mbuf chain.
4746  *
4747  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4748  * not used at all on architectures with strict alignment.
4749  */
4750 static int
4751 em_fixup_rx(struct rx_ring *rxr)
4752 {
4753         struct adapter *adapter = rxr->adapter;
4754         struct mbuf *m, *n;
4755         int error;
4756
4757         error = 0;
4758         m = rxr->fmp;
4759         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4760                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4761                 m->m_data += ETHER_HDR_LEN;
4762         } else {
4763                 MGETHDR(n, M_NOWAIT, MT_DATA);
4764                 if (n != NULL) {
4765                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4766                         m->m_data += ETHER_HDR_LEN;
4767                         m->m_len -= ETHER_HDR_LEN;
4768                         n->m_len = ETHER_HDR_LEN;
4769                         M_MOVE_PKTHDR(n, m);
4770                         n->m_next = m;
4771                         rxr->fmp = n;
4772                 } else {
4773                         adapter->dropped_pkts++;
4774                         m_freem(rxr->fmp);
4775                         rxr->fmp = NULL;
4776                         error = ENOMEM;
4777                 }
4778         }
4779
4780         return (error);
4781 }
4782 #endif
4783
4784 /*********************************************************************
4785  *
4786  *  Verify that the hardware indicated that the checksum is valid.
4787  *  Inform the stack about the status of checksum so that stack
4788  *  doesn't spend time verifying the checksum.
4789  *
4790  *********************************************************************/
4791 static void
4792 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4793 {
4794         mp->m_pkthdr.csum_flags = 0;
4795
4796         /* Ignore Checksum bit is set */
4797         if (rx_desc->status & E1000_RXD_STAT_IXSM)
4798                 return;
4799
4800         if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4801                 return;
4802
4803         /* IP Checksum Good? */
4804         if (rx_desc->status & E1000_RXD_STAT_IPCS)
4805                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4806
4807         /* TCP or UDP checksum */
4808         if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4809                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4810                 mp->m_pkthdr.csum_data = htons(0xffff);
4811         }
4812 }
4813
4814 /*
4815  * This routine is run via an vlan
4816  * config EVENT
4817  */
4818 static void
4819 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4820 {
4821         struct adapter  *adapter = if_getsoftc(ifp);
4822         u32             index, bit;
4823
4824         if ((void*)adapter !=  arg)   /* Not our event */
4825                 return;
4826
4827         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4828                 return;
4829
4830         EM_CORE_LOCK(adapter);
4831         index = (vtag >> 5) & 0x7F;
4832         bit = vtag & 0x1F;
4833         adapter->shadow_vfta[index] |= (1 << bit);
4834         ++adapter->num_vlans;
4835         /* Re-init to load the changes */
4836         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4837                 em_init_locked(adapter);
4838         EM_CORE_UNLOCK(adapter);
4839 }
4840
4841 /*
4842  * This routine is run via an vlan
4843  * unconfig EVENT
4844  */
4845 static void
4846 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4847 {
4848         struct adapter  *adapter = if_getsoftc(ifp);
4849         u32             index, bit;
4850
4851         if (adapter != arg)
4852                 return;
4853
4854         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4855                 return;
4856
4857         EM_CORE_LOCK(adapter);
4858         index = (vtag >> 5) & 0x7F;
4859         bit = vtag & 0x1F;
4860         adapter->shadow_vfta[index] &= ~(1 << bit);
4861         --adapter->num_vlans;
4862         /* Re-init to load the changes */
4863         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4864                 em_init_locked(adapter);
4865         EM_CORE_UNLOCK(adapter);
4866 }
4867
4868 static void
4869 em_setup_vlan_hw_support(struct adapter *adapter)
4870 {
4871         struct e1000_hw *hw = &adapter->hw;
4872         u32             reg;
4873
4874         /*
4875         ** We get here thru init_locked, meaning
4876         ** a soft reset, this has already cleared
4877         ** the VFTA and other state, so if there
4878         ** have been no vlan's registered do nothing.
4879         */
4880         if (adapter->num_vlans == 0)
4881                 return;
4882
4883         /*
4884         ** A soft reset zero's out the VFTA, so
4885         ** we need to repopulate it now.
4886         */
4887         for (int i = 0; i < EM_VFTA_SIZE; i++)
4888                 if (adapter->shadow_vfta[i] != 0)
4889                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4890                             i, adapter->shadow_vfta[i]);
4891
4892         reg = E1000_READ_REG(hw, E1000_CTRL);
4893         reg |= E1000_CTRL_VME;
4894         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4895
4896         /* Enable the Filter Table */
4897         reg = E1000_READ_REG(hw, E1000_RCTL);
4898         reg &= ~E1000_RCTL_CFIEN;
4899         reg |= E1000_RCTL_VFE;
4900         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4901 }
4902
4903 static void
4904 em_enable_intr(struct adapter *adapter)
4905 {
4906         struct e1000_hw *hw = &adapter->hw;
4907         u32 ims_mask = IMS_ENABLE_MASK;
4908
4909         if (hw->mac.type == e1000_82574) {
4910                 E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
4911                 ims_mask |= adapter->ims;
4912         } 
4913         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4914 }
4915
4916 static void
4917 em_disable_intr(struct adapter *adapter)
4918 {
4919         struct e1000_hw *hw = &adapter->hw;
4920
4921         if (hw->mac.type == e1000_82574)
4922                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4923         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4924 }
4925
4926 /*
4927  * Bit of a misnomer, what this really means is
4928  * to enable OS management of the system... aka
4929  * to disable special hardware management features 
4930  */
4931 static void
4932 em_init_manageability(struct adapter *adapter)
4933 {
4934         /* A shared code workaround */
4935 #define E1000_82542_MANC2H E1000_MANC2H
4936         if (adapter->has_manage) {
4937                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4938                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4939
4940                 /* disable hardware interception of ARP */
4941                 manc &= ~(E1000_MANC_ARP_EN);
4942
4943                 /* enable receiving management packets to the host */
4944                 manc |= E1000_MANC_EN_MNG2HOST;
4945 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4946 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4947                 manc2h |= E1000_MNG2HOST_PORT_623;
4948                 manc2h |= E1000_MNG2HOST_PORT_664;
4949                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4950                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4951         }
4952 }
4953
4954 /*
4955  * Give control back to hardware management
4956  * controller if there is one.
4957  */
4958 static void
4959 em_release_manageability(struct adapter *adapter)
4960 {
4961         if (adapter->has_manage) {
4962                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4963
4964                 /* re-enable hardware interception of ARP */
4965                 manc |= E1000_MANC_ARP_EN;
4966                 manc &= ~E1000_MANC_EN_MNG2HOST;
4967
4968                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4969         }
4970 }
4971
4972 /*
4973  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4974  * For ASF and Pass Through versions of f/w this means
4975  * that the driver is loaded. For AMT version type f/w
4976  * this means that the network i/f is open.
4977  */
4978 static void
4979 em_get_hw_control(struct adapter *adapter)
4980 {
4981         u32 ctrl_ext, swsm;
4982
4983         if (adapter->hw.mac.type == e1000_82573) {
4984                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4985                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4986                     swsm | E1000_SWSM_DRV_LOAD);
4987                 return;
4988         }
4989         /* else */
4990         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4991         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4992             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4993         return;
4994 }
4995
4996 /*
4997  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4998  * For ASF and Pass Through versions of f/w this means that
4999  * the driver is no longer loaded. For AMT versions of the
5000  * f/w this means that the network i/f is closed.
5001  */
5002 static void
5003 em_release_hw_control(struct adapter *adapter)
5004 {
5005         u32 ctrl_ext, swsm;
5006
5007         if (!adapter->has_manage)
5008                 return;
5009
5010         if (adapter->hw.mac.type == e1000_82573) {
5011                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5012                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5013                     swsm & ~E1000_SWSM_DRV_LOAD);
5014                 return;
5015         }
5016         /* else */
5017         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5018         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5019             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5020         return;
5021 }
5022
5023 static int
5024 em_is_valid_ether_addr(u8 *addr)
5025 {
5026         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5027
5028         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5029                 return (FALSE);
5030         }
5031
5032         return (TRUE);
5033 }
5034
5035 /*
5036 ** Parse the interface capabilities with regard
5037 ** to both system management and wake-on-lan for
5038 ** later use.
5039 */
5040 static void
5041 em_get_wakeup(device_t dev)
5042 {
5043         struct adapter  *adapter = device_get_softc(dev);
5044         u16             eeprom_data = 0, device_id, apme_mask;
5045
5046         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5047         apme_mask = EM_EEPROM_APME;
5048
5049         switch (adapter->hw.mac.type) {
5050         case e1000_82573:
5051         case e1000_82583:
5052                 adapter->has_amt = TRUE;
5053                 /* Falls thru */
5054         case e1000_82571:
5055         case e1000_82572:
5056         case e1000_80003es2lan:
5057                 if (adapter->hw.bus.func == 1) {
5058                         e1000_read_nvm(&adapter->hw,
5059                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5060                         break;
5061                 } else
5062                         e1000_read_nvm(&adapter->hw,
5063                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5064                 break;
5065         case e1000_ich8lan:
5066         case e1000_ich9lan:
5067         case e1000_ich10lan:
5068         case e1000_pchlan:
5069         case e1000_pch2lan:
5070                 apme_mask = E1000_WUC_APME;
5071                 adapter->has_amt = TRUE;
5072                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5073                 break;
5074         default:
5075                 e1000_read_nvm(&adapter->hw,
5076                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5077                 break;
5078         }
5079         if (eeprom_data & apme_mask)
5080                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5081         /*
5082          * We have the eeprom settings, now apply the special cases
5083          * where the eeprom may be wrong or the board won't support
5084          * wake on lan on a particular port
5085          */
5086         device_id = pci_get_device(dev);
5087         switch (device_id) {
5088         case E1000_DEV_ID_82571EB_FIBER:
5089                 /* Wake events only supported on port A for dual fiber
5090                  * regardless of eeprom setting */
5091                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5092                     E1000_STATUS_FUNC_1)
5093                         adapter->wol = 0;
5094                 break;
5095         case E1000_DEV_ID_82571EB_QUAD_COPPER:
5096         case E1000_DEV_ID_82571EB_QUAD_FIBER:
5097         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5098                 /* if quad port adapter, disable WoL on all but port A */
5099                 if (global_quad_port_a != 0)
5100                         adapter->wol = 0;
5101                 /* Reset for multiple quad port adapters */
5102                 if (++global_quad_port_a == 4)
5103                         global_quad_port_a = 0;
5104                 break;
5105         }
5106         return;
5107 }
5108
5109
5110 /*
5111  * Enable PCI Wake On Lan capability
5112  */
5113 static void
5114 em_enable_wakeup(device_t dev)
5115 {
5116         struct adapter  *adapter = device_get_softc(dev);
5117         if_t ifp = adapter->ifp;
5118         u32             pmc, ctrl, ctrl_ext, rctl;
5119         u16             status;
5120
5121         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5122                 return;
5123
5124         /* Advertise the wakeup capability */
5125         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5126         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5127         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5128         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5129
5130         if ((adapter->hw.mac.type == e1000_ich8lan) ||
5131             (adapter->hw.mac.type == e1000_pchlan) ||
5132             (adapter->hw.mac.type == e1000_ich9lan) ||
5133             (adapter->hw.mac.type == e1000_ich10lan))
5134                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5135
5136         /* Keep the laser running on Fiber adapters */
5137         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5138             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5139                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5140                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5141                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5142         }
5143
5144         /*
5145         ** Determine type of Wakeup: note that wol
5146         ** is set with all bits on by default.
5147         */
5148         if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5149                 adapter->wol &= ~E1000_WUFC_MAG;
5150
5151         if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5152                 adapter->wol &= ~E1000_WUFC_MC;
5153         else {
5154                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5155                 rctl |= E1000_RCTL_MPE;
5156                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5157         }
5158
5159         if ((adapter->hw.mac.type == e1000_pchlan) ||
5160             (adapter->hw.mac.type == e1000_pch2lan)) {
5161                 if (em_enable_phy_wakeup(adapter))
5162                         return;
5163         } else {
5164                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5165                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5166         }
5167
5168         if (adapter->hw.phy.type == e1000_phy_igp_3)
5169                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5170
5171         /* Request PME */
5172         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5173         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5174         if (if_getcapenable(ifp) & IFCAP_WOL)
5175                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5176         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5177
5178         return;
5179 }
5180
5181 /*
5182 ** WOL in the newer chipset interfaces (pchlan)
5183 ** require thing to be copied into the phy
5184 */
5185 static int
5186 em_enable_phy_wakeup(struct adapter *adapter)
5187 {
5188         struct e1000_hw *hw = &adapter->hw;
5189         u32 mreg, ret = 0;
5190         u16 preg;
5191
5192         /* copy MAC RARs to PHY RARs */
5193         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5194
5195         /* copy MAC MTA to PHY MTA */
5196         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5197                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5198                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5199                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5200                     (u16)((mreg >> 16) & 0xFFFF));
5201         }
5202
5203         /* configure PHY Rx Control register */
5204         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5205         mreg = E1000_READ_REG(hw, E1000_RCTL);
5206         if (mreg & E1000_RCTL_UPE)
5207                 preg |= BM_RCTL_UPE;
5208         if (mreg & E1000_RCTL_MPE)
5209                 preg |= BM_RCTL_MPE;
5210         preg &= ~(BM_RCTL_MO_MASK);
5211         if (mreg & E1000_RCTL_MO_3)
5212                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5213                                 << BM_RCTL_MO_SHIFT);
5214         if (mreg & E1000_RCTL_BAM)
5215                 preg |= BM_RCTL_BAM;
5216         if (mreg & E1000_RCTL_PMCF)
5217                 preg |= BM_RCTL_PMCF;
5218         mreg = E1000_READ_REG(hw, E1000_CTRL);
5219         if (mreg & E1000_CTRL_RFCE)
5220                 preg |= BM_RCTL_RFCE;
5221         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5222
5223         /* enable PHY wakeup in MAC register */
5224         E1000_WRITE_REG(hw, E1000_WUC,
5225             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5226         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5227
5228         /* configure and enable PHY wakeup in PHY registers */
5229         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5230         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5231
5232         /* activate PHY wakeup */
5233         ret = hw->phy.ops.acquire(hw);
5234         if (ret) {
5235                 printf("Could not acquire PHY\n");
5236                 return ret;
5237         }
5238         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5239                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5240         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5241         if (ret) {
5242                 printf("Could not read PHY page 769\n");
5243                 goto out;
5244         }
5245         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5246         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5247         if (ret)
5248                 printf("Could not set PHY Host Wakeup bit\n");
5249 out:
5250         hw->phy.ops.release(hw);
5251
5252         return ret;
5253 }
5254
5255 static void
5256 em_led_func(void *arg, int onoff)
5257 {
5258         struct adapter  *adapter = arg;
5259  
5260         EM_CORE_LOCK(adapter);
5261         if (onoff) {
5262                 e1000_setup_led(&adapter->hw);
5263                 e1000_led_on(&adapter->hw);
5264         } else {
5265                 e1000_led_off(&adapter->hw);
5266                 e1000_cleanup_led(&adapter->hw);
5267         }
5268         EM_CORE_UNLOCK(adapter);
5269 }
5270
5271 /*
5272 ** Disable the L0S and L1 LINK states
5273 */
5274 static void
5275 em_disable_aspm(struct adapter *adapter)
5276 {
5277         int             base, reg;
5278         u16             link_cap,link_ctrl;
5279         device_t        dev = adapter->dev;
5280
5281         switch (adapter->hw.mac.type) {
5282                 case e1000_82573:
5283                 case e1000_82574:
5284                 case e1000_82583:
5285                         break;
5286                 default:
5287                         return;
5288         }
5289         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5290                 return;
5291         reg = base + PCIER_LINK_CAP;
5292         link_cap = pci_read_config(dev, reg, 2);
5293         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5294                 return;
5295         reg = base + PCIER_LINK_CTL;
5296         link_ctrl = pci_read_config(dev, reg, 2);
5297         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5298         pci_write_config(dev, reg, link_ctrl, 2);
5299         return;
5300 }
5301
5302 /**********************************************************************
5303  *
5304  *  Update the board statistics counters.
5305  *
5306  **********************************************************************/
5307 static void
5308 em_update_stats_counters(struct adapter *adapter)
5309 {
5310
5311         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5312            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5313                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5314                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5315         }
5316         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5317         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5318         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5319         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5320
5321         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5322         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5323         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5324         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5325         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5326         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5327         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5328         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5329         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5330         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5331         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5332         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5333         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5334         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5335         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5336         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5337         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5338         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5339         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5340         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5341
5342         /* For the 64-bit byte counters the low dword must be read first. */
5343         /* Both registers clear on the read of the high dword */
5344
5345         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5346             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5347         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5348             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5349
5350         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5351         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5352         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5353         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5354         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5355
5356         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5357         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5358
5359         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5360         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5361         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5362         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5363         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5364         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5365         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5366         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5367         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5368         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5369
5370         /* Interrupt Counts */
5371
5372         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5373         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5374         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5375         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5376         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5377         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5378         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5379         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5380         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5381
5382         if (adapter->hw.mac.type >= e1000_82543) {
5383                 adapter->stats.algnerrc += 
5384                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5385                 adapter->stats.rxerrc += 
5386                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5387                 adapter->stats.tncrs += 
5388                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5389                 adapter->stats.cexterr += 
5390                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5391                 adapter->stats.tsctc += 
5392                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5393                 adapter->stats.tsctfc += 
5394                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5395         }
5396 }
5397
5398 static uint64_t
5399 em_get_counter(if_t ifp, ift_counter cnt)
5400 {
5401         struct adapter *adapter;
5402
5403         adapter = if_getsoftc(ifp);
5404
5405         switch (cnt) {
5406         case IFCOUNTER_COLLISIONS:
5407                 return (adapter->stats.colc);
5408         case IFCOUNTER_IERRORS:
5409                 return (adapter->dropped_pkts + adapter->stats.rxerrc +
5410                     adapter->stats.crcerrs + adapter->stats.algnerrc +
5411                     adapter->stats.ruc + adapter->stats.roc +
5412                     adapter->stats.mpc + adapter->stats.cexterr);
5413         case IFCOUNTER_OERRORS:
5414                 return (adapter->stats.ecol + adapter->stats.latecol +
5415                     adapter->watchdog_events);
5416         default:
5417                 return (if_get_counter_default(ifp, cnt));
5418         }
5419 }
5420
5421 /* Export a single 32-bit register via a read-only sysctl. */
5422 static int
5423 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5424 {
5425         struct adapter *adapter;
5426         u_int val;
5427
5428         adapter = oidp->oid_arg1;
5429         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5430         return (sysctl_handle_int(oidp, &val, 0, req));
5431 }
5432
5433 /*
5434  * Add sysctl variables, one per statistic, to the system.
5435  */
5436 static void
5437 em_add_hw_stats(struct adapter *adapter)
5438 {
5439         device_t dev = adapter->dev;
5440
5441         struct tx_ring *txr = adapter->tx_rings;
5442         struct rx_ring *rxr = adapter->rx_rings;
5443
5444         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5445         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5446         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5447         struct e1000_hw_stats *stats = &adapter->stats;
5448
5449         struct sysctl_oid *stat_node, *queue_node, *int_node;
5450         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5451
5452 #define QUEUE_NAME_LEN 32
5453         char namebuf[QUEUE_NAME_LEN];
5454         
5455         /* Driver Statistics */
5456         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5457                         CTLFLAG_RD, &adapter->link_irq,
5458                         "Link MSIX IRQ Handled");
5459         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5460                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5461                          "Std mbuf failed");
5462         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5463                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5464                          "Std mbuf cluster failed");
5465         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5466                         CTLFLAG_RD, &adapter->dropped_pkts,
5467                         "Driver dropped packets");
5468         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5469                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5470                         "Driver tx dma failure in xmit");
5471         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5472                         CTLFLAG_RD, &adapter->rx_overruns,
5473                         "RX overruns");
5474         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5475                         CTLFLAG_RD, &adapter->watchdog_events,
5476                         "Watchdog timeouts");
5477         
5478         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5479                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5480                         em_sysctl_reg_handler, "IU",
5481                         "Device Control Register");
5482         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5483                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5484                         em_sysctl_reg_handler, "IU",
5485                         "Receiver Control Register");
5486         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5487                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5488                         "Flow Control High Watermark");
5489         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5490                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5491                         "Flow Control Low Watermark");
5492
5493         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5494                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5495                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5496                                             CTLFLAG_RD, NULL, "TX Queue Name");
5497                 queue_list = SYSCTL_CHILDREN(queue_node);
5498
5499                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5500                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5501                                 E1000_TDH(txr->me),
5502                                 em_sysctl_reg_handler, "IU",
5503                                 "Transmit Descriptor Head");
5504                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5505                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5506                                 E1000_TDT(txr->me),
5507                                 em_sysctl_reg_handler, "IU",
5508                                 "Transmit Descriptor Tail");
5509                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5510                                 CTLFLAG_RD, &txr->tx_irq,
5511                                 "Queue MSI-X Transmit Interrupts");
5512                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5513                                 CTLFLAG_RD, &txr->no_desc_avail,
5514                                 "Queue No Descriptor Available");
5515
5516                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5517                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5518                                             CTLFLAG_RD, NULL, "RX Queue Name");
5519                 queue_list = SYSCTL_CHILDREN(queue_node);
5520
5521                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5522                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5523                                 E1000_RDH(rxr->me),
5524                                 em_sysctl_reg_handler, "IU",
5525                                 "Receive Descriptor Head");
5526                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5527                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5528                                 E1000_RDT(rxr->me),
5529                                 em_sysctl_reg_handler, "IU",
5530                                 "Receive Descriptor Tail");
5531                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5532                                 CTLFLAG_RD, &rxr->rx_irq,
5533                                 "Queue MSI-X Receive Interrupts");
5534         }
5535
5536         /* MAC stats get their own sub node */
5537
5538         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5539                                     CTLFLAG_RD, NULL, "Statistics");
5540         stat_list = SYSCTL_CHILDREN(stat_node);
5541
5542         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5543                         CTLFLAG_RD, &stats->ecol,
5544                         "Excessive collisions");
5545         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5546                         CTLFLAG_RD, &stats->scc,
5547                         "Single collisions");
5548         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5549                         CTLFLAG_RD, &stats->mcc,
5550                         "Multiple collisions");
5551         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5552                         CTLFLAG_RD, &stats->latecol,
5553                         "Late collisions");
5554         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5555                         CTLFLAG_RD, &stats->colc,
5556                         "Collision Count");
5557         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5558                         CTLFLAG_RD, &adapter->stats.symerrs,
5559                         "Symbol Errors");
5560         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5561                         CTLFLAG_RD, &adapter->stats.sec,
5562                         "Sequence Errors");
5563         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5564                         CTLFLAG_RD, &adapter->stats.dc,
5565                         "Defer Count");
5566         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5567                         CTLFLAG_RD, &adapter->stats.mpc,
5568                         "Missed Packets");
5569         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5570                         CTLFLAG_RD, &adapter->stats.rnbc,
5571                         "Receive No Buffers");
5572         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5573                         CTLFLAG_RD, &adapter->stats.ruc,
5574                         "Receive Undersize");
5575         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5576                         CTLFLAG_RD, &adapter->stats.rfc,
5577                         "Fragmented Packets Received ");
5578         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5579                         CTLFLAG_RD, &adapter->stats.roc,
5580                         "Oversized Packets Received");
5581         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5582                         CTLFLAG_RD, &adapter->stats.rjc,
5583                         "Recevied Jabber");
5584         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5585                         CTLFLAG_RD, &adapter->stats.rxerrc,
5586                         "Receive Errors");
5587         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5588                         CTLFLAG_RD, &adapter->stats.crcerrs,
5589                         "CRC errors");
5590         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5591                         CTLFLAG_RD, &adapter->stats.algnerrc,
5592                         "Alignment Errors");
5593         /* On 82575 these are collision counts */
5594         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5595                         CTLFLAG_RD, &adapter->stats.cexterr,
5596                         "Collision/Carrier extension errors");
5597         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5598                         CTLFLAG_RD, &adapter->stats.xonrxc,
5599                         "XON Received");
5600         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5601                         CTLFLAG_RD, &adapter->stats.xontxc,
5602                         "XON Transmitted");
5603         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5604                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5605                         "XOFF Received");
5606         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5607                         CTLFLAG_RD, &adapter->stats.xofftxc,
5608                         "XOFF Transmitted");
5609
5610         /* Packet Reception Stats */
5611         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5612                         CTLFLAG_RD, &adapter->stats.tpr,
5613                         "Total Packets Received ");
5614         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5615                         CTLFLAG_RD, &adapter->stats.gprc,
5616                         "Good Packets Received");
5617         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5618                         CTLFLAG_RD, &adapter->stats.bprc,
5619                         "Broadcast Packets Received");
5620         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5621                         CTLFLAG_RD, &adapter->stats.mprc,
5622                         "Multicast Packets Received");
5623         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5624                         CTLFLAG_RD, &adapter->stats.prc64,
5625                         "64 byte frames received ");
5626         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5627                         CTLFLAG_RD, &adapter->stats.prc127,
5628                         "65-127 byte frames received");
5629         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5630                         CTLFLAG_RD, &adapter->stats.prc255,
5631                         "128-255 byte frames received");
5632         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5633                         CTLFLAG_RD, &adapter->stats.prc511,
5634                         "256-511 byte frames received");
5635         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5636                         CTLFLAG_RD, &adapter->stats.prc1023,
5637                         "512-1023 byte frames received");
5638         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5639                         CTLFLAG_RD, &adapter->stats.prc1522,
5640                         "1023-1522 byte frames received");
5641         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5642                         CTLFLAG_RD, &adapter->stats.gorc, 
5643                         "Good Octets Received"); 
5644
5645         /* Packet Transmission Stats */
5646         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5647                         CTLFLAG_RD, &adapter->stats.gotc, 
5648                         "Good Octets Transmitted"); 
5649         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5650                         CTLFLAG_RD, &adapter->stats.tpt,
5651                         "Total Packets Transmitted");
5652         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5653                         CTLFLAG_RD, &adapter->stats.gptc,
5654                         "Good Packets Transmitted");
5655         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5656                         CTLFLAG_RD, &adapter->stats.bptc,
5657                         "Broadcast Packets Transmitted");
5658         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5659                         CTLFLAG_RD, &adapter->stats.mptc,
5660                         "Multicast Packets Transmitted");
5661         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5662                         CTLFLAG_RD, &adapter->stats.ptc64,
5663                         "64 byte frames transmitted ");
5664         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5665                         CTLFLAG_RD, &adapter->stats.ptc127,
5666                         "65-127 byte frames transmitted");
5667         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5668                         CTLFLAG_RD, &adapter->stats.ptc255,
5669                         "128-255 byte frames transmitted");
5670         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5671                         CTLFLAG_RD, &adapter->stats.ptc511,
5672                         "256-511 byte frames transmitted");
5673         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5674                         CTLFLAG_RD, &adapter->stats.ptc1023,
5675                         "512-1023 byte frames transmitted");
5676         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5677                         CTLFLAG_RD, &adapter->stats.ptc1522,
5678                         "1024-1522 byte frames transmitted");
5679         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5680                         CTLFLAG_RD, &adapter->stats.tsctc,
5681                         "TSO Contexts Transmitted");
5682         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5683                         CTLFLAG_RD, &adapter->stats.tsctfc,
5684                         "TSO Contexts Failed");
5685
5686
5687         /* Interrupt Stats */
5688
5689         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5690                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5691         int_list = SYSCTL_CHILDREN(int_node);
5692
5693         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5694                         CTLFLAG_RD, &adapter->stats.iac,
5695                         "Interrupt Assertion Count");
5696
5697         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5698                         CTLFLAG_RD, &adapter->stats.icrxptc,
5699                         "Interrupt Cause Rx Pkt Timer Expire Count");
5700
5701         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5702                         CTLFLAG_RD, &adapter->stats.icrxatc,
5703                         "Interrupt Cause Rx Abs Timer Expire Count");
5704
5705         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5706                         CTLFLAG_RD, &adapter->stats.ictxptc,
5707                         "Interrupt Cause Tx Pkt Timer Expire Count");
5708
5709         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5710                         CTLFLAG_RD, &adapter->stats.ictxatc,
5711                         "Interrupt Cause Tx Abs Timer Expire Count");
5712
5713         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5714                         CTLFLAG_RD, &adapter->stats.ictxqec,
5715                         "Interrupt Cause Tx Queue Empty Count");
5716
5717         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5718                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5719                         "Interrupt Cause Tx Queue Min Thresh Count");
5720
5721         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5722                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5723                         "Interrupt Cause Rx Desc Min Thresh Count");
5724
5725         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5726                         CTLFLAG_RD, &adapter->stats.icrxoc,
5727                         "Interrupt Cause Receiver Overrun Count");
5728 }
5729
5730 /**********************************************************************
5731  *
5732  *  This routine provides a way to dump out the adapter eeprom,
5733  *  often a useful debug/service tool. This only dumps the first
5734  *  32 words, stuff that matters is in that extent.
5735  *
5736  **********************************************************************/
5737 static int
5738 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5739 {
5740         struct adapter *adapter = (struct adapter *)arg1;
5741         int error;
5742         int result;
5743
5744         result = -1;
5745         error = sysctl_handle_int(oidp, &result, 0, req);
5746
5747         if (error || !req->newptr)
5748                 return (error);
5749
5750         /*
5751          * This value will cause a hex dump of the
5752          * first 32 16-bit words of the EEPROM to
5753          * the screen.
5754          */
5755         if (result == 1)
5756                 em_print_nvm_info(adapter);
5757
5758         return (error);
5759 }
5760
5761 static void
5762 em_print_nvm_info(struct adapter *adapter)
5763 {
5764         u16     eeprom_data;
5765         int     i, j, row = 0;
5766
5767         /* Its a bit crude, but it gets the job done */
5768         printf("\nInterface EEPROM Dump:\n");
5769         printf("Offset\n0x0000  ");
5770         for (i = 0, j = 0; i < 32; i++, j++) {
5771                 if (j == 8) { /* Make the offset block */
5772                         j = 0; ++row;
5773                         printf("\n0x00%x0  ",row);
5774                 }
5775                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5776                 printf("%04x ", eeprom_data);
5777         }
5778         printf("\n");
5779 }
5780
5781 static int
5782 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5783 {
5784         struct em_int_delay_info *info;
5785         struct adapter *adapter;
5786         u32 regval;
5787         int error, usecs, ticks;
5788
5789         info = (struct em_int_delay_info *)arg1;
5790         usecs = info->value;
5791         error = sysctl_handle_int(oidp, &usecs, 0, req);
5792         if (error != 0 || req->newptr == NULL)
5793                 return (error);
5794         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5795                 return (EINVAL);
5796         info->value = usecs;
5797         ticks = EM_USECS_TO_TICKS(usecs);
5798         if (info->offset == E1000_ITR)  /* units are 256ns here */
5799                 ticks *= 4;
5800
5801         adapter = info->adapter;
5802         
5803         EM_CORE_LOCK(adapter);
5804         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5805         regval = (regval & ~0xffff) | (ticks & 0xffff);
5806         /* Handle a few special cases. */
5807         switch (info->offset) {
5808         case E1000_RDTR:
5809                 break;
5810         case E1000_TIDV:
5811                 if (ticks == 0) {
5812                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5813                         /* Don't write 0 into the TIDV register. */
5814                         regval++;
5815                 } else
5816                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5817                 break;
5818         }
5819         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5820         EM_CORE_UNLOCK(adapter);
5821         return (0);
5822 }
5823
5824 static void
5825 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5826         const char *description, struct em_int_delay_info *info,
5827         int offset, int value)
5828 {
5829         info->adapter = adapter;
5830         info->offset = offset;
5831         info->value = value;
5832         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5833             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5834             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5835             info, 0, em_sysctl_int_delay, "I", description);
5836 }
5837
5838 static void
5839 em_set_sysctl_value(struct adapter *adapter, const char *name,
5840         const char *description, int *limit, int value)
5841 {
5842         *limit = value;
5843         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5844             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5845             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5846 }
5847
5848
5849 /*
5850 ** Set flow control using sysctl:
5851 ** Flow control values:
5852 **      0 - off
5853 **      1 - rx pause
5854 **      2 - tx pause
5855 **      3 - full
5856 */
5857 static int
5858 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5859 {       
5860         int             error;
5861         static int      input = 3; /* default is full */
5862         struct adapter  *adapter = (struct adapter *) arg1;
5863                     
5864         error = sysctl_handle_int(oidp, &input, 0, req);
5865     
5866         if ((error) || (req->newptr == NULL))
5867                 return (error);
5868                 
5869         if (input == adapter->fc) /* no change? */
5870                 return (error);
5871
5872         switch (input) {
5873                 case e1000_fc_rx_pause:
5874                 case e1000_fc_tx_pause:
5875                 case e1000_fc_full:
5876                 case e1000_fc_none:
5877                         adapter->hw.fc.requested_mode = input;
5878                         adapter->fc = input;
5879                         break;
5880                 default:
5881                         /* Do nothing */
5882                         return (error);
5883         }
5884
5885         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5886         e1000_force_mac_fc(&adapter->hw);
5887         return (error);
5888 }
5889
5890 /*
5891 ** Manage Energy Efficient Ethernet:
5892 ** Control values:
5893 **     0/1 - enabled/disabled
5894 */
5895 static int
5896 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5897 {
5898        struct adapter *adapter = (struct adapter *) arg1;
5899        int             error, value;
5900
5901        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5902        error = sysctl_handle_int(oidp, &value, 0, req);
5903        if (error || req->newptr == NULL)
5904                return (error);
5905        EM_CORE_LOCK(adapter);
5906        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5907        em_init_locked(adapter);
5908        EM_CORE_UNLOCK(adapter);
5909        return (0);
5910 }
5911
5912 static int
5913 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5914 {
5915         struct adapter *adapter;
5916         int error;
5917         int result;
5918
5919         result = -1;
5920         error = sysctl_handle_int(oidp, &result, 0, req);
5921
5922         if (error || !req->newptr)
5923                 return (error);
5924
5925         if (result == 1) {
5926                 adapter = (struct adapter *)arg1;
5927                 em_print_debug_info(adapter);
5928         }
5929
5930         return (error);
5931 }
5932
5933 /*
5934 ** This routine is meant to be fluid, add whatever is
5935 ** needed for debugging a problem.  -jfv
5936 */
5937 static void
5938 em_print_debug_info(struct adapter *adapter)
5939 {
5940         device_t dev = adapter->dev;
5941         struct tx_ring *txr = adapter->tx_rings;
5942         struct rx_ring *rxr = adapter->rx_rings;
5943
5944         if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5945                 printf("Interface is RUNNING ");
5946         else
5947                 printf("Interface is NOT RUNNING\n");
5948
5949         if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5950                 printf("and INACTIVE\n");
5951         else
5952                 printf("and ACTIVE\n");
5953
5954         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5955                 device_printf(dev, "TX Queue %d ------\n", i);
5956                 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5957                         E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
5958                         E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
5959                 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
5960                 device_printf(dev, "TX descriptors avail = %d\n",
5961                         txr->tx_avail);
5962                 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5963                         txr->no_desc_avail);
5964                 device_printf(dev, "RX Queue %d ------\n", i);
5965                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5966                         E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
5967                         E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
5968                 device_printf(dev, "RX discarded packets = %ld\n",
5969                         rxr->rx_discarded);
5970                 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5971                 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5972         }
5973 }
5974
5975 #ifdef EM_MULTIQUEUE
5976 /*
5977  * 82574 only:
5978  * Write a new value to the EEPROM increasing the number of MSIX
5979  * vectors from 3 to 5, for proper multiqueue support.
5980  */
5981 static void
5982 em_enable_vectors_82574(struct adapter *adapter)
5983 {
5984         struct e1000_hw *hw = &adapter->hw;
5985         device_t dev = adapter->dev;
5986         u16 edata;
5987
5988         e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
5989         printf("Current cap: %#06x\n", edata);
5990         if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
5991                 device_printf(dev, "Writing to eeprom: increasing "
5992                     "reported MSIX vectors from 3 to 5...\n");
5993                 edata &= ~(EM_NVM_MSIX_N_MASK);
5994                 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
5995                 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
5996                 e1000_update_nvm_checksum(hw);
5997                 device_printf(dev, "Writing to eeprom: done\n");
5998         }
5999 }
6000 #endif
6001
6002 #ifdef DDB
6003 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6004 {
6005         devclass_t      dc;
6006         int max_em;
6007
6008         dc = devclass_find("em");
6009         max_em = devclass_get_maxunit(dc);
6010
6011         for (int index = 0; index < (max_em - 1); index++) {
6012                 device_t dev;
6013                 dev = devclass_get_device(dc, index);
6014                 if (device_get_driver(dev) == &em_driver) {
6015                         struct adapter *adapter = device_get_softc(dev);
6016                         EM_CORE_LOCK(adapter);
6017                         em_init_locked(adapter);
6018                         EM_CORE_UNLOCK(adapter);
6019                 }
6020         }
6021 }
6022 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6023 {
6024         devclass_t      dc;
6025         int max_em;
6026
6027         dc = devclass_find("em");
6028         max_em = devclass_get_maxunit(dc);
6029
6030         for (int index = 0; index < (max_em - 1); index++) {
6031                 device_t dev;
6032                 dev = devclass_get_device(dc, index);
6033                 if (device_get_driver(dev) == &em_driver)
6034                         em_print_debug_info(device_get_softc(dev));
6035         }
6036
6037 }
6038 #endif