]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/e1000/if_em.c
Direct commit to stable/10 to correctly setting the EIAC and IMS
[FreeBSD/stable/10.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_arp.h>
74 #include <net/if_dl.h>
75 #include <net/if_media.h>
76
77 #include <net/if_types.h>
78 #include <net/if_vlan_var.h>
79
80 #include <netinet/in_systm.h>
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip6.h>
85 #include <netinet/tcp.h>
86 #include <netinet/udp.h>
87
88 #include <machine/in_cksum.h>
89 #include <dev/led/led.h>
90 #include <dev/pci/pcivar.h>
91 #include <dev/pci/pcireg.h>
92
93 #include "e1000_api.h"
94 #include "e1000_82571.h"
95 #include "if_em.h"
96
97 /*********************************************************************
98  *  Driver version:
99  *********************************************************************/
100 char em_driver_version[] = "7.6.1-k";
101
102 /*********************************************************************
103  *  PCI Device ID Table
104  *
105  *  Used by probe to select devices to load on
106  *  Last field stores an index into e1000_strings
107  *  Last entry must be all 0s
108  *
109  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
110  *********************************************************************/
111
112 static em_vendor_info_t em_vendor_info_array[] =
113 {
114         /* Intel(R) PRO/1000 Network Connection */
115         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
127                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
134
135         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
144                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
146                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
181                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
182         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
183                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
184         { 0x8086, E1000_DEV_ID_PCH_I218_LM2,    PCI_ANY_ID, PCI_ANY_ID, 0},
185         { 0x8086, E1000_DEV_ID_PCH_I218_V2,     PCI_ANY_ID, PCI_ANY_ID, 0},
186         { 0x8086, E1000_DEV_ID_PCH_I218_LM3,    PCI_ANY_ID, PCI_ANY_ID, 0},
187         { 0x8086, E1000_DEV_ID_PCH_I218_V3,     PCI_ANY_ID, PCI_ANY_ID, 0},
188         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
189         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
190         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
191                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
192         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
193         { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
194                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
195         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
196                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
197         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
198         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
199                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
200         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
201         /* required last entry */
202         { 0, 0, 0, 0, 0}
203 };
204
205 /*********************************************************************
206  *  Table of branding strings for all supported NICs.
207  *********************************************************************/
208
209 static char *em_strings[] = {
210         "Intel(R) PRO/1000 Network Connection"
211 };
212
213 /*********************************************************************
214  *  Function prototypes
215  *********************************************************************/
216 static int      em_probe(device_t);
217 static int      em_attach(device_t);
218 static int      em_detach(device_t);
219 static int      em_shutdown(device_t);
220 static int      em_suspend(device_t);
221 static int      em_resume(device_t);
222 #ifdef EM_MULTIQUEUE
223 static int      em_mq_start(struct ifnet *, struct mbuf *);
224 static int      em_mq_start_locked(struct ifnet *,
225                     struct tx_ring *);
226 static void     em_qflush(struct ifnet *);
227 #else
228 static void     em_start(struct ifnet *);
229 static void     em_start_locked(struct ifnet *, struct tx_ring *);
230 #endif
231 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
232 static void     em_init(void *);
233 static void     em_init_locked(struct adapter *);
234 static void     em_stop(void *);
235 static void     em_media_status(struct ifnet *, struct ifmediareq *);
236 static int      em_media_change(struct ifnet *);
237 static void     em_identify_hardware(struct adapter *);
238 static int      em_allocate_pci_resources(struct adapter *);
239 static int      em_allocate_legacy(struct adapter *);
240 static int      em_allocate_msix(struct adapter *);
241 static int      em_allocate_queues(struct adapter *);
242 static int      em_setup_msix(struct adapter *);
243 static void     em_free_pci_resources(struct adapter *);
244 static void     em_local_timer(void *);
245 static void     em_reset(struct adapter *);
246 static int      em_setup_interface(device_t, struct adapter *);
247 static void     em_flush_desc_rings(struct adapter *);
248
249 static void     em_setup_transmit_structures(struct adapter *);
250 static void     em_initialize_transmit_unit(struct adapter *);
251 static int      em_allocate_transmit_buffers(struct tx_ring *);
252 static void     em_free_transmit_structures(struct adapter *);
253 static void     em_free_transmit_buffers(struct tx_ring *);
254
255 static int      em_setup_receive_structures(struct adapter *);
256 static int      em_allocate_receive_buffers(struct rx_ring *);
257 static void     em_initialize_receive_unit(struct adapter *);
258 static void     em_free_receive_structures(struct adapter *);
259 static void     em_free_receive_buffers(struct rx_ring *);
260
261 static void     em_enable_intr(struct adapter *);
262 static void     em_disable_intr(struct adapter *);
263 static void     em_update_stats_counters(struct adapter *);
264 static void     em_add_hw_stats(struct adapter *adapter);
265 static void     em_txeof(struct tx_ring *);
266 static bool     em_rxeof(struct rx_ring *, int, int *);
267 #ifndef __NO_STRICT_ALIGNMENT
268 static int      em_fixup_rx(struct rx_ring *);
269 #endif
270 static void     em_setup_rxdesc(union e1000_rx_desc_extended *,
271                     const struct em_rxbuffer *rxbuf);
272 static void     em_receive_checksum(uint32_t status, struct mbuf *);
273 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
274                     struct ip *, u32 *, u32 *);
275 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
276                     struct tcphdr *, u32 *, u32 *);
277 static void     em_set_promisc(struct adapter *);
278 static void     em_disable_promisc(struct adapter *);
279 static void     em_set_multi(struct adapter *);
280 static void     em_update_link_status(struct adapter *);
281 static void     em_refresh_mbufs(struct rx_ring *, int);
282 static void     em_register_vlan(void *, struct ifnet *, u16);
283 static void     em_unregister_vlan(void *, struct ifnet *, u16);
284 static void     em_setup_vlan_hw_support(struct adapter *);
285 static int      em_xmit(struct tx_ring *, struct mbuf **);
286 static int      em_dma_malloc(struct adapter *, bus_size_t,
287                     struct em_dma_alloc *, int);
288 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
289 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
290 static void     em_print_nvm_info(struct adapter *);
291 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
292 static void     em_print_debug_info(struct adapter *);
293 static int      em_is_valid_ether_addr(u8 *);
294 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
295 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
296                     const char *, struct em_int_delay_info *, int, int);
297 /* Management and WOL Support */
298 static void     em_init_manageability(struct adapter *);
299 static void     em_release_manageability(struct adapter *);
300 static void     em_get_hw_control(struct adapter *);
301 static void     em_release_hw_control(struct adapter *);
302 static void     em_get_wakeup(device_t);
303 static void     em_enable_wakeup(device_t);
304 static int      em_enable_phy_wakeup(struct adapter *);
305 static void     em_led_func(void *, int);
306 static void     em_disable_aspm(struct adapter *);
307
308 static int      em_irq_fast(void *);
309
310 /* MSIX handlers */
311 static void     em_msix_tx(void *);
312 static void     em_msix_rx(void *);
313 static void     em_msix_link(void *);
314 static void     em_handle_tx(void *context, int pending);
315 static void     em_handle_rx(void *context, int pending);
316 static void     em_handle_link(void *context, int pending);
317
318 #ifdef EM_MULTIQUEUE
319 static void     em_enable_vectors_82574(struct adapter *);
320 #endif
321
322 static void     em_set_sysctl_value(struct adapter *, const char *,
323                     const char *, int *, int);
324 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
325 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
326
327 static __inline void em_rx_discard(struct rx_ring *, int);
328
329 #ifdef DEVICE_POLLING
330 static poll_handler_t em_poll;
331 #endif /* POLLING */
332
333 /*********************************************************************
334  *  FreeBSD Device Interface Entry Points
335  *********************************************************************/
336
337 static device_method_t em_methods[] = {
338         /* Device interface */
339         DEVMETHOD(device_probe, em_probe),
340         DEVMETHOD(device_attach, em_attach),
341         DEVMETHOD(device_detach, em_detach),
342         DEVMETHOD(device_shutdown, em_shutdown),
343         DEVMETHOD(device_suspend, em_suspend),
344         DEVMETHOD(device_resume, em_resume),
345         DEVMETHOD_END
346 };
347
348 static driver_t em_driver = {
349         "em", em_methods, sizeof(struct adapter),
350 };
351
352 devclass_t em_devclass;
353 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
354 MODULE_DEPEND(em, pci, 1, 1, 1);
355 MODULE_DEPEND(em, ether, 1, 1, 1);
356
357 /*********************************************************************
358  *  Tunable default values.
359  *********************************************************************/
360
361 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
362 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
363 #define M_TSO_LEN                       66
364
365 #define MAX_INTS_PER_SEC        8000
366 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
367
368 /* Allow common code without TSO */
369 #ifndef CSUM_TSO
370 #define CSUM_TSO        0
371 #endif
372
373 #define TSO_WORKAROUND  4
374
375 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
376
377 static int em_disable_crc_stripping = 0;
378 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
379     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
380
381 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
382 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
383 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
384 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
385 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
386     0, "Default transmit interrupt delay in usecs");
387 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
388     0, "Default receive interrupt delay in usecs");
389
390 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
391 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
392 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
393 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
394 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
395     &em_tx_abs_int_delay_dflt, 0,
396     "Default transmit interrupt delay limit in usecs");
397 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
398     &em_rx_abs_int_delay_dflt, 0,
399     "Default receive interrupt delay limit in usecs");
400
401 static int em_rxd = EM_DEFAULT_RXD;
402 static int em_txd = EM_DEFAULT_TXD;
403 TUNABLE_INT("hw.em.rxd", &em_rxd);
404 TUNABLE_INT("hw.em.txd", &em_txd);
405 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
406     "Number of receive descriptors per queue");
407 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
408     "Number of transmit descriptors per queue");
409
410 static int em_smart_pwr_down = FALSE;
411 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
412 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
413     0, "Set to true to leave smart power down enabled on newer adapters");
414
415 /* Controls whether promiscuous also shows bad packets */
416 static int em_debug_sbp = FALSE;
417 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
418 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
419     "Show bad packets in promiscuous mode");
420
421 static int em_enable_msix = TRUE;
422 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
423 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
424     "Enable MSI-X interrupts");
425
426 #ifdef EM_MULTIQUEUE
427 static int em_num_queues = 1;
428 TUNABLE_INT("hw.em.num_queues", &em_num_queues);
429 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
430     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
431 #endif
432
433 /*
434 ** Global variable to store last used CPU when binding queues
435 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
436 ** queue is bound to a cpu.
437 */
438 static int em_last_bind_cpu = -1;
439
440 /* How many packets rxeof tries to clean at a time */
441 static int em_rx_process_limit = 100;
442 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
443 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
444     &em_rx_process_limit, 0,
445     "Maximum number of received packets to process "
446     "at a time, -1 means unlimited");
447
448 /* Energy efficient ethernet - default to OFF */
449 static int eee_setting = 1;
450 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
451 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
452     "Enable Energy Efficient Ethernet");
453
454 /* Global used in WOL setup with multiport cards */
455 static int global_quad_port_a = 0;
456
457 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
458 #include <dev/netmap/if_em_netmap.h>
459 #endif /* DEV_NETMAP */
460
461 /*********************************************************************
462  *  Device identification routine
463  *
464  *  em_probe determines if the driver should be loaded on
465  *  adapter based on PCI vendor/device id of the adapter.
466  *
467  *  return BUS_PROBE_DEFAULT on success, positive on failure
468  *********************************************************************/
469
470 static int
471 em_probe(device_t dev)
472 {
473         char            adapter_name[60];
474         uint16_t        pci_vendor_id = 0;
475         uint16_t        pci_device_id = 0;
476         uint16_t        pci_subvendor_id = 0;
477         uint16_t        pci_subdevice_id = 0;
478         em_vendor_info_t *ent;
479
480         INIT_DEBUGOUT("em_probe: begin");
481
482         pci_vendor_id = pci_get_vendor(dev);
483         if (pci_vendor_id != EM_VENDOR_ID)
484                 return (ENXIO);
485
486         pci_device_id = pci_get_device(dev);
487         pci_subvendor_id = pci_get_subvendor(dev);
488         pci_subdevice_id = pci_get_subdevice(dev);
489
490         ent = em_vendor_info_array;
491         while (ent->vendor_id != 0) {
492                 if ((pci_vendor_id == ent->vendor_id) &&
493                     (pci_device_id == ent->device_id) &&
494
495                     ((pci_subvendor_id == ent->subvendor_id) ||
496                     (ent->subvendor_id == PCI_ANY_ID)) &&
497
498                     ((pci_subdevice_id == ent->subdevice_id) ||
499                     (ent->subdevice_id == PCI_ANY_ID))) {
500                         sprintf(adapter_name, "%s %s",
501                                 em_strings[ent->index],
502                                 em_driver_version);
503                         device_set_desc_copy(dev, adapter_name);
504                         return (BUS_PROBE_DEFAULT);
505                 }
506                 ent++;
507         }
508
509         return (ENXIO);
510 }
511
512 /*********************************************************************
513  *  Device initialization routine
514  *
515  *  The attach entry point is called when the driver is being loaded.
516  *  This routine identifies the type of hardware, allocates all resources
517  *  and initializes the hardware.
518  *
519  *  return 0 on success, positive on failure
520  *********************************************************************/
521
522 static int
523 em_attach(device_t dev)
524 {
525         struct adapter  *adapter;
526         struct e1000_hw *hw;
527         int             error = 0;
528
529         INIT_DEBUGOUT("em_attach: begin");
530
531         if (resource_disabled("em", device_get_unit(dev))) {
532                 device_printf(dev, "Disabled by device hint\n");
533                 return (ENXIO);
534         }
535
536         adapter = device_get_softc(dev);
537         adapter->dev = adapter->osdep.dev = dev;
538         hw = &adapter->hw;
539         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
540
541         /* SYSCTL stuff */
542         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
543             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
544             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
545             em_sysctl_nvm_info, "I", "NVM Information");
546
547         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
550             em_sysctl_debug_info, "I", "Debug Information");
551
552         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
553             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
554             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
555             em_set_flowcntl, "I", "Flow Control");
556
557         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
558
559         /* Determine hardware and mac info */
560         em_identify_hardware(adapter);
561
562         /* Setup PCI resources */
563         if (em_allocate_pci_resources(adapter)) {
564                 device_printf(dev, "Allocation of PCI resources failed\n");
565                 error = ENXIO;
566                 goto err_pci;
567         }
568
569         /*
570         ** For ICH8 and family we need to
571         ** map the flash memory, and this
572         ** must happen after the MAC is 
573         ** identified
574         */
575         if ((hw->mac.type == e1000_ich8lan) ||
576             (hw->mac.type == e1000_ich9lan) ||
577             (hw->mac.type == e1000_ich10lan) ||
578             (hw->mac.type == e1000_pchlan) ||
579             (hw->mac.type == e1000_pch2lan) ||
580             (hw->mac.type == e1000_pch_lpt)) {
581                 int rid = EM_BAR_TYPE_FLASH;
582                 adapter->flash = bus_alloc_resource_any(dev,
583                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
584                 if (adapter->flash == NULL) {
585                         device_printf(dev, "Mapping of Flash failed\n");
586                         error = ENXIO;
587                         goto err_pci;
588                 }
589                 /* This is used in the shared code */
590                 hw->flash_address = (u8 *)adapter->flash;
591                 adapter->osdep.flash_bus_space_tag =
592                     rman_get_bustag(adapter->flash);
593                 adapter->osdep.flash_bus_space_handle =
594                     rman_get_bushandle(adapter->flash);
595         }
596         /*
597         ** In the new SPT device flash is not  a
598         ** seperate BAR, rather it is also in BAR0,
599         ** so use the same tag and an offset handle for the
600         ** FLASH read/write macros in the shared code.
601         */
602         else if (hw->mac.type == e1000_pch_spt) {
603                 adapter->osdep.flash_bus_space_tag =
604                     adapter->osdep.mem_bus_space_tag;
605                 adapter->osdep.flash_bus_space_handle =
606                     adapter->osdep.mem_bus_space_handle
607                     + E1000_FLASH_BASE_ADDR;
608         }
609
610         /* Do Shared Code initialization */
611         error = e1000_setup_init_funcs(hw, TRUE);
612         if (error) {
613                 device_printf(dev, "Setup of Shared code failed, error %d\n",
614                     error);
615                 error = ENXIO;
616                 goto err_pci;
617         }
618
619         /*
620          * Setup MSI/X or MSI if PCI Express
621          */
622         adapter->msix = em_setup_msix(adapter);
623
624         e1000_get_bus_info(hw);
625
626         /* Set up some sysctls for the tunable interrupt delays */
627         em_add_int_delay_sysctl(adapter, "rx_int_delay",
628             "receive interrupt delay in usecs", &adapter->rx_int_delay,
629             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
630         em_add_int_delay_sysctl(adapter, "tx_int_delay",
631             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
632             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
633         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
634             "receive interrupt delay limit in usecs",
635             &adapter->rx_abs_int_delay,
636             E1000_REGISTER(hw, E1000_RADV),
637             em_rx_abs_int_delay_dflt);
638         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
639             "transmit interrupt delay limit in usecs",
640             &adapter->tx_abs_int_delay,
641             E1000_REGISTER(hw, E1000_TADV),
642             em_tx_abs_int_delay_dflt);
643         em_add_int_delay_sysctl(adapter, "itr",
644             "interrupt delay limit in usecs/4",
645             &adapter->tx_itr,
646             E1000_REGISTER(hw, E1000_ITR),
647             DEFAULT_ITR);
648
649         /* Sysctl for limiting the amount of work done in the taskqueue */
650         em_set_sysctl_value(adapter, "rx_processing_limit",
651             "max number of rx packets to process", &adapter->rx_process_limit,
652             em_rx_process_limit);
653
654         /*
655          * Validate number of transmit and receive descriptors. It
656          * must not exceed hardware maximum, and must be multiple
657          * of E1000_DBA_ALIGN.
658          */
659         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
660             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
661                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
662                     EM_DEFAULT_TXD, em_txd);
663                 adapter->num_tx_desc = EM_DEFAULT_TXD;
664         } else
665                 adapter->num_tx_desc = em_txd;
666
667         if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
668             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
669                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
670                     EM_DEFAULT_RXD, em_rxd);
671                 adapter->num_rx_desc = EM_DEFAULT_RXD;
672         } else
673                 adapter->num_rx_desc = em_rxd;
674
675         hw->mac.autoneg = DO_AUTO_NEG;
676         hw->phy.autoneg_wait_to_complete = FALSE;
677         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
678
679         /* Copper options */
680         if (hw->phy.media_type == e1000_media_type_copper) {
681                 hw->phy.mdix = AUTO_ALL_MODES;
682                 hw->phy.disable_polarity_correction = FALSE;
683                 hw->phy.ms_type = EM_MASTER_SLAVE;
684         }
685
686         /*
687          * Set the frame limits assuming
688          * standard ethernet sized frames.
689          */
690         adapter->hw.mac.max_frame_size =
691             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
692
693         /*
694          * This controls when hardware reports transmit completion
695          * status.
696          */
697         hw->mac.report_tx_early = 1;
698
699         /* 
700         ** Get queue/ring memory
701         */
702         if (em_allocate_queues(adapter)) {
703                 error = ENOMEM;
704                 goto err_pci;
705         }
706
707         /* Allocate multicast array memory. */
708         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
709             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
710         if (adapter->mta == NULL) {
711                 device_printf(dev, "Can not allocate multicast setup array\n");
712                 error = ENOMEM;
713                 goto err_late;
714         }
715
716         /* Check SOL/IDER usage */
717         if (e1000_check_reset_block(hw))
718                 device_printf(dev, "PHY reset is blocked"
719                     " due to SOL/IDER session.\n");
720
721         /* Sysctl for setting Energy Efficient Ethernet */
722         hw->dev_spec.ich8lan.eee_disable = eee_setting;
723         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
724             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
725             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
726             adapter, 0, em_sysctl_eee, "I",
727             "Disable Energy Efficient Ethernet");
728
729         /*
730         ** Start from a known state, this is
731         ** important in reading the nvm and
732         ** mac from that.
733         */
734         e1000_reset_hw(hw);
735
736
737         /* Make sure we have a good EEPROM before we read from it */
738         if (e1000_validate_nvm_checksum(hw) < 0) {
739                 /*
740                 ** Some PCI-E parts fail the first check due to
741                 ** the link being in sleep state, call it again,
742                 ** if it fails a second time its a real issue.
743                 */
744                 if (e1000_validate_nvm_checksum(hw) < 0) {
745                         device_printf(dev,
746                             "The EEPROM Checksum Is Not Valid\n");
747                         error = EIO;
748                         goto err_late;
749                 }
750         }
751
752         /* Copy the permanent MAC address out of the EEPROM */
753         if (e1000_read_mac_addr(hw) < 0) {
754                 device_printf(dev, "EEPROM read error while reading MAC"
755                     " address\n");
756                 error = EIO;
757                 goto err_late;
758         }
759
760         if (!em_is_valid_ether_addr(hw->mac.addr)) {
761                 device_printf(dev, "Invalid MAC address\n");
762                 error = EIO;
763                 goto err_late;
764         }
765
766         /* Disable ULP support */
767         e1000_disable_ulp_lpt_lp(hw, TRUE);
768
769         /*
770         **  Do interrupt configuration
771         */
772         if (adapter->msix > 1) /* Do MSIX */
773                 error = em_allocate_msix(adapter);
774         else  /* MSI or Legacy */
775                 error = em_allocate_legacy(adapter);
776         if (error)
777                 goto err_late;
778
779         /*
780          * Get Wake-on-Lan and Management info for later use
781          */
782         em_get_wakeup(dev);
783
784         /* Setup OS specific network interface */
785         if (em_setup_interface(dev, adapter) != 0)
786                 goto err_late;
787
788         em_reset(adapter);
789
790         /* Initialize statistics */
791         em_update_stats_counters(adapter);
792
793         hw->mac.get_link_status = 1;
794         em_update_link_status(adapter);
795
796         /* Register for VLAN events */
797         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
798             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
799         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
800             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
801
802         em_add_hw_stats(adapter);
803
804         /* Non-AMT based hardware can now take control from firmware */
805         if (adapter->has_manage && !adapter->has_amt)
806                 em_get_hw_control(adapter);
807
808         /* Tell the stack that the interface is not active */
809         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
810         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
811
812         adapter->led_dev = led_create(em_led_func, adapter,
813             device_get_nameunit(dev));
814 #ifdef DEV_NETMAP
815         em_netmap_attach(adapter);
816 #endif /* DEV_NETMAP */
817
818         INIT_DEBUGOUT("em_attach: end");
819
820         return (0);
821
822 err_late:
823         em_free_transmit_structures(adapter);
824         em_free_receive_structures(adapter);
825         em_release_hw_control(adapter);
826         if (adapter->ifp != NULL)
827                 if_free(adapter->ifp);
828 err_pci:
829         em_free_pci_resources(adapter);
830         free(adapter->mta, M_DEVBUF);
831         EM_CORE_LOCK_DESTROY(adapter);
832
833         return (error);
834 }
835
836 /*********************************************************************
837  *  Device removal routine
838  *
839  *  The detach entry point is called when the driver is being removed.
840  *  This routine stops the adapter and deallocates all the resources
841  *  that were allocated for driver operation.
842  *
843  *  return 0 on success, positive on failure
844  *********************************************************************/
845
846 static int
847 em_detach(device_t dev)
848 {
849         struct adapter  *adapter = device_get_softc(dev);
850         struct ifnet    *ifp = adapter->ifp;
851
852         INIT_DEBUGOUT("em_detach: begin");
853
854         /* Make sure VLANS are not using driver */
855         if (adapter->ifp->if_vlantrunk != NULL) {
856                 device_printf(dev,"Vlan in use, detach first\n");
857                 return (EBUSY);
858         }
859
860 #ifdef DEVICE_POLLING
861         if (ifp->if_capenable & IFCAP_POLLING)
862                 ether_poll_deregister(ifp);
863 #endif
864
865         if (adapter->led_dev != NULL)
866                 led_destroy(adapter->led_dev);
867
868         EM_CORE_LOCK(adapter);
869         adapter->in_detach = 1;
870         em_stop(adapter);
871         EM_CORE_UNLOCK(adapter);
872         EM_CORE_LOCK_DESTROY(adapter);
873
874         e1000_phy_hw_reset(&adapter->hw);
875
876         em_release_manageability(adapter);
877         em_release_hw_control(adapter);
878
879         /* Unregister VLAN events */
880         if (adapter->vlan_attach != NULL)
881                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
882         if (adapter->vlan_detach != NULL)
883                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
884
885         ether_ifdetach(adapter->ifp);
886         callout_drain(&adapter->timer);
887
888 #ifdef DEV_NETMAP
889         netmap_detach(ifp);
890 #endif /* DEV_NETMAP */
891
892         em_free_pci_resources(adapter);
893         bus_generic_detach(dev);
894         if_free(ifp);
895
896         em_free_transmit_structures(adapter);
897         em_free_receive_structures(adapter);
898
899         em_release_hw_control(adapter);
900         free(adapter->mta, M_DEVBUF);
901
902         return (0);
903 }
904
905 /*********************************************************************
906  *
907  *  Shutdown entry point
908  *
909  **********************************************************************/
910
911 static int
912 em_shutdown(device_t dev)
913 {
914         return em_suspend(dev);
915 }
916
917 /*
918  * Suspend/resume device methods.
919  */
920 static int
921 em_suspend(device_t dev)
922 {
923         struct adapter *adapter = device_get_softc(dev);
924
925         EM_CORE_LOCK(adapter);
926
927         em_release_manageability(adapter);
928         em_release_hw_control(adapter);
929         em_enable_wakeup(dev);
930
931         EM_CORE_UNLOCK(adapter);
932
933         return bus_generic_suspend(dev);
934 }
935
936 static int
937 em_resume(device_t dev)
938 {
939         struct adapter *adapter = device_get_softc(dev);
940         struct tx_ring  *txr = adapter->tx_rings;
941         struct ifnet *ifp = adapter->ifp;
942
943         EM_CORE_LOCK(adapter);
944         if (adapter->hw.mac.type == e1000_pch2lan)
945                 e1000_resume_workarounds_pchlan(&adapter->hw);
946         em_init_locked(adapter);
947         em_init_manageability(adapter);
948
949         if ((ifp->if_flags & IFF_UP) &&
950             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
951                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
952                         EM_TX_LOCK(txr);
953 #ifdef EM_MULTIQUEUE
954                         if (!drbr_empty(ifp, txr->br))
955                                 em_mq_start_locked(ifp, txr);
956 #else
957                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
958                                 em_start_locked(ifp, txr);
959 #endif
960                         EM_TX_UNLOCK(txr);
961                 }
962         }
963         EM_CORE_UNLOCK(adapter);
964
965         return bus_generic_resume(dev);
966 }
967
968
969 #ifndef EM_MULTIQUEUE
970 static void
971 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
972 {
973         struct adapter  *adapter = ifp->if_softc;
974         struct mbuf     *m_head;
975
976         EM_TX_LOCK_ASSERT(txr);
977
978         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
979             IFF_DRV_RUNNING)
980                 return;
981
982         if (!adapter->link_active)
983                 return;
984
985         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
986                 /* Call cleanup if number of TX descriptors low */
987                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
988                         em_txeof(txr);
989                 if (txr->tx_avail < EM_MAX_SCATTER) {
990                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
991                         break;
992                 }
993                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
994                 if (m_head == NULL)
995                         break;
996                 /*
997                  *  Encapsulation can modify our pointer, and or make it
998                  *  NULL on failure.  In that event, we can't requeue.
999                  */
1000                 if (em_xmit(txr, &m_head)) {
1001                         if (m_head == NULL)
1002                                 break;
1003                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1004                         break;
1005                 }
1006
1007                 /* Mark the queue as having work */
1008                 if (txr->busy == EM_TX_IDLE)
1009                         txr->busy = EM_TX_BUSY;
1010
1011                 /* Send a copy of the frame to the BPF listener */
1012                 ETHER_BPF_MTAP(ifp, m_head);
1013
1014         }
1015
1016         return;
1017 }
1018
1019 static void
1020 em_start(struct ifnet *ifp)
1021 {
1022         struct adapter  *adapter = ifp->if_softc;
1023         struct tx_ring  *txr = adapter->tx_rings;
1024
1025         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1026                 EM_TX_LOCK(txr);
1027                 em_start_locked(ifp, txr);
1028                 EM_TX_UNLOCK(txr);
1029         }
1030         return;
1031 }
1032 #else /* EM_MULTIQUEUE */
1033 /*********************************************************************
1034  *  Multiqueue Transmit routines 
1035  *
1036  *  em_mq_start is called by the stack to initiate a transmit.
1037  *  however, if busy the driver can queue the request rather
1038  *  than do an immediate send. It is this that is an advantage
1039  *  in this driver, rather than also having multiple tx queues.
1040  **********************************************************************/
1041 /*
1042 ** Multiqueue capable stack interface
1043 */
1044 static int
1045 em_mq_start(struct ifnet *ifp, struct mbuf *m)
1046 {
1047         struct adapter  *adapter = ifp->if_softc;
1048         struct tx_ring  *txr = adapter->tx_rings;
1049         unsigned int    i, error;
1050
1051         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1052                 i = m->m_pkthdr.flowid % adapter->num_queues;
1053         else
1054                 i = curcpu % adapter->num_queues;
1055
1056         txr = &adapter->tx_rings[i];
1057
1058         error = drbr_enqueue(ifp, txr->br, m);
1059         if (error)
1060                 return (error);
1061
1062         if (EM_TX_TRYLOCK(txr)) {
1063                 em_mq_start_locked(ifp, txr);
1064                 EM_TX_UNLOCK(txr);
1065         } else 
1066                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1067
1068         return (0);
1069 }
1070
1071 static int
1072 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1073 {
1074         struct adapter  *adapter = txr->adapter;
1075         struct mbuf     *next;
1076         int             err = 0, enq = 0;
1077
1078         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1079             IFF_DRV_RUNNING || adapter->link_active == 0) {
1080                 return (ENETDOWN);
1081         }
1082
1083         /* Process the queue */
1084         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1085                 if ((err = em_xmit(txr, &next)) != 0) {
1086                         if (next == NULL) {
1087                                 /* It was freed, move forward */
1088                                 drbr_advance(ifp, txr->br);
1089                         } else {
1090                                 /* 
1091                                  * Still have one left, it may not be
1092                                  * the same since the transmit function
1093                                  * may have changed it.
1094                                  */
1095                                 drbr_putback(ifp, txr->br, next);
1096                         }
1097                         break;
1098                 }
1099                 drbr_advance(ifp, txr->br);
1100                 enq++;
1101                 ifp->if_obytes += next->m_pkthdr.len;
1102                 if (next->m_flags & M_MCAST)
1103                         ifp->if_omcasts++;
1104                 ETHER_BPF_MTAP(ifp, next);
1105                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1106                         break;
1107         }
1108
1109         /* Mark the queue as having work */
1110         if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1111                 txr->busy = EM_TX_BUSY;
1112
1113         if (txr->tx_avail < EM_MAX_SCATTER)
1114                 em_txeof(txr);
1115         if (txr->tx_avail < EM_MAX_SCATTER) {
1116                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1117         }
1118         return (err);
1119 }
1120
1121 /*
1122 ** Flush all ring buffers
1123 */
1124 static void
1125 em_qflush(struct ifnet *ifp)
1126 {
1127         struct adapter  *adapter = ifp->if_softc;
1128         struct tx_ring  *txr = adapter->tx_rings;
1129         struct mbuf     *m;
1130
1131         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1132                 EM_TX_LOCK(txr);
1133                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1134                         m_freem(m);
1135                 EM_TX_UNLOCK(txr);
1136         }
1137         if_qflush(ifp);
1138 }
1139 #endif /* EM_MULTIQUEUE */
1140
1141 /*********************************************************************
1142  *  Ioctl entry point
1143  *
1144  *  em_ioctl is called when the user wants to configure the
1145  *  interface.
1146  *
1147  *  return 0 on success, positive on failure
1148  **********************************************************************/
1149
1150 static int
1151 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1152 {
1153         struct adapter  *adapter = ifp->if_softc;
1154         struct ifreq    *ifr = (struct ifreq *)data;
1155 #if defined(INET) || defined(INET6)
1156         struct ifaddr   *ifa = (struct ifaddr *)data;
1157 #endif
1158         bool            avoid_reset = FALSE;
1159         int             error = 0;
1160
1161         if (adapter->in_detach)
1162                 return (error);
1163
1164         switch (command) {
1165         case SIOCSIFADDR:
1166 #ifdef INET
1167                 if (ifa->ifa_addr->sa_family == AF_INET)
1168                         avoid_reset = TRUE;
1169 #endif
1170 #ifdef INET6
1171                 if (ifa->ifa_addr->sa_family == AF_INET6)
1172                         avoid_reset = TRUE;
1173 #endif
1174                 /*
1175                 ** Calling init results in link renegotiation,
1176                 ** so we avoid doing it when possible.
1177                 */
1178                 if (avoid_reset) {
1179                         ifp->if_flags |= IFF_UP;
1180                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1181                                 em_init(adapter);
1182 #ifdef INET
1183                         if (!(ifp->if_flags & IFF_NOARP))
1184                                 arp_ifinit(ifp, ifa);
1185 #endif
1186                 } else
1187                         error = ether_ioctl(ifp, command, data);
1188                 break;
1189         case SIOCSIFMTU:
1190             {
1191                 int max_frame_size;
1192
1193                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1194
1195                 EM_CORE_LOCK(adapter);
1196                 switch (adapter->hw.mac.type) {
1197                 case e1000_82571:
1198                 case e1000_82572:
1199                 case e1000_ich9lan:
1200                 case e1000_ich10lan:
1201                 case e1000_pch2lan:
1202                 case e1000_pch_lpt:
1203                 case e1000_pch_spt:
1204                 case e1000_82574:
1205                 case e1000_82583:
1206                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1207                         max_frame_size = 9234;
1208                         break;
1209                 case e1000_pchlan:
1210                         max_frame_size = 4096;
1211                         break;
1212                         /* Adapters that do not support jumbo frames */
1213                 case e1000_ich8lan:
1214                         max_frame_size = ETHER_MAX_LEN;
1215                         break;
1216                 default:
1217                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1218                 }
1219                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1220                     ETHER_CRC_LEN) {
1221                         EM_CORE_UNLOCK(adapter);
1222                         error = EINVAL;
1223                         break;
1224                 }
1225
1226                 ifp->if_mtu = ifr->ifr_mtu;
1227                 adapter->hw.mac.max_frame_size =
1228                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1229                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1230                         em_init_locked(adapter);
1231                 EM_CORE_UNLOCK(adapter);
1232                 break;
1233             }
1234         case SIOCSIFFLAGS:
1235                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1236                     SIOCSIFFLAGS (Set Interface Flags)");
1237                 EM_CORE_LOCK(adapter);
1238                 if (ifp->if_flags & IFF_UP) {
1239                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1240                                 if ((ifp->if_flags ^ adapter->if_flags) &
1241                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1242                                         em_disable_promisc(adapter);
1243                                         em_set_promisc(adapter);
1244                                 }
1245                         } else
1246                                 em_init_locked(adapter);
1247                 } else
1248                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1249                                 em_stop(adapter);
1250                 adapter->if_flags = ifp->if_flags;
1251                 EM_CORE_UNLOCK(adapter);
1252                 break;
1253         case SIOCADDMULTI:
1254         case SIOCDELMULTI:
1255                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1256                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1257                         EM_CORE_LOCK(adapter);
1258                         em_disable_intr(adapter);
1259                         em_set_multi(adapter);
1260 #ifdef DEVICE_POLLING
1261                         if (!(ifp->if_capenable & IFCAP_POLLING))
1262 #endif
1263                                 em_enable_intr(adapter);
1264                         EM_CORE_UNLOCK(adapter);
1265                 }
1266                 break;
1267         case SIOCSIFMEDIA:
1268                 /* Check SOL/IDER usage */
1269                 EM_CORE_LOCK(adapter);
1270                 if (e1000_check_reset_block(&adapter->hw)) {
1271                         EM_CORE_UNLOCK(adapter);
1272                         device_printf(adapter->dev, "Media change is"
1273                             " blocked due to SOL/IDER session.\n");
1274                         break;
1275                 }
1276                 EM_CORE_UNLOCK(adapter);
1277                 /* falls thru */
1278         case SIOCGIFMEDIA:
1279                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1280                     SIOCxIFMEDIA (Get/Set Interface Media)");
1281                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1282                 break;
1283         case SIOCSIFCAP:
1284             {
1285                 int mask, reinit;
1286
1287                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1288                 reinit = 0;
1289                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1290 #ifdef DEVICE_POLLING
1291                 if (mask & IFCAP_POLLING) {
1292                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1293                                 error = ether_poll_register(em_poll, ifp);
1294                                 if (error)
1295                                         return (error);
1296                                 EM_CORE_LOCK(adapter);
1297                                 em_disable_intr(adapter);
1298                                 ifp->if_capenable |= IFCAP_POLLING;
1299                                 EM_CORE_UNLOCK(adapter);
1300                         } else {
1301                                 error = ether_poll_deregister(ifp);
1302                                 /* Enable interrupt even in error case */
1303                                 EM_CORE_LOCK(adapter);
1304                                 em_enable_intr(adapter);
1305                                 ifp->if_capenable &= ~IFCAP_POLLING;
1306                                 EM_CORE_UNLOCK(adapter);
1307                         }
1308                 }
1309 #endif
1310                 if (mask & IFCAP_HWCSUM) {
1311                         ifp->if_capenable ^= IFCAP_HWCSUM;
1312                         reinit = 1;
1313                 }
1314                 if (mask & IFCAP_TSO4) {
1315                         ifp->if_capenable ^= IFCAP_TSO4;
1316                         reinit = 1;
1317                 }
1318                 if (mask & IFCAP_VLAN_HWTAGGING) {
1319                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1320                         reinit = 1;
1321                 }
1322                 if (mask & IFCAP_VLAN_HWFILTER) {
1323                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1324                         reinit = 1;
1325                 }
1326                 if (mask & IFCAP_VLAN_HWTSO) {
1327                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1328                         reinit = 1;
1329                 }
1330                 if ((mask & IFCAP_WOL) &&
1331                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1332                         if (mask & IFCAP_WOL_MCAST)
1333                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1334                         if (mask & IFCAP_WOL_MAGIC)
1335                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1336                 }
1337                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1338                         em_init(adapter);
1339                 VLAN_CAPABILITIES(ifp);
1340                 break;
1341             }
1342
1343         default:
1344                 error = ether_ioctl(ifp, command, data);
1345                 break;
1346         }
1347
1348         return (error);
1349 }
1350
1351
1352 /*********************************************************************
1353  *  Init entry point
1354  *
1355  *  This routine is used in two ways. It is used by the stack as
1356  *  init entry point in network interface structure. It is also used
1357  *  by the driver as a hw/sw initialization routine to get to a
1358  *  consistent state.
1359  *
1360  *  return 0 on success, positive on failure
1361  **********************************************************************/
1362
1363 static void
1364 em_init_locked(struct adapter *adapter)
1365 {
1366         struct ifnet    *ifp = adapter->ifp;
1367         device_t        dev = adapter->dev;
1368
1369         INIT_DEBUGOUT("em_init: begin");
1370
1371         EM_CORE_LOCK_ASSERT(adapter);
1372
1373         em_disable_intr(adapter);
1374         callout_stop(&adapter->timer);
1375
1376         /* Get the latest mac address, User can use a LAA */
1377         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1378               ETHER_ADDR_LEN);
1379
1380         /* Put the address into the Receive Address Array */
1381         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1382
1383         /*
1384          * With the 82571 adapter, RAR[0] may be overwritten
1385          * when the other port is reset, we make a duplicate
1386          * in RAR[14] for that eventuality, this assures
1387          * the interface continues to function.
1388          */
1389         if (adapter->hw.mac.type == e1000_82571) {
1390                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1391                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1392                     E1000_RAR_ENTRIES - 1);
1393         }
1394
1395         /* Initialize the hardware */
1396         em_reset(adapter);
1397         em_update_link_status(adapter);
1398
1399         /* Setup VLAN support, basic and offload if available */
1400         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1401
1402         /* Set hardware offload abilities */
1403         ifp->if_hwassist = 0;
1404         if (ifp->if_capenable & IFCAP_TXCSUM)
1405                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1406         /* 
1407         ** There have proven to be problems with TSO when not
1408         ** at full gigabit speed, so disable the assist automatically
1409         ** when at lower speeds.  -jfv
1410         */
1411         if (ifp->if_capenable & IFCAP_TSO4) {
1412                 if (adapter->link_speed == SPEED_1000)
1413                         ifp->if_hwassist |= CSUM_TSO;
1414         }
1415
1416         /* Configure for OS presence */
1417         em_init_manageability(adapter);
1418
1419         /* Prepare transmit descriptors and buffers */
1420         em_setup_transmit_structures(adapter);
1421         em_initialize_transmit_unit(adapter);
1422
1423         /* Setup Multicast table */
1424         em_set_multi(adapter);
1425
1426         /*
1427         ** Figure out the desired mbuf
1428         ** pool for doing jumbos
1429         */
1430         if (adapter->hw.mac.max_frame_size <= 2048)
1431                 adapter->rx_mbuf_sz = MCLBYTES;
1432         else if (adapter->hw.mac.max_frame_size <= 4096)
1433                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1434         else
1435                 adapter->rx_mbuf_sz = MJUM9BYTES;
1436
1437         /* Prepare receive descriptors and buffers */
1438         if (em_setup_receive_structures(adapter)) {
1439                 device_printf(dev, "Could not setup receive structures\n");
1440                 em_stop(adapter);
1441                 return;
1442         }
1443         em_initialize_receive_unit(adapter);
1444
1445         /* Use real VLAN Filter support? */
1446         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1447                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1448                         /* Use real VLAN Filter support */
1449                         em_setup_vlan_hw_support(adapter);
1450                 else {
1451                         u32 ctrl;
1452                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1453                         ctrl |= E1000_CTRL_VME;
1454                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1455                 }
1456         }
1457
1458         /* Don't lose promiscuous settings */
1459         em_set_promisc(adapter);
1460
1461         /* Set the interface as ACTIVE */
1462         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1463         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1464
1465         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1466         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1467
1468         /* MSI/X configuration for 82574 */
1469         if (adapter->hw.mac.type == e1000_82574) {
1470                 int tmp;
1471                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1472                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1473                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1474                 /* Set the IVAR - interrupt vector routing. */
1475                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1476         }
1477
1478 #ifdef DEVICE_POLLING
1479         /*
1480          * Only enable interrupts if we are not polling, make sure
1481          * they are off otherwise.
1482          */
1483         if (ifp->if_capenable & IFCAP_POLLING)
1484                 em_disable_intr(adapter);
1485         else
1486 #endif /* DEVICE_POLLING */
1487                 em_enable_intr(adapter);
1488
1489         /* AMT based hardware can now take control from firmware */
1490         if (adapter->has_manage && adapter->has_amt)
1491                 em_get_hw_control(adapter);
1492 }
1493
1494 static void
1495 em_init(void *arg)
1496 {
1497         struct adapter *adapter = arg;
1498
1499         EM_CORE_LOCK(adapter);
1500         em_init_locked(adapter);
1501         EM_CORE_UNLOCK(adapter);
1502 }
1503
1504
1505 #ifdef DEVICE_POLLING
1506 /*********************************************************************
1507  *
1508  *  Legacy polling routine: note this only works with single queue
1509  *
1510  *********************************************************************/
1511 static int
1512 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1513 {
1514         struct adapter *adapter = ifp->if_softc;
1515         struct tx_ring  *txr = adapter->tx_rings;
1516         struct rx_ring  *rxr = adapter->rx_rings;
1517         u32             reg_icr;
1518         int             rx_done;
1519
1520         EM_CORE_LOCK(adapter);
1521         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1522                 EM_CORE_UNLOCK(adapter);
1523                 return (0);
1524         }
1525
1526         if (cmd == POLL_AND_CHECK_STATUS) {
1527                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1528                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1529                         callout_stop(&adapter->timer);
1530                         adapter->hw.mac.get_link_status = 1;
1531                         em_update_link_status(adapter);
1532                         callout_reset(&adapter->timer, hz,
1533                             em_local_timer, adapter);
1534                 }
1535         }
1536         EM_CORE_UNLOCK(adapter);
1537
1538         em_rxeof(rxr, count, &rx_done);
1539
1540         EM_TX_LOCK(txr);
1541         em_txeof(txr);
1542 #ifdef EM_MULTIQUEUE
1543         if (!drbr_empty(ifp, txr->br))
1544                 em_mq_start_locked(ifp, txr);
1545 #else
1546         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1547                 em_start_locked(ifp, txr);
1548 #endif
1549         EM_TX_UNLOCK(txr);
1550
1551         return (rx_done);
1552 }
1553 #endif /* DEVICE_POLLING */
1554
1555
1556 /*********************************************************************
1557  *
1558  *  Fast Legacy/MSI Combined Interrupt Service routine  
1559  *
1560  *********************************************************************/
1561 static int
1562 em_irq_fast(void *arg)
1563 {
1564         struct adapter  *adapter = arg;
1565         struct ifnet    *ifp;
1566         u32             reg_icr;
1567
1568         ifp = adapter->ifp;
1569
1570         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1571
1572         /* Hot eject?  */
1573         if (reg_icr == 0xffffffff)
1574                 return FILTER_STRAY;
1575
1576         /* Definitely not our interrupt.  */
1577         if (reg_icr == 0x0)
1578                 return FILTER_STRAY;
1579
1580         /*
1581          * Starting with the 82571 chip, bit 31 should be used to
1582          * determine whether the interrupt belongs to us.
1583          */
1584         if (adapter->hw.mac.type >= e1000_82571 &&
1585             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1586                 return FILTER_STRAY;
1587
1588         em_disable_intr(adapter);
1589         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1590
1591         /* Link status change */
1592         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1593                 adapter->hw.mac.get_link_status = 1;
1594                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1595         }
1596
1597         if (reg_icr & E1000_ICR_RXO)
1598                 adapter->rx_overruns++;
1599         return FILTER_HANDLED;
1600 }
1601
1602 /* Combined RX/TX handler, used by Legacy and MSI */
1603 static void
1604 em_handle_que(void *context, int pending)
1605 {
1606         struct adapter  *adapter = context;
1607         struct ifnet    *ifp = adapter->ifp;
1608         struct tx_ring  *txr = adapter->tx_rings;
1609         struct rx_ring  *rxr = adapter->rx_rings;
1610
1611         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1612                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1613
1614                 EM_TX_LOCK(txr);
1615                 em_txeof(txr);
1616 #ifdef EM_MULTIQUEUE
1617                 if (!drbr_empty(ifp, txr->br))
1618                         em_mq_start_locked(ifp, txr);
1619 #else
1620                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1621                         em_start_locked(ifp, txr);
1622 #endif
1623                 EM_TX_UNLOCK(txr);
1624                 if (more) {
1625                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1626                         return;
1627                 }
1628         }
1629
1630         em_enable_intr(adapter);
1631         return;
1632 }
1633
1634
1635 /*********************************************************************
1636  *
1637  *  MSIX Interrupt Service Routines
1638  *
1639  **********************************************************************/
1640 static void
1641 em_msix_tx(void *arg)
1642 {
1643         struct tx_ring *txr = arg;
1644         struct adapter *adapter = txr->adapter;
1645         struct ifnet    *ifp = adapter->ifp;
1646
1647         ++txr->tx_irq;
1648         EM_TX_LOCK(txr);
1649         em_txeof(txr);
1650 #ifdef EM_MULTIQUEUE
1651         if (!drbr_empty(ifp, txr->br))
1652                 em_mq_start_locked(ifp, txr);
1653 #else
1654         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1655                 em_start_locked(ifp, txr);
1656 #endif
1657
1658         /* Reenable this interrupt */
1659         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1660         EM_TX_UNLOCK(txr);
1661         return;
1662 }
1663
1664 /*********************************************************************
1665  *
1666  *  MSIX RX Interrupt Service routine
1667  *
1668  **********************************************************************/
1669
1670 static void
1671 em_msix_rx(void *arg)
1672 {
1673         struct rx_ring  *rxr = arg;
1674         struct adapter  *adapter = rxr->adapter;
1675         bool            more;
1676
1677         ++rxr->rx_irq;
1678         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1679                 return;
1680         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1681         if (more)
1682                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1683         else {
1684                 /* Reenable this interrupt */
1685                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1686         }
1687         return;
1688 }
1689
1690 /*********************************************************************
1691  *
1692  *  MSIX Link Fast Interrupt Service routine
1693  *
1694  **********************************************************************/
1695 static void
1696 em_msix_link(void *arg)
1697 {
1698         struct adapter  *adapter = arg;
1699         u32             reg_icr;
1700
1701         ++adapter->link_irq;
1702         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1703
1704         if (reg_icr & E1000_ICR_RXO)
1705                 adapter->rx_overruns++;
1706
1707         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1708                 adapter->hw.mac.get_link_status = 1;
1709                 em_handle_link(adapter, 0);
1710         } else
1711                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1712                     EM_MSIX_LINK | E1000_IMS_LSC);
1713         /*
1714         ** Because we must read the ICR for this interrupt
1715         ** it may clear other causes using autoclear, for
1716         ** this reason we simply create a soft interrupt
1717         ** for all these vectors.
1718         */
1719         if (reg_icr) {
1720                 E1000_WRITE_REG(&adapter->hw,
1721                         E1000_ICS, adapter->ims);
1722         }
1723         return;
1724 }
1725
1726 static void
1727 em_handle_rx(void *context, int pending)
1728 {
1729         struct rx_ring  *rxr = context;
1730         struct adapter  *adapter = rxr->adapter;
1731         bool            more;
1732
1733         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1734         if (more)
1735                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1736         else {
1737                 /* Reenable this interrupt */
1738                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1739         }
1740 }
1741
1742 static void
1743 em_handle_tx(void *context, int pending)
1744 {
1745         struct tx_ring  *txr = context;
1746         struct adapter  *adapter = txr->adapter;
1747         struct ifnet    *ifp = adapter->ifp;
1748
1749         EM_TX_LOCK(txr);
1750         em_txeof(txr);
1751 #ifdef EM_MULTIQUEUE
1752         if (!drbr_empty(ifp, txr->br))
1753                 em_mq_start_locked(ifp, txr);
1754 #else
1755         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1756                 em_start_locked(ifp, txr);
1757 #endif
1758         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1759         EM_TX_UNLOCK(txr);
1760 }
1761
1762 static void
1763 em_handle_link(void *context, int pending)
1764 {
1765         struct adapter  *adapter = context;
1766         struct tx_ring  *txr = adapter->tx_rings;
1767         struct ifnet *ifp = adapter->ifp;
1768
1769         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1770                 return;
1771
1772         EM_CORE_LOCK(adapter);
1773         callout_stop(&adapter->timer);
1774         em_update_link_status(adapter);
1775         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1776         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1777             EM_MSIX_LINK | E1000_IMS_LSC);
1778         if (adapter->link_active) {
1779                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1780                         EM_TX_LOCK(txr);
1781 #ifdef EM_MULTIQUEUE
1782                         if (!drbr_empty(ifp, txr->br))
1783                                 em_mq_start_locked(ifp, txr);
1784 #else
1785                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1786                                 em_start_locked(ifp, txr);
1787 #endif
1788                         EM_TX_UNLOCK(txr);
1789                 }
1790         }
1791         EM_CORE_UNLOCK(adapter);
1792 }
1793
1794
1795 /*********************************************************************
1796  *
1797  *  Media Ioctl callback
1798  *
1799  *  This routine is called whenever the user queries the status of
1800  *  the interface using ifconfig.
1801  *
1802  **********************************************************************/
1803 static void
1804 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1805 {
1806         struct adapter *adapter = ifp->if_softc;
1807         u_char fiber_type = IFM_1000_SX;
1808
1809         INIT_DEBUGOUT("em_media_status: begin");
1810
1811         EM_CORE_LOCK(adapter);
1812         em_update_link_status(adapter);
1813
1814         ifmr->ifm_status = IFM_AVALID;
1815         ifmr->ifm_active = IFM_ETHER;
1816
1817         if (!adapter->link_active) {
1818                 EM_CORE_UNLOCK(adapter);
1819                 return;
1820         }
1821
1822         ifmr->ifm_status |= IFM_ACTIVE;
1823
1824         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1825             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1826                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1827         } else {
1828                 switch (adapter->link_speed) {
1829                 case 10:
1830                         ifmr->ifm_active |= IFM_10_T;
1831                         break;
1832                 case 100:
1833                         ifmr->ifm_active |= IFM_100_TX;
1834                         break;
1835                 case 1000:
1836                         ifmr->ifm_active |= IFM_1000_T;
1837                         break;
1838                 }
1839                 if (adapter->link_duplex == FULL_DUPLEX)
1840                         ifmr->ifm_active |= IFM_FDX;
1841                 else
1842                         ifmr->ifm_active |= IFM_HDX;
1843         }
1844         EM_CORE_UNLOCK(adapter);
1845 }
1846
1847 /*********************************************************************
1848  *
1849  *  Media Ioctl callback
1850  *
1851  *  This routine is called when the user changes speed/duplex using
1852  *  media/mediopt option with ifconfig.
1853  *
1854  **********************************************************************/
1855 static int
1856 em_media_change(struct ifnet *ifp)
1857 {
1858         struct adapter *adapter = ifp->if_softc;
1859         struct ifmedia  *ifm = &adapter->media;
1860
1861         INIT_DEBUGOUT("em_media_change: begin");
1862
1863         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1864                 return (EINVAL);
1865
1866         EM_CORE_LOCK(adapter);
1867         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1868         case IFM_AUTO:
1869                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1870                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1871                 break;
1872         case IFM_1000_LX:
1873         case IFM_1000_SX:
1874         case IFM_1000_T:
1875                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1876                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1877                 break;
1878         case IFM_100_TX:
1879                 adapter->hw.mac.autoneg = FALSE;
1880                 adapter->hw.phy.autoneg_advertised = 0;
1881                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1882                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1883                 else
1884                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1885                 break;
1886         case IFM_10_T:
1887                 adapter->hw.mac.autoneg = FALSE;
1888                 adapter->hw.phy.autoneg_advertised = 0;
1889                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1890                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1891                 else
1892                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1893                 break;
1894         default:
1895                 device_printf(adapter->dev, "Unsupported media type\n");
1896         }
1897
1898         em_init_locked(adapter);
1899         EM_CORE_UNLOCK(adapter);
1900
1901         return (0);
1902 }
1903
1904 /*********************************************************************
1905  *
1906  *  This routine maps the mbufs to tx descriptors.
1907  *
1908  *  return 0 on success, positive on failure
1909  **********************************************************************/
1910
1911 static int
1912 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1913 {
1914         struct adapter          *adapter = txr->adapter;
1915         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1916         bus_dmamap_t            map;
1917         struct em_txbuffer      *tx_buffer, *tx_buffer_mapped;
1918         struct e1000_tx_desc    *ctxd = NULL;
1919         struct mbuf             *m_head;
1920         struct ether_header     *eh;
1921         struct ip               *ip = NULL;
1922         struct tcphdr           *tp = NULL;
1923         u32                     txd_upper = 0, txd_lower = 0;
1924         int                     ip_off, poff;
1925         int                     nsegs, i, j, first, last = 0;
1926         int                     error;
1927         bool                    do_tso, tso_desc, remap = TRUE;
1928
1929         m_head = *m_headp;
1930         do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1931         tso_desc = FALSE;
1932         ip_off = poff = 0;
1933
1934         /*
1935          * Intel recommends entire IP/TCP header length reside in a single
1936          * buffer. If multiple descriptors are used to describe the IP and
1937          * TCP header, each descriptor should describe one or more
1938          * complete headers; descriptors referencing only parts of headers
1939          * are not supported. If all layer headers are not coalesced into
1940          * a single buffer, each buffer should not cross a 4KB boundary,
1941          * or be larger than the maximum read request size.
1942          * Controller also requires modifing IP/TCP header to make TSO work
1943          * so we firstly get a writable mbuf chain then coalesce ethernet/
1944          * IP/TCP header into a single buffer to meet the requirement of
1945          * controller. This also simplifies IP/TCP/UDP checksum offloading
1946          * which also has similiar restrictions.
1947          */
1948         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1949                 if (do_tso || (m_head->m_next != NULL && 
1950                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1951                         if (M_WRITABLE(*m_headp) == 0) {
1952                                 m_head = m_dup(*m_headp, M_NOWAIT);
1953                                 m_freem(*m_headp);
1954                                 if (m_head == NULL) {
1955                                         *m_headp = NULL;
1956                                         return (ENOBUFS);
1957                                 }
1958                                 *m_headp = m_head;
1959                         }
1960                 }
1961                 /*
1962                  * XXX
1963                  * Assume IPv4, we don't have TSO/checksum offload support
1964                  * for IPv6 yet.
1965                  */
1966                 ip_off = sizeof(struct ether_header);
1967                 if (m_head->m_len < ip_off) {
1968                         m_head = m_pullup(m_head, ip_off);
1969                         if (m_head == NULL) {
1970                                 *m_headp = NULL;
1971                                 return (ENOBUFS);
1972                         }
1973                 }
1974                 eh = mtod(m_head, struct ether_header *);
1975                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1976                         ip_off = sizeof(struct ether_vlan_header);
1977                         if (m_head->m_len < ip_off) {
1978                                 m_head = m_pullup(m_head, ip_off);
1979                                 if (m_head == NULL) {
1980                                         *m_headp = NULL;
1981                                         return (ENOBUFS);
1982                                 }
1983                         }
1984                 }
1985                 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1986                         m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1987                         if (m_head == NULL) {
1988                                 *m_headp = NULL;
1989                                 return (ENOBUFS);
1990                         }
1991                 }
1992                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1993                 poff = ip_off + (ip->ip_hl << 2);
1994
1995                 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1996                         if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1997                                 m_head = m_pullup(m_head, poff +
1998                                     sizeof(struct tcphdr));
1999                                 if (m_head == NULL) {
2000                                         *m_headp = NULL;
2001                                         return (ENOBUFS);
2002                                 }
2003                         }
2004                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2005                         /*
2006                          * TSO workaround:
2007                          *   pull 4 more bytes of data into it.
2008                          */
2009                         if (m_head->m_len < poff + (tp->th_off << 2)) {
2010                                 m_head = m_pullup(m_head, poff +
2011                                                  (tp->th_off << 2) +
2012                                                  TSO_WORKAROUND);
2013                                 if (m_head == NULL) {
2014                                         *m_headp = NULL;
2015                                         return (ENOBUFS);
2016                                 }
2017                         }
2018                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2019                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2020                         if (do_tso) {
2021                                 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2022                                                   (ip->ip_hl << 2) +
2023                                                   (tp->th_off << 2));
2024                                 ip->ip_sum = 0;
2025                                 /*
2026                                  * The pseudo TCP checksum does not include TCP
2027                                  * payload length so driver should recompute
2028                                  * the checksum here what hardware expect to
2029                                  * see. This is adherence of Microsoft's Large
2030                                  * Send specification.
2031                                 */
2032                                 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2033                                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2034                         }
2035                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2036                         if (m_head->m_len < poff + sizeof(struct udphdr)) {
2037                                 m_head = m_pullup(m_head, poff +
2038                                     sizeof(struct udphdr));
2039                                 if (m_head == NULL) {
2040                                         *m_headp = NULL;
2041                                         return (ENOBUFS);
2042                                 }
2043                         }
2044                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2045                 }
2046                 *m_headp = m_head;
2047         }
2048
2049         /*
2050          * Map the packet for DMA
2051          *
2052          * Capture the first descriptor index,
2053          * this descriptor will have the index
2054          * of the EOP which is the only one that
2055          * now gets a DONE bit writeback.
2056          */
2057         first = txr->next_avail_desc;
2058         tx_buffer = &txr->tx_buffers[first];
2059         tx_buffer_mapped = tx_buffer;
2060         map = tx_buffer->map;
2061
2062 retry:
2063         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2064             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2065
2066         /*
2067          * There are two types of errors we can (try) to handle:
2068          * - EFBIG means the mbuf chain was too long and bus_dma ran
2069          *   out of segments.  Defragment the mbuf chain and try again.
2070          * - ENOMEM means bus_dma could not obtain enough bounce buffers
2071          *   at this point in time.  Defer sending and try again later.
2072          * All other errors, in particular EINVAL, are fatal and prevent the
2073          * mbuf chain from ever going through.  Drop it and report error.
2074          */
2075         if (error == EFBIG && remap) {
2076                 struct mbuf *m;
2077
2078                 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2079                 if (m == NULL) {
2080                         adapter->mbuf_defrag_failed++;
2081                         m_freem(*m_headp);
2082                         *m_headp = NULL;
2083                         return (ENOBUFS);
2084                 }
2085                 *m_headp = m;
2086
2087                 /* Try it again, but only once */
2088                 remap = FALSE;
2089                 goto retry;
2090         } else if (error != 0) {
2091                 adapter->no_tx_dma_setup++;
2092                 m_freem(*m_headp);
2093                 *m_headp = NULL;
2094                 return (error);
2095         }
2096
2097         /*
2098          * TSO Hardware workaround, if this packet is not
2099          * TSO, and is only a single descriptor long, and
2100          * it follows a TSO burst, then we need to add a
2101          * sentinel descriptor to prevent premature writeback.
2102          */
2103         if ((!do_tso) && (txr->tx_tso == TRUE)) {
2104                 if (nsegs == 1)
2105                         tso_desc = TRUE;
2106                 txr->tx_tso = FALSE;
2107         }
2108
2109         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2110                 txr->no_desc_avail++;
2111                 bus_dmamap_unload(txr->txtag, map);
2112                 return (ENOBUFS);
2113         }
2114         m_head = *m_headp;
2115
2116         /* Do hardware assists */
2117         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2118                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2119                     &txd_upper, &txd_lower);
2120                 /* we need to make a final sentinel transmit desc */
2121                 tso_desc = TRUE;
2122         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2123                 em_transmit_checksum_setup(txr, m_head,
2124                     ip_off, ip, &txd_upper, &txd_lower);
2125
2126         if (m_head->m_flags & M_VLANTAG) {
2127                 /* Set the vlan id. */
2128                 txd_upper |=
2129                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2130                 /* Tell hardware to add tag */
2131                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2132         }
2133
2134         i = txr->next_avail_desc;
2135
2136         /* Set up our transmit descriptors */
2137         for (j = 0; j < nsegs; j++) {
2138                 bus_size_t seg_len;
2139                 bus_addr_t seg_addr;
2140
2141                 tx_buffer = &txr->tx_buffers[i];
2142                 ctxd = &txr->tx_base[i];
2143                 seg_addr = segs[j].ds_addr;
2144                 seg_len  = segs[j].ds_len;
2145                 /*
2146                 ** TSO Workaround:
2147                 ** If this is the last descriptor, we want to
2148                 ** split it so we have a small final sentinel
2149                 */
2150                 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2151                         seg_len -= TSO_WORKAROUND;
2152                         ctxd->buffer_addr = htole64(seg_addr);
2153                         ctxd->lower.data = htole32(
2154                                 adapter->txd_cmd | txd_lower | seg_len);
2155                         ctxd->upper.data = htole32(txd_upper);
2156                         if (++i == adapter->num_tx_desc)
2157                                 i = 0;
2158
2159                         /* Now make the sentinel */     
2160                         txr->tx_avail--;
2161                         ctxd = &txr->tx_base[i];
2162                         tx_buffer = &txr->tx_buffers[i];
2163                         ctxd->buffer_addr =
2164                             htole64(seg_addr + seg_len);
2165                         ctxd->lower.data = htole32(
2166                         adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2167                         ctxd->upper.data =
2168                             htole32(txd_upper);
2169                         last = i;
2170                         if (++i == adapter->num_tx_desc)
2171                                 i = 0;
2172                 } else {
2173                         ctxd->buffer_addr = htole64(seg_addr);
2174                         ctxd->lower.data = htole32(
2175                         adapter->txd_cmd | txd_lower | seg_len);
2176                         ctxd->upper.data = htole32(txd_upper);
2177                         last = i;
2178                         if (++i == adapter->num_tx_desc)
2179                                 i = 0;
2180                 }
2181                 tx_buffer->m_head = NULL;
2182                 tx_buffer->next_eop = -1;
2183         }
2184
2185         txr->next_avail_desc = i;
2186         txr->tx_avail -= nsegs;
2187
2188         tx_buffer->m_head = m_head;
2189         /*
2190         ** Here we swap the map so the last descriptor,
2191         ** which gets the completion interrupt has the
2192         ** real map, and the first descriptor gets the
2193         ** unused map from this descriptor.
2194         */
2195         tx_buffer_mapped->map = tx_buffer->map;
2196         tx_buffer->map = map;
2197         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2198
2199         /*
2200          * Last Descriptor of Packet
2201          * needs End Of Packet (EOP)
2202          * and Report Status (RS)
2203          */
2204         ctxd->lower.data |=
2205             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2206         /*
2207          * Keep track in the first buffer which
2208          * descriptor will be written back
2209          */
2210         tx_buffer = &txr->tx_buffers[first];
2211         tx_buffer->next_eop = last;
2212
2213         /*
2214          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2215          * that this frame is available to transmit.
2216          */
2217         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2218             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2219         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2220
2221         return (0);
2222 }
2223
2224 static void
2225 em_set_promisc(struct adapter *adapter)
2226 {
2227         struct ifnet    *ifp = adapter->ifp;
2228         u32             reg_rctl;
2229
2230         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2231
2232         if (ifp->if_flags & IFF_PROMISC) {
2233                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2234                 /* Turn this on if you want to see bad packets */
2235                 if (em_debug_sbp)
2236                         reg_rctl |= E1000_RCTL_SBP;
2237                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2238         } else if (ifp->if_flags & IFF_ALLMULTI) {
2239                 reg_rctl |= E1000_RCTL_MPE;
2240                 reg_rctl &= ~E1000_RCTL_UPE;
2241                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2242         }
2243 }
2244
2245 static void
2246 em_disable_promisc(struct adapter *adapter)
2247 {
2248         struct ifnet    *ifp = adapter->ifp;
2249         u32             reg_rctl;
2250         int             mcnt = 0;
2251
2252         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2253         reg_rctl &=  (~E1000_RCTL_UPE);
2254         if (ifp->if_flags & IFF_ALLMULTI)
2255                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2256         else {
2257                 struct  ifmultiaddr *ifma;
2258 #if __FreeBSD_version < 800000
2259                 IF_ADDR_LOCK(ifp);
2260 #else   
2261                 if_maddr_rlock(ifp);
2262 #endif
2263                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2264                         if (ifma->ifma_addr->sa_family != AF_LINK)
2265                                 continue;
2266                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2267                                 break;
2268                         mcnt++;
2269                 }
2270 #if __FreeBSD_version < 800000
2271                 IF_ADDR_UNLOCK(ifp);
2272 #else
2273                 if_maddr_runlock(ifp);
2274 #endif
2275         }
2276         /* Don't disable if in MAX groups */
2277         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2278                 reg_rctl &=  (~E1000_RCTL_MPE);
2279         reg_rctl &=  (~E1000_RCTL_SBP);
2280         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2281 }
2282
2283
2284 /*********************************************************************
2285  *  Multicast Update
2286  *
2287  *  This routine is called whenever multicast address list is updated.
2288  *
2289  **********************************************************************/
2290
2291 static void
2292 em_set_multi(struct adapter *adapter)
2293 {
2294         struct ifnet    *ifp = adapter->ifp;
2295         struct ifmultiaddr *ifma;
2296         u32 reg_rctl = 0;
2297         u8  *mta; /* Multicast array memory */
2298         int mcnt = 0;
2299
2300         IOCTL_DEBUGOUT("em_set_multi: begin");
2301
2302         mta = adapter->mta;
2303         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2304
2305         if (adapter->hw.mac.type == e1000_82542 && 
2306             adapter->hw.revision_id == E1000_REVISION_2) {
2307                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2308                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2309                         e1000_pci_clear_mwi(&adapter->hw);
2310                 reg_rctl |= E1000_RCTL_RST;
2311                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2312                 msec_delay(5);
2313         }
2314
2315 #if __FreeBSD_version < 800000
2316         IF_ADDR_LOCK(ifp);
2317 #else
2318         if_maddr_rlock(ifp);
2319 #endif
2320         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2321                 if (ifma->ifma_addr->sa_family != AF_LINK)
2322                         continue;
2323
2324                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2325                         break;
2326
2327                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2328                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2329                 mcnt++;
2330         }
2331 #if __FreeBSD_version < 800000
2332         IF_ADDR_UNLOCK(ifp);
2333 #else
2334         if_maddr_runlock(ifp);
2335 #endif
2336         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2337                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2338                 reg_rctl |= E1000_RCTL_MPE;
2339                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2340         } else
2341                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2342
2343         if (adapter->hw.mac.type == e1000_82542 && 
2344             adapter->hw.revision_id == E1000_REVISION_2) {
2345                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2346                 reg_rctl &= ~E1000_RCTL_RST;
2347                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2348                 msec_delay(5);
2349                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2350                         e1000_pci_set_mwi(&adapter->hw);
2351         }
2352 }
2353
2354
2355 /*********************************************************************
2356  *  Timer routine
2357  *
2358  *  This routine checks for link status and updates statistics.
2359  *
2360  **********************************************************************/
2361
2362 static void
2363 em_local_timer(void *arg)
2364 {
2365         struct adapter  *adapter = arg;
2366         struct ifnet    *ifp = adapter->ifp;
2367         struct tx_ring  *txr = adapter->tx_rings;
2368         struct rx_ring  *rxr = adapter->rx_rings;
2369         u32             trigger = 0;
2370
2371         EM_CORE_LOCK_ASSERT(adapter);
2372
2373         em_update_link_status(adapter);
2374         em_update_stats_counters(adapter);
2375
2376         /* Reset LAA into RAR[0] on 82571 */
2377         if ((adapter->hw.mac.type == e1000_82571) &&
2378             e1000_get_laa_state_82571(&adapter->hw))
2379                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2380
2381         /* Mask to use in the irq trigger */
2382         if (adapter->msix_mem) {
2383                 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2384                         trigger |= rxr->ims;
2385                 rxr = adapter->rx_rings;
2386         } else
2387                 trigger = E1000_ICS_RXDMT0;
2388
2389         /*
2390         ** Check on the state of the TX queue(s), this 
2391         ** can be done without the lock because its RO
2392         ** and the HUNG state will be static if set.
2393         */
2394         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2395                 if (txr->busy == EM_TX_HUNG)
2396                         goto hung;
2397                 if (txr->busy >= EM_TX_MAXTRIES)
2398                         txr->busy = EM_TX_HUNG;
2399                 /* Schedule a TX tasklet if needed */
2400                 if (txr->tx_avail <= EM_MAX_SCATTER)
2401                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2402         }
2403         
2404         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2405 #ifndef DEVICE_POLLING
2406         /* Trigger an RX interrupt to guarantee mbuf refresh */
2407         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2408 #endif
2409         return;
2410 hung:
2411         /* Looks like we're hung */
2412         device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2413                         txr->me);
2414         em_print_debug_info(adapter);
2415         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2416         adapter->watchdog_events++;
2417         em_init_locked(adapter);
2418 }
2419
2420
2421 static void
2422 em_update_link_status(struct adapter *adapter)
2423 {
2424         struct e1000_hw *hw = &adapter->hw;
2425         struct ifnet *ifp = adapter->ifp;
2426         device_t dev = adapter->dev;
2427         struct tx_ring *txr = adapter->tx_rings;
2428         u32 link_check = 0;
2429
2430         /* Get the cached link value or read phy for real */
2431         switch (hw->phy.media_type) {
2432         case e1000_media_type_copper:
2433                 if (hw->mac.get_link_status) {
2434                         if (hw->mac.type == e1000_pch_spt)
2435                                 msec_delay(50);
2436                         /* Do the work to read phy */
2437                         e1000_check_for_link(hw);
2438                         link_check = !hw->mac.get_link_status;
2439                         if (link_check) /* ESB2 fix */
2440                                 e1000_cfg_on_link_up(hw);
2441                 } else
2442                         link_check = TRUE;
2443                 break;
2444         case e1000_media_type_fiber:
2445                 e1000_check_for_link(hw);
2446                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2447                                  E1000_STATUS_LU);
2448                 break;
2449         case e1000_media_type_internal_serdes:
2450                 e1000_check_for_link(hw);
2451                 link_check = adapter->hw.mac.serdes_has_link;
2452                 break;
2453         default:
2454         case e1000_media_type_unknown:
2455                 break;
2456         }
2457
2458         /* Now check for a transition */
2459         if (link_check && (adapter->link_active == 0)) {
2460                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2461                     &adapter->link_duplex);
2462                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2463                 if ((adapter->link_speed != SPEED_1000) &&
2464                     ((hw->mac.type == e1000_82571) ||
2465                     (hw->mac.type == e1000_82572))) {
2466                         int tarc0;
2467                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2468                         tarc0 &= ~TARC_SPEED_MODE_BIT;
2469                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2470                 }
2471                 if (bootverbose)
2472                         device_printf(dev, "Link is up %d Mbps %s\n",
2473                             adapter->link_speed,
2474                             ((adapter->link_duplex == FULL_DUPLEX) ?
2475                             "Full Duplex" : "Half Duplex"));
2476                 adapter->link_active = 1;
2477                 adapter->smartspeed = 0;
2478                 ifp->if_baudrate = adapter->link_speed * 1000000;
2479                 if_link_state_change(ifp, LINK_STATE_UP);
2480         } else if (!link_check && (adapter->link_active == 1)) {
2481                 ifp->if_baudrate = adapter->link_speed = 0;
2482                 adapter->link_duplex = 0;
2483                 if (bootverbose)
2484                         device_printf(dev, "Link is Down\n");
2485                 adapter->link_active = 0;
2486                 /* Link down, disable hang detection */
2487                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2488                         txr->busy = EM_TX_IDLE;
2489                 if_link_state_change(ifp, LINK_STATE_DOWN);
2490         }
2491 }
2492
2493 /*********************************************************************
2494  *
2495  *  This routine disables all traffic on the adapter by issuing a
2496  *  global reset on the MAC and deallocates TX/RX buffers.
2497  *
2498  *  This routine should always be called with BOTH the CORE
2499  *  and TX locks.
2500  **********************************************************************/
2501
2502 static void
2503 em_stop(void *arg)
2504 {
2505         struct adapter  *adapter = arg;
2506         struct ifnet    *ifp = adapter->ifp;
2507         struct tx_ring  *txr = adapter->tx_rings;
2508
2509         EM_CORE_LOCK_ASSERT(adapter);
2510
2511         INIT_DEBUGOUT("em_stop: begin");
2512
2513         em_disable_intr(adapter);
2514         callout_stop(&adapter->timer);
2515
2516         /* Tell the stack that the interface is no longer active */
2517         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2518         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2519
2520         /* Disarm Hang Detection. */
2521         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2522                 EM_TX_LOCK(txr);
2523                 txr->busy = EM_TX_IDLE;
2524                 EM_TX_UNLOCK(txr);
2525         }
2526
2527         /* I219 needs some special flushing to avoid hangs */
2528         if (adapter->hw.mac.type == e1000_pch_spt)
2529                 em_flush_desc_rings(adapter);
2530
2531         e1000_reset_hw(&adapter->hw);
2532         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2533
2534         e1000_led_off(&adapter->hw);
2535         e1000_cleanup_led(&adapter->hw);
2536 }
2537
2538
2539 /*********************************************************************
2540  *
2541  *  Determine hardware revision.
2542  *
2543  **********************************************************************/
2544 static void
2545 em_identify_hardware(struct adapter *adapter)
2546 {
2547         device_t dev = adapter->dev;
2548
2549         /* Make sure our PCI config space has the necessary stuff set */
2550         pci_enable_busmaster(dev);
2551         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2552
2553         /* Save off the information about this board */
2554         adapter->hw.vendor_id = pci_get_vendor(dev);
2555         adapter->hw.device_id = pci_get_device(dev);
2556         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2557         adapter->hw.subsystem_vendor_id =
2558             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2559         adapter->hw.subsystem_device_id =
2560             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2561
2562         /* Do Shared Code Init and Setup */
2563         if (e1000_set_mac_type(&adapter->hw)) {
2564                 device_printf(dev, "Setup init failure\n");
2565                 return;
2566         }
2567 }
2568
2569 static int
2570 em_allocate_pci_resources(struct adapter *adapter)
2571 {
2572         device_t        dev = adapter->dev;
2573         int             rid;
2574
2575         rid = PCIR_BAR(0);
2576         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2577             &rid, RF_ACTIVE);
2578         if (adapter->memory == NULL) {
2579                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2580                 return (ENXIO);
2581         }
2582         adapter->osdep.mem_bus_space_tag =
2583             rman_get_bustag(adapter->memory);
2584         adapter->osdep.mem_bus_space_handle =
2585             rman_get_bushandle(adapter->memory);
2586         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2587
2588         adapter->hw.back = &adapter->osdep;
2589
2590         return (0);
2591 }
2592
2593 /*********************************************************************
2594  *
2595  *  Setup the Legacy or MSI Interrupt handler
2596  *
2597  **********************************************************************/
2598 int
2599 em_allocate_legacy(struct adapter *adapter)
2600 {
2601         device_t dev = adapter->dev;
2602         struct tx_ring  *txr = adapter->tx_rings;
2603         int error, rid = 0;
2604
2605         /* Manually turn off all interrupts */
2606         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2607
2608         if (adapter->msix == 1) /* using MSI */
2609                 rid = 1;
2610         /* We allocate a single interrupt resource */
2611         adapter->res = bus_alloc_resource_any(dev,
2612             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2613         if (adapter->res == NULL) {
2614                 device_printf(dev, "Unable to allocate bus resource: "
2615                     "interrupt\n");
2616                 return (ENXIO);
2617         }
2618
2619         /*
2620          * Allocate a fast interrupt and the associated
2621          * deferred processing contexts.
2622          */
2623         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2624         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2625             taskqueue_thread_enqueue, &adapter->tq);
2626         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2627             device_get_nameunit(adapter->dev));
2628         /* Use a TX only tasklet for local timer */
2629         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2630         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2631             taskqueue_thread_enqueue, &txr->tq);
2632         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2633             device_get_nameunit(adapter->dev));
2634         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2635         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2636             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2637                 device_printf(dev, "Failed to register fast interrupt "
2638                             "handler: %d\n", error);
2639                 taskqueue_free(adapter->tq);
2640                 adapter->tq = NULL;
2641                 return (error);
2642         }
2643         
2644         return (0);
2645 }
2646
2647 /*********************************************************************
2648  *
2649  *  Setup the MSIX Interrupt handlers
2650  *   This is not really Multiqueue, rather
2651  *   its just seperate interrupt vectors
2652  *   for TX, RX, and Link.
2653  *
2654  **********************************************************************/
2655 int
2656 em_allocate_msix(struct adapter *adapter)
2657 {
2658         device_t        dev = adapter->dev;
2659         struct          tx_ring *txr = adapter->tx_rings;
2660         struct          rx_ring *rxr = adapter->rx_rings;
2661         int             error, rid, vector = 0;
2662         int             cpu_id = 0;
2663
2664
2665         /* Make sure all interrupts are disabled */
2666         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2667
2668         /* First set up ring resources */
2669         for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2670
2671                 /* RX ring */
2672                 rid = vector + 1;
2673
2674                 rxr->res = bus_alloc_resource_any(dev,
2675                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2676                 if (rxr->res == NULL) {
2677                         device_printf(dev,
2678                             "Unable to allocate bus resource: "
2679                             "RX MSIX Interrupt %d\n", i);
2680                         return (ENXIO);
2681                 }
2682                 if ((error = bus_setup_intr(dev, rxr->res,
2683                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2684                     rxr, &rxr->tag)) != 0) {
2685                         device_printf(dev, "Failed to register RX handler");
2686                         return (error);
2687                 }
2688 #if __FreeBSD_version >= 800504
2689                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2690 #endif
2691                 rxr->msix = vector;
2692
2693                 if (em_last_bind_cpu < 0)
2694                         em_last_bind_cpu = CPU_FIRST();
2695                 cpu_id = em_last_bind_cpu;
2696                 bus_bind_intr(dev, rxr->res, cpu_id);
2697
2698                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2699                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2700                     taskqueue_thread_enqueue, &rxr->tq);
2701                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2702                     device_get_nameunit(adapter->dev), cpu_id);
2703                 /*
2704                 ** Set the bit to enable interrupt
2705                 ** in E1000_IMS -- bits 20 and 21
2706                 ** are for RX0 and RX1, note this has
2707                 ** NOTHING to do with the MSIX vector
2708                 */
2709                 rxr->ims = 1 << (20 + i);
2710                 adapter->ims |= rxr->ims;
2711                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2712
2713                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2714         }
2715
2716         for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2717                 /* TX ring */
2718                 rid = vector + 1;
2719                 txr->res = bus_alloc_resource_any(dev,
2720                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2721                 if (txr->res == NULL) {
2722                         device_printf(dev,
2723                             "Unable to allocate bus resource: "
2724                             "TX MSIX Interrupt %d\n", i);
2725                         return (ENXIO);
2726                 }
2727                 if ((error = bus_setup_intr(dev, txr->res,
2728                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2729                     txr, &txr->tag)) != 0) {
2730                         device_printf(dev, "Failed to register TX handler");
2731                         return (error);
2732                 }
2733 #if __FreeBSD_version >= 800504
2734                 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2735 #endif
2736                 txr->msix = vector;
2737
2738                 if (em_last_bind_cpu < 0)
2739                         em_last_bind_cpu = CPU_FIRST();
2740                 cpu_id = em_last_bind_cpu;
2741                 bus_bind_intr(dev, txr->res, cpu_id);
2742
2743                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2744                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2745                     taskqueue_thread_enqueue, &txr->tq);
2746                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2747                     device_get_nameunit(adapter->dev), cpu_id);
2748                 /*
2749                 ** Set the bit to enable interrupt
2750                 ** in E1000_IMS -- bits 22 and 23
2751                 ** are for TX0 and TX1, note this has
2752                 ** NOTHING to do with the MSIX vector
2753                 */
2754                 txr->ims = 1 << (22 + i);
2755                 adapter->ims |= txr->ims;
2756                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2757
2758                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2759         }
2760
2761         /* Link interrupt */
2762         rid = vector + 1;
2763         adapter->res = bus_alloc_resource_any(dev,
2764             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2765         if (!adapter->res) {
2766                 device_printf(dev,"Unable to allocate "
2767                     "bus resource: Link interrupt [%d]\n", rid);
2768                 return (ENXIO);
2769         }
2770         /* Set the link handler function */
2771         error = bus_setup_intr(dev, adapter->res,
2772             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2773             em_msix_link, adapter, &adapter->tag);
2774         if (error) {
2775                 adapter->res = NULL;
2776                 device_printf(dev, "Failed to register LINK handler");
2777                 return (error);
2778         }
2779 #if __FreeBSD_version >= 800504
2780         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2781 #endif
2782         adapter->linkvec = vector;
2783         adapter->ivars |=  (8 | vector) << 16;
2784         adapter->ivars |= 0x80000000;
2785
2786         return (0);
2787 }
2788
2789
2790 static void
2791 em_free_pci_resources(struct adapter *adapter)
2792 {
2793         device_t        dev = adapter->dev;
2794         struct tx_ring  *txr;
2795         struct rx_ring  *rxr;
2796         int             rid;
2797
2798
2799         /*
2800         ** Release all the queue interrupt resources:
2801         */
2802         for (int i = 0; i < adapter->num_queues; i++) {
2803                 txr = &adapter->tx_rings[i];
2804                 /* an early abort? */
2805                 if (txr == NULL)
2806                         break;
2807                 rid = txr->msix +1;
2808                 if (txr->tag != NULL) {
2809                         bus_teardown_intr(dev, txr->res, txr->tag);
2810                         txr->tag = NULL;
2811                 }
2812                 if (txr->res != NULL)
2813                         bus_release_resource(dev, SYS_RES_IRQ,
2814                             rid, txr->res);
2815
2816                 rxr = &adapter->rx_rings[i];
2817                 /* an early abort? */
2818                 if (rxr == NULL)
2819                         break;
2820                 rid = rxr->msix +1;
2821                 if (rxr->tag != NULL) {
2822                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2823                         rxr->tag = NULL;
2824                 }
2825                 if (rxr->res != NULL)
2826                         bus_release_resource(dev, SYS_RES_IRQ,
2827                             rid, rxr->res);
2828         }
2829
2830         if (adapter->linkvec) /* we are doing MSIX */
2831                 rid = adapter->linkvec + 1;
2832         else
2833                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2834
2835         if (adapter->tag != NULL) {
2836                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2837                 adapter->tag = NULL;
2838         }
2839
2840         if (adapter->res != NULL)
2841                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2842
2843
2844         if (adapter->msix)
2845                 pci_release_msi(dev);
2846
2847         if (adapter->msix_mem != NULL)
2848                 bus_release_resource(dev, SYS_RES_MEMORY,
2849                     adapter->memrid, adapter->msix_mem);
2850
2851         if (adapter->memory != NULL)
2852                 bus_release_resource(dev, SYS_RES_MEMORY,
2853                     PCIR_BAR(0), adapter->memory);
2854
2855         if (adapter->flash != NULL)
2856                 bus_release_resource(dev, SYS_RES_MEMORY,
2857                     EM_FLASH, adapter->flash);
2858 }
2859
2860 /*
2861  * Setup MSI or MSI/X
2862  */
2863 static int
2864 em_setup_msix(struct adapter *adapter)
2865 {
2866         device_t dev = adapter->dev;
2867         int val;
2868
2869         /* Nearly always going to use one queue */
2870         adapter->num_queues = 1;
2871
2872         /*
2873         ** Try using MSI-X for Hartwell adapters
2874         */
2875         if ((adapter->hw.mac.type == e1000_82574) &&
2876             (em_enable_msix == TRUE)) {
2877 #ifdef EM_MULTIQUEUE
2878                 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2879                 if (adapter->num_queues > 1)
2880                         em_enable_vectors_82574(adapter);
2881 #endif
2882                 /* Map the MSIX BAR */
2883                 adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2884                 adapter->msix_mem = bus_alloc_resource_any(dev,
2885                     SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2886                 if (adapter->msix_mem == NULL) {
2887                         /* May not be enabled */
2888                         device_printf(adapter->dev,
2889                             "Unable to map MSIX table \n");
2890                         goto msi;
2891                 }
2892                 val = pci_msix_count(dev); 
2893
2894 #ifdef EM_MULTIQUEUE
2895                 /* We need 5 vectors in the multiqueue case */
2896                 if (adapter->num_queues > 1 ) {
2897                         if (val >= 5)
2898                                 val = 5;
2899                         else {
2900                                 adapter->num_queues = 1;
2901                                 device_printf(adapter->dev,
2902                                     "Insufficient MSIX vectors for >1 queue, "
2903                                     "using single queue...\n");
2904                                 goto msix_one;
2905                         }
2906                 } else {
2907 msix_one:
2908 #endif
2909                         if (val >= 3)
2910                                 val = 3;
2911                         else {
2912                                 device_printf(adapter->dev,
2913                                 "Insufficient MSIX vectors, using MSI\n");
2914                                 goto msi;
2915                         }
2916 #ifdef EM_MULTIQUEUE
2917                 }
2918 #endif
2919
2920                 if ((pci_alloc_msix(dev, &val) == 0)) {
2921                         device_printf(adapter->dev,
2922                             "Using MSIX interrupts "
2923                             "with %d vectors\n", val);
2924                         return (val);
2925                 }
2926
2927                 /*
2928                 ** If MSIX alloc failed or provided us with
2929                 ** less than needed, free and fall through to MSI
2930                 */
2931                 pci_release_msi(dev);
2932         }
2933 msi:
2934         if (adapter->msix_mem != NULL) {
2935                 bus_release_resource(dev, SYS_RES_MEMORY,
2936                     adapter->memrid, adapter->msix_mem);
2937                 adapter->msix_mem = NULL;
2938         }
2939         val = 1;
2940         if (pci_alloc_msi(dev, &val) == 0) {
2941                 device_printf(adapter->dev, "Using an MSI interrupt\n");
2942                 return (val);
2943         } 
2944         /* Should only happen due to manual configuration */
2945         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2946         return (0);
2947 }
2948
2949
2950 /*
2951 ** The 3 following flush routines are used as a workaround in the
2952 ** I219 client parts and only for them.
2953 **
2954 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2955 **
2956 ** We want to clear all pending descriptors from the TX ring.
2957 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2958 ** the data of the next descriptor. We don't care about the data we are about
2959 ** to reset the HW.
2960 */
2961 static void
2962 em_flush_tx_ring(struct adapter *adapter)
2963 {
2964         struct e1000_hw         *hw = &adapter->hw;
2965         struct tx_ring          *txr = adapter->tx_rings;
2966         struct e1000_tx_desc    *txd;
2967         u32                     tctl, txd_lower = E1000_TXD_CMD_IFCS;
2968         u16                     size = 512;
2969
2970         tctl = E1000_READ_REG(hw, E1000_TCTL);
2971         E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2972
2973         txd = &txr->tx_base[txr->next_avail_desc++];
2974         if (txr->next_avail_desc == adapter->num_tx_desc)
2975                 txr->next_avail_desc = 0;
2976
2977         /* Just use the ring as a dummy buffer addr */
2978         txd->buffer_addr = txr->txdma.dma_paddr;
2979         txd->lower.data = htole32(txd_lower | size);
2980         txd->upper.data = 0;
2981
2982         /* flush descriptors to memory before notifying the HW */
2983         wmb();
2984
2985         E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2986         mb();
2987         usec_delay(250);
2988 }
2989
2990 /*
2991 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2992 **
2993 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2994 */
2995 static void
2996 em_flush_rx_ring(struct adapter *adapter)
2997 {
2998         struct e1000_hw *hw = &adapter->hw;
2999         u32             rctl, rxdctl;
3000
3001         rctl = E1000_READ_REG(hw, E1000_RCTL);
3002         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3003         E1000_WRITE_FLUSH(hw);
3004         usec_delay(150);
3005
3006         rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
3007         /* zero the lower 14 bits (prefetch and host thresholds) */
3008         rxdctl &= 0xffffc000;
3009         /*
3010          * update thresholds: prefetch threshold to 31, host threshold to 1
3011          * and make sure the granularity is "descriptors" and not "cache lines"
3012          */
3013         rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3014         E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
3015
3016         /* momentarily enable the RX ring for the changes to take effect */
3017         E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3018         E1000_WRITE_FLUSH(hw);
3019         usec_delay(150);
3020         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3021 }
3022
3023 /*
3024 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
3025 **
3026 ** In i219, the descriptor rings must be emptied before resetting the HW
3027 ** or before changing the device state to D3 during runtime (runtime PM).
3028 **
3029 ** Failure to do this will cause the HW to enter a unit hang state which can
3030 ** only be released by PCI reset on the device
3031 **
3032 */
3033 static void
3034 em_flush_desc_rings(struct adapter *adapter)
3035 {
3036         struct e1000_hw *hw = &adapter->hw;
3037         device_t        dev = adapter->dev;
3038         u16             hang_state;
3039         u32             fext_nvm11, tdlen;
3040  
3041         /* First, disable MULR fix in FEXTNVM11 */
3042         fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3043         fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3044         E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3045         
3046         /* do nothing if we're not in faulty state, or if the queue is empty */
3047         tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3048         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3049         if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3050                 return;
3051         em_flush_tx_ring(adapter);
3052
3053         /* recheck, maybe the fault is caused by the rx ring */
3054         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3055         if (hang_state & FLUSH_DESC_REQUIRED)
3056                 em_flush_rx_ring(adapter);
3057 }
3058
3059
3060 /*********************************************************************
3061  *
3062  *  Initialize the hardware to a configuration
3063  *  as specified by the adapter structure.
3064  *
3065  **********************************************************************/
3066 static void
3067 em_reset(struct adapter *adapter)
3068 {
3069         device_t        dev = adapter->dev;
3070         struct ifnet    *ifp = adapter->ifp;
3071         struct e1000_hw *hw = &adapter->hw;
3072         u16             rx_buffer_size;
3073         u32             pba;
3074
3075         INIT_DEBUGOUT("em_reset: begin");
3076
3077         /* Set up smart power down as default off on newer adapters. */
3078         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3079             hw->mac.type == e1000_82572)) {
3080                 u16 phy_tmp = 0;
3081
3082                 /* Speed up time to link by disabling smart power down. */
3083                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3084                 phy_tmp &= ~IGP02E1000_PM_SPD;
3085                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3086         }
3087
3088         /*
3089          * Packet Buffer Allocation (PBA)
3090          * Writing PBA sets the receive portion of the buffer
3091          * the remainder is used for the transmit buffer.
3092          */
3093         switch (hw->mac.type) {
3094         /* Total Packet Buffer on these is 48K */
3095         case e1000_82571:
3096         case e1000_82572:
3097         case e1000_80003es2lan:
3098                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3099                 break;
3100         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3101                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3102                 break;
3103         case e1000_82574:
3104         case e1000_82583:
3105                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3106                 break;
3107         case e1000_ich8lan:
3108                 pba = E1000_PBA_8K;
3109                 break;
3110         case e1000_ich9lan:
3111         case e1000_ich10lan:
3112                 /* Boost Receive side for jumbo frames */
3113                 if (adapter->hw.mac.max_frame_size > 4096)
3114                         pba = E1000_PBA_14K;
3115                 else
3116                         pba = E1000_PBA_10K;
3117                 break;
3118         case e1000_pchlan:
3119         case e1000_pch2lan:
3120         case e1000_pch_lpt:
3121         case e1000_pch_spt:
3122                 pba = E1000_PBA_26K;
3123                 break;
3124         default:
3125                 if (adapter->hw.mac.max_frame_size > 8192)
3126                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3127                 else
3128                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3129         }
3130         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3131
3132         /*
3133          * These parameters control the automatic generation (Tx) and
3134          * response (Rx) to Ethernet PAUSE frames.
3135          * - High water mark should allow for at least two frames to be
3136          *   received after sending an XOFF.
3137          * - Low water mark works best when it is very near the high water mark.
3138          *   This allows the receiver to restart by sending XON when it has
3139          *   drained a bit. Here we use an arbitary value of 1500 which will
3140          *   restart after one full frame is pulled from the buffer. There
3141          *   could be several smaller frames in the buffer and if so they will
3142          *   not trigger the XON until their total number reduces the buffer
3143          *   by 1500.
3144          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3145          */
3146         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3147         hw->fc.high_water = rx_buffer_size -
3148             roundup2(adapter->hw.mac.max_frame_size, 1024);
3149         hw->fc.low_water = hw->fc.high_water - 1500;
3150
3151         if (adapter->fc) /* locally set flow control value? */
3152                 hw->fc.requested_mode = adapter->fc;
3153         else
3154                 hw->fc.requested_mode = e1000_fc_full;
3155
3156         if (hw->mac.type == e1000_80003es2lan)
3157                 hw->fc.pause_time = 0xFFFF;
3158         else
3159                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3160
3161         hw->fc.send_xon = TRUE;
3162
3163         /* Device specific overrides/settings */
3164         switch (hw->mac.type) {
3165         case e1000_pchlan:
3166                 /* Workaround: no TX flow ctrl for PCH */
3167                 hw->fc.requested_mode = e1000_fc_rx_pause;
3168                 hw->fc.pause_time = 0xFFFF; /* override */
3169                 if (ifp->if_mtu > ETHERMTU) {
3170                         hw->fc.high_water = 0x3500;
3171                         hw->fc.low_water = 0x1500;
3172                 } else {
3173                         hw->fc.high_water = 0x5000;
3174                         hw->fc.low_water = 0x3000;
3175                 }
3176                 hw->fc.refresh_time = 0x1000;
3177                 break;
3178         case e1000_pch2lan:
3179         case e1000_pch_lpt:
3180         case e1000_pch_spt:
3181                 hw->fc.high_water = 0x5C20;
3182                 hw->fc.low_water = 0x5048;
3183                 hw->fc.pause_time = 0x0650;
3184                 hw->fc.refresh_time = 0x0400;
3185                 /* Jumbos need adjusted PBA */
3186                 if (ifp->if_mtu > ETHERMTU)
3187                         E1000_WRITE_REG(hw, E1000_PBA, 12);
3188                 else
3189                         E1000_WRITE_REG(hw, E1000_PBA, 26);
3190                 break;
3191         case e1000_ich9lan:
3192         case e1000_ich10lan:
3193                 if (ifp->if_mtu > ETHERMTU) {
3194                         hw->fc.high_water = 0x2800;
3195                         hw->fc.low_water = hw->fc.high_water - 8;
3196                         break;
3197                 } 
3198                 /* else fall thru */
3199         default:
3200                 if (hw->mac.type == e1000_80003es2lan)
3201                         hw->fc.pause_time = 0xFFFF;
3202                 break;
3203         }
3204
3205         /* I219 needs some special flushing to avoid hangs */
3206         if (hw->mac.type == e1000_pch_spt)
3207                 em_flush_desc_rings(adapter);
3208
3209         /* Issue a global reset */
3210         e1000_reset_hw(hw);
3211         E1000_WRITE_REG(hw, E1000_WUC, 0);
3212         em_disable_aspm(adapter);
3213         /* and a re-init */
3214         if (e1000_init_hw(hw) < 0) {
3215                 device_printf(dev, "Hardware Initialization Failed\n");
3216                 return;
3217         }
3218
3219         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3220         e1000_get_phy_info(hw);
3221         e1000_check_for_link(hw);
3222         return;
3223 }
3224
3225 /*********************************************************************
3226  *
3227  *  Setup networking device structure and register an interface.
3228  *
3229  **********************************************************************/
3230 static int
3231 em_setup_interface(device_t dev, struct adapter *adapter)
3232 {
3233         struct ifnet   *ifp;
3234
3235         INIT_DEBUGOUT("em_setup_interface: begin");
3236
3237         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3238         if (ifp == NULL) {
3239                 device_printf(dev, "can not allocate ifnet structure\n");
3240                 return (-1);
3241         }
3242         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3243         ifp->if_init =  em_init;
3244         ifp->if_softc = adapter;
3245         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3246         ifp->if_ioctl = em_ioctl;
3247
3248         /* TSO parameters */
3249         ifp->if_hw_tsomax = IP_MAXPACKET;
3250         /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3251         ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3252         ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3253
3254 #ifdef EM_MULTIQUEUE
3255         /* Multiqueue stack interface */
3256         ifp->if_transmit = em_mq_start;
3257         ifp->if_qflush = em_qflush;
3258 #else
3259         ifp->if_start = em_start;
3260         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3261         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3262         IFQ_SET_READY(&ifp->if_snd);
3263 #endif  
3264
3265         ether_ifattach(ifp, adapter->hw.mac.addr);
3266
3267         ifp->if_capabilities = ifp->if_capenable = 0;
3268
3269
3270         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3271         ifp->if_capabilities |= IFCAP_TSO4;
3272         /*
3273          * Tell the upper layer(s) we
3274          * support full VLAN capability
3275          */
3276         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3277         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3278                              |  IFCAP_VLAN_HWTSO
3279                              |  IFCAP_VLAN_MTU;
3280         ifp->if_capenable = ifp->if_capabilities;
3281
3282         /*
3283         ** Don't turn this on by default, if vlans are
3284         ** created on another pseudo device (eg. lagg)
3285         ** then vlan events are not passed thru, breaking
3286         ** operation, but with HW FILTER off it works. If
3287         ** using vlans directly on the em driver you can
3288         ** enable this and get full hardware tag filtering.
3289         */
3290         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3291
3292 #ifdef DEVICE_POLLING
3293         ifp->if_capabilities |= IFCAP_POLLING;
3294 #endif
3295
3296         /* Enable only WOL MAGIC by default */
3297         if (adapter->wol) {
3298                 ifp->if_capabilities |= IFCAP_WOL;
3299                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3300         }
3301                 
3302         /*
3303          * Specify the media types supported by this adapter and register
3304          * callbacks to update media and link information
3305          */
3306         ifmedia_init(&adapter->media, IFM_IMASK,
3307             em_media_change, em_media_status);
3308         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3309             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3310                 u_char fiber_type = IFM_1000_SX;        /* default type */
3311
3312                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3313                             0, NULL);
3314                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3315         } else {
3316                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3317                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3318                             0, NULL);
3319                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3320                             0, NULL);
3321                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3322                             0, NULL);
3323                 if (adapter->hw.phy.type != e1000_phy_ife) {
3324                         ifmedia_add(&adapter->media,
3325                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3326                         ifmedia_add(&adapter->media,
3327                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3328                 }
3329         }
3330         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3331         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3332         return (0);
3333 }
3334
3335
3336 /*
3337  * Manage DMA'able memory.
3338  */
3339 static void
3340 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3341 {
3342         if (error)
3343                 return;
3344         *(bus_addr_t *) arg = segs[0].ds_addr;
3345 }
3346
3347 static int
3348 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3349         struct em_dma_alloc *dma, int mapflags)
3350 {
3351         int error;
3352
3353         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3354                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3355                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3356                                 BUS_SPACE_MAXADDR,      /* highaddr */
3357                                 NULL, NULL,             /* filter, filterarg */
3358                                 size,                   /* maxsize */
3359                                 1,                      /* nsegments */
3360                                 size,                   /* maxsegsize */
3361                                 0,                      /* flags */
3362                                 NULL,                   /* lockfunc */
3363                                 NULL,                   /* lockarg */
3364                                 &dma->dma_tag);
3365         if (error) {
3366                 device_printf(adapter->dev,
3367                     "%s: bus_dma_tag_create failed: %d\n",
3368                     __func__, error);
3369                 goto fail_0;
3370         }
3371
3372         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3373             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3374         if (error) {
3375                 device_printf(adapter->dev,
3376                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3377                     __func__, (uintmax_t)size, error);
3378                 goto fail_2;
3379         }
3380
3381         dma->dma_paddr = 0;
3382         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3383             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3384         if (error || dma->dma_paddr == 0) {
3385                 device_printf(adapter->dev,
3386                     "%s: bus_dmamap_load failed: %d\n",
3387                     __func__, error);
3388                 goto fail_3;
3389         }
3390
3391         return (0);
3392
3393 fail_3:
3394         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3395 fail_2:
3396         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3397         bus_dma_tag_destroy(dma->dma_tag);
3398 fail_0:
3399         dma->dma_tag = NULL;
3400
3401         return (error);
3402 }
3403
3404 static void
3405 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3406 {
3407         if (dma->dma_tag == NULL)
3408                 return;
3409         if (dma->dma_paddr != 0) {
3410                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3411                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3412                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3413                 dma->dma_paddr = 0;
3414         }
3415         if (dma->dma_vaddr != NULL) {
3416                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3417                 dma->dma_vaddr = NULL;
3418         }
3419         bus_dma_tag_destroy(dma->dma_tag);
3420         dma->dma_tag = NULL;
3421 }
3422
3423
3424 /*********************************************************************
3425  *
3426  *  Allocate memory for the transmit and receive rings, and then
3427  *  the descriptors associated with each, called only once at attach.
3428  *
3429  **********************************************************************/
3430 static int
3431 em_allocate_queues(struct adapter *adapter)
3432 {
3433         device_t                dev = adapter->dev;
3434         struct tx_ring          *txr = NULL;
3435         struct rx_ring          *rxr = NULL;
3436         int rsize, tsize, error = E1000_SUCCESS;
3437         int txconf = 0, rxconf = 0;
3438
3439
3440         /* Allocate the TX ring struct memory */
3441         if (!(adapter->tx_rings =
3442             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3443             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3444                 device_printf(dev, "Unable to allocate TX ring memory\n");
3445                 error = ENOMEM;
3446                 goto fail;
3447         }
3448
3449         /* Now allocate the RX */
3450         if (!(adapter->rx_rings =
3451             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3452             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3453                 device_printf(dev, "Unable to allocate RX ring memory\n");
3454                 error = ENOMEM;
3455                 goto rx_fail;
3456         }
3457
3458         tsize = roundup2(adapter->num_tx_desc *
3459             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3460         /*
3461          * Now set up the TX queues, txconf is needed to handle the
3462          * possibility that things fail midcourse and we need to
3463          * undo memory gracefully
3464          */ 
3465         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3466                 /* Set up some basics */
3467                 txr = &adapter->tx_rings[i];
3468                 txr->adapter = adapter;
3469                 txr->me = i;
3470
3471                 /* Initialize the TX lock */
3472                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3473                     device_get_nameunit(dev), txr->me);
3474                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3475
3476                 if (em_dma_malloc(adapter, tsize,
3477                         &txr->txdma, BUS_DMA_NOWAIT)) {
3478                         device_printf(dev,
3479                             "Unable to allocate TX Descriptor memory\n");
3480                         error = ENOMEM;
3481                         goto err_tx_desc;
3482                 }
3483                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3484                 bzero((void *)txr->tx_base, tsize);
3485
3486                 if (em_allocate_transmit_buffers(txr)) {
3487                         device_printf(dev,
3488                             "Critical Failure setting up transmit buffers\n");
3489                         error = ENOMEM;
3490                         goto err_tx_desc;
3491                 }
3492 #if __FreeBSD_version >= 800000
3493                 /* Allocate a buf ring */
3494                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3495                     M_WAITOK, &txr->tx_mtx);
3496 #endif
3497         }
3498
3499         /*
3500          * Next the RX queues...
3501          */ 
3502         rsize = roundup2(adapter->num_rx_desc *
3503             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3504         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3505                 rxr = &adapter->rx_rings[i];
3506                 rxr->adapter = adapter;
3507                 rxr->me = i;
3508
3509                 /* Initialize the RX lock */
3510                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3511                     device_get_nameunit(dev), txr->me);
3512                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3513
3514                 if (em_dma_malloc(adapter, rsize,
3515                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3516                         device_printf(dev,
3517                             "Unable to allocate RxDescriptor memory\n");
3518                         error = ENOMEM;
3519                         goto err_rx_desc;
3520                 }
3521                 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3522                 bzero((void *)rxr->rx_base, rsize);
3523
3524                 /* Allocate receive buffers for the ring*/
3525                 if (em_allocate_receive_buffers(rxr)) {
3526                         device_printf(dev,
3527                             "Critical Failure setting up receive buffers\n");
3528                         error = ENOMEM;
3529                         goto err_rx_desc;
3530                 }
3531         }
3532
3533         return (0);
3534
3535 err_rx_desc:
3536         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3537                 em_dma_free(adapter, &rxr->rxdma);
3538 err_tx_desc:
3539         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3540                 em_dma_free(adapter, &txr->txdma);
3541         free(adapter->rx_rings, M_DEVBUF);
3542 rx_fail:
3543 #if __FreeBSD_version >= 800000
3544         buf_ring_free(txr->br, M_DEVBUF);
3545 #endif
3546         free(adapter->tx_rings, M_DEVBUF);
3547 fail:
3548         return (error);
3549 }
3550
3551
3552 /*********************************************************************
3553  *
3554  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3555  *  the information needed to transmit a packet on the wire. This is
3556  *  called only once at attach, setup is done every reset.
3557  *
3558  **********************************************************************/
3559 static int
3560 em_allocate_transmit_buffers(struct tx_ring *txr)
3561 {
3562         struct adapter *adapter = txr->adapter;
3563         device_t dev = adapter->dev;
3564         struct em_txbuffer *txbuf;
3565         int error, i;
3566
3567         /*
3568          * Setup DMA descriptor areas.
3569          */
3570         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3571                                1, 0,                    /* alignment, bounds */
3572                                BUS_SPACE_MAXADDR,       /* lowaddr */
3573                                BUS_SPACE_MAXADDR,       /* highaddr */
3574                                NULL, NULL,              /* filter, filterarg */
3575                                EM_TSO_SIZE,             /* maxsize */
3576                                EM_MAX_SCATTER,          /* nsegments */
3577                                PAGE_SIZE,               /* maxsegsize */
3578                                0,                       /* flags */
3579                                NULL,                    /* lockfunc */
3580                                NULL,                    /* lockfuncarg */
3581                                &txr->txtag))) {
3582                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3583                 goto fail;
3584         }
3585
3586         if (!(txr->tx_buffers =
3587             (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3588             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3589                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3590                 error = ENOMEM;
3591                 goto fail;
3592         }
3593
3594         /* Create the descriptor buffer dma maps */
3595         txbuf = txr->tx_buffers;
3596         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3597                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3598                 if (error != 0) {
3599                         device_printf(dev, "Unable to create TX DMA map\n");
3600                         goto fail;
3601                 }
3602         }
3603
3604         return 0;
3605 fail:
3606         /* We free all, it handles case where we are in the middle */
3607         em_free_transmit_structures(adapter);
3608         return (error);
3609 }
3610
3611 /*********************************************************************
3612  *
3613  *  Initialize a transmit ring.
3614  *
3615  **********************************************************************/
3616 static void
3617 em_setup_transmit_ring(struct tx_ring *txr)
3618 {
3619         struct adapter *adapter = txr->adapter;
3620         struct em_txbuffer *txbuf;
3621         int i;
3622 #ifdef DEV_NETMAP
3623         struct netmap_adapter *na = NA(adapter->ifp);
3624         struct netmap_slot *slot;
3625 #endif /* DEV_NETMAP */
3626
3627         /* Clear the old descriptor contents */
3628         EM_TX_LOCK(txr);
3629 #ifdef DEV_NETMAP
3630         slot = netmap_reset(na, NR_TX, txr->me, 0);
3631 #endif /* DEV_NETMAP */
3632
3633         bzero((void *)txr->tx_base,
3634               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3635         /* Reset indices */
3636         txr->next_avail_desc = 0;
3637         txr->next_to_clean = 0;
3638
3639         /* Free any existing tx buffers. */
3640         txbuf = txr->tx_buffers;
3641         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3642                 if (txbuf->m_head != NULL) {
3643                         bus_dmamap_sync(txr->txtag, txbuf->map,
3644                             BUS_DMASYNC_POSTWRITE);
3645                         bus_dmamap_unload(txr->txtag, txbuf->map);
3646                         m_freem(txbuf->m_head);
3647                         txbuf->m_head = NULL;
3648                 }
3649 #ifdef DEV_NETMAP
3650                 if (slot) {
3651                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3652                         uint64_t paddr;
3653                         void *addr;
3654
3655                         addr = PNMB(na, slot + si, &paddr);
3656                         txr->tx_base[i].buffer_addr = htole64(paddr);
3657                         /* reload the map for netmap mode */
3658                         netmap_load_map(na, txr->txtag, txbuf->map, addr);
3659                 }
3660 #endif /* DEV_NETMAP */
3661
3662                 /* clear the watch index */
3663                 txbuf->next_eop = -1;
3664         }
3665
3666         /* Set number of descriptors available */
3667         txr->tx_avail = adapter->num_tx_desc;
3668         txr->busy = EM_TX_IDLE;
3669
3670         /* Clear checksum offload context. */
3671         txr->last_hw_offload = 0;
3672         txr->last_hw_ipcss = 0;
3673         txr->last_hw_ipcso = 0;
3674         txr->last_hw_tucss = 0;
3675         txr->last_hw_tucso = 0;
3676
3677         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3678             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3679         EM_TX_UNLOCK(txr);
3680 }
3681
3682 /*********************************************************************
3683  *
3684  *  Initialize all transmit rings.
3685  *
3686  **********************************************************************/
3687 static void
3688 em_setup_transmit_structures(struct adapter *adapter)
3689 {
3690         struct tx_ring *txr = adapter->tx_rings;
3691
3692         for (int i = 0; i < adapter->num_queues; i++, txr++)
3693                 em_setup_transmit_ring(txr);
3694
3695         return;
3696 }
3697
3698 /*********************************************************************
3699  *
3700  *  Enable transmit unit.
3701  *
3702  **********************************************************************/
3703 static void
3704 em_initialize_transmit_unit(struct adapter *adapter)
3705 {
3706         struct tx_ring  *txr = adapter->tx_rings;
3707         struct e1000_hw *hw = &adapter->hw;
3708         u32     tctl, txdctl = 0, tarc, tipg = 0;
3709
3710          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3711
3712         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3713                 u64 bus_addr = txr->txdma.dma_paddr;
3714                 /* Base and Len of TX Ring */
3715                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3716                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3717                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3718                     (u32)(bus_addr >> 32));
3719                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3720                     (u32)bus_addr);
3721                 /* Init the HEAD/TAIL indices */
3722                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3723                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3724
3725                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3726                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3727                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3728
3729                 txr->busy = EM_TX_IDLE;
3730                 txdctl = 0; /* clear txdctl */
3731                 txdctl |= 0x1f; /* PTHRESH */
3732                 txdctl |= 1 << 8; /* HTHRESH */
3733                 txdctl |= 1 << 16;/* WTHRESH */
3734                 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3735                 txdctl |= E1000_TXDCTL_GRAN;
3736                 txdctl |= 1 << 25; /* LWTHRESH */
3737
3738                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3739         }
3740
3741         /* Set the default values for the Tx Inter Packet Gap timer */
3742         switch (adapter->hw.mac.type) {
3743         case e1000_80003es2lan:
3744                 tipg = DEFAULT_82543_TIPG_IPGR1;
3745                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3746                     E1000_TIPG_IPGR2_SHIFT;
3747                 break;
3748         default:
3749                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3750                     (adapter->hw.phy.media_type ==
3751                     e1000_media_type_internal_serdes))
3752                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3753                 else
3754                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3755                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3756                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3757         }
3758
3759         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3760         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3761
3762         if(adapter->hw.mac.type >= e1000_82540)
3763                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3764                     adapter->tx_abs_int_delay.value);
3765
3766         if ((adapter->hw.mac.type == e1000_82571) ||
3767             (adapter->hw.mac.type == e1000_82572)) {
3768                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3769                 tarc |= TARC_SPEED_MODE_BIT;
3770                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3771         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3772                 /* errata: program both queues to unweighted RR */
3773                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3774                 tarc |= 1;
3775                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3776                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3777                 tarc |= 1;
3778                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3779         } else if (adapter->hw.mac.type == e1000_82574) {
3780                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3781                 tarc |= TARC_ERRATA_BIT;
3782                 if ( adapter->num_queues > 1) {
3783                         tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3784                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3785                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3786                 } else
3787                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3788         }
3789
3790         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3791         if (adapter->tx_int_delay.value > 0)
3792                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3793
3794         /* Program the Transmit Control Register */
3795         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3796         tctl &= ~E1000_TCTL_CT;
3797         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3798                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3799
3800         if (adapter->hw.mac.type >= e1000_82571)
3801                 tctl |= E1000_TCTL_MULR;
3802
3803         /* This write will effectively turn on the transmit unit. */
3804         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3805
3806         if (hw->mac.type == e1000_pch_spt) {
3807                 u32 reg;
3808                 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3809                 reg |= E1000_RCTL_RDMTS_HEX;
3810                 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3811                 reg = E1000_READ_REG(hw, E1000_TARC(0));
3812                 reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3813                 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3814         }
3815 }
3816
3817
3818 /*********************************************************************
3819  *
3820  *  Free all transmit rings.
3821  *
3822  **********************************************************************/
3823 static void
3824 em_free_transmit_structures(struct adapter *adapter)
3825 {
3826         struct tx_ring *txr = adapter->tx_rings;
3827
3828         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3829                 EM_TX_LOCK(txr);
3830                 em_free_transmit_buffers(txr);
3831                 em_dma_free(adapter, &txr->txdma);
3832                 EM_TX_UNLOCK(txr);
3833                 EM_TX_LOCK_DESTROY(txr);
3834         }
3835
3836         free(adapter->tx_rings, M_DEVBUF);
3837 }
3838
3839 /*********************************************************************
3840  *
3841  *  Free transmit ring related data structures.
3842  *
3843  **********************************************************************/
3844 static void
3845 em_free_transmit_buffers(struct tx_ring *txr)
3846 {
3847         struct adapter          *adapter = txr->adapter;
3848         struct em_txbuffer      *txbuf;
3849
3850         INIT_DEBUGOUT("free_transmit_ring: begin");
3851
3852         if (txr->tx_buffers == NULL)
3853                 return;
3854
3855         for (int i = 0; i < adapter->num_tx_desc; i++) {
3856                 txbuf = &txr->tx_buffers[i];
3857                 if (txbuf->m_head != NULL) {
3858                         bus_dmamap_sync(txr->txtag, txbuf->map,
3859                             BUS_DMASYNC_POSTWRITE);
3860                         bus_dmamap_unload(txr->txtag,
3861                             txbuf->map);
3862                         m_freem(txbuf->m_head);
3863                         txbuf->m_head = NULL;
3864                         if (txbuf->map != NULL) {
3865                                 bus_dmamap_destroy(txr->txtag,
3866                                     txbuf->map);
3867                                 txbuf->map = NULL;
3868                         }
3869                 } else if (txbuf->map != NULL) {
3870                         bus_dmamap_unload(txr->txtag,
3871                             txbuf->map);
3872                         bus_dmamap_destroy(txr->txtag,
3873                             txbuf->map);
3874                         txbuf->map = NULL;
3875                 }
3876         }
3877 #if __FreeBSD_version >= 800000
3878         if (txr->br != NULL)
3879                 buf_ring_free(txr->br, M_DEVBUF);
3880 #endif
3881         if (txr->tx_buffers != NULL) {
3882                 free(txr->tx_buffers, M_DEVBUF);
3883                 txr->tx_buffers = NULL;
3884         }
3885         if (txr->txtag != NULL) {
3886                 bus_dma_tag_destroy(txr->txtag);
3887                 txr->txtag = NULL;
3888         }
3889         return;
3890 }
3891
3892
3893 /*********************************************************************
3894  *  The offload context is protocol specific (TCP/UDP) and thus
3895  *  only needs to be set when the protocol changes. The occasion
3896  *  of a context change can be a performance detriment, and
3897  *  might be better just disabled. The reason arises in the way
3898  *  in which the controller supports pipelined requests from the
3899  *  Tx data DMA. Up to four requests can be pipelined, and they may
3900  *  belong to the same packet or to multiple packets. However all
3901  *  requests for one packet are issued before a request is issued
3902  *  for a subsequent packet and if a request for the next packet
3903  *  requires a context change, that request will be stalled
3904  *  until the previous request completes. This means setting up
3905  *  a new context effectively disables pipelined Tx data DMA which
3906  *  in turn greatly slow down performance to send small sized
3907  *  frames. 
3908  **********************************************************************/
3909 static void
3910 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3911     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3912 {
3913         struct adapter                  *adapter = txr->adapter;
3914         struct e1000_context_desc       *TXD = NULL;
3915         struct em_txbuffer              *tx_buffer;
3916         int                             cur, hdr_len;
3917         u32                             cmd = 0;
3918         u16                             offload = 0;
3919         u8                              ipcso, ipcss, tucso, tucss;
3920
3921         ipcss = ipcso = tucss = tucso = 0;
3922         hdr_len = ip_off + (ip->ip_hl << 2);
3923         cur = txr->next_avail_desc;
3924
3925         /* Setup of IP header checksum. */
3926         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3927                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3928                 offload |= CSUM_IP;
3929                 ipcss = ip_off;
3930                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3931                 /*
3932                  * Start offset for header checksum calculation.
3933                  * End offset for header checksum calculation.
3934                  * Offset of place to put the checksum.
3935                  */
3936                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3937                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3938                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3939                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3940                 cmd |= E1000_TXD_CMD_IP;
3941         }
3942
3943         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3944                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3945                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3946                 offload |= CSUM_TCP;
3947                 tucss = hdr_len;
3948                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3949                 /*
3950                  * The 82574L can only remember the *last* context used
3951                  * regardless of queue that it was use for.  We cannot reuse
3952                  * contexts on this hardware platform and must generate a new
3953                  * context every time.  82574L hardware spec, section 7.2.6,
3954                  * second note.
3955                  */
3956                 if (adapter->num_queues < 2) {
3957                         /*
3958                         * Setting up new checksum offload context for every
3959                         * frames takes a lot of processing time for hardware.
3960                         * This also reduces performance a lot for small sized
3961                         * frames so avoid it if driver can use previously
3962                         * configured checksum offload context.
3963                         */
3964                         if (txr->last_hw_offload == offload) {
3965                                 if (offload & CSUM_IP) {
3966                                         if (txr->last_hw_ipcss == ipcss &&
3967                                         txr->last_hw_ipcso == ipcso &&
3968                                         txr->last_hw_tucss == tucss &&
3969                                         txr->last_hw_tucso == tucso)
3970                                                 return;
3971                                 } else {
3972                                         if (txr->last_hw_tucss == tucss &&
3973                                         txr->last_hw_tucso == tucso)
3974                                                 return;
3975                                 }
3976                         }
3977                         txr->last_hw_offload = offload;
3978                         txr->last_hw_tucss = tucss;
3979                         txr->last_hw_tucso = tucso;
3980                 }
3981                 /*
3982                  * Start offset for payload checksum calculation.
3983                  * End offset for payload checksum calculation.
3984                  * Offset of place to put the checksum.
3985                  */
3986                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3987                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3988                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3989                 TXD->upper_setup.tcp_fields.tucso = tucso;
3990                 cmd |= E1000_TXD_CMD_TCP;
3991         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3992                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3993                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3994                 tucss = hdr_len;
3995                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3996                 /*
3997                  * The 82574L can only remember the *last* context used
3998                  * regardless of queue that it was use for.  We cannot reuse
3999                  * contexts on this hardware platform and must generate a new
4000                  * context every time.  82574L hardware spec, section 7.2.6,
4001                  * second note.
4002                  */
4003                 if (adapter->num_queues < 2) {
4004                         /*
4005                         * Setting up new checksum offload context for every
4006                         * frames takes a lot of processing time for hardware.
4007                         * This also reduces performance a lot for small sized
4008                         * frames so avoid it if driver can use previously
4009                         * configured checksum offload context.
4010                         */
4011                         if (txr->last_hw_offload == offload) {
4012                                 if (offload & CSUM_IP) {
4013                                         if (txr->last_hw_ipcss == ipcss &&
4014                                         txr->last_hw_ipcso == ipcso &&
4015                                         txr->last_hw_tucss == tucss &&
4016                                         txr->last_hw_tucso == tucso)
4017                                                 return;
4018                                 } else {
4019                                         if (txr->last_hw_tucss == tucss &&
4020                                         txr->last_hw_tucso == tucso)
4021                                                 return;
4022                                 }
4023                         }
4024                         txr->last_hw_offload = offload;
4025                         txr->last_hw_tucss = tucss;
4026                         txr->last_hw_tucso = tucso;
4027                 }
4028                 /*
4029                  * Start offset for header checksum calculation.
4030                  * End offset for header checksum calculation.
4031                  * Offset of place to put the checksum.
4032                  */
4033                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4034                 TXD->upper_setup.tcp_fields.tucss = tucss;
4035                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4036                 TXD->upper_setup.tcp_fields.tucso = tucso;
4037         }
4038   
4039         if (offload & CSUM_IP) {
4040                 txr->last_hw_ipcss = ipcss;
4041                 txr->last_hw_ipcso = ipcso;
4042         }
4043
4044         TXD->tcp_seg_setup.data = htole32(0);
4045         TXD->cmd_and_length =
4046             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4047         tx_buffer = &txr->tx_buffers[cur];
4048         tx_buffer->m_head = NULL;
4049         tx_buffer->next_eop = -1;
4050
4051         if (++cur == adapter->num_tx_desc)
4052                 cur = 0;
4053
4054         txr->tx_avail--;
4055         txr->next_avail_desc = cur;
4056 }
4057
4058
4059 /**********************************************************************
4060  *
4061  *  Setup work for hardware segmentation offload (TSO)
4062  *
4063  **********************************************************************/
4064 static void
4065 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4066     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4067 {
4068         struct adapter                  *adapter = txr->adapter;
4069         struct e1000_context_desc       *TXD;
4070         struct em_txbuffer              *tx_buffer;
4071         int cur, hdr_len;
4072
4073         /*
4074          * In theory we can use the same TSO context if and only if
4075          * frame is the same type(IP/TCP) and the same MSS. However
4076          * checking whether a frame has the same IP/TCP structure is
4077          * hard thing so just ignore that and always restablish a
4078          * new TSO context.
4079          */
4080         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4081         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
4082                       E1000_TXD_DTYP_D |        /* Data descr type */
4083                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
4084
4085         /* IP and/or TCP header checksum calculation and insertion. */
4086         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4087
4088         cur = txr->next_avail_desc;
4089         tx_buffer = &txr->tx_buffers[cur];
4090         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4091
4092         /*
4093          * Start offset for header checksum calculation.
4094          * End offset for header checksum calculation.
4095          * Offset of place put the checksum.
4096          */
4097         TXD->lower_setup.ip_fields.ipcss = ip_off;
4098         TXD->lower_setup.ip_fields.ipcse =
4099             htole16(ip_off + (ip->ip_hl << 2) - 1);
4100         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4101         /*
4102          * Start offset for payload checksum calculation.
4103          * End offset for payload checksum calculation.
4104          * Offset of place to put the checksum.
4105          */
4106         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4107         TXD->upper_setup.tcp_fields.tucse = 0;
4108         TXD->upper_setup.tcp_fields.tucso =
4109             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4110         /*
4111          * Payload size per packet w/o any headers.
4112          * Length of all headers up to payload.
4113          */
4114         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4115         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4116
4117         TXD->cmd_and_length = htole32(adapter->txd_cmd |
4118                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
4119                                 E1000_TXD_CMD_TSE |     /* TSE context */
4120                                 E1000_TXD_CMD_IP |      /* Do IP csum */
4121                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
4122                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4123
4124         tx_buffer->m_head = NULL;
4125         tx_buffer->next_eop = -1;
4126
4127         if (++cur == adapter->num_tx_desc)
4128                 cur = 0;
4129
4130         txr->tx_avail--;
4131         txr->next_avail_desc = cur;
4132         txr->tx_tso = TRUE;
4133 }
4134
4135
4136 /**********************************************************************
4137  *
4138  *  Examine each tx_buffer in the used queue. If the hardware is done
4139  *  processing the packet then free associated resources. The
4140  *  tx_buffer is put back on the free queue.
4141  *
4142  **********************************************************************/
4143 static void
4144 em_txeof(struct tx_ring *txr)
4145 {
4146         struct adapter  *adapter = txr->adapter;
4147         int first, last, done, processed;
4148         struct em_txbuffer *tx_buffer;
4149         struct e1000_tx_desc   *tx_desc, *eop_desc;
4150         struct ifnet   *ifp = adapter->ifp;
4151
4152         EM_TX_LOCK_ASSERT(txr);
4153 #ifdef DEV_NETMAP
4154         if (netmap_tx_irq(ifp, txr->me))
4155                 return;
4156 #endif /* DEV_NETMAP */
4157
4158         /* No work, make sure hang detection is disabled */
4159         if (txr->tx_avail == adapter->num_tx_desc) {
4160                 txr->busy = EM_TX_IDLE;
4161                 return;
4162         }
4163
4164         processed = 0;
4165         first = txr->next_to_clean;
4166         tx_desc = &txr->tx_base[first];
4167         tx_buffer = &txr->tx_buffers[first];
4168         last = tx_buffer->next_eop;
4169         eop_desc = &txr->tx_base[last];
4170
4171         /*
4172          * What this does is get the index of the
4173          * first descriptor AFTER the EOP of the 
4174          * first packet, that way we can do the
4175          * simple comparison on the inner while loop.
4176          */
4177         if (++last == adapter->num_tx_desc)
4178                 last = 0;
4179         done = last;
4180
4181         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4182             BUS_DMASYNC_POSTREAD);
4183
4184         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4185                 /* We clean the range of the packet */
4186                 while (first != done) {
4187                         tx_desc->upper.data = 0;
4188                         tx_desc->lower.data = 0;
4189                         tx_desc->buffer_addr = 0;
4190                         ++txr->tx_avail;
4191                         ++processed;
4192
4193                         if (tx_buffer->m_head) {
4194                                 bus_dmamap_sync(txr->txtag,
4195                                     tx_buffer->map,
4196                                     BUS_DMASYNC_POSTWRITE);
4197                                 bus_dmamap_unload(txr->txtag,
4198                                     tx_buffer->map);
4199                                 m_freem(tx_buffer->m_head);
4200                                 tx_buffer->m_head = NULL;
4201                         }
4202                         tx_buffer->next_eop = -1;
4203
4204                         if (++first == adapter->num_tx_desc)
4205                                 first = 0;
4206
4207                         tx_buffer = &txr->tx_buffers[first];
4208                         tx_desc = &txr->tx_base[first];
4209                 }
4210                 ++ifp->if_opackets;
4211                 /* See if we can continue to the next packet */
4212                 last = tx_buffer->next_eop;
4213                 if (last != -1) {
4214                         eop_desc = &txr->tx_base[last];
4215                         /* Get new done point */
4216                         if (++last == adapter->num_tx_desc) last = 0;
4217                         done = last;
4218                 } else
4219                         break;
4220         }
4221         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4222             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4223
4224         txr->next_to_clean = first;
4225
4226         /*
4227         ** Hang detection: we know there's work outstanding
4228         ** or the entry return would have been taken, so no
4229         ** descriptor processed here indicates a potential hang.
4230         ** The local timer will examine this and do a reset if needed.
4231         */
4232         if (processed == 0) {
4233                 if (txr->busy != EM_TX_HUNG)
4234                         ++txr->busy;
4235         } else /* At least one descriptor was cleaned */
4236                 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4237
4238         /*
4239          * If we have a minimum free, clear IFF_DRV_OACTIVE
4240          * to tell the stack that it is OK to send packets.
4241          * Notice that all writes of OACTIVE happen under the
4242          * TX lock which, with a single queue, guarantees 
4243          * sanity.
4244          */
4245         if (txr->tx_avail >= EM_MAX_SCATTER) {
4246                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
4247         }
4248
4249         /* Disable hang detection if all clean */
4250         if (txr->tx_avail == adapter->num_tx_desc)
4251                 txr->busy = EM_TX_IDLE;
4252 }
4253
4254 /*********************************************************************
4255  *
4256  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4257  *
4258  **********************************************************************/
4259 static void
4260 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4261 {
4262         struct adapter          *adapter = rxr->adapter;
4263         struct mbuf             *m;
4264         bus_dma_segment_t       segs;
4265         struct em_rxbuffer      *rxbuf;
4266         int                     i, j, error, nsegs;
4267         bool                    cleaned = FALSE;
4268
4269         i = j = rxr->next_to_refresh;
4270         /*
4271         ** Get one descriptor beyond
4272         ** our work mark to control
4273         ** the loop.
4274         */
4275         if (++j == adapter->num_rx_desc)
4276                 j = 0;
4277
4278         while (j != limit) {
4279                 rxbuf = &rxr->rx_buffers[i];
4280                 if (rxbuf->m_head == NULL) {
4281                         m = m_getjcl(M_NOWAIT, MT_DATA,
4282                             M_PKTHDR, adapter->rx_mbuf_sz);
4283                         /*
4284                         ** If we have a temporary resource shortage
4285                         ** that causes a failure, just abort refresh
4286                         ** for now, we will return to this point when
4287                         ** reinvoked from em_rxeof.
4288                         */
4289                         if (m == NULL)
4290                                 goto update;
4291                 } else
4292                         m = rxbuf->m_head;
4293
4294                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4295                 m->m_flags |= M_PKTHDR;
4296                 m->m_data = m->m_ext.ext_buf;
4297
4298                 /* Use bus_dma machinery to setup the memory mapping  */
4299                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4300                     m, &segs, &nsegs, BUS_DMA_NOWAIT);
4301                 if (error != 0) {
4302                         printf("Refresh mbufs: hdr dmamap load"
4303                             " failure - %d\n", error);
4304                         m_free(m);
4305                         rxbuf->m_head = NULL;
4306                         goto update;
4307                 }
4308                 rxbuf->m_head = m;
4309                 rxbuf->paddr = segs.ds_addr;
4310                 bus_dmamap_sync(rxr->rxtag,
4311                     rxbuf->map, BUS_DMASYNC_PREREAD);
4312                 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4313                 cleaned = TRUE;
4314
4315                 i = j; /* Next is precalulated for us */
4316                 rxr->next_to_refresh = i;
4317                 /* Calculate next controlling index */
4318                 if (++j == adapter->num_rx_desc)
4319                         j = 0;
4320         }
4321 update:
4322         /*
4323         ** Update the tail pointer only if,
4324         ** and as far as we have refreshed.
4325         */
4326         if (cleaned)
4327                 E1000_WRITE_REG(&adapter->hw,
4328                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4329
4330         return;
4331 }
4332
4333
4334 /*********************************************************************
4335  *
4336  *  Allocate memory for rx_buffer structures. Since we use one
4337  *  rx_buffer per received packet, the maximum number of rx_buffer's
4338  *  that we'll need is equal to the number of receive descriptors
4339  *  that we've allocated.
4340  *
4341  **********************************************************************/
4342 static int
4343 em_allocate_receive_buffers(struct rx_ring *rxr)
4344 {
4345         struct adapter          *adapter = rxr->adapter;
4346         device_t                dev = adapter->dev;
4347         struct em_rxbuffer      *rxbuf;
4348         int                     error;
4349
4350         rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4351             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4352         if (rxr->rx_buffers == NULL) {
4353                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4354                 return (ENOMEM);
4355         }
4356
4357         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4358                                 1, 0,                   /* alignment, bounds */
4359                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4360                                 BUS_SPACE_MAXADDR,      /* highaddr */
4361                                 NULL, NULL,             /* filter, filterarg */
4362                                 MJUM9BYTES,             /* maxsize */
4363                                 1,                      /* nsegments */
4364                                 MJUM9BYTES,             /* maxsegsize */
4365                                 0,                      /* flags */
4366                                 NULL,                   /* lockfunc */
4367                                 NULL,                   /* lockarg */
4368                                 &rxr->rxtag);
4369         if (error) {
4370                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4371                     __func__, error);
4372                 goto fail;
4373         }
4374
4375         rxbuf = rxr->rx_buffers;
4376         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4377                 rxbuf = &rxr->rx_buffers[i];
4378                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4379                 if (error) {
4380                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4381                             __func__, error);
4382                         goto fail;
4383                 }
4384         }
4385
4386         return (0);
4387
4388 fail:
4389         em_free_receive_structures(adapter);
4390         return (error);
4391 }
4392
4393
4394 /*********************************************************************
4395  *
4396  *  Initialize a receive ring and its buffers.
4397  *
4398  **********************************************************************/
4399 static int
4400 em_setup_receive_ring(struct rx_ring *rxr)
4401 {
4402         struct  adapter         *adapter = rxr->adapter;
4403         struct em_rxbuffer      *rxbuf;
4404         bus_dma_segment_t       seg[1];
4405         int                     rsize, nsegs, error = 0;
4406 #ifdef DEV_NETMAP
4407         struct netmap_adapter *na = NA(adapter->ifp);
4408         struct netmap_slot *slot;
4409 #endif
4410
4411
4412         /* Clear the ring contents */
4413         EM_RX_LOCK(rxr);
4414         rsize = roundup2(adapter->num_rx_desc *
4415             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4416         bzero((void *)rxr->rx_base, rsize);
4417 #ifdef DEV_NETMAP
4418         slot = netmap_reset(na, NR_RX, 0, 0);
4419 #endif
4420
4421         /*
4422         ** Free current RX buffer structs and their mbufs
4423         */
4424         for (int i = 0; i < adapter->num_rx_desc; i++) {
4425                 rxbuf = &rxr->rx_buffers[i];
4426                 if (rxbuf->m_head != NULL) {
4427                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4428                             BUS_DMASYNC_POSTREAD);
4429                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4430                         m_freem(rxbuf->m_head);
4431                         rxbuf->m_head = NULL; /* mark as freed */
4432                 }
4433         }
4434
4435         /* Now replenish the mbufs */
4436         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4437                 rxbuf = &rxr->rx_buffers[j];
4438 #ifdef DEV_NETMAP
4439                 if (slot) {
4440                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4441                         uint64_t paddr;
4442                         void *addr;
4443
4444                         addr = PNMB(na, slot + si, &paddr);
4445                         netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4446                         rxbuf->paddr = paddr;
4447                         em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4448                         continue;
4449                 }
4450 #endif /* DEV_NETMAP */
4451                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4452                     M_PKTHDR, adapter->rx_mbuf_sz);
4453                 if (rxbuf->m_head == NULL) {
4454                         error = ENOBUFS;
4455                         goto fail;
4456                 }
4457                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4458                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4459                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4460
4461                 /* Get the memory mapping */
4462                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4463                     rxbuf->map, rxbuf->m_head, seg,
4464                     &nsegs, BUS_DMA_NOWAIT);
4465                 if (error != 0) {
4466                         m_freem(rxbuf->m_head);
4467                         rxbuf->m_head = NULL;
4468                         goto fail;
4469                 }
4470                 bus_dmamap_sync(rxr->rxtag,
4471                     rxbuf->map, BUS_DMASYNC_PREREAD);
4472
4473                 rxbuf->paddr = seg[0].ds_addr;
4474                 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4475         }
4476         rxr->next_to_check = 0;
4477         rxr->next_to_refresh = 0;
4478         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4479             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4480
4481 fail:
4482         EM_RX_UNLOCK(rxr);
4483         return (error);
4484 }
4485
4486 /*********************************************************************
4487  *
4488  *  Initialize all receive rings.
4489  *
4490  **********************************************************************/
4491 static int
4492 em_setup_receive_structures(struct adapter *adapter)
4493 {
4494         struct rx_ring *rxr = adapter->rx_rings;
4495         int q;
4496
4497         for (q = 0; q < adapter->num_queues; q++, rxr++)
4498                 if (em_setup_receive_ring(rxr))
4499                         goto fail;
4500
4501         return (0);
4502 fail:
4503         /*
4504          * Free RX buffers allocated so far, we will only handle
4505          * the rings that completed, the failing case will have
4506          * cleaned up for itself. 'q' failed, so its the terminus.
4507          */
4508         for (int i = 0; i < q; ++i) {
4509                 rxr = &adapter->rx_rings[i];
4510                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4511                         struct em_rxbuffer *rxbuf;
4512                         rxbuf = &rxr->rx_buffers[n];
4513                         if (rxbuf->m_head != NULL) {
4514                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4515                                   BUS_DMASYNC_POSTREAD);
4516                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4517                                 m_freem(rxbuf->m_head);
4518                                 rxbuf->m_head = NULL;
4519                         }
4520                 }
4521                 rxr->next_to_check = 0;
4522                 rxr->next_to_refresh = 0;
4523         }
4524
4525         return (ENOBUFS);
4526 }
4527
4528 /*********************************************************************
4529  *
4530  *  Free all receive rings.
4531  *
4532  **********************************************************************/
4533 static void
4534 em_free_receive_structures(struct adapter *adapter)
4535 {
4536         struct rx_ring *rxr = adapter->rx_rings;
4537
4538         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4539                 em_free_receive_buffers(rxr);
4540                 /* Free the ring memory as well */
4541                 em_dma_free(adapter, &rxr->rxdma);
4542                 EM_RX_LOCK_DESTROY(rxr);
4543         }
4544
4545         free(adapter->rx_rings, M_DEVBUF);
4546 }
4547
4548
4549 /*********************************************************************
4550  *
4551  *  Free receive ring data structures
4552  *
4553  **********************************************************************/
4554 static void
4555 em_free_receive_buffers(struct rx_ring *rxr)
4556 {
4557         struct adapter          *adapter = rxr->adapter;
4558         struct em_rxbuffer      *rxbuf = NULL;
4559
4560         INIT_DEBUGOUT("free_receive_buffers: begin");
4561
4562         if (rxr->rx_buffers != NULL) {
4563                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4564                         rxbuf = &rxr->rx_buffers[i];
4565                         if (rxbuf->map != NULL) {
4566                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4567                                     BUS_DMASYNC_POSTREAD);
4568                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4569                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4570                         }
4571                         if (rxbuf->m_head != NULL) {
4572                                 m_freem(rxbuf->m_head);
4573                                 rxbuf->m_head = NULL;
4574                         }
4575                 }
4576                 free(rxr->rx_buffers, M_DEVBUF);
4577                 rxr->rx_buffers = NULL;
4578                 rxr->next_to_check = 0;
4579                 rxr->next_to_refresh = 0;
4580         }
4581
4582         if (rxr->rxtag != NULL) {
4583                 bus_dma_tag_destroy(rxr->rxtag);
4584                 rxr->rxtag = NULL;
4585         }
4586
4587         return;
4588 }
4589
4590
4591 /*********************************************************************
4592  *
4593  *  Enable receive unit.
4594  *
4595  **********************************************************************/
4596
4597 static void
4598 em_initialize_receive_unit(struct adapter *adapter)
4599 {
4600         struct rx_ring *rxr = adapter->rx_rings;
4601         struct ifnet    *ifp = adapter->ifp;
4602         struct e1000_hw *hw = &adapter->hw;
4603         u32     rctl, rxcsum, rfctl;
4604
4605         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4606
4607         /*
4608          * Make sure receives are disabled while setting
4609          * up the descriptor ring
4610          */
4611         rctl = E1000_READ_REG(hw, E1000_RCTL);
4612         /* Do not disable if ever enabled on this hardware */
4613         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4614                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4615
4616         /* Setup the Receive Control Register */
4617         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4618         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4619             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4620             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4621
4622         /* Do not store bad packets */
4623         rctl &= ~E1000_RCTL_SBP;
4624
4625         /* Enable Long Packet receive */
4626         if (ifp->if_mtu > ETHERMTU)
4627                 rctl |= E1000_RCTL_LPE;
4628         else
4629                 rctl &= ~E1000_RCTL_LPE;
4630
4631         /* Strip the CRC */
4632         if (!em_disable_crc_stripping)
4633                 rctl |= E1000_RCTL_SECRC;
4634
4635         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4636             adapter->rx_abs_int_delay.value);
4637
4638         E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4639             adapter->rx_int_delay.value);
4640         /*
4641          * Set the interrupt throttling rate. Value is calculated
4642          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4643          */
4644         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4645
4646         /* Use extended rx descriptor formats */
4647         rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4648         rfctl |= E1000_RFCTL_EXTEN;
4649         /*
4650         ** When using MSIX interrupts we need to throttle
4651         ** using the EITR register (82574 only)
4652         */
4653         if (hw->mac.type == e1000_82574) {
4654                 for (int i = 0; i < 4; i++)
4655                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4656                             DEFAULT_ITR);
4657                 /* Disable accelerated acknowledge */
4658                 rfctl |= E1000_RFCTL_ACK_DIS;
4659         }
4660         E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4661
4662         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4663         if (ifp->if_capenable & IFCAP_RXCSUM) {
4664 #ifdef EM_MULTIQUEUE
4665                 rxcsum |= E1000_RXCSUM_TUOFL |
4666                           E1000_RXCSUM_IPOFL |
4667                           E1000_RXCSUM_PCSD;
4668 #else
4669                 rxcsum |= E1000_RXCSUM_TUOFL;
4670 #endif
4671         } else
4672                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4673
4674         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4675
4676 #ifdef EM_MULTIQUEUE
4677 #define RSSKEYLEN 10
4678         if (adapter->num_queues > 1) {
4679                 uint8_t  rss_key[4 * RSSKEYLEN];
4680                 uint32_t reta = 0;
4681                 int i;
4682
4683                 /*
4684                 * Configure RSS key
4685                 */
4686                 arc4rand(rss_key, sizeof(rss_key), 0);
4687                 for (i = 0; i < RSSKEYLEN; ++i) {
4688                         uint32_t rssrk = 0;
4689
4690                         rssrk = EM_RSSRK_VAL(rss_key, i);
4691                         E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4692                 }
4693
4694                 /*
4695                 * Configure RSS redirect table in following fashion:
4696                 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4697                 */
4698                 for (i = 0; i < sizeof(reta); ++i) {
4699                         uint32_t q;
4700
4701                         q = (i % adapter->num_queues) << 7;
4702                         reta |= q << (8 * i);
4703                 }
4704
4705                 for (i = 0; i < 32; ++i) {
4706                         E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4707                 }
4708
4709                 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 
4710                                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4711                                 E1000_MRQC_RSS_FIELD_IPV4 |
4712                                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4713                                 E1000_MRQC_RSS_FIELD_IPV6_EX |
4714                                 E1000_MRQC_RSS_FIELD_IPV6);
4715         }
4716 #endif
4717         /*
4718         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4719         ** long latencies are observed, like Lenovo X60. This
4720         ** change eliminates the problem, but since having positive
4721         ** values in RDTR is a known source of problems on other
4722         ** platforms another solution is being sought.
4723         */
4724         if (hw->mac.type == e1000_82573)
4725                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4726
4727         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4728                 /* Setup the Base and Length of the Rx Descriptor Ring */
4729                 u64 bus_addr = rxr->rxdma.dma_paddr;
4730                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4731
4732                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4733                     adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4734                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4735                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4736                 /* Setup the Head and Tail Descriptor Pointers */
4737                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4738 #ifdef DEV_NETMAP
4739                 /*
4740                  * an init() while a netmap client is active must
4741                  * preserve the rx buffers passed to userspace.
4742                  */
4743                 if (ifp->if_capenable & IFCAP_NETMAP)
4744                         rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4745 #endif /* DEV_NETMAP */
4746                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4747         }
4748
4749         /*
4750          * Set PTHRESH for improved jumbo performance
4751          * According to 10.2.5.11 of Intel 82574 Datasheet,
4752          * RXDCTL(1) is written whenever RXDCTL(0) is written.
4753          * Only write to RXDCTL(1) if there is a need for different
4754          * settings.
4755          */
4756         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4757             (adapter->hw.mac.type == e1000_pch2lan) ||
4758             (adapter->hw.mac.type == e1000_ich10lan)) &&
4759             (ifp->if_mtu > ETHERMTU)) {
4760                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4761                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4762         } else if (adapter->hw.mac.type == e1000_82574) {
4763                 for (int i = 0; i < adapter->num_queues; i++) {
4764                         u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4765
4766                         rxdctl |= 0x20; /* PTHRESH */
4767                         rxdctl |= 4 << 8; /* HTHRESH */
4768                         rxdctl |= 4 << 16;/* WTHRESH */
4769                         rxdctl |= 1 << 24; /* Switch to granularity */
4770                         E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4771                 }
4772         }
4773                 
4774         if (adapter->hw.mac.type >= e1000_pch2lan) {
4775                 if (ifp->if_mtu > ETHERMTU)
4776                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4777                 else
4778                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4779         }
4780
4781         /* Make sure VLAN Filters are off */
4782         rctl &= ~E1000_RCTL_VFE;
4783
4784         if (adapter->rx_mbuf_sz == MCLBYTES)
4785                 rctl |= E1000_RCTL_SZ_2048;
4786         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4787                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4788         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4789                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4790
4791         /* ensure we clear use DTYPE of 00 here */
4792         rctl &= ~0x00000C00;
4793         /* Write out the settings */
4794         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4795
4796         return;
4797 }
4798
4799
4800 /*********************************************************************
4801  *
4802  *  This routine executes in interrupt context. It replenishes
4803  *  the mbufs in the descriptor and sends data which has been
4804  *  dma'ed into host memory to upper layer.
4805  *
4806  *  We loop at most count times if count is > 0, or until done if
4807  *  count < 0.
4808  *  
4809  *  For polling we also now return the number of cleaned packets
4810  *********************************************************************/
4811 static bool
4812 em_rxeof(struct rx_ring *rxr, int count, int *done)
4813 {
4814         struct adapter          *adapter = rxr->adapter;
4815         struct ifnet            *ifp = adapter->ifp;
4816         struct mbuf             *mp, *sendmp;
4817         u32                     status = 0;
4818         u16                     len;
4819         int                     i, processed, rxdone = 0;
4820         bool                    eop;
4821         union e1000_rx_desc_extended    *cur;
4822
4823         EM_RX_LOCK(rxr);
4824
4825         /* Sync the ring */
4826         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4827             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4828
4829
4830 #ifdef DEV_NETMAP
4831         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4832                 EM_RX_UNLOCK(rxr);
4833                 return (FALSE);
4834         }
4835 #endif /* DEV_NETMAP */
4836
4837         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4838                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4839                         break;
4840
4841                 cur = &rxr->rx_base[i];
4842                 status = le32toh(cur->wb.upper.status_error);
4843                 mp = sendmp = NULL;
4844
4845                 if ((status & E1000_RXD_STAT_DD) == 0)
4846                         break;
4847
4848                 len = le16toh(cur->wb.upper.length);
4849                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4850
4851                 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4852                     (rxr->discard == TRUE)) {
4853                         adapter->dropped_pkts++;
4854                         ++rxr->rx_discarded;
4855                         if (!eop) /* Catch subsequent segs */
4856                                 rxr->discard = TRUE;
4857                         else
4858                                 rxr->discard = FALSE;
4859                         em_rx_discard(rxr, i);
4860                         goto next_desc;
4861                 }
4862                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4863
4864                 /* Assign correct length to the current fragment */
4865                 mp = rxr->rx_buffers[i].m_head;
4866                 mp->m_len = len;
4867
4868                 /* Trigger for refresh */
4869                 rxr->rx_buffers[i].m_head = NULL;
4870
4871                 /* First segment? */
4872                 if (rxr->fmp == NULL) {
4873                         mp->m_pkthdr.len = len;
4874                         rxr->fmp = rxr->lmp = mp;
4875                 } else {
4876                         /* Chain mbuf's together */
4877                         mp->m_flags &= ~M_PKTHDR;
4878                         rxr->lmp->m_next = mp;
4879                         rxr->lmp = mp;
4880                         rxr->fmp->m_pkthdr.len += len;
4881                 }
4882
4883                 if (eop) {
4884                         --count;
4885                         sendmp = rxr->fmp;
4886                         sendmp->m_pkthdr.rcvif = ifp;
4887                         ifp->if_ipackets++;
4888                         em_receive_checksum(status, sendmp);
4889 #ifndef __NO_STRICT_ALIGNMENT
4890                         if (adapter->hw.mac.max_frame_size >
4891                             (MCLBYTES - ETHER_ALIGN) &&
4892                             em_fixup_rx(rxr) != 0)
4893                                 goto skip;
4894 #endif
4895                         if (status & E1000_RXD_STAT_VP) {
4896                                 sendmp->m_pkthdr.ether_vtag =
4897                                     le16toh(cur->wb.upper.vlan);
4898                                 sendmp->m_flags |= M_VLANTAG;
4899                         }
4900 #ifndef __NO_STRICT_ALIGNMENT
4901 skip:
4902 #endif
4903                         rxr->fmp = rxr->lmp = NULL;
4904                 }
4905 next_desc:
4906                 /* Sync the ring */
4907                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4908                         BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4909
4910                 /* Zero out the receive descriptors status. */
4911                 cur->wb.upper.status_error &= htole32(~0xFF);
4912                 ++rxdone;       /* cumulative for POLL */
4913                 ++processed;
4914
4915                 /* Advance our pointers to the next descriptor. */
4916                 if (++i == adapter->num_rx_desc)
4917                         i = 0;
4918
4919                 /* Send to the stack */
4920                 if (sendmp != NULL) {
4921                         rxr->next_to_check = i;
4922                         EM_RX_UNLOCK(rxr);
4923                         (*ifp->if_input)(ifp, sendmp);
4924                         EM_RX_LOCK(rxr);
4925                         i = rxr->next_to_check;
4926                 }
4927
4928                 /* Only refresh mbufs every 8 descriptors */
4929                 if (processed == 8) {
4930                         em_refresh_mbufs(rxr, i);
4931                         processed = 0;
4932                 }
4933         }
4934
4935         /* Catch any remaining refresh work */
4936         if (e1000_rx_unrefreshed(rxr))
4937                 em_refresh_mbufs(rxr, i);
4938
4939         rxr->next_to_check = i;
4940         if (done != NULL)
4941                 *done = rxdone;
4942         EM_RX_UNLOCK(rxr);
4943
4944         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4945 }
4946
4947 static __inline void
4948 em_rx_discard(struct rx_ring *rxr, int i)
4949 {
4950         struct em_rxbuffer      *rbuf;
4951
4952         rbuf = &rxr->rx_buffers[i];
4953         bus_dmamap_unload(rxr->rxtag, rbuf->map);
4954
4955         /* Free any previous pieces */
4956         if (rxr->fmp != NULL) {
4957                 rxr->fmp->m_flags |= M_PKTHDR;
4958                 m_freem(rxr->fmp);
4959                 rxr->fmp = NULL;
4960                 rxr->lmp = NULL;
4961         }
4962         /*
4963         ** Free buffer and allow em_refresh_mbufs()
4964         ** to clean up and recharge buffer.
4965         */
4966         if (rbuf->m_head) {
4967                 m_free(rbuf->m_head);
4968                 rbuf->m_head = NULL;
4969         }
4970         return;
4971 }
4972
4973 #ifndef __NO_STRICT_ALIGNMENT
4974 /*
4975  * When jumbo frames are enabled we should realign entire payload on
4976  * architecures with strict alignment. This is serious design mistake of 8254x
4977  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4978  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4979  * payload. On architecures without strict alignment restrictions 8254x still
4980  * performs unaligned memory access which would reduce the performance too.
4981  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4982  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4983  * existing mbuf chain.
4984  *
4985  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4986  * not used at all on architectures with strict alignment.
4987  */
4988 static int
4989 em_fixup_rx(struct rx_ring *rxr)
4990 {
4991         struct adapter *adapter = rxr->adapter;
4992         struct mbuf *m, *n;
4993         int error;
4994
4995         error = 0;
4996         m = rxr->fmp;
4997         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4998                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4999                 m->m_data += ETHER_HDR_LEN;
5000         } else {
5001                 MGETHDR(n, M_NOWAIT, MT_DATA);
5002                 if (n != NULL) {
5003                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
5004                         m->m_data += ETHER_HDR_LEN;
5005                         m->m_len -= ETHER_HDR_LEN;
5006                         n->m_len = ETHER_HDR_LEN;
5007                         M_MOVE_PKTHDR(n, m);
5008                         n->m_next = m;
5009                         rxr->fmp = n;
5010                 } else {
5011                         adapter->dropped_pkts++;
5012                         m_freem(rxr->fmp);
5013                         rxr->fmp = NULL;
5014                         error = ENOMEM;
5015                 }
5016         }
5017
5018         return (error);
5019 }
5020 #endif
5021
5022 static void
5023 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5024 {
5025         rxd->read.buffer_addr = htole64(rxbuf->paddr);
5026         /* DD bits must be cleared */
5027         rxd->wb.upper.status_error= 0;
5028 }
5029
5030 /*********************************************************************
5031  *
5032  *  Verify that the hardware indicated that the checksum is valid.
5033  *  Inform the stack about the status of checksum so that stack
5034  *  doesn't spend time verifying the checksum.
5035  *
5036  *********************************************************************/
5037 static void
5038 em_receive_checksum(uint32_t status, struct mbuf *mp)
5039 {
5040         mp->m_pkthdr.csum_flags = 0;
5041
5042         /* Ignore Checksum bit is set */
5043         if (status & E1000_RXD_STAT_IXSM)
5044                 return;
5045
5046         /* If the IP checksum exists and there is no IP Checksum error */
5047         if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5048                 E1000_RXD_STAT_IPCS) {
5049                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5050         }
5051
5052         /* TCP or UDP checksum */
5053         if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5054             E1000_RXD_STAT_TCPCS) {
5055                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5056                 mp->m_pkthdr.csum_data = htons(0xffff);
5057         }
5058         if (status & E1000_RXD_STAT_UDPCS) {
5059                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5060                 mp->m_pkthdr.csum_data = htons(0xffff);
5061         }
5062 }
5063
5064 /*
5065  * This routine is run via an vlan
5066  * config EVENT
5067  */
5068 static void
5069 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5070 {
5071         struct adapter  *adapter = ifp->if_softc;
5072         u32             index, bit;
5073
5074         if (ifp->if_softc !=  arg)   /* Not our event */
5075                 return;
5076
5077         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5078                 return;
5079
5080         EM_CORE_LOCK(adapter);
5081         index = (vtag >> 5) & 0x7F;
5082         bit = vtag & 0x1F;
5083         adapter->shadow_vfta[index] |= (1 << bit);
5084         ++adapter->num_vlans;
5085         /* Re-init to load the changes */
5086         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5087                 em_init_locked(adapter);
5088         EM_CORE_UNLOCK(adapter);
5089 }
5090
5091 /*
5092  * This routine is run via an vlan
5093  * unconfig EVENT
5094  */
5095 static void
5096 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5097 {
5098         struct adapter  *adapter = ifp->if_softc;
5099         u32             index, bit;
5100
5101         if (ifp->if_softc !=  arg)
5102                 return;
5103
5104         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5105                 return;
5106
5107         EM_CORE_LOCK(adapter);
5108         index = (vtag >> 5) & 0x7F;
5109         bit = vtag & 0x1F;
5110         adapter->shadow_vfta[index] &= ~(1 << bit);
5111         --adapter->num_vlans;
5112         /* Re-init to load the changes */
5113         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5114                 em_init_locked(adapter);
5115         EM_CORE_UNLOCK(adapter);
5116 }
5117
5118 static void
5119 em_setup_vlan_hw_support(struct adapter *adapter)
5120 {
5121         struct e1000_hw *hw = &adapter->hw;
5122         u32             reg;
5123
5124         /*
5125         ** We get here thru init_locked, meaning
5126         ** a soft reset, this has already cleared
5127         ** the VFTA and other state, so if there
5128         ** have been no vlan's registered do nothing.
5129         */
5130         if (adapter->num_vlans == 0)
5131                 return;
5132
5133         /*
5134         ** A soft reset zero's out the VFTA, so
5135         ** we need to repopulate it now.
5136         */
5137         for (int i = 0; i < EM_VFTA_SIZE; i++)
5138                 if (adapter->shadow_vfta[i] != 0)
5139                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5140                             i, adapter->shadow_vfta[i]);
5141
5142         reg = E1000_READ_REG(hw, E1000_CTRL);
5143         reg |= E1000_CTRL_VME;
5144         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5145
5146         /* Enable the Filter Table */
5147         reg = E1000_READ_REG(hw, E1000_RCTL);
5148         reg &= ~E1000_RCTL_CFIEN;
5149         reg |= E1000_RCTL_VFE;
5150         E1000_WRITE_REG(hw, E1000_RCTL, reg);
5151 }
5152
5153 static void
5154 em_enable_intr(struct adapter *adapter)
5155 {
5156         struct e1000_hw *hw = &adapter->hw;
5157         u32 ims_mask = IMS_ENABLE_MASK;
5158
5159         if (hw->mac.type == e1000_82574) {
5160                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5161                 ims_mask |= EM_MSIX_MASK;
5162         } 
5163         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5164 }
5165
5166 static void
5167 em_disable_intr(struct adapter *adapter)
5168 {
5169         struct e1000_hw *hw = &adapter->hw;
5170
5171         if (hw->mac.type == e1000_82574)
5172                 E1000_WRITE_REG(hw, EM_EIAC, 0);
5173         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5174 }
5175
5176 /*
5177  * Bit of a misnomer, what this really means is
5178  * to enable OS management of the system... aka
5179  * to disable special hardware management features 
5180  */
5181 static void
5182 em_init_manageability(struct adapter *adapter)
5183 {
5184         /* A shared code workaround */
5185 #define E1000_82542_MANC2H E1000_MANC2H
5186         if (adapter->has_manage) {
5187                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5188                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5189
5190                 /* disable hardware interception of ARP */
5191                 manc &= ~(E1000_MANC_ARP_EN);
5192
5193                 /* enable receiving management packets to the host */
5194                 manc |= E1000_MANC_EN_MNG2HOST;
5195 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5196 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5197                 manc2h |= E1000_MNG2HOST_PORT_623;
5198                 manc2h |= E1000_MNG2HOST_PORT_664;
5199                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5200                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5201         }
5202 }
5203
5204 /*
5205  * Give control back to hardware management
5206  * controller if there is one.
5207  */
5208 static void
5209 em_release_manageability(struct adapter *adapter)
5210 {
5211         if (adapter->has_manage) {
5212                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5213
5214                 /* re-enable hardware interception of ARP */
5215                 manc |= E1000_MANC_ARP_EN;
5216                 manc &= ~E1000_MANC_EN_MNG2HOST;
5217
5218                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5219         }
5220 }
5221
5222 /*
5223  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5224  * For ASF and Pass Through versions of f/w this means
5225  * that the driver is loaded. For AMT version type f/w
5226  * this means that the network i/f is open.
5227  */
5228 static void
5229 em_get_hw_control(struct adapter *adapter)
5230 {
5231         u32 ctrl_ext, swsm;
5232
5233         if (adapter->hw.mac.type == e1000_82573) {
5234                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5235                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5236                     swsm | E1000_SWSM_DRV_LOAD);
5237                 return;
5238         }
5239         /* else */
5240         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5241         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5242             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5243         return;
5244 }
5245
5246 /*
5247  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5248  * For ASF and Pass Through versions of f/w this means that
5249  * the driver is no longer loaded. For AMT versions of the
5250  * f/w this means that the network i/f is closed.
5251  */
5252 static void
5253 em_release_hw_control(struct adapter *adapter)
5254 {
5255         u32 ctrl_ext, swsm;
5256
5257         if (!adapter->has_manage)
5258                 return;
5259
5260         if (adapter->hw.mac.type == e1000_82573) {
5261                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5262                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5263                     swsm & ~E1000_SWSM_DRV_LOAD);
5264                 return;
5265         }
5266         /* else */
5267         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5268         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5269             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5270         return;
5271 }
5272
5273 static int
5274 em_is_valid_ether_addr(u8 *addr)
5275 {
5276         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5277
5278         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5279                 return (FALSE);
5280         }
5281
5282         return (TRUE);
5283 }
5284
5285 /*
5286 ** Parse the interface capabilities with regard
5287 ** to both system management and wake-on-lan for
5288 ** later use.
5289 */
5290 static void
5291 em_get_wakeup(device_t dev)
5292 {
5293         struct adapter  *adapter = device_get_softc(dev);
5294         u16             eeprom_data = 0, device_id, apme_mask;
5295
5296         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5297         apme_mask = EM_EEPROM_APME;
5298
5299         switch (adapter->hw.mac.type) {
5300         case e1000_82573:
5301         case e1000_82583:
5302                 adapter->has_amt = TRUE;
5303                 /* Falls thru */
5304         case e1000_82571:
5305         case e1000_82572:
5306         case e1000_80003es2lan:
5307                 if (adapter->hw.bus.func == 1) {
5308                         e1000_read_nvm(&adapter->hw,
5309                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5310                         break;
5311                 } else
5312                         e1000_read_nvm(&adapter->hw,
5313                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5314                 break;
5315         case e1000_ich8lan:
5316         case e1000_ich9lan:
5317         case e1000_ich10lan:
5318         case e1000_pchlan:
5319         case e1000_pch2lan:
5320                 apme_mask = E1000_WUC_APME;
5321                 adapter->has_amt = TRUE;
5322                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5323                 break;
5324         default:
5325                 e1000_read_nvm(&adapter->hw,
5326                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5327                 break;
5328         }
5329         if (eeprom_data & apme_mask)
5330                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5331         /*
5332          * We have the eeprom settings, now apply the special cases
5333          * where the eeprom may be wrong or the board won't support
5334          * wake on lan on a particular port
5335          */
5336         device_id = pci_get_device(dev);
5337         switch (device_id) {
5338         case E1000_DEV_ID_82571EB_FIBER:
5339                 /* Wake events only supported on port A for dual fiber
5340                  * regardless of eeprom setting */
5341                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5342                     E1000_STATUS_FUNC_1)
5343                         adapter->wol = 0;
5344                 break;
5345         case E1000_DEV_ID_82571EB_QUAD_COPPER:
5346         case E1000_DEV_ID_82571EB_QUAD_FIBER:
5347         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5348                 /* if quad port adapter, disable WoL on all but port A */
5349                 if (global_quad_port_a != 0)
5350                         adapter->wol = 0;
5351                 /* Reset for multiple quad port adapters */
5352                 if (++global_quad_port_a == 4)
5353                         global_quad_port_a = 0;
5354                 break;
5355         }
5356         return;
5357 }
5358
5359
5360 /*
5361  * Enable PCI Wake On Lan capability
5362  */
5363 static void
5364 em_enable_wakeup(device_t dev)
5365 {
5366         struct adapter  *adapter = device_get_softc(dev);
5367         struct ifnet    *ifp = adapter->ifp;
5368         u32             pmc, ctrl, ctrl_ext, rctl;
5369         u16             status;
5370
5371         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5372                 return;
5373
5374         /* Advertise the wakeup capability */
5375         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5376         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5377         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5378         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5379
5380         if ((adapter->hw.mac.type == e1000_ich8lan) ||
5381             (adapter->hw.mac.type == e1000_pchlan) ||
5382             (adapter->hw.mac.type == e1000_ich9lan) ||
5383             (adapter->hw.mac.type == e1000_ich10lan))
5384                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5385
5386         /* Keep the laser running on Fiber adapters */
5387         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5388             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5389                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5390                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5391                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5392         }
5393
5394         /*
5395         ** Determine type of Wakeup: note that wol
5396         ** is set with all bits on by default.
5397         */
5398         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5399                 adapter->wol &= ~E1000_WUFC_MAG;
5400
5401         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5402                 adapter->wol &= ~E1000_WUFC_MC;
5403         else {
5404                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5405                 rctl |= E1000_RCTL_MPE;
5406                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5407         }
5408
5409         if ((adapter->hw.mac.type == e1000_pchlan) ||
5410             (adapter->hw.mac.type == e1000_pch2lan)) {
5411                 if (em_enable_phy_wakeup(adapter))
5412                         return;
5413         } else {
5414                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5415                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5416         }
5417
5418         if (adapter->hw.phy.type == e1000_phy_igp_3)
5419                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5420
5421         /* Request PME */
5422         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5423         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5424         if (ifp->if_capenable & IFCAP_WOL)
5425                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5426         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5427
5428         return;
5429 }
5430
5431 /*
5432 ** WOL in the newer chipset interfaces (pchlan)
5433 ** require thing to be copied into the phy
5434 */
5435 static int
5436 em_enable_phy_wakeup(struct adapter *adapter)
5437 {
5438         struct e1000_hw *hw = &adapter->hw;
5439         u32 mreg, ret = 0;
5440         u16 preg;
5441
5442         /* copy MAC RARs to PHY RARs */
5443         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5444
5445         /* copy MAC MTA to PHY MTA */
5446         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5447                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5448                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5449                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5450                     (u16)((mreg >> 16) & 0xFFFF));
5451         }
5452
5453         /* configure PHY Rx Control register */
5454         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5455         mreg = E1000_READ_REG(hw, E1000_RCTL);
5456         if (mreg & E1000_RCTL_UPE)
5457                 preg |= BM_RCTL_UPE;
5458         if (mreg & E1000_RCTL_MPE)
5459                 preg |= BM_RCTL_MPE;
5460         preg &= ~(BM_RCTL_MO_MASK);
5461         if (mreg & E1000_RCTL_MO_3)
5462                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5463                                 << BM_RCTL_MO_SHIFT);
5464         if (mreg & E1000_RCTL_BAM)
5465                 preg |= BM_RCTL_BAM;
5466         if (mreg & E1000_RCTL_PMCF)
5467                 preg |= BM_RCTL_PMCF;
5468         mreg = E1000_READ_REG(hw, E1000_CTRL);
5469         if (mreg & E1000_CTRL_RFCE)
5470                 preg |= BM_RCTL_RFCE;
5471         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5472
5473         /* enable PHY wakeup in MAC register */
5474         E1000_WRITE_REG(hw, E1000_WUC,
5475             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5476         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5477
5478         /* configure and enable PHY wakeup in PHY registers */
5479         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5480         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5481
5482         /* activate PHY wakeup */
5483         ret = hw->phy.ops.acquire(hw);
5484         if (ret) {
5485                 printf("Could not acquire PHY\n");
5486                 return ret;
5487         }
5488         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5489                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5490         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5491         if (ret) {
5492                 printf("Could not read PHY page 769\n");
5493                 goto out;
5494         }
5495         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5496         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5497         if (ret)
5498                 printf("Could not set PHY Host Wakeup bit\n");
5499 out:
5500         hw->phy.ops.release(hw);
5501
5502         return ret;
5503 }
5504
5505 static void
5506 em_led_func(void *arg, int onoff)
5507 {
5508         struct adapter  *adapter = arg;
5509  
5510         EM_CORE_LOCK(adapter);
5511         if (onoff) {
5512                 e1000_setup_led(&adapter->hw);
5513                 e1000_led_on(&adapter->hw);
5514         } else {
5515                 e1000_led_off(&adapter->hw);
5516                 e1000_cleanup_led(&adapter->hw);
5517         }
5518         EM_CORE_UNLOCK(adapter);
5519 }
5520
5521 /*
5522 ** Disable the L0S and L1 LINK states
5523 */
5524 static void
5525 em_disable_aspm(struct adapter *adapter)
5526 {
5527         int             base, reg;
5528         u16             link_cap,link_ctrl;
5529         device_t        dev = adapter->dev;
5530
5531         switch (adapter->hw.mac.type) {
5532                 case e1000_82573:
5533                 case e1000_82574:
5534                 case e1000_82583:
5535                         break;
5536                 default:
5537                         return;
5538         }
5539         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5540                 return;
5541         reg = base + PCIER_LINK_CAP;
5542         link_cap = pci_read_config(dev, reg, 2);
5543         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5544                 return;
5545         reg = base + PCIER_LINK_CTL;
5546         link_ctrl = pci_read_config(dev, reg, 2);
5547         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5548         pci_write_config(dev, reg, link_ctrl, 2);
5549         return;
5550 }
5551
5552 /**********************************************************************
5553  *
5554  *  Update the board statistics counters.
5555  *
5556  **********************************************************************/
5557 static void
5558 em_update_stats_counters(struct adapter *adapter)
5559 {
5560         struct ifnet   *ifp;
5561
5562         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5563            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5564                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5565                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5566         }
5567         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5568         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5569         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5570         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5571
5572         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5573         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5574         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5575         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5576         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5577         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5578         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5579         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5580         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5581         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5582         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5583         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5584         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5585         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5586         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5587         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5588         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5589         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5590         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5591         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5592
5593         /* For the 64-bit byte counters the low dword must be read first. */
5594         /* Both registers clear on the read of the high dword */
5595
5596         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5597             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5598         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5599             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5600
5601         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5602         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5603         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5604         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5605         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5606
5607         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5608         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5609
5610         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5611         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5612         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5613         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5614         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5615         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5616         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5617         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5618         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5619         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5620
5621         /* Interrupt Counts */
5622
5623         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5624         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5625         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5626         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5627         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5628         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5629         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5630         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5631         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5632
5633         if (adapter->hw.mac.type >= e1000_82543) {
5634                 adapter->stats.algnerrc += 
5635                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5636                 adapter->stats.rxerrc += 
5637                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5638                 adapter->stats.tncrs += 
5639                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5640                 adapter->stats.cexterr += 
5641                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5642                 adapter->stats.tsctc += 
5643                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5644                 adapter->stats.tsctfc += 
5645                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5646         }
5647         ifp = adapter->ifp;
5648
5649         ifp->if_collisions = adapter->stats.colc;
5650
5651         /* Rx Errors */
5652         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5653             adapter->stats.crcerrs + adapter->stats.algnerrc +
5654             adapter->stats.ruc + adapter->stats.roc +
5655             adapter->stats.mpc + adapter->stats.cexterr;
5656
5657         /* Tx Errors */
5658         ifp->if_oerrors = adapter->stats.ecol +
5659             adapter->stats.latecol + adapter->watchdog_events;
5660 }
5661
5662 /* Export a single 32-bit register via a read-only sysctl. */
5663 static int
5664 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5665 {
5666         struct adapter *adapter;
5667         u_int val;
5668
5669         adapter = oidp->oid_arg1;
5670         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5671         return (sysctl_handle_int(oidp, &val, 0, req));
5672 }
5673
5674 /*
5675  * Add sysctl variables, one per statistic, to the system.
5676  */
5677 static void
5678 em_add_hw_stats(struct adapter *adapter)
5679 {
5680         device_t dev = adapter->dev;
5681
5682         struct tx_ring *txr = adapter->tx_rings;
5683         struct rx_ring *rxr = adapter->rx_rings;
5684
5685         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5686         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5687         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5688         struct e1000_hw_stats *stats = &adapter->stats;
5689
5690         struct sysctl_oid *stat_node, *queue_node, *int_node;
5691         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5692
5693 #define QUEUE_NAME_LEN 32
5694         char namebuf[QUEUE_NAME_LEN];
5695         
5696         /* Driver Statistics */
5697         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5698                         CTLFLAG_RD, &adapter->dropped_pkts,
5699                         "Driver dropped packets");
5700         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5701                         CTLFLAG_RD, &adapter->link_irq,
5702                         "Link MSIX IRQ Handled");
5703         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 
5704                          CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5705                          "Defragmenting mbuf chain failed");
5706         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5707                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5708                         "Driver tx dma failure in xmit");
5709         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5710                         CTLFLAG_RD, &adapter->rx_overruns,
5711                         "RX overruns");
5712         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5713                         CTLFLAG_RD, &adapter->watchdog_events,
5714                         "Watchdog timeouts");
5715         
5716         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5717                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5718                         em_sysctl_reg_handler, "IU",
5719                         "Device Control Register");
5720         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5721                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5722                         em_sysctl_reg_handler, "IU",
5723                         "Receiver Control Register");
5724         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5725                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5726                         "Flow Control High Watermark");
5727         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5728                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5729                         "Flow Control Low Watermark");
5730
5731         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5732                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5733                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5734                                             CTLFLAG_RD, NULL, "TX Queue Name");
5735                 queue_list = SYSCTL_CHILDREN(queue_node);
5736
5737                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5738                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5739                                 E1000_TDH(txr->me),
5740                                 em_sysctl_reg_handler, "IU",
5741                                 "Transmit Descriptor Head");
5742                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5743                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5744                                 E1000_TDT(txr->me),
5745                                 em_sysctl_reg_handler, "IU",
5746                                 "Transmit Descriptor Tail");
5747                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5748                                 CTLFLAG_RD, &txr->tx_irq,
5749                                 "Queue MSI-X Transmit Interrupts");
5750                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5751                                 CTLFLAG_RD, &txr->no_desc_avail,
5752                                 "Queue No Descriptor Available");
5753
5754                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5755                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5756                                             CTLFLAG_RD, NULL, "RX Queue Name");
5757                 queue_list = SYSCTL_CHILDREN(queue_node);
5758
5759                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5760                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5761                                 E1000_RDH(rxr->me),
5762                                 em_sysctl_reg_handler, "IU",
5763                                 "Receive Descriptor Head");
5764                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5765                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5766                                 E1000_RDT(rxr->me),
5767                                 em_sysctl_reg_handler, "IU",
5768                                 "Receive Descriptor Tail");
5769                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5770                                 CTLFLAG_RD, &rxr->rx_irq,
5771                                 "Queue MSI-X Receive Interrupts");
5772         }
5773
5774         /* MAC stats get their own sub node */
5775
5776         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5777                                     CTLFLAG_RD, NULL, "Statistics");
5778         stat_list = SYSCTL_CHILDREN(stat_node);
5779
5780         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5781                         CTLFLAG_RD, &stats->ecol,
5782                         "Excessive collisions");
5783         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5784                         CTLFLAG_RD, &stats->scc,
5785                         "Single collisions");
5786         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5787                         CTLFLAG_RD, &stats->mcc,
5788                         "Multiple collisions");
5789         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5790                         CTLFLAG_RD, &stats->latecol,
5791                         "Late collisions");
5792         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5793                         CTLFLAG_RD, &stats->colc,
5794                         "Collision Count");
5795         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5796                         CTLFLAG_RD, &adapter->stats.symerrs,
5797                         "Symbol Errors");
5798         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5799                         CTLFLAG_RD, &adapter->stats.sec,
5800                         "Sequence Errors");
5801         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5802                         CTLFLAG_RD, &adapter->stats.dc,
5803                         "Defer Count");
5804         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5805                         CTLFLAG_RD, &adapter->stats.mpc,
5806                         "Missed Packets");
5807         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5808                         CTLFLAG_RD, &adapter->stats.rnbc,
5809                         "Receive No Buffers");
5810         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5811                         CTLFLAG_RD, &adapter->stats.ruc,
5812                         "Receive Undersize");
5813         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5814                         CTLFLAG_RD, &adapter->stats.rfc,
5815                         "Fragmented Packets Received ");
5816         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5817                         CTLFLAG_RD, &adapter->stats.roc,
5818                         "Oversized Packets Received");
5819         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5820                         CTLFLAG_RD, &adapter->stats.rjc,
5821                         "Recevied Jabber");
5822         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5823                         CTLFLAG_RD, &adapter->stats.rxerrc,
5824                         "Receive Errors");
5825         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5826                         CTLFLAG_RD, &adapter->stats.crcerrs,
5827                         "CRC errors");
5828         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5829                         CTLFLAG_RD, &adapter->stats.algnerrc,
5830                         "Alignment Errors");
5831         /* On 82575 these are collision counts */
5832         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5833                         CTLFLAG_RD, &adapter->stats.cexterr,
5834                         "Collision/Carrier extension errors");
5835         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5836                         CTLFLAG_RD, &adapter->stats.xonrxc,
5837                         "XON Received");
5838         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5839                         CTLFLAG_RD, &adapter->stats.xontxc,
5840                         "XON Transmitted");
5841         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5842                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5843                         "XOFF Received");
5844         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5845                         CTLFLAG_RD, &adapter->stats.xofftxc,
5846                         "XOFF Transmitted");
5847
5848         /* Packet Reception Stats */
5849         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5850                         CTLFLAG_RD, &adapter->stats.tpr,
5851                         "Total Packets Received ");
5852         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5853                         CTLFLAG_RD, &adapter->stats.gprc,
5854                         "Good Packets Received");
5855         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5856                         CTLFLAG_RD, &adapter->stats.bprc,
5857                         "Broadcast Packets Received");
5858         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5859                         CTLFLAG_RD, &adapter->stats.mprc,
5860                         "Multicast Packets Received");
5861         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5862                         CTLFLAG_RD, &adapter->stats.prc64,
5863                         "64 byte frames received ");
5864         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5865                         CTLFLAG_RD, &adapter->stats.prc127,
5866                         "65-127 byte frames received");
5867         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5868                         CTLFLAG_RD, &adapter->stats.prc255,
5869                         "128-255 byte frames received");
5870         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5871                         CTLFLAG_RD, &adapter->stats.prc511,
5872                         "256-511 byte frames received");
5873         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5874                         CTLFLAG_RD, &adapter->stats.prc1023,
5875                         "512-1023 byte frames received");
5876         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5877                         CTLFLAG_RD, &adapter->stats.prc1522,
5878                         "1023-1522 byte frames received");
5879         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5880                         CTLFLAG_RD, &adapter->stats.gorc, 
5881                         "Good Octets Received"); 
5882
5883         /* Packet Transmission Stats */
5884         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5885                         CTLFLAG_RD, &adapter->stats.gotc, 
5886                         "Good Octets Transmitted"); 
5887         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5888                         CTLFLAG_RD, &adapter->stats.tpt,
5889                         "Total Packets Transmitted");
5890         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5891                         CTLFLAG_RD, &adapter->stats.gptc,
5892                         "Good Packets Transmitted");
5893         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5894                         CTLFLAG_RD, &adapter->stats.bptc,
5895                         "Broadcast Packets Transmitted");
5896         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5897                         CTLFLAG_RD, &adapter->stats.mptc,
5898                         "Multicast Packets Transmitted");
5899         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5900                         CTLFLAG_RD, &adapter->stats.ptc64,
5901                         "64 byte frames transmitted ");
5902         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5903                         CTLFLAG_RD, &adapter->stats.ptc127,
5904                         "65-127 byte frames transmitted");
5905         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5906                         CTLFLAG_RD, &adapter->stats.ptc255,
5907                         "128-255 byte frames transmitted");
5908         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5909                         CTLFLAG_RD, &adapter->stats.ptc511,
5910                         "256-511 byte frames transmitted");
5911         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5912                         CTLFLAG_RD, &adapter->stats.ptc1023,
5913                         "512-1023 byte frames transmitted");
5914         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5915                         CTLFLAG_RD, &adapter->stats.ptc1522,
5916                         "1024-1522 byte frames transmitted");
5917         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5918                         CTLFLAG_RD, &adapter->stats.tsctc,
5919                         "TSO Contexts Transmitted");
5920         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5921                         CTLFLAG_RD, &adapter->stats.tsctfc,
5922                         "TSO Contexts Failed");
5923
5924
5925         /* Interrupt Stats */
5926
5927         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5928                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5929         int_list = SYSCTL_CHILDREN(int_node);
5930
5931         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5932                         CTLFLAG_RD, &adapter->stats.iac,
5933                         "Interrupt Assertion Count");
5934
5935         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5936                         CTLFLAG_RD, &adapter->stats.icrxptc,
5937                         "Interrupt Cause Rx Pkt Timer Expire Count");
5938
5939         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5940                         CTLFLAG_RD, &adapter->stats.icrxatc,
5941                         "Interrupt Cause Rx Abs Timer Expire Count");
5942
5943         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5944                         CTLFLAG_RD, &adapter->stats.ictxptc,
5945                         "Interrupt Cause Tx Pkt Timer Expire Count");
5946
5947         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5948                         CTLFLAG_RD, &adapter->stats.ictxatc,
5949                         "Interrupt Cause Tx Abs Timer Expire Count");
5950
5951         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5952                         CTLFLAG_RD, &adapter->stats.ictxqec,
5953                         "Interrupt Cause Tx Queue Empty Count");
5954
5955         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5956                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5957                         "Interrupt Cause Tx Queue Min Thresh Count");
5958
5959         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5960                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5961                         "Interrupt Cause Rx Desc Min Thresh Count");
5962
5963         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5964                         CTLFLAG_RD, &adapter->stats.icrxoc,
5965                         "Interrupt Cause Receiver Overrun Count");
5966 }
5967
5968 /**********************************************************************
5969  *
5970  *  This routine provides a way to dump out the adapter eeprom,
5971  *  often a useful debug/service tool. This only dumps the first
5972  *  32 words, stuff that matters is in that extent.
5973  *
5974  **********************************************************************/
5975 static int
5976 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5977 {
5978         struct adapter *adapter = (struct adapter *)arg1;
5979         int error;
5980         int result;
5981
5982         result = -1;
5983         error = sysctl_handle_int(oidp, &result, 0, req);
5984
5985         if (error || !req->newptr)
5986                 return (error);
5987
5988         /*
5989          * This value will cause a hex dump of the
5990          * first 32 16-bit words of the EEPROM to
5991          * the screen.
5992          */
5993         if (result == 1)
5994                 em_print_nvm_info(adapter);
5995
5996         return (error);
5997 }
5998
5999 static void
6000 em_print_nvm_info(struct adapter *adapter)
6001 {
6002         u16     eeprom_data;
6003         int     i, j, row = 0;
6004
6005         /* Its a bit crude, but it gets the job done */
6006         printf("\nInterface EEPROM Dump:\n");
6007         printf("Offset\n0x0000  ");
6008         for (i = 0, j = 0; i < 32; i++, j++) {
6009                 if (j == 8) { /* Make the offset block */
6010                         j = 0; ++row;
6011                         printf("\n0x00%x0  ",row);
6012                 }
6013                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6014                 printf("%04x ", eeprom_data);
6015         }
6016         printf("\n");
6017 }
6018
6019 static int
6020 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6021 {
6022         struct em_int_delay_info *info;
6023         struct adapter *adapter;
6024         u32 regval;
6025         int error, usecs, ticks;
6026
6027         info = (struct em_int_delay_info *)arg1;
6028         usecs = info->value;
6029         error = sysctl_handle_int(oidp, &usecs, 0, req);
6030         if (error != 0 || req->newptr == NULL)
6031                 return (error);
6032         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6033                 return (EINVAL);
6034         info->value = usecs;
6035         ticks = EM_USECS_TO_TICKS(usecs);
6036         if (info->offset == E1000_ITR)  /* units are 256ns here */
6037                 ticks *= 4;
6038
6039         adapter = info->adapter;
6040         
6041         EM_CORE_LOCK(adapter);
6042         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6043         regval = (regval & ~0xffff) | (ticks & 0xffff);
6044         /* Handle a few special cases. */
6045         switch (info->offset) {
6046         case E1000_RDTR:
6047                 break;
6048         case E1000_TIDV:
6049                 if (ticks == 0) {
6050                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6051                         /* Don't write 0 into the TIDV register. */
6052                         regval++;
6053                 } else
6054                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6055                 break;
6056         }
6057         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6058         EM_CORE_UNLOCK(adapter);
6059         return (0);
6060 }
6061
6062 static void
6063 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6064         const char *description, struct em_int_delay_info *info,
6065         int offset, int value)
6066 {
6067         info->adapter = adapter;
6068         info->offset = offset;
6069         info->value = value;
6070         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6071             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6072             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6073             info, 0, em_sysctl_int_delay, "I", description);
6074 }
6075
6076 static void
6077 em_set_sysctl_value(struct adapter *adapter, const char *name,
6078         const char *description, int *limit, int value)
6079 {
6080         *limit = value;
6081         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6082             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6083             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6084 }
6085
6086
6087 /*
6088 ** Set flow control using sysctl:
6089 ** Flow control values:
6090 **      0 - off
6091 **      1 - rx pause
6092 **      2 - tx pause
6093 **      3 - full
6094 */
6095 static int
6096 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6097 {       
6098         int             error;
6099         static int      input = 3; /* default is full */
6100         struct adapter  *adapter = (struct adapter *) arg1;
6101                     
6102         error = sysctl_handle_int(oidp, &input, 0, req);
6103     
6104         if ((error) || (req->newptr == NULL))
6105                 return (error);
6106                 
6107         if (input == adapter->fc) /* no change? */
6108                 return (error);
6109
6110         switch (input) {
6111                 case e1000_fc_rx_pause:
6112                 case e1000_fc_tx_pause:
6113                 case e1000_fc_full:
6114                 case e1000_fc_none:
6115                         adapter->hw.fc.requested_mode = input;
6116                         adapter->fc = input;
6117                         break;
6118                 default:
6119                         /* Do nothing */
6120                         return (error);
6121         }
6122
6123         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6124         e1000_force_mac_fc(&adapter->hw);
6125         return (error);
6126 }
6127
6128 /*
6129 ** Manage Energy Efficient Ethernet:
6130 ** Control values:
6131 **     0/1 - enabled/disabled
6132 */
6133 static int
6134 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6135 {
6136        struct adapter *adapter = (struct adapter *) arg1;
6137        int             error, value;
6138
6139        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6140        error = sysctl_handle_int(oidp, &value, 0, req);
6141        if (error || req->newptr == NULL)
6142                return (error);
6143        EM_CORE_LOCK(adapter);
6144        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6145        em_init_locked(adapter);
6146        EM_CORE_UNLOCK(adapter);
6147        return (0);
6148 }
6149
6150 static int
6151 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6152 {
6153         struct adapter *adapter;
6154         int error;
6155         int result;
6156
6157         result = -1;
6158         error = sysctl_handle_int(oidp, &result, 0, req);
6159
6160         if (error || !req->newptr)
6161                 return (error);
6162
6163         if (result == 1) {
6164                 adapter = (struct adapter *)arg1;
6165                 em_print_debug_info(adapter);
6166         }
6167
6168         return (error);
6169 }
6170
6171 /*
6172 ** This routine is meant to be fluid, add whatever is
6173 ** needed for debugging a problem.  -jfv
6174 */
6175 static void
6176 em_print_debug_info(struct adapter *adapter)
6177 {
6178         device_t dev = adapter->dev;
6179         struct tx_ring *txr = adapter->tx_rings;
6180         struct rx_ring *rxr = adapter->rx_rings;
6181
6182         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
6183                 printf("Interface is RUNNING ");
6184         else
6185                 printf("Interface is NOT RUNNING\n");
6186
6187         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
6188                 printf("and INACTIVE\n");
6189         else
6190                 printf("and ACTIVE\n");
6191
6192         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6193                 device_printf(dev, "TX Queue %d ------\n", i);
6194                 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6195                         E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6196                         E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6197                 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6198                 device_printf(dev, "TX descriptors avail = %d\n",
6199                         txr->tx_avail);
6200                 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6201                         txr->no_desc_avail);
6202                 device_printf(dev, "RX Queue %d ------\n", i);
6203                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6204                         E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6205                         E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6206                 device_printf(dev, "RX discarded packets = %ld\n",
6207                         rxr->rx_discarded);
6208                 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6209                 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6210         }
6211 }
6212
6213 #ifdef EM_MULTIQUEUE
6214 /*
6215  * 82574 only:
6216  * Write a new value to the EEPROM increasing the number of MSIX
6217  * vectors from 3 to 5, for proper multiqueue support.
6218  */
6219 static void
6220 em_enable_vectors_82574(struct adapter *adapter)
6221 {
6222         struct e1000_hw *hw = &adapter->hw;
6223         device_t dev = adapter->dev;
6224         u16 edata;
6225
6226         e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6227         printf("Current cap: %#06x\n", edata);
6228         if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6229                 device_printf(dev, "Writing to eeprom: increasing "
6230                     "reported MSIX vectors from 3 to 5...\n");
6231                 edata &= ~(EM_NVM_MSIX_N_MASK);
6232                 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6233                 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6234                 e1000_update_nvm_checksum(hw);
6235                 device_printf(dev, "Writing to eeprom: done\n");
6236         }
6237 }
6238 #endif
6239
6240 #ifdef DDB
6241 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6242 {
6243         devclass_t      dc;
6244         int max_em;
6245
6246         dc = devclass_find("em");
6247         max_em = devclass_get_maxunit(dc);
6248
6249         for (int index = 0; index < (max_em - 1); index++) {
6250                 device_t dev;
6251                 dev = devclass_get_device(dc, index);
6252                 if (device_get_driver(dev) == &em_driver) {
6253                         struct adapter *adapter = device_get_softc(dev);
6254                         EM_CORE_LOCK(adapter);
6255                         em_init_locked(adapter);
6256                         EM_CORE_UNLOCK(adapter);
6257                 }
6258         }
6259 }
6260 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6261 {
6262         devclass_t      dc;
6263         int max_em;
6264
6265         dc = devclass_find("em");
6266         max_em = devclass_get_maxunit(dc);
6267
6268         for (int index = 0; index < (max_em - 1); index++) {
6269                 device_t dev;
6270                 dev = devclass_get_device(dc, index);
6271                 if (device_get_driver(dev) == &em_driver)
6272                         em_print_debug_info(device_get_softc(dev));
6273         }
6274
6275 }
6276 #endif