]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/e1000/if_em.c
MFC: r327312, r327842, r327865
[FreeBSD/stable/10.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_arp.h>
74 #include <net/if_dl.h>
75 #include <net/if_media.h>
76
77 #include <net/if_types.h>
78 #include <net/if_vlan_var.h>
79
80 #include <netinet/in_systm.h>
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip6.h>
85 #include <netinet/tcp.h>
86 #include <netinet/udp.h>
87
88 #include <machine/in_cksum.h>
89 #include <dev/led/led.h>
90 #include <dev/pci/pcivar.h>
91 #include <dev/pci/pcireg.h>
92
93 #include "e1000_api.h"
94 #include "e1000_82571.h"
95 #include "if_em.h"
96
97 /*********************************************************************
98  *  Driver version:
99  *********************************************************************/
100 char em_driver_version[] = "7.6.1-k";
101
102 /*********************************************************************
103  *  PCI Device ID Table
104  *
105  *  Used by probe to select devices to load on
106  *  Last field stores an index into e1000_strings
107  *  Last entry must be all 0s
108  *
109  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
110  *********************************************************************/
111
112 static em_vendor_info_t em_vendor_info_array[] =
113 {
114         /* Intel(R) PRO/1000 Network Connection */
115         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
127                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
134
135         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
144                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
146                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
181                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
182         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
183                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
184         { 0x8086, E1000_DEV_ID_PCH_I218_LM2,    PCI_ANY_ID, PCI_ANY_ID, 0},
185         { 0x8086, E1000_DEV_ID_PCH_I218_V2,     PCI_ANY_ID, PCI_ANY_ID, 0},
186         { 0x8086, E1000_DEV_ID_PCH_I218_LM3,    PCI_ANY_ID, PCI_ANY_ID, 0},
187         { 0x8086, E1000_DEV_ID_PCH_I218_V3,     PCI_ANY_ID, PCI_ANY_ID, 0},
188         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
189         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
190         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
191                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
192         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
193         { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
194                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
195         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
196                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
197         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
198         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
199                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
200         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
201         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
202                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
203         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
204         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
205                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
206         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
207         { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM6,
208                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
209         { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V6, PCI_ANY_ID, PCI_ANY_ID, 0},
210         { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM7,
211                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
212         { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V7, PCI_ANY_ID, PCI_ANY_ID, 0},
213         { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM8,
214                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
215         { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, PCI_ANY_ID, PCI_ANY_ID, 0},
216         { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9,
217                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
218         { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, PCI_ANY_ID, PCI_ANY_ID, 0},
219         /* required last entry */
220         { 0, 0, 0, 0, 0}
221 };
222
223 /*********************************************************************
224  *  Table of branding strings for all supported NICs.
225  *********************************************************************/
226
227 static char *em_strings[] = {
228         "Intel(R) PRO/1000 Network Connection"
229 };
230
231 /*********************************************************************
232  *  Function prototypes
233  *********************************************************************/
234 static int      em_probe(device_t);
235 static int      em_attach(device_t);
236 static int      em_detach(device_t);
237 static int      em_shutdown(device_t);
238 static int      em_suspend(device_t);
239 static int      em_resume(device_t);
240 #ifdef EM_MULTIQUEUE
241 static int      em_mq_start(struct ifnet *, struct mbuf *);
242 static int      em_mq_start_locked(struct ifnet *,
243                     struct tx_ring *);
244 static void     em_qflush(struct ifnet *);
245 #else
246 static void     em_start(struct ifnet *);
247 static void     em_start_locked(struct ifnet *, struct tx_ring *);
248 #endif
249 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
250 static void     em_init(void *);
251 static void     em_init_locked(struct adapter *);
252 static void     em_stop(void *);
253 static void     em_media_status(struct ifnet *, struct ifmediareq *);
254 static int      em_media_change(struct ifnet *);
255 static void     em_identify_hardware(struct adapter *);
256 static int      em_allocate_pci_resources(struct adapter *);
257 static int      em_allocate_legacy(struct adapter *);
258 static int      em_allocate_msix(struct adapter *);
259 static int      em_allocate_queues(struct adapter *);
260 static int      em_setup_msix(struct adapter *);
261 static void     em_free_pci_resources(struct adapter *);
262 static void     em_local_timer(void *);
263 static void     em_reset(struct adapter *);
264 static int      em_setup_interface(device_t, struct adapter *);
265 static void     em_flush_desc_rings(struct adapter *);
266
267 static void     em_setup_transmit_structures(struct adapter *);
268 static void     em_initialize_transmit_unit(struct adapter *);
269 static int      em_allocate_transmit_buffers(struct tx_ring *);
270 static void     em_free_transmit_structures(struct adapter *);
271 static void     em_free_transmit_buffers(struct tx_ring *);
272
273 static int      em_setup_receive_structures(struct adapter *);
274 static int      em_allocate_receive_buffers(struct rx_ring *);
275 static void     em_initialize_receive_unit(struct adapter *);
276 static void     em_free_receive_structures(struct adapter *);
277 static void     em_free_receive_buffers(struct rx_ring *);
278
279 static void     em_enable_intr(struct adapter *);
280 static void     em_disable_intr(struct adapter *);
281 static void     em_update_stats_counters(struct adapter *);
282 static void     em_add_hw_stats(struct adapter *adapter);
283 static void     em_txeof(struct tx_ring *);
284 static bool     em_rxeof(struct rx_ring *, int, int *);
285 #ifndef __NO_STRICT_ALIGNMENT
286 static int      em_fixup_rx(struct rx_ring *);
287 #endif
288 static void     em_setup_rxdesc(union e1000_rx_desc_extended *,
289                     const struct em_rxbuffer *rxbuf);
290 static void     em_receive_checksum(uint32_t status, struct mbuf *);
291 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
292                     struct ip *, u32 *, u32 *);
293 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
294                     struct tcphdr *, u32 *, u32 *);
295 static void     em_set_promisc(struct adapter *);
296 static void     em_disable_promisc(struct adapter *);
297 static void     em_set_multi(struct adapter *);
298 static void     em_update_link_status(struct adapter *);
299 static void     em_refresh_mbufs(struct rx_ring *, int);
300 static void     em_register_vlan(void *, struct ifnet *, u16);
301 static void     em_unregister_vlan(void *, struct ifnet *, u16);
302 static void     em_setup_vlan_hw_support(struct adapter *);
303 static int      em_xmit(struct tx_ring *, struct mbuf **);
304 static int      em_dma_malloc(struct adapter *, bus_size_t,
305                     struct em_dma_alloc *, int);
306 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
307 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
308 static void     em_print_nvm_info(struct adapter *);
309 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
310 static void     em_print_debug_info(struct adapter *);
311 static int      em_is_valid_ether_addr(u8 *);
312 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
313 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
314                     const char *, struct em_int_delay_info *, int, int);
315 /* Management and WOL Support */
316 static void     em_init_manageability(struct adapter *);
317 static void     em_release_manageability(struct adapter *);
318 static void     em_get_hw_control(struct adapter *);
319 static void     em_release_hw_control(struct adapter *);
320 static void     em_get_wakeup(device_t);
321 static void     em_enable_wakeup(device_t);
322 static int      em_enable_phy_wakeup(struct adapter *);
323 static void     em_led_func(void *, int);
324 static void     em_disable_aspm(struct adapter *);
325
326 static int      em_irq_fast(void *);
327
328 /* MSIX handlers */
329 static void     em_msix_tx(void *);
330 static void     em_msix_rx(void *);
331 static void     em_msix_link(void *);
332 static void     em_handle_tx(void *context, int pending);
333 static void     em_handle_rx(void *context, int pending);
334 static void     em_handle_link(void *context, int pending);
335
336 #ifdef EM_MULTIQUEUE
337 static void     em_enable_vectors_82574(struct adapter *);
338 #endif
339
340 static void     em_set_sysctl_value(struct adapter *, const char *,
341                     const char *, int *, int);
342 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
343 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
344
345 static __inline void em_rx_discard(struct rx_ring *, int);
346
347 #ifdef DEVICE_POLLING
348 static poll_handler_t em_poll;
349 #endif /* POLLING */
350
351 /*********************************************************************
352  *  FreeBSD Device Interface Entry Points
353  *********************************************************************/
354
355 static device_method_t em_methods[] = {
356         /* Device interface */
357         DEVMETHOD(device_probe, em_probe),
358         DEVMETHOD(device_attach, em_attach),
359         DEVMETHOD(device_detach, em_detach),
360         DEVMETHOD(device_shutdown, em_shutdown),
361         DEVMETHOD(device_suspend, em_suspend),
362         DEVMETHOD(device_resume, em_resume),
363         DEVMETHOD_END
364 };
365
366 static driver_t em_driver = {
367         "em", em_methods, sizeof(struct adapter),
368 };
369
370 devclass_t em_devclass;
371 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
372 MODULE_DEPEND(em, pci, 1, 1, 1);
373 MODULE_DEPEND(em, ether, 1, 1, 1);
374
375 /*********************************************************************
376  *  Tunable default values.
377  *********************************************************************/
378
379 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
380 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
381 #define M_TSO_LEN                       66
382
383 #define MAX_INTS_PER_SEC        8000
384 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
385
386 #define TSO_WORKAROUND  4
387
388 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
389
390 static int em_disable_crc_stripping = 0;
391 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
392     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
393
394 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
395 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
396 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
397 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
398 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
399     0, "Default transmit interrupt delay in usecs");
400 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
401     0, "Default receive interrupt delay in usecs");
402
403 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
404 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
405 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
406 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
407 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
408     &em_tx_abs_int_delay_dflt, 0,
409     "Default transmit interrupt delay limit in usecs");
410 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
411     &em_rx_abs_int_delay_dflt, 0,
412     "Default receive interrupt delay limit in usecs");
413
414 static int em_rxd = EM_DEFAULT_RXD;
415 static int em_txd = EM_DEFAULT_TXD;
416 TUNABLE_INT("hw.em.rxd", &em_rxd);
417 TUNABLE_INT("hw.em.txd", &em_txd);
418 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
419     "Number of receive descriptors per queue");
420 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
421     "Number of transmit descriptors per queue");
422
423 static int em_smart_pwr_down = FALSE;
424 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
425 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
426     0, "Set to true to leave smart power down enabled on newer adapters");
427
428 /* Controls whether promiscuous also shows bad packets */
429 static int em_debug_sbp = FALSE;
430 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
431 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
432     "Show bad packets in promiscuous mode");
433
434 static int em_enable_msix = TRUE;
435 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
436 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
437     "Enable MSI-X interrupts");
438
439 #ifdef EM_MULTIQUEUE
440 static int em_num_queues = 1;
441 TUNABLE_INT("hw.em.num_queues", &em_num_queues);
442 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
443     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
444 #endif
445
446 /*
447 ** Global variable to store last used CPU when binding queues
448 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
449 ** queue is bound to a cpu.
450 */
451 static int em_last_bind_cpu = -1;
452
453 /* How many packets rxeof tries to clean at a time */
454 static int em_rx_process_limit = 100;
455 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
456 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
457     &em_rx_process_limit, 0,
458     "Maximum number of received packets to process "
459     "at a time, -1 means unlimited");
460
461 /* Energy efficient ethernet - default to OFF */
462 static int eee_setting = 1;
463 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
464 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
465     "Enable Energy Efficient Ethernet");
466
467 /* Global used in WOL setup with multiport cards */
468 static int global_quad_port_a = 0;
469
470 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
471 #include <dev/netmap/if_em_netmap.h>
472 #endif /* DEV_NETMAP */
473
474 /*********************************************************************
475  *  Device identification routine
476  *
477  *  em_probe determines if the driver should be loaded on
478  *  adapter based on PCI vendor/device id of the adapter.
479  *
480  *  return BUS_PROBE_DEFAULT on success, positive on failure
481  *********************************************************************/
482
483 static int
484 em_probe(device_t dev)
485 {
486         char            adapter_name[60];
487         uint16_t        pci_vendor_id = 0;
488         uint16_t        pci_device_id = 0;
489         uint16_t        pci_subvendor_id = 0;
490         uint16_t        pci_subdevice_id = 0;
491         em_vendor_info_t *ent;
492
493         INIT_DEBUGOUT("em_probe: begin");
494
495         pci_vendor_id = pci_get_vendor(dev);
496         if (pci_vendor_id != EM_VENDOR_ID)
497                 return (ENXIO);
498
499         pci_device_id = pci_get_device(dev);
500         pci_subvendor_id = pci_get_subvendor(dev);
501         pci_subdevice_id = pci_get_subdevice(dev);
502
503         ent = em_vendor_info_array;
504         while (ent->vendor_id != 0) {
505                 if ((pci_vendor_id == ent->vendor_id) &&
506                     (pci_device_id == ent->device_id) &&
507
508                     ((pci_subvendor_id == ent->subvendor_id) ||
509                     (ent->subvendor_id == PCI_ANY_ID)) &&
510
511                     ((pci_subdevice_id == ent->subdevice_id) ||
512                     (ent->subdevice_id == PCI_ANY_ID))) {
513                         sprintf(adapter_name, "%s %s",
514                                 em_strings[ent->index],
515                                 em_driver_version);
516                         device_set_desc_copy(dev, adapter_name);
517                         return (BUS_PROBE_DEFAULT);
518                 }
519                 ent++;
520         }
521
522         return (ENXIO);
523 }
524
525 /*********************************************************************
526  *  Device initialization routine
527  *
528  *  The attach entry point is called when the driver is being loaded.
529  *  This routine identifies the type of hardware, allocates all resources
530  *  and initializes the hardware.
531  *
532  *  return 0 on success, positive on failure
533  *********************************************************************/
534
535 static int
536 em_attach(device_t dev)
537 {
538         struct adapter  *adapter;
539         struct e1000_hw *hw;
540         int             error = 0;
541
542         INIT_DEBUGOUT("em_attach: begin");
543
544         if (resource_disabled("em", device_get_unit(dev))) {
545                 device_printf(dev, "Disabled by device hint\n");
546                 return (ENXIO);
547         }
548
549         adapter = device_get_softc(dev);
550         adapter->dev = adapter->osdep.dev = dev;
551         hw = &adapter->hw;
552         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
553
554         /* SYSCTL stuff */
555         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
556             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
557             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
558             em_sysctl_nvm_info, "I", "NVM Information");
559
560         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
561             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
562             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
563             em_sysctl_debug_info, "I", "Debug Information");
564
565         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
566             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
567             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
568             em_set_flowcntl, "I", "Flow Control");
569
570         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
571
572         /* Determine hardware and mac info */
573         em_identify_hardware(adapter);
574
575         /* Setup PCI resources */
576         if (em_allocate_pci_resources(adapter)) {
577                 device_printf(dev, "Allocation of PCI resources failed\n");
578                 error = ENXIO;
579                 goto err_pci;
580         }
581
582         /*
583         ** For ICH8 and family we need to
584         ** map the flash memory, and this
585         ** must happen after the MAC is 
586         ** identified
587         */
588         if ((hw->mac.type == e1000_ich8lan) ||
589             (hw->mac.type == e1000_ich9lan) ||
590             (hw->mac.type == e1000_ich10lan) ||
591             (hw->mac.type == e1000_pchlan) ||
592             (hw->mac.type == e1000_pch2lan) ||
593             (hw->mac.type == e1000_pch_lpt)) {
594                 int rid = EM_BAR_TYPE_FLASH;
595                 adapter->flash = bus_alloc_resource_any(dev,
596                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
597                 if (adapter->flash == NULL) {
598                         device_printf(dev, "Mapping of Flash failed\n");
599                         error = ENXIO;
600                         goto err_pci;
601                 }
602                 /* This is used in the shared code */
603                 hw->flash_address = (u8 *)adapter->flash;
604                 adapter->osdep.flash_bus_space_tag =
605                     rman_get_bustag(adapter->flash);
606                 adapter->osdep.flash_bus_space_handle =
607                     rman_get_bushandle(adapter->flash);
608         }
609         /*
610         ** In the new SPT device flash is not  a
611         ** seperate BAR, rather it is also in BAR0,
612         ** so use the same tag and an offset handle for the
613         ** FLASH read/write macros in the shared code.
614         */
615         else if (hw->mac.type >= e1000_pch_spt) {
616                 adapter->osdep.flash_bus_space_tag =
617                     adapter->osdep.mem_bus_space_tag;
618                 adapter->osdep.flash_bus_space_handle =
619                     adapter->osdep.mem_bus_space_handle
620                     + E1000_FLASH_BASE_ADDR;
621         }
622
623         /* Do Shared Code initialization */
624         error = e1000_setup_init_funcs(hw, TRUE);
625         if (error) {
626                 device_printf(dev, "Setup of Shared code failed, error %d\n",
627                     error);
628                 error = ENXIO;
629                 goto err_pci;
630         }
631
632         /*
633          * Setup MSI/X or MSI if PCI Express
634          */
635         adapter->msix = em_setup_msix(adapter);
636
637         e1000_get_bus_info(hw);
638
639         /* Set up some sysctls for the tunable interrupt delays */
640         em_add_int_delay_sysctl(adapter, "rx_int_delay",
641             "receive interrupt delay in usecs", &adapter->rx_int_delay,
642             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
643         em_add_int_delay_sysctl(adapter, "tx_int_delay",
644             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
645             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
646         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
647             "receive interrupt delay limit in usecs",
648             &adapter->rx_abs_int_delay,
649             E1000_REGISTER(hw, E1000_RADV),
650             em_rx_abs_int_delay_dflt);
651         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
652             "transmit interrupt delay limit in usecs",
653             &adapter->tx_abs_int_delay,
654             E1000_REGISTER(hw, E1000_TADV),
655             em_tx_abs_int_delay_dflt);
656         em_add_int_delay_sysctl(adapter, "itr",
657             "interrupt delay limit in usecs/4",
658             &adapter->tx_itr,
659             E1000_REGISTER(hw, E1000_ITR),
660             DEFAULT_ITR);
661
662         /* Sysctl for limiting the amount of work done in the taskqueue */
663         em_set_sysctl_value(adapter, "rx_processing_limit",
664             "max number of rx packets to process", &adapter->rx_process_limit,
665             em_rx_process_limit);
666
667         /*
668          * Validate number of transmit and receive descriptors. It
669          * must not exceed hardware maximum, and must be multiple
670          * of E1000_DBA_ALIGN.
671          */
672         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
673             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
674                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
675                     EM_DEFAULT_TXD, em_txd);
676                 adapter->num_tx_desc = EM_DEFAULT_TXD;
677         } else
678                 adapter->num_tx_desc = em_txd;
679
680         if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
681             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
682                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
683                     EM_DEFAULT_RXD, em_rxd);
684                 adapter->num_rx_desc = EM_DEFAULT_RXD;
685         } else
686                 adapter->num_rx_desc = em_rxd;
687
688         hw->mac.autoneg = DO_AUTO_NEG;
689         hw->phy.autoneg_wait_to_complete = FALSE;
690         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
691
692         /* Copper options */
693         if (hw->phy.media_type == e1000_media_type_copper) {
694                 hw->phy.mdix = AUTO_ALL_MODES;
695                 hw->phy.disable_polarity_correction = FALSE;
696                 hw->phy.ms_type = EM_MASTER_SLAVE;
697         }
698
699         /*
700          * Set the frame limits assuming
701          * standard ethernet sized frames.
702          */
703         adapter->hw.mac.max_frame_size =
704             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
705
706         /*
707          * This controls when hardware reports transmit completion
708          * status.
709          */
710         hw->mac.report_tx_early = 1;
711
712         /* 
713         ** Get queue/ring memory
714         */
715         if (em_allocate_queues(adapter)) {
716                 error = ENOMEM;
717                 goto err_pci;
718         }
719
720         /* Allocate multicast array memory. */
721         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
722             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
723         if (adapter->mta == NULL) {
724                 device_printf(dev, "Can not allocate multicast setup array\n");
725                 error = ENOMEM;
726                 goto err_late;
727         }
728
729         /* Check SOL/IDER usage */
730         if (e1000_check_reset_block(hw))
731                 device_printf(dev, "PHY reset is blocked"
732                     " due to SOL/IDER session.\n");
733
734         /* Sysctl for setting Energy Efficient Ethernet */
735         hw->dev_spec.ich8lan.eee_disable = eee_setting;
736         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
737             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
738             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
739             adapter, 0, em_sysctl_eee, "I",
740             "Disable Energy Efficient Ethernet");
741
742         /*
743         ** Start from a known state, this is
744         ** important in reading the nvm and
745         ** mac from that.
746         */
747         e1000_reset_hw(hw);
748
749
750         /* Make sure we have a good EEPROM before we read from it */
751         if (e1000_validate_nvm_checksum(hw) < 0) {
752                 /*
753                 ** Some PCI-E parts fail the first check due to
754                 ** the link being in sleep state, call it again,
755                 ** if it fails a second time its a real issue.
756                 */
757                 if (e1000_validate_nvm_checksum(hw) < 0) {
758                         device_printf(dev,
759                             "The EEPROM Checksum Is Not Valid\n");
760                         error = EIO;
761                         goto err_late;
762                 }
763         }
764
765         /* Copy the permanent MAC address out of the EEPROM */
766         if (e1000_read_mac_addr(hw) < 0) {
767                 device_printf(dev, "EEPROM read error while reading MAC"
768                     " address\n");
769                 error = EIO;
770                 goto err_late;
771         }
772
773         if (!em_is_valid_ether_addr(hw->mac.addr)) {
774                 device_printf(dev, "Invalid MAC address\n");
775                 error = EIO;
776                 goto err_late;
777         }
778
779         /* Disable ULP support */
780         e1000_disable_ulp_lpt_lp(hw, TRUE);
781
782         /*
783         **  Do interrupt configuration
784         */
785         if (adapter->msix > 1) /* Do MSIX */
786                 error = em_allocate_msix(adapter);
787         else  /* MSI or Legacy */
788                 error = em_allocate_legacy(adapter);
789         if (error)
790                 goto err_late;
791
792         /*
793          * Get Wake-on-Lan and Management info for later use
794          */
795         em_get_wakeup(dev);
796
797         /* Setup OS specific network interface */
798         if (em_setup_interface(dev, adapter) != 0)
799                 goto err_late;
800
801         em_reset(adapter);
802
803         /* Initialize statistics */
804         em_update_stats_counters(adapter);
805
806         hw->mac.get_link_status = 1;
807         em_update_link_status(adapter);
808
809         /* Register for VLAN events */
810         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
811             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
812         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
813             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
814
815         em_add_hw_stats(adapter);
816
817         /* Non-AMT based hardware can now take control from firmware */
818         if (adapter->has_manage && !adapter->has_amt)
819                 em_get_hw_control(adapter);
820
821         /* Tell the stack that the interface is not active */
822         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
823         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
824
825         adapter->led_dev = led_create(em_led_func, adapter,
826             device_get_nameunit(dev));
827 #ifdef DEV_NETMAP
828         em_netmap_attach(adapter);
829 #endif /* DEV_NETMAP */
830
831         INIT_DEBUGOUT("em_attach: end");
832
833         return (0);
834
835 err_late:
836         em_free_transmit_structures(adapter);
837         em_free_receive_structures(adapter);
838         em_release_hw_control(adapter);
839         if (adapter->ifp != NULL)
840                 if_free(adapter->ifp);
841 err_pci:
842         em_free_pci_resources(adapter);
843         free(adapter->mta, M_DEVBUF);
844         EM_CORE_LOCK_DESTROY(adapter);
845
846         return (error);
847 }
848
849 /*********************************************************************
850  *  Device removal routine
851  *
852  *  The detach entry point is called when the driver is being removed.
853  *  This routine stops the adapter and deallocates all the resources
854  *  that were allocated for driver operation.
855  *
856  *  return 0 on success, positive on failure
857  *********************************************************************/
858
859 static int
860 em_detach(device_t dev)
861 {
862         struct adapter  *adapter = device_get_softc(dev);
863         struct ifnet    *ifp = adapter->ifp;
864
865         INIT_DEBUGOUT("em_detach: begin");
866
867         /* Make sure VLANS are not using driver */
868         if (adapter->ifp->if_vlantrunk != NULL) {
869                 device_printf(dev,"Vlan in use, detach first\n");
870                 return (EBUSY);
871         }
872
873 #ifdef DEVICE_POLLING
874         if (ifp->if_capenable & IFCAP_POLLING)
875                 ether_poll_deregister(ifp);
876 #endif
877
878         if (adapter->led_dev != NULL)
879                 led_destroy(adapter->led_dev);
880
881         EM_CORE_LOCK(adapter);
882         adapter->in_detach = 1;
883         em_stop(adapter);
884         EM_CORE_UNLOCK(adapter);
885         EM_CORE_LOCK_DESTROY(adapter);
886
887         e1000_phy_hw_reset(&adapter->hw);
888
889         em_release_manageability(adapter);
890         em_release_hw_control(adapter);
891
892         /* Unregister VLAN events */
893         if (adapter->vlan_attach != NULL)
894                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
895         if (adapter->vlan_detach != NULL)
896                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
897
898         ether_ifdetach(adapter->ifp);
899         callout_drain(&adapter->timer);
900
901 #ifdef DEV_NETMAP
902         netmap_detach(ifp);
903 #endif /* DEV_NETMAP */
904
905         em_free_pci_resources(adapter);
906         bus_generic_detach(dev);
907         if_free(ifp);
908
909         em_free_transmit_structures(adapter);
910         em_free_receive_structures(adapter);
911
912         em_release_hw_control(adapter);
913         free(adapter->mta, M_DEVBUF);
914
915         return (0);
916 }
917
918 /*********************************************************************
919  *
920  *  Shutdown entry point
921  *
922  **********************************************************************/
923
924 static int
925 em_shutdown(device_t dev)
926 {
927         return em_suspend(dev);
928 }
929
930 /*
931  * Suspend/resume device methods.
932  */
933 static int
934 em_suspend(device_t dev)
935 {
936         struct adapter *adapter = device_get_softc(dev);
937
938         EM_CORE_LOCK(adapter);
939
940         em_release_manageability(adapter);
941         em_release_hw_control(adapter);
942         em_enable_wakeup(dev);
943
944         EM_CORE_UNLOCK(adapter);
945
946         return bus_generic_suspend(dev);
947 }
948
949 static int
950 em_resume(device_t dev)
951 {
952         struct adapter *adapter = device_get_softc(dev);
953         struct tx_ring  *txr = adapter->tx_rings;
954         struct ifnet *ifp = adapter->ifp;
955
956         EM_CORE_LOCK(adapter);
957         if (adapter->hw.mac.type == e1000_pch2lan)
958                 e1000_resume_workarounds_pchlan(&adapter->hw);
959         em_init_locked(adapter);
960         em_init_manageability(adapter);
961
962         if ((ifp->if_flags & IFF_UP) &&
963             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
964                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
965                         EM_TX_LOCK(txr);
966 #ifdef EM_MULTIQUEUE
967                         if (!drbr_empty(ifp, txr->br))
968                                 em_mq_start_locked(ifp, txr);
969 #else
970                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
971                                 em_start_locked(ifp, txr);
972 #endif
973                         EM_TX_UNLOCK(txr);
974                 }
975         }
976         EM_CORE_UNLOCK(adapter);
977
978         return bus_generic_resume(dev);
979 }
980
981
982 #ifndef EM_MULTIQUEUE
983 static void
984 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
985 {
986         struct adapter  *adapter = ifp->if_softc;
987         struct mbuf     *m_head;
988
989         EM_TX_LOCK_ASSERT(txr);
990
991         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
992             IFF_DRV_RUNNING)
993                 return;
994
995         if (!adapter->link_active)
996                 return;
997
998         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
999                 /* Call cleanup if number of TX descriptors low */
1000                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1001                         em_txeof(txr);
1002                 if (txr->tx_avail < EM_MAX_SCATTER) {
1003                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1004                         break;
1005                 }
1006                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1007                 if (m_head == NULL)
1008                         break;
1009                 /*
1010                  *  Encapsulation can modify our pointer, and or make it
1011                  *  NULL on failure.  In that event, we can't requeue.
1012                  */
1013                 if (em_xmit(txr, &m_head)) {
1014                         if (m_head == NULL)
1015                                 break;
1016                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1017                         break;
1018                 }
1019
1020                 /* Mark the queue as having work */
1021                 if (txr->busy == EM_TX_IDLE)
1022                         txr->busy = EM_TX_BUSY;
1023
1024                 /* Send a copy of the frame to the BPF listener */
1025                 ETHER_BPF_MTAP(ifp, m_head);
1026
1027         }
1028
1029         return;
1030 }
1031
1032 static void
1033 em_start(struct ifnet *ifp)
1034 {
1035         struct adapter  *adapter = ifp->if_softc;
1036         struct tx_ring  *txr = adapter->tx_rings;
1037
1038         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1039                 EM_TX_LOCK(txr);
1040                 em_start_locked(ifp, txr);
1041                 EM_TX_UNLOCK(txr);
1042         }
1043         return;
1044 }
1045 #else /* EM_MULTIQUEUE */
1046 /*********************************************************************
1047  *  Multiqueue Transmit routines 
1048  *
1049  *  em_mq_start is called by the stack to initiate a transmit.
1050  *  however, if busy the driver can queue the request rather
1051  *  than do an immediate send. It is this that is an advantage
1052  *  in this driver, rather than also having multiple tx queues.
1053  **********************************************************************/
1054 /*
1055 ** Multiqueue capable stack interface
1056 */
1057 static int
1058 em_mq_start(struct ifnet *ifp, struct mbuf *m)
1059 {
1060         struct adapter  *adapter = ifp->if_softc;
1061         struct tx_ring  *txr = adapter->tx_rings;
1062         unsigned int    i, error;
1063
1064         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1065                 i = m->m_pkthdr.flowid % adapter->num_queues;
1066         else
1067                 i = curcpu % adapter->num_queues;
1068
1069         txr = &adapter->tx_rings[i];
1070
1071         error = drbr_enqueue(ifp, txr->br, m);
1072         if (error)
1073                 return (error);
1074
1075         if (EM_TX_TRYLOCK(txr)) {
1076                 em_mq_start_locked(ifp, txr);
1077                 EM_TX_UNLOCK(txr);
1078         } else 
1079                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1080
1081         return (0);
1082 }
1083
1084 static int
1085 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1086 {
1087         struct adapter  *adapter = txr->adapter;
1088         struct mbuf     *next;
1089         int             err = 0, enq = 0;
1090
1091         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1092             IFF_DRV_RUNNING || adapter->link_active == 0) {
1093                 return (ENETDOWN);
1094         }
1095
1096         /* Process the queue */
1097         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1098                 if ((err = em_xmit(txr, &next)) != 0) {
1099                         if (next == NULL) {
1100                                 /* It was freed, move forward */
1101                                 drbr_advance(ifp, txr->br);
1102                         } else {
1103                                 /* 
1104                                  * Still have one left, it may not be
1105                                  * the same since the transmit function
1106                                  * may have changed it.
1107                                  */
1108                                 drbr_putback(ifp, txr->br, next);
1109                         }
1110                         break;
1111                 }
1112                 drbr_advance(ifp, txr->br);
1113                 enq++;
1114                 ifp->if_obytes += next->m_pkthdr.len;
1115                 if (next->m_flags & M_MCAST)
1116                         ifp->if_omcasts++;
1117                 ETHER_BPF_MTAP(ifp, next);
1118                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1119                         break;
1120         }
1121
1122         /* Mark the queue as having work */
1123         if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1124                 txr->busy = EM_TX_BUSY;
1125
1126         if (txr->tx_avail < EM_MAX_SCATTER)
1127                 em_txeof(txr);
1128         if (txr->tx_avail < EM_MAX_SCATTER) {
1129                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1130         }
1131         return (err);
1132 }
1133
1134 /*
1135 ** Flush all ring buffers
1136 */
1137 static void
1138 em_qflush(struct ifnet *ifp)
1139 {
1140         struct adapter  *adapter = ifp->if_softc;
1141         struct tx_ring  *txr = adapter->tx_rings;
1142         struct mbuf     *m;
1143
1144         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1145                 EM_TX_LOCK(txr);
1146                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1147                         m_freem(m);
1148                 EM_TX_UNLOCK(txr);
1149         }
1150         if_qflush(ifp);
1151 }
1152 #endif /* EM_MULTIQUEUE */
1153
1154 /*********************************************************************
1155  *  Ioctl entry point
1156  *
1157  *  em_ioctl is called when the user wants to configure the
1158  *  interface.
1159  *
1160  *  return 0 on success, positive on failure
1161  **********************************************************************/
1162
1163 static int
1164 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1165 {
1166         struct adapter  *adapter = ifp->if_softc;
1167         struct ifreq    *ifr = (struct ifreq *)data;
1168 #if defined(INET) || defined(INET6)
1169         struct ifaddr   *ifa = (struct ifaddr *)data;
1170 #endif
1171         bool            avoid_reset = FALSE;
1172         int             error = 0;
1173
1174         if (adapter->in_detach)
1175                 return (error);
1176
1177         switch (command) {
1178         case SIOCSIFADDR:
1179 #ifdef INET
1180                 if (ifa->ifa_addr->sa_family == AF_INET)
1181                         avoid_reset = TRUE;
1182 #endif
1183 #ifdef INET6
1184                 if (ifa->ifa_addr->sa_family == AF_INET6)
1185                         avoid_reset = TRUE;
1186 #endif
1187                 /*
1188                 ** Calling init results in link renegotiation,
1189                 ** so we avoid doing it when possible.
1190                 */
1191                 if (avoid_reset) {
1192                         ifp->if_flags |= IFF_UP;
1193                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1194                                 em_init(adapter);
1195 #ifdef INET
1196                         if (!(ifp->if_flags & IFF_NOARP))
1197                                 arp_ifinit(ifp, ifa);
1198 #endif
1199                 } else
1200                         error = ether_ioctl(ifp, command, data);
1201                 break;
1202         case SIOCSIFMTU:
1203             {
1204                 int max_frame_size;
1205
1206                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1207
1208                 EM_CORE_LOCK(adapter);
1209                 switch (adapter->hw.mac.type) {
1210                 case e1000_82571:
1211                 case e1000_82572:
1212                 case e1000_ich9lan:
1213                 case e1000_ich10lan:
1214                 case e1000_pch2lan:
1215                 case e1000_pch_lpt:
1216                 case e1000_pch_spt:
1217                 case e1000_pch_cnp:
1218                 case e1000_82574:
1219                 case e1000_82583:
1220                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1221                         max_frame_size = 9234;
1222                         break;
1223                 case e1000_pchlan:
1224                         max_frame_size = 4096;
1225                         break;
1226                         /* Adapters that do not support jumbo frames */
1227                 case e1000_ich8lan:
1228                         max_frame_size = ETHER_MAX_LEN;
1229                         break;
1230                 default:
1231                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1232                 }
1233                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1234                     ETHER_CRC_LEN) {
1235                         EM_CORE_UNLOCK(adapter);
1236                         error = EINVAL;
1237                         break;
1238                 }
1239
1240                 ifp->if_mtu = ifr->ifr_mtu;
1241                 adapter->hw.mac.max_frame_size =
1242                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1243                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1244                         em_init_locked(adapter);
1245                 EM_CORE_UNLOCK(adapter);
1246                 break;
1247             }
1248         case SIOCSIFFLAGS:
1249                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1250                     SIOCSIFFLAGS (Set Interface Flags)");
1251                 EM_CORE_LOCK(adapter);
1252                 if (ifp->if_flags & IFF_UP) {
1253                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1254                                 if ((ifp->if_flags ^ adapter->if_flags) &
1255                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1256                                         em_disable_promisc(adapter);
1257                                         em_set_promisc(adapter);
1258                                 }
1259                         } else
1260                                 em_init_locked(adapter);
1261                 } else
1262                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1263                                 em_stop(adapter);
1264                 adapter->if_flags = ifp->if_flags;
1265                 EM_CORE_UNLOCK(adapter);
1266                 break;
1267         case SIOCADDMULTI:
1268         case SIOCDELMULTI:
1269                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1270                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1271                         EM_CORE_LOCK(adapter);
1272                         em_disable_intr(adapter);
1273                         em_set_multi(adapter);
1274 #ifdef DEVICE_POLLING
1275                         if (!(ifp->if_capenable & IFCAP_POLLING))
1276 #endif
1277                                 em_enable_intr(adapter);
1278                         EM_CORE_UNLOCK(adapter);
1279                 }
1280                 break;
1281         case SIOCSIFMEDIA:
1282                 /* Check SOL/IDER usage */
1283                 EM_CORE_LOCK(adapter);
1284                 if (e1000_check_reset_block(&adapter->hw)) {
1285                         EM_CORE_UNLOCK(adapter);
1286                         device_printf(adapter->dev, "Media change is"
1287                             " blocked due to SOL/IDER session.\n");
1288                         break;
1289                 }
1290                 EM_CORE_UNLOCK(adapter);
1291                 /* falls thru */
1292         case SIOCGIFMEDIA:
1293                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1294                     SIOCxIFMEDIA (Get/Set Interface Media)");
1295                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1296                 break;
1297         case SIOCSIFCAP:
1298             {
1299                 int mask, reinit;
1300
1301                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1302                 reinit = 0;
1303                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1304 #ifdef DEVICE_POLLING
1305                 if (mask & IFCAP_POLLING) {
1306                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1307                                 error = ether_poll_register(em_poll, ifp);
1308                                 if (error)
1309                                         return (error);
1310                                 EM_CORE_LOCK(adapter);
1311                                 em_disable_intr(adapter);
1312                                 ifp->if_capenable |= IFCAP_POLLING;
1313                                 EM_CORE_UNLOCK(adapter);
1314                         } else {
1315                                 error = ether_poll_deregister(ifp);
1316                                 /* Enable interrupt even in error case */
1317                                 EM_CORE_LOCK(adapter);
1318                                 em_enable_intr(adapter);
1319                                 ifp->if_capenable &= ~IFCAP_POLLING;
1320                                 EM_CORE_UNLOCK(adapter);
1321                         }
1322                 }
1323 #endif
1324                 if (mask & IFCAP_HWCSUM) {
1325                         ifp->if_capenable ^= IFCAP_HWCSUM;
1326                         reinit = 1;
1327                 }
1328                 if (mask & IFCAP_TSO4) {
1329                         ifp->if_capenable ^= IFCAP_TSO4;
1330                         reinit = 1;
1331                 }
1332                 if (mask & IFCAP_VLAN_HWTAGGING) {
1333                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1334                         reinit = 1;
1335                 }
1336                 if (mask & IFCAP_VLAN_HWFILTER) {
1337                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1338                         reinit = 1;
1339                 }
1340                 if (mask & IFCAP_VLAN_HWTSO) {
1341                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1342                         reinit = 1;
1343                 }
1344                 if ((mask & IFCAP_WOL) &&
1345                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1346                         if (mask & IFCAP_WOL_MCAST)
1347                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1348                         if (mask & IFCAP_WOL_MAGIC)
1349                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1350                 }
1351                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1352                         em_init(adapter);
1353                 VLAN_CAPABILITIES(ifp);
1354                 break;
1355             }
1356
1357         default:
1358                 error = ether_ioctl(ifp, command, data);
1359                 break;
1360         }
1361
1362         return (error);
1363 }
1364
1365
1366 /*********************************************************************
1367  *  Init entry point
1368  *
1369  *  This routine is used in two ways. It is used by the stack as
1370  *  init entry point in network interface structure. It is also used
1371  *  by the driver as a hw/sw initialization routine to get to a
1372  *  consistent state.
1373  *
1374  *  return 0 on success, positive on failure
1375  **********************************************************************/
1376
1377 static void
1378 em_init_locked(struct adapter *adapter)
1379 {
1380         struct ifnet    *ifp = adapter->ifp;
1381         device_t        dev = adapter->dev;
1382
1383         INIT_DEBUGOUT("em_init: begin");
1384
1385         EM_CORE_LOCK_ASSERT(adapter);
1386
1387         em_disable_intr(adapter);
1388         callout_stop(&adapter->timer);
1389
1390         /* Get the latest mac address, User can use a LAA */
1391         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1392               ETHER_ADDR_LEN);
1393
1394         /* Put the address into the Receive Address Array */
1395         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1396
1397         /*
1398          * With the 82571 adapter, RAR[0] may be overwritten
1399          * when the other port is reset, we make a duplicate
1400          * in RAR[14] for that eventuality, this assures
1401          * the interface continues to function.
1402          */
1403         if (adapter->hw.mac.type == e1000_82571) {
1404                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1405                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1406                     E1000_RAR_ENTRIES - 1);
1407         }
1408
1409         /* Initialize the hardware */
1410         em_reset(adapter);
1411         em_update_link_status(adapter);
1412
1413         /* Setup VLAN support, basic and offload if available */
1414         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1415
1416         /* Set hardware offload abilities */
1417         if (ifp->if_capenable & IFCAP_TXCSUM)
1418                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1419         else
1420                 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
1421
1422         /* Configure for OS presence */
1423         em_init_manageability(adapter);
1424
1425         /* Prepare transmit descriptors and buffers */
1426         em_setup_transmit_structures(adapter);
1427         em_initialize_transmit_unit(adapter);
1428
1429         /* Setup Multicast table */
1430         em_set_multi(adapter);
1431
1432         /*
1433         ** Figure out the desired mbuf
1434         ** pool for doing jumbos
1435         */
1436         if (adapter->hw.mac.max_frame_size <= 2048)
1437                 adapter->rx_mbuf_sz = MCLBYTES;
1438         else if (adapter->hw.mac.max_frame_size <= 4096)
1439                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1440         else
1441                 adapter->rx_mbuf_sz = MJUM9BYTES;
1442
1443         /* Prepare receive descriptors and buffers */
1444         if (em_setup_receive_structures(adapter)) {
1445                 device_printf(dev, "Could not setup receive structures\n");
1446                 em_stop(adapter);
1447                 return;
1448         }
1449         em_initialize_receive_unit(adapter);
1450
1451         /* Use real VLAN Filter support? */
1452         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1453                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1454                         /* Use real VLAN Filter support */
1455                         em_setup_vlan_hw_support(adapter);
1456                 else {
1457                         u32 ctrl;
1458                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1459                         ctrl |= E1000_CTRL_VME;
1460                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1461                 }
1462         }
1463
1464         /* Don't lose promiscuous settings */
1465         em_set_promisc(adapter);
1466
1467         /* Set the interface as ACTIVE */
1468         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1469         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1470
1471         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1472         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1473
1474         /* MSI/X configuration for 82574 */
1475         if (adapter->hw.mac.type == e1000_82574) {
1476                 int tmp;
1477                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1478                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1479                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1480                 /* Set the IVAR - interrupt vector routing. */
1481                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1482         }
1483
1484 #ifdef DEVICE_POLLING
1485         /*
1486          * Only enable interrupts if we are not polling, make sure
1487          * they are off otherwise.
1488          */
1489         if (ifp->if_capenable & IFCAP_POLLING)
1490                 em_disable_intr(adapter);
1491         else
1492 #endif /* DEVICE_POLLING */
1493                 em_enable_intr(adapter);
1494
1495         /* AMT based hardware can now take control from firmware */
1496         if (adapter->has_manage && adapter->has_amt)
1497                 em_get_hw_control(adapter);
1498 }
1499
1500 static void
1501 em_init(void *arg)
1502 {
1503         struct adapter *adapter = arg;
1504
1505         EM_CORE_LOCK(adapter);
1506         em_init_locked(adapter);
1507         EM_CORE_UNLOCK(adapter);
1508 }
1509
1510
1511 #ifdef DEVICE_POLLING
1512 /*********************************************************************
1513  *
1514  *  Legacy polling routine: note this only works with single queue
1515  *
1516  *********************************************************************/
1517 static int
1518 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1519 {
1520         struct adapter *adapter = ifp->if_softc;
1521         struct tx_ring  *txr = adapter->tx_rings;
1522         struct rx_ring  *rxr = adapter->rx_rings;
1523         u32             reg_icr;
1524         int             rx_done;
1525
1526         EM_CORE_LOCK(adapter);
1527         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1528                 EM_CORE_UNLOCK(adapter);
1529                 return (0);
1530         }
1531
1532         if (cmd == POLL_AND_CHECK_STATUS) {
1533                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1534                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1535                         callout_stop(&adapter->timer);
1536                         adapter->hw.mac.get_link_status = 1;
1537                         em_update_link_status(adapter);
1538                         callout_reset(&adapter->timer, hz,
1539                             em_local_timer, adapter);
1540                 }
1541         }
1542         EM_CORE_UNLOCK(adapter);
1543
1544         em_rxeof(rxr, count, &rx_done);
1545
1546         EM_TX_LOCK(txr);
1547         em_txeof(txr);
1548 #ifdef EM_MULTIQUEUE
1549         if (!drbr_empty(ifp, txr->br))
1550                 em_mq_start_locked(ifp, txr);
1551 #else
1552         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1553                 em_start_locked(ifp, txr);
1554 #endif
1555         EM_TX_UNLOCK(txr);
1556
1557         return (rx_done);
1558 }
1559 #endif /* DEVICE_POLLING */
1560
1561
1562 /*********************************************************************
1563  *
1564  *  Fast Legacy/MSI Combined Interrupt Service routine  
1565  *
1566  *********************************************************************/
1567 static int
1568 em_irq_fast(void *arg)
1569 {
1570         struct adapter  *adapter = arg;
1571         struct ifnet    *ifp;
1572         u32             reg_icr;
1573
1574         ifp = adapter->ifp;
1575
1576         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1577
1578         /* Hot eject?  */
1579         if (reg_icr == 0xffffffff)
1580                 return FILTER_STRAY;
1581
1582         /* Definitely not our interrupt.  */
1583         if (reg_icr == 0x0)
1584                 return FILTER_STRAY;
1585
1586         /*
1587          * Starting with the 82571 chip, bit 31 should be used to
1588          * determine whether the interrupt belongs to us.
1589          */
1590         if (adapter->hw.mac.type >= e1000_82571 &&
1591             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1592                 return FILTER_STRAY;
1593
1594         em_disable_intr(adapter);
1595         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1596
1597         /* Link status change */
1598         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1599                 adapter->hw.mac.get_link_status = 1;
1600                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1601         }
1602
1603         if (reg_icr & E1000_ICR_RXO)
1604                 adapter->rx_overruns++;
1605         return FILTER_HANDLED;
1606 }
1607
1608 /* Combined RX/TX handler, used by Legacy and MSI */
1609 static void
1610 em_handle_que(void *context, int pending)
1611 {
1612         struct adapter  *adapter = context;
1613         struct ifnet    *ifp = adapter->ifp;
1614         struct tx_ring  *txr = adapter->tx_rings;
1615         struct rx_ring  *rxr = adapter->rx_rings;
1616
1617         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1618                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1619
1620                 EM_TX_LOCK(txr);
1621                 em_txeof(txr);
1622 #ifdef EM_MULTIQUEUE
1623                 if (!drbr_empty(ifp, txr->br))
1624                         em_mq_start_locked(ifp, txr);
1625 #else
1626                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1627                         em_start_locked(ifp, txr);
1628 #endif
1629                 EM_TX_UNLOCK(txr);
1630                 if (more) {
1631                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1632                         return;
1633                 }
1634         }
1635
1636         em_enable_intr(adapter);
1637         return;
1638 }
1639
1640
1641 /*********************************************************************
1642  *
1643  *  MSIX Interrupt Service Routines
1644  *
1645  **********************************************************************/
1646 static void
1647 em_msix_tx(void *arg)
1648 {
1649         struct tx_ring *txr = arg;
1650         struct adapter *adapter = txr->adapter;
1651         struct ifnet    *ifp = adapter->ifp;
1652
1653         ++txr->tx_irq;
1654         EM_TX_LOCK(txr);
1655         em_txeof(txr);
1656 #ifdef EM_MULTIQUEUE
1657         if (!drbr_empty(ifp, txr->br))
1658                 em_mq_start_locked(ifp, txr);
1659 #else
1660         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1661                 em_start_locked(ifp, txr);
1662 #endif
1663
1664         /* Reenable this interrupt */
1665         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1666         EM_TX_UNLOCK(txr);
1667         return;
1668 }
1669
1670 /*********************************************************************
1671  *
1672  *  MSIX RX Interrupt Service routine
1673  *
1674  **********************************************************************/
1675
1676 static void
1677 em_msix_rx(void *arg)
1678 {
1679         struct rx_ring  *rxr = arg;
1680         struct adapter  *adapter = rxr->adapter;
1681         bool            more;
1682
1683         ++rxr->rx_irq;
1684         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1685                 return;
1686         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1687         if (more)
1688                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1689         else {
1690                 /* Reenable this interrupt */
1691                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1692         }
1693         return;
1694 }
1695
1696 /*********************************************************************
1697  *
1698  *  MSIX Link Fast Interrupt Service routine
1699  *
1700  **********************************************************************/
1701 static void
1702 em_msix_link(void *arg)
1703 {
1704         struct adapter  *adapter = arg;
1705         u32             reg_icr;
1706
1707         ++adapter->link_irq;
1708         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1709
1710         if (reg_icr & E1000_ICR_RXO)
1711                 adapter->rx_overruns++;
1712
1713         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1714                 adapter->hw.mac.get_link_status = 1;
1715                 em_handle_link(adapter, 0);
1716         } else
1717                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1718                     EM_MSIX_LINK | E1000_IMS_LSC);
1719         /*
1720         ** Because we must read the ICR for this interrupt
1721         ** it may clear other causes using autoclear, for
1722         ** this reason we simply create a soft interrupt
1723         ** for all these vectors.
1724         */
1725         if (reg_icr) {
1726                 E1000_WRITE_REG(&adapter->hw,
1727                         E1000_ICS, adapter->ims);
1728         }
1729         return;
1730 }
1731
1732 static void
1733 em_handle_rx(void *context, int pending)
1734 {
1735         struct rx_ring  *rxr = context;
1736         struct adapter  *adapter = rxr->adapter;
1737         bool            more;
1738
1739         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1740         if (more)
1741                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1742         else {
1743                 /* Reenable this interrupt */
1744                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1745         }
1746 }
1747
1748 static void
1749 em_handle_tx(void *context, int pending)
1750 {
1751         struct tx_ring  *txr = context;
1752         struct adapter  *adapter = txr->adapter;
1753         struct ifnet    *ifp = adapter->ifp;
1754
1755         EM_TX_LOCK(txr);
1756         em_txeof(txr);
1757 #ifdef EM_MULTIQUEUE
1758         if (!drbr_empty(ifp, txr->br))
1759                 em_mq_start_locked(ifp, txr);
1760 #else
1761         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1762                 em_start_locked(ifp, txr);
1763 #endif
1764         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1765         EM_TX_UNLOCK(txr);
1766 }
1767
1768 static void
1769 em_handle_link(void *context, int pending)
1770 {
1771         struct adapter  *adapter = context;
1772         struct tx_ring  *txr = adapter->tx_rings;
1773         struct ifnet *ifp = adapter->ifp;
1774
1775         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1776                 return;
1777
1778         EM_CORE_LOCK(adapter);
1779         callout_stop(&adapter->timer);
1780         em_update_link_status(adapter);
1781         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1782         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1783             EM_MSIX_LINK | E1000_IMS_LSC);
1784         if (adapter->link_active) {
1785                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1786                         EM_TX_LOCK(txr);
1787 #ifdef EM_MULTIQUEUE
1788                         if (!drbr_empty(ifp, txr->br))
1789                                 em_mq_start_locked(ifp, txr);
1790 #else
1791                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1792                                 em_start_locked(ifp, txr);
1793 #endif
1794                         EM_TX_UNLOCK(txr);
1795                 }
1796         }
1797         EM_CORE_UNLOCK(adapter);
1798 }
1799
1800
1801 /*********************************************************************
1802  *
1803  *  Media Ioctl callback
1804  *
1805  *  This routine is called whenever the user queries the status of
1806  *  the interface using ifconfig.
1807  *
1808  **********************************************************************/
1809 static void
1810 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1811 {
1812         struct adapter *adapter = ifp->if_softc;
1813         u_char fiber_type = IFM_1000_SX;
1814
1815         INIT_DEBUGOUT("em_media_status: begin");
1816
1817         EM_CORE_LOCK(adapter);
1818         em_update_link_status(adapter);
1819
1820         ifmr->ifm_status = IFM_AVALID;
1821         ifmr->ifm_active = IFM_ETHER;
1822
1823         if (!adapter->link_active) {
1824                 EM_CORE_UNLOCK(adapter);
1825                 return;
1826         }
1827
1828         ifmr->ifm_status |= IFM_ACTIVE;
1829
1830         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1831             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1832                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1833         } else {
1834                 switch (adapter->link_speed) {
1835                 case 10:
1836                         ifmr->ifm_active |= IFM_10_T;
1837                         break;
1838                 case 100:
1839                         ifmr->ifm_active |= IFM_100_TX;
1840                         break;
1841                 case 1000:
1842                         ifmr->ifm_active |= IFM_1000_T;
1843                         break;
1844                 }
1845                 if (adapter->link_duplex == FULL_DUPLEX)
1846                         ifmr->ifm_active |= IFM_FDX;
1847                 else
1848                         ifmr->ifm_active |= IFM_HDX;
1849         }
1850         EM_CORE_UNLOCK(adapter);
1851 }
1852
1853 /*********************************************************************
1854  *
1855  *  Media Ioctl callback
1856  *
1857  *  This routine is called when the user changes speed/duplex using
1858  *  media/mediopt option with ifconfig.
1859  *
1860  **********************************************************************/
1861 static int
1862 em_media_change(struct ifnet *ifp)
1863 {
1864         struct adapter *adapter = ifp->if_softc;
1865         struct ifmedia  *ifm = &adapter->media;
1866
1867         INIT_DEBUGOUT("em_media_change: begin");
1868
1869         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1870                 return (EINVAL);
1871
1872         EM_CORE_LOCK(adapter);
1873         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1874         case IFM_AUTO:
1875                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1876                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1877                 break;
1878         case IFM_1000_LX:
1879         case IFM_1000_SX:
1880         case IFM_1000_T:
1881                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1882                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1883                 break;
1884         case IFM_100_TX:
1885                 adapter->hw.mac.autoneg = FALSE;
1886                 adapter->hw.phy.autoneg_advertised = 0;
1887                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1888                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1889                 else
1890                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1891                 break;
1892         case IFM_10_T:
1893                 adapter->hw.mac.autoneg = FALSE;
1894                 adapter->hw.phy.autoneg_advertised = 0;
1895                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1896                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1897                 else
1898                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1899                 break;
1900         default:
1901                 device_printf(adapter->dev, "Unsupported media type\n");
1902         }
1903
1904         em_init_locked(adapter);
1905         EM_CORE_UNLOCK(adapter);
1906
1907         return (0);
1908 }
1909
1910 /*********************************************************************
1911  *
1912  *  This routine maps the mbufs to tx descriptors.
1913  *
1914  *  return 0 on success, positive on failure
1915  **********************************************************************/
1916
1917 static int
1918 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1919 {
1920         struct adapter          *adapter = txr->adapter;
1921         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1922         bus_dmamap_t            map;
1923         struct em_txbuffer      *tx_buffer, *tx_buffer_mapped;
1924         struct e1000_tx_desc    *ctxd = NULL;
1925         struct mbuf             *m_head;
1926         struct ether_header     *eh;
1927         struct ip               *ip = NULL;
1928         struct tcphdr           *tp = NULL;
1929         u32                     txd_upper = 0, txd_lower = 0;
1930         int                     ip_off, poff;
1931         int                     nsegs, i, j, first, last = 0;
1932         int                     error;
1933         bool                    do_tso, tso_desc, remap = TRUE;
1934
1935         m_head = *m_headp;
1936         do_tso = m_head->m_pkthdr.csum_flags & CSUM_IP_TSO;
1937         tso_desc = FALSE;
1938         ip_off = poff = 0;
1939
1940         /*
1941          * Intel recommends entire IP/TCP header length reside in a single
1942          * buffer. If multiple descriptors are used to describe the IP and
1943          * TCP header, each descriptor should describe one or more
1944          * complete headers; descriptors referencing only parts of headers
1945          * are not supported. If all layer headers are not coalesced into
1946          * a single buffer, each buffer should not cross a 4KB boundary,
1947          * or be larger than the maximum read request size.
1948          * Controller also requires modifing IP/TCP header to make TSO work
1949          * so we firstly get a writable mbuf chain then coalesce ethernet/
1950          * IP/TCP header into a single buffer to meet the requirement of
1951          * controller. This also simplifies IP/TCP/UDP checksum offloading
1952          * which also has similiar restrictions.
1953          */
1954         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1955                 if (do_tso || (m_head->m_next != NULL && 
1956                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1957                         if (M_WRITABLE(*m_headp) == 0) {
1958                                 m_head = m_dup(*m_headp, M_NOWAIT);
1959                                 m_freem(*m_headp);
1960                                 if (m_head == NULL) {
1961                                         *m_headp = NULL;
1962                                         return (ENOBUFS);
1963                                 }
1964                                 *m_headp = m_head;
1965                         }
1966                 }
1967                 /*
1968                  * XXX
1969                  * Assume IPv4, we don't have TSO/checksum offload support
1970                  * for IPv6 yet.
1971                  */
1972                 ip_off = sizeof(struct ether_header);
1973                 if (m_head->m_len < ip_off) {
1974                         m_head = m_pullup(m_head, ip_off);
1975                         if (m_head == NULL) {
1976                                 *m_headp = NULL;
1977                                 return (ENOBUFS);
1978                         }
1979                 }
1980                 eh = mtod(m_head, struct ether_header *);
1981                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1982                         ip_off = sizeof(struct ether_vlan_header);
1983                         if (m_head->m_len < ip_off) {
1984                                 m_head = m_pullup(m_head, ip_off);
1985                                 if (m_head == NULL) {
1986                                         *m_headp = NULL;
1987                                         return (ENOBUFS);
1988                                 }
1989                         }
1990                 }
1991                 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1992                         m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1993                         if (m_head == NULL) {
1994                                 *m_headp = NULL;
1995                                 return (ENOBUFS);
1996                         }
1997                 }
1998                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1999                 poff = ip_off + (ip->ip_hl << 2);
2000
2001                 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
2002                         if (m_head->m_len < poff + sizeof(struct tcphdr)) {
2003                                 m_head = m_pullup(m_head, poff +
2004                                     sizeof(struct tcphdr));
2005                                 if (m_head == NULL) {
2006                                         *m_headp = NULL;
2007                                         return (ENOBUFS);
2008                                 }
2009                         }
2010                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2011                         /*
2012                          * TSO workaround:
2013                          *   pull 4 more bytes of data into it.
2014                          */
2015                         if (m_head->m_len < poff + (tp->th_off << 2)) {
2016                                 m_head = m_pullup(m_head, poff +
2017                                                  (tp->th_off << 2) +
2018                                                  TSO_WORKAROUND);
2019                                 if (m_head == NULL) {
2020                                         *m_headp = NULL;
2021                                         return (ENOBUFS);
2022                                 }
2023                         }
2024                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2025                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2026                         if (do_tso) {
2027                                 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2028                                                   (ip->ip_hl << 2) +
2029                                                   (tp->th_off << 2));
2030                                 ip->ip_sum = 0;
2031                                 /*
2032                                  * The pseudo TCP checksum does not include TCP
2033                                  * payload length so driver should recompute
2034                                  * the checksum here what hardware expect to
2035                                  * see. This is adherence of Microsoft's Large
2036                                  * Send specification.
2037                                 */
2038                                 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2039                                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2040                         }
2041                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2042                         if (m_head->m_len < poff + sizeof(struct udphdr)) {
2043                                 m_head = m_pullup(m_head, poff +
2044                                     sizeof(struct udphdr));
2045                                 if (m_head == NULL) {
2046                                         *m_headp = NULL;
2047                                         return (ENOBUFS);
2048                                 }
2049                         }
2050                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2051                 }
2052                 *m_headp = m_head;
2053         }
2054
2055         /*
2056          * Map the packet for DMA
2057          *
2058          * Capture the first descriptor index,
2059          * this descriptor will have the index
2060          * of the EOP which is the only one that
2061          * now gets a DONE bit writeback.
2062          */
2063         first = txr->next_avail_desc;
2064         tx_buffer = &txr->tx_buffers[first];
2065         tx_buffer_mapped = tx_buffer;
2066         map = tx_buffer->map;
2067
2068 retry:
2069         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2070             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2071
2072         /*
2073          * There are two types of errors we can (try) to handle:
2074          * - EFBIG means the mbuf chain was too long and bus_dma ran
2075          *   out of segments.  Defragment the mbuf chain and try again.
2076          * - ENOMEM means bus_dma could not obtain enough bounce buffers
2077          *   at this point in time.  Defer sending and try again later.
2078          * All other errors, in particular EINVAL, are fatal and prevent the
2079          * mbuf chain from ever going through.  Drop it and report error.
2080          */
2081         if (error == EFBIG && remap) {
2082                 struct mbuf *m;
2083
2084                 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2085                 if (m == NULL) {
2086                         adapter->mbuf_defrag_failed++;
2087                         m_freem(*m_headp);
2088                         *m_headp = NULL;
2089                         return (ENOBUFS);
2090                 }
2091                 *m_headp = m;
2092
2093                 /* Try it again, but only once */
2094                 remap = FALSE;
2095                 goto retry;
2096         } else if (error != 0) {
2097                 adapter->no_tx_dma_setup++;
2098                 m_freem(*m_headp);
2099                 *m_headp = NULL;
2100                 return (error);
2101         }
2102
2103         /*
2104          * TSO Hardware workaround, if this packet is not
2105          * TSO, and is only a single descriptor long, and
2106          * it follows a TSO burst, then we need to add a
2107          * sentinel descriptor to prevent premature writeback.
2108          */
2109         if ((!do_tso) && (txr->tx_tso == TRUE)) {
2110                 if (nsegs == 1)
2111                         tso_desc = TRUE;
2112                 txr->tx_tso = FALSE;
2113         }
2114
2115         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2116                 txr->no_desc_avail++;
2117                 bus_dmamap_unload(txr->txtag, map);
2118                 return (ENOBUFS);
2119         }
2120         m_head = *m_headp;
2121
2122         /* Do hardware assists */
2123         if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
2124                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2125                     &txd_upper, &txd_lower);
2126                 /* we need to make a final sentinel transmit desc */
2127                 tso_desc = TRUE;
2128         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2129                 em_transmit_checksum_setup(txr, m_head,
2130                     ip_off, ip, &txd_upper, &txd_lower);
2131
2132         if (m_head->m_flags & M_VLANTAG) {
2133                 /* Set the vlan id. */
2134                 txd_upper |=
2135                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2136                 /* Tell hardware to add tag */
2137                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2138         }
2139
2140         i = txr->next_avail_desc;
2141
2142         /* Set up our transmit descriptors */
2143         for (j = 0; j < nsegs; j++) {
2144                 bus_size_t seg_len;
2145                 bus_addr_t seg_addr;
2146
2147                 tx_buffer = &txr->tx_buffers[i];
2148                 ctxd = &txr->tx_base[i];
2149                 seg_addr = segs[j].ds_addr;
2150                 seg_len  = segs[j].ds_len;
2151                 /*
2152                 ** TSO Workaround:
2153                 ** If this is the last descriptor, we want to
2154                 ** split it so we have a small final sentinel
2155                 */
2156                 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2157                         seg_len -= TSO_WORKAROUND;
2158                         ctxd->buffer_addr = htole64(seg_addr);
2159                         ctxd->lower.data = htole32(
2160                                 adapter->txd_cmd | txd_lower | seg_len);
2161                         ctxd->upper.data = htole32(txd_upper);
2162                         if (++i == adapter->num_tx_desc)
2163                                 i = 0;
2164
2165                         /* Now make the sentinel */     
2166                         txr->tx_avail--;
2167                         ctxd = &txr->tx_base[i];
2168                         tx_buffer = &txr->tx_buffers[i];
2169                         ctxd->buffer_addr =
2170                             htole64(seg_addr + seg_len);
2171                         ctxd->lower.data = htole32(
2172                         adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2173                         ctxd->upper.data =
2174                             htole32(txd_upper);
2175                         last = i;
2176                         if (++i == adapter->num_tx_desc)
2177                                 i = 0;
2178                 } else {
2179                         ctxd->buffer_addr = htole64(seg_addr);
2180                         ctxd->lower.data = htole32(
2181                         adapter->txd_cmd | txd_lower | seg_len);
2182                         ctxd->upper.data = htole32(txd_upper);
2183                         last = i;
2184                         if (++i == adapter->num_tx_desc)
2185                                 i = 0;
2186                 }
2187                 tx_buffer->m_head = NULL;
2188                 tx_buffer->next_eop = -1;
2189         }
2190
2191         txr->next_avail_desc = i;
2192         txr->tx_avail -= nsegs;
2193
2194         tx_buffer->m_head = m_head;
2195         /*
2196         ** Here we swap the map so the last descriptor,
2197         ** which gets the completion interrupt has the
2198         ** real map, and the first descriptor gets the
2199         ** unused map from this descriptor.
2200         */
2201         tx_buffer_mapped->map = tx_buffer->map;
2202         tx_buffer->map = map;
2203         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2204
2205         /*
2206          * Last Descriptor of Packet
2207          * needs End Of Packet (EOP)
2208          * and Report Status (RS)
2209          */
2210         ctxd->lower.data |=
2211             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2212         /*
2213          * Keep track in the first buffer which
2214          * descriptor will be written back
2215          */
2216         tx_buffer = &txr->tx_buffers[first];
2217         tx_buffer->next_eop = last;
2218
2219         /*
2220          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2221          * that this frame is available to transmit.
2222          */
2223         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2224             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2225         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2226
2227         return (0);
2228 }
2229
2230 static void
2231 em_set_promisc(struct adapter *adapter)
2232 {
2233         struct ifnet    *ifp = adapter->ifp;
2234         u32             reg_rctl;
2235
2236         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2237
2238         if (ifp->if_flags & IFF_PROMISC) {
2239                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2240                 /* Turn this on if you want to see bad packets */
2241                 if (em_debug_sbp)
2242                         reg_rctl |= E1000_RCTL_SBP;
2243                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2244         } else if (ifp->if_flags & IFF_ALLMULTI) {
2245                 reg_rctl |= E1000_RCTL_MPE;
2246                 reg_rctl &= ~E1000_RCTL_UPE;
2247                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2248         }
2249 }
2250
2251 static void
2252 em_disable_promisc(struct adapter *adapter)
2253 {
2254         struct ifnet    *ifp = adapter->ifp;
2255         u32             reg_rctl;
2256         int             mcnt = 0;
2257
2258         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2259         reg_rctl &=  (~E1000_RCTL_UPE);
2260         if (ifp->if_flags & IFF_ALLMULTI)
2261                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2262         else {
2263                 struct  ifmultiaddr *ifma;
2264 #if __FreeBSD_version < 800000
2265                 IF_ADDR_LOCK(ifp);
2266 #else   
2267                 if_maddr_rlock(ifp);
2268 #endif
2269                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2270                         if (ifma->ifma_addr->sa_family != AF_LINK)
2271                                 continue;
2272                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2273                                 break;
2274                         mcnt++;
2275                 }
2276 #if __FreeBSD_version < 800000
2277                 IF_ADDR_UNLOCK(ifp);
2278 #else
2279                 if_maddr_runlock(ifp);
2280 #endif
2281         }
2282         /* Don't disable if in MAX groups */
2283         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2284                 reg_rctl &=  (~E1000_RCTL_MPE);
2285         reg_rctl &=  (~E1000_RCTL_SBP);
2286         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2287 }
2288
2289
2290 /*********************************************************************
2291  *  Multicast Update
2292  *
2293  *  This routine is called whenever multicast address list is updated.
2294  *
2295  **********************************************************************/
2296
2297 static void
2298 em_set_multi(struct adapter *adapter)
2299 {
2300         struct ifnet    *ifp = adapter->ifp;
2301         struct ifmultiaddr *ifma;
2302         u32 reg_rctl = 0;
2303         u8  *mta; /* Multicast array memory */
2304         int mcnt = 0;
2305
2306         IOCTL_DEBUGOUT("em_set_multi: begin");
2307
2308         mta = adapter->mta;
2309         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2310
2311         if (adapter->hw.mac.type == e1000_82542 && 
2312             adapter->hw.revision_id == E1000_REVISION_2) {
2313                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2314                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2315                         e1000_pci_clear_mwi(&adapter->hw);
2316                 reg_rctl |= E1000_RCTL_RST;
2317                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2318                 msec_delay(5);
2319         }
2320
2321 #if __FreeBSD_version < 800000
2322         IF_ADDR_LOCK(ifp);
2323 #else
2324         if_maddr_rlock(ifp);
2325 #endif
2326         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2327                 if (ifma->ifma_addr->sa_family != AF_LINK)
2328                         continue;
2329
2330                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2331                         break;
2332
2333                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2334                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2335                 mcnt++;
2336         }
2337 #if __FreeBSD_version < 800000
2338         IF_ADDR_UNLOCK(ifp);
2339 #else
2340         if_maddr_runlock(ifp);
2341 #endif
2342         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2343                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2344                 reg_rctl |= E1000_RCTL_MPE;
2345                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2346         } else
2347                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2348
2349         if (adapter->hw.mac.type == e1000_82542 && 
2350             adapter->hw.revision_id == E1000_REVISION_2) {
2351                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2352                 reg_rctl &= ~E1000_RCTL_RST;
2353                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2354                 msec_delay(5);
2355                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2356                         e1000_pci_set_mwi(&adapter->hw);
2357         }
2358 }
2359
2360
2361 /*********************************************************************
2362  *  Timer routine
2363  *
2364  *  This routine checks for link status and updates statistics.
2365  *
2366  **********************************************************************/
2367
2368 static void
2369 em_local_timer(void *arg)
2370 {
2371         struct adapter  *adapter = arg;
2372         struct ifnet    *ifp = adapter->ifp;
2373         struct tx_ring  *txr = adapter->tx_rings;
2374         struct rx_ring  *rxr = adapter->rx_rings;
2375         u32             trigger = 0;
2376
2377         EM_CORE_LOCK_ASSERT(adapter);
2378
2379         em_update_link_status(adapter);
2380         em_update_stats_counters(adapter);
2381
2382         /* Reset LAA into RAR[0] on 82571 */
2383         if ((adapter->hw.mac.type == e1000_82571) &&
2384             e1000_get_laa_state_82571(&adapter->hw))
2385                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2386
2387         /* Mask to use in the irq trigger */
2388         if (adapter->msix_mem) {
2389                 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2390                         trigger |= rxr->ims;
2391                 rxr = adapter->rx_rings;
2392         } else
2393                 trigger = E1000_ICS_RXDMT0;
2394
2395         /*
2396         ** Check on the state of the TX queue(s), this 
2397         ** can be done without the lock because its RO
2398         ** and the HUNG state will be static if set.
2399         */
2400         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2401                 if (txr->busy == EM_TX_HUNG)
2402                         goto hung;
2403                 if (txr->busy >= EM_TX_MAXTRIES)
2404                         txr->busy = EM_TX_HUNG;
2405                 /* Schedule a TX tasklet if needed */
2406                 if (txr->tx_avail <= EM_MAX_SCATTER)
2407                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2408         }
2409         
2410         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2411 #ifndef DEVICE_POLLING
2412         /* Trigger an RX interrupt to guarantee mbuf refresh */
2413         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2414 #endif
2415         return;
2416 hung:
2417         /* Looks like we're hung */
2418         device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2419                         txr->me);
2420         em_print_debug_info(adapter);
2421         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2422         adapter->watchdog_events++;
2423         em_init_locked(adapter);
2424 }
2425
2426
2427 static void
2428 em_update_link_status(struct adapter *adapter)
2429 {
2430         struct e1000_hw *hw = &adapter->hw;
2431         struct ifnet *ifp = adapter->ifp;
2432         device_t dev = adapter->dev;
2433         struct tx_ring *txr = adapter->tx_rings;
2434         u32 link_check = 0;
2435
2436         /* Get the cached link value or read phy for real */
2437         switch (hw->phy.media_type) {
2438         case e1000_media_type_copper:
2439                 if (hw->mac.get_link_status) {
2440                         if (hw->mac.type == e1000_pch_spt)
2441                                 msec_delay(50);
2442                         /* Do the work to read phy */
2443                         e1000_check_for_link(hw);
2444                         link_check = !hw->mac.get_link_status;
2445                         if (link_check) /* ESB2 fix */
2446                                 e1000_cfg_on_link_up(hw);
2447                 } else
2448                         link_check = TRUE;
2449                 break;
2450         case e1000_media_type_fiber:
2451                 e1000_check_for_link(hw);
2452                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2453                                  E1000_STATUS_LU);
2454                 break;
2455         case e1000_media_type_internal_serdes:
2456                 e1000_check_for_link(hw);
2457                 link_check = adapter->hw.mac.serdes_has_link;
2458                 break;
2459         default:
2460         case e1000_media_type_unknown:
2461                 break;
2462         }
2463
2464         /* Now check for a transition */
2465         if (link_check && (adapter->link_active == 0)) {
2466                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2467                     &adapter->link_duplex);
2468
2469                 /*
2470                 ** There have proven to be problems with TSO when not at full
2471                 ** gigabit speed, so disable the assist automatically when at
2472                 ** lower speeds.  -jfv
2473                 */
2474                 if (ifp->if_capenable & IFCAP_TSO4) {
2475                         if (adapter->link_speed == SPEED_1000)
2476                                 ifp->if_hwassist |= CSUM_IP_TSO;
2477                         else
2478                                 ifp->if_hwassist &= ~CSUM_IP_TSO;
2479                 }
2480
2481                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2482                 if ((adapter->link_speed != SPEED_1000) &&
2483                     ((hw->mac.type == e1000_82571) ||
2484                     (hw->mac.type == e1000_82572))) {
2485                         int tarc0;
2486                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2487                         tarc0 &= ~TARC_SPEED_MODE_BIT;
2488                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2489                 }
2490                 if (bootverbose)
2491                         device_printf(dev, "Link is up %d Mbps %s\n",
2492                             adapter->link_speed,
2493                             ((adapter->link_duplex == FULL_DUPLEX) ?
2494                             "Full Duplex" : "Half Duplex"));
2495                 adapter->link_active = 1;
2496                 adapter->smartspeed = 0;
2497                 ifp->if_baudrate = adapter->link_speed * 1000000;
2498                 if_link_state_change(ifp, LINK_STATE_UP);
2499         } else if (!link_check && (adapter->link_active == 1)) {
2500                 ifp->if_baudrate = adapter->link_speed = 0;
2501                 adapter->link_duplex = 0;
2502                 if (bootverbose)
2503                         device_printf(dev, "Link is Down\n");
2504                 adapter->link_active = 0;
2505                 /* Link down, disable hang detection */
2506                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2507                         txr->busy = EM_TX_IDLE;
2508                 if_link_state_change(ifp, LINK_STATE_DOWN);
2509         }
2510 }
2511
2512 /*********************************************************************
2513  *
2514  *  This routine disables all traffic on the adapter by issuing a
2515  *  global reset on the MAC and deallocates TX/RX buffers.
2516  *
2517  *  This routine should always be called with BOTH the CORE
2518  *  and TX locks.
2519  **********************************************************************/
2520
2521 static void
2522 em_stop(void *arg)
2523 {
2524         struct adapter  *adapter = arg;
2525         struct ifnet    *ifp = adapter->ifp;
2526         struct tx_ring  *txr = adapter->tx_rings;
2527
2528         EM_CORE_LOCK_ASSERT(adapter);
2529
2530         INIT_DEBUGOUT("em_stop: begin");
2531
2532         em_disable_intr(adapter);
2533         callout_stop(&adapter->timer);
2534
2535         /* Tell the stack that the interface is no longer active */
2536         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2537         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2538
2539         /* Disarm Hang Detection. */
2540         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2541                 EM_TX_LOCK(txr);
2542                 txr->busy = EM_TX_IDLE;
2543                 EM_TX_UNLOCK(txr);
2544         }
2545
2546         /* I219 needs some special flushing to avoid hangs */
2547         if (adapter->hw.mac.type == e1000_pch_spt)
2548                 em_flush_desc_rings(adapter);
2549
2550         e1000_reset_hw(&adapter->hw);
2551         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2552
2553         e1000_led_off(&adapter->hw);
2554         e1000_cleanup_led(&adapter->hw);
2555 }
2556
2557
2558 /*********************************************************************
2559  *
2560  *  Determine hardware revision.
2561  *
2562  **********************************************************************/
2563 static void
2564 em_identify_hardware(struct adapter *adapter)
2565 {
2566         device_t dev = adapter->dev;
2567
2568         /* Make sure our PCI config space has the necessary stuff set */
2569         pci_enable_busmaster(dev);
2570         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2571
2572         /* Save off the information about this board */
2573         adapter->hw.vendor_id = pci_get_vendor(dev);
2574         adapter->hw.device_id = pci_get_device(dev);
2575         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2576         adapter->hw.subsystem_vendor_id =
2577             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2578         adapter->hw.subsystem_device_id =
2579             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2580
2581         /* Do Shared Code Init and Setup */
2582         if (e1000_set_mac_type(&adapter->hw)) {
2583                 device_printf(dev, "Setup init failure\n");
2584                 return;
2585         }
2586 }
2587
2588 static int
2589 em_allocate_pci_resources(struct adapter *adapter)
2590 {
2591         device_t        dev = adapter->dev;
2592         int             rid;
2593
2594         rid = PCIR_BAR(0);
2595         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2596             &rid, RF_ACTIVE);
2597         if (adapter->memory == NULL) {
2598                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2599                 return (ENXIO);
2600         }
2601         adapter->osdep.mem_bus_space_tag =
2602             rman_get_bustag(adapter->memory);
2603         adapter->osdep.mem_bus_space_handle =
2604             rman_get_bushandle(adapter->memory);
2605         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2606
2607         adapter->hw.back = &adapter->osdep;
2608
2609         return (0);
2610 }
2611
2612 /*********************************************************************
2613  *
2614  *  Setup the Legacy or MSI Interrupt handler
2615  *
2616  **********************************************************************/
2617 static int
2618 em_allocate_legacy(struct adapter *adapter)
2619 {
2620         device_t dev = adapter->dev;
2621         struct tx_ring  *txr = adapter->tx_rings;
2622         int error, rid = 0;
2623
2624         /* Manually turn off all interrupts */
2625         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2626
2627         if (adapter->msix == 1) /* using MSI */
2628                 rid = 1;
2629         /* We allocate a single interrupt resource */
2630         adapter->res = bus_alloc_resource_any(dev,
2631             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2632         if (adapter->res == NULL) {
2633                 device_printf(dev, "Unable to allocate bus resource: "
2634                     "interrupt\n");
2635                 return (ENXIO);
2636         }
2637
2638         /*
2639          * Allocate a fast interrupt and the associated
2640          * deferred processing contexts.
2641          */
2642         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2643         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2644             taskqueue_thread_enqueue, &adapter->tq);
2645         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2646             device_get_nameunit(adapter->dev));
2647         /* Use a TX only tasklet for local timer */
2648         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2649         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2650             taskqueue_thread_enqueue, &txr->tq);
2651         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2652             device_get_nameunit(adapter->dev));
2653         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2654         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2655             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2656                 device_printf(dev, "Failed to register fast interrupt "
2657                             "handler: %d\n", error);
2658                 taskqueue_free(adapter->tq);
2659                 adapter->tq = NULL;
2660                 return (error);
2661         }
2662         
2663         return (0);
2664 }
2665
2666 /*********************************************************************
2667  *
2668  *  Setup the MSIX Interrupt handlers
2669  *   This is not really Multiqueue, rather
2670  *   its just seperate interrupt vectors
2671  *   for TX, RX, and Link.
2672  *
2673  **********************************************************************/
2674 static int
2675 em_allocate_msix(struct adapter *adapter)
2676 {
2677         device_t        dev = adapter->dev;
2678         struct          tx_ring *txr = adapter->tx_rings;
2679         struct          rx_ring *rxr = adapter->rx_rings;
2680         int             error, rid, vector = 0;
2681         int             cpu_id = 0;
2682
2683
2684         /* Make sure all interrupts are disabled */
2685         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2686
2687         /* First set up ring resources */
2688         for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2689
2690                 /* RX ring */
2691                 rid = vector + 1;
2692
2693                 rxr->res = bus_alloc_resource_any(dev,
2694                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2695                 if (rxr->res == NULL) {
2696                         device_printf(dev,
2697                             "Unable to allocate bus resource: "
2698                             "RX MSIX Interrupt %d\n", i);
2699                         return (ENXIO);
2700                 }
2701                 if ((error = bus_setup_intr(dev, rxr->res,
2702                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2703                     rxr, &rxr->tag)) != 0) {
2704                         device_printf(dev, "Failed to register RX handler");
2705                         return (error);
2706                 }
2707 #if __FreeBSD_version >= 800504
2708                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2709 #endif
2710                 rxr->msix = vector;
2711
2712                 if (em_last_bind_cpu < 0)
2713                         em_last_bind_cpu = CPU_FIRST();
2714                 cpu_id = em_last_bind_cpu;
2715                 bus_bind_intr(dev, rxr->res, cpu_id);
2716
2717                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2718                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2719                     taskqueue_thread_enqueue, &rxr->tq);
2720                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2721                     device_get_nameunit(adapter->dev), cpu_id);
2722                 /*
2723                 ** Set the bit to enable interrupt
2724                 ** in E1000_IMS -- bits 20 and 21
2725                 ** are for RX0 and RX1, note this has
2726                 ** NOTHING to do with the MSIX vector
2727                 */
2728                 rxr->ims = 1 << (20 + i);
2729                 adapter->ims |= rxr->ims;
2730                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2731
2732                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2733         }
2734
2735         for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2736                 /* TX ring */
2737                 rid = vector + 1;
2738                 txr->res = bus_alloc_resource_any(dev,
2739                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2740                 if (txr->res == NULL) {
2741                         device_printf(dev,
2742                             "Unable to allocate bus resource: "
2743                             "TX MSIX Interrupt %d\n", i);
2744                         return (ENXIO);
2745                 }
2746                 if ((error = bus_setup_intr(dev, txr->res,
2747                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2748                     txr, &txr->tag)) != 0) {
2749                         device_printf(dev, "Failed to register TX handler");
2750                         return (error);
2751                 }
2752 #if __FreeBSD_version >= 800504
2753                 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2754 #endif
2755                 txr->msix = vector;
2756
2757                 if (em_last_bind_cpu < 0)
2758                         em_last_bind_cpu = CPU_FIRST();
2759                 cpu_id = em_last_bind_cpu;
2760                 bus_bind_intr(dev, txr->res, cpu_id);
2761
2762                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2763                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2764                     taskqueue_thread_enqueue, &txr->tq);
2765                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2766                     device_get_nameunit(adapter->dev), cpu_id);
2767                 /*
2768                 ** Set the bit to enable interrupt
2769                 ** in E1000_IMS -- bits 22 and 23
2770                 ** are for TX0 and TX1, note this has
2771                 ** NOTHING to do with the MSIX vector
2772                 */
2773                 txr->ims = 1 << (22 + i);
2774                 adapter->ims |= txr->ims;
2775                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2776
2777                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2778         }
2779
2780         /* Link interrupt */
2781         rid = vector + 1;
2782         adapter->res = bus_alloc_resource_any(dev,
2783             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2784         if (!adapter->res) {
2785                 device_printf(dev,"Unable to allocate "
2786                     "bus resource: Link interrupt [%d]\n", rid);
2787                 return (ENXIO);
2788         }
2789         /* Set the link handler function */
2790         error = bus_setup_intr(dev, adapter->res,
2791             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2792             em_msix_link, adapter, &adapter->tag);
2793         if (error) {
2794                 adapter->res = NULL;
2795                 device_printf(dev, "Failed to register LINK handler");
2796                 return (error);
2797         }
2798 #if __FreeBSD_version >= 800504
2799         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2800 #endif
2801         adapter->linkvec = vector;
2802         adapter->ivars |=  (8 | vector) << 16;
2803         adapter->ivars |= 0x80000000;
2804
2805         return (0);
2806 }
2807
2808
2809 static void
2810 em_free_pci_resources(struct adapter *adapter)
2811 {
2812         device_t        dev = adapter->dev;
2813         struct tx_ring  *txr;
2814         struct rx_ring  *rxr;
2815         int             rid;
2816
2817
2818         /*
2819         ** Release all the queue interrupt resources:
2820         */
2821         for (int i = 0; i < adapter->num_queues; i++) {
2822                 txr = &adapter->tx_rings[i];
2823                 /* an early abort? */
2824                 if (txr == NULL)
2825                         break;
2826                 rid = txr->msix +1;
2827                 if (txr->tag != NULL) {
2828                         bus_teardown_intr(dev, txr->res, txr->tag);
2829                         txr->tag = NULL;
2830                 }
2831                 if (txr->res != NULL)
2832                         bus_release_resource(dev, SYS_RES_IRQ,
2833                             rid, txr->res);
2834
2835                 rxr = &adapter->rx_rings[i];
2836                 /* an early abort? */
2837                 if (rxr == NULL)
2838                         break;
2839                 rid = rxr->msix +1;
2840                 if (rxr->tag != NULL) {
2841                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2842                         rxr->tag = NULL;
2843                 }
2844                 if (rxr->res != NULL)
2845                         bus_release_resource(dev, SYS_RES_IRQ,
2846                             rid, rxr->res);
2847         }
2848
2849         if (adapter->linkvec) /* we are doing MSIX */
2850                 rid = adapter->linkvec + 1;
2851         else
2852                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2853
2854         if (adapter->tag != NULL) {
2855                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2856                 adapter->tag = NULL;
2857         }
2858
2859         if (adapter->res != NULL)
2860                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2861
2862
2863         if (adapter->msix)
2864                 pci_release_msi(dev);
2865
2866         if (adapter->msix_mem != NULL)
2867                 bus_release_resource(dev, SYS_RES_MEMORY,
2868                     adapter->memrid, adapter->msix_mem);
2869
2870         if (adapter->memory != NULL)
2871                 bus_release_resource(dev, SYS_RES_MEMORY,
2872                     PCIR_BAR(0), adapter->memory);
2873
2874         if (adapter->flash != NULL)
2875                 bus_release_resource(dev, SYS_RES_MEMORY,
2876                     EM_FLASH, adapter->flash);
2877 }
2878
2879 /*
2880  * Setup MSI or MSI/X
2881  */
2882 static int
2883 em_setup_msix(struct adapter *adapter)
2884 {
2885         device_t dev = adapter->dev;
2886         int val;
2887
2888         /* Nearly always going to use one queue */
2889         adapter->num_queues = 1;
2890
2891         /*
2892         ** Try using MSI-X for Hartwell adapters
2893         */
2894         if ((adapter->hw.mac.type == e1000_82574) &&
2895             (em_enable_msix == TRUE)) {
2896 #ifdef EM_MULTIQUEUE
2897                 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2898                 if (adapter->num_queues > 1)
2899                         em_enable_vectors_82574(adapter);
2900 #endif
2901                 /* Map the MSIX BAR */
2902                 adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2903                 adapter->msix_mem = bus_alloc_resource_any(dev,
2904                     SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2905                 if (adapter->msix_mem == NULL) {
2906                         /* May not be enabled */
2907                         device_printf(adapter->dev,
2908                             "Unable to map MSIX table \n");
2909                         goto msi;
2910                 }
2911                 val = pci_msix_count(dev); 
2912
2913 #ifdef EM_MULTIQUEUE
2914                 /* We need 5 vectors in the multiqueue case */
2915                 if (adapter->num_queues > 1 ) {
2916                         if (val >= 5)
2917                                 val = 5;
2918                         else {
2919                                 adapter->num_queues = 1;
2920                                 device_printf(adapter->dev,
2921                                     "Insufficient MSIX vectors for >1 queue, "
2922                                     "using single queue...\n");
2923                                 goto msix_one;
2924                         }
2925                 } else {
2926 msix_one:
2927 #endif
2928                         if (val >= 3)
2929                                 val = 3;
2930                         else {
2931                                 device_printf(adapter->dev,
2932                                 "Insufficient MSIX vectors, using MSI\n");
2933                                 goto msi;
2934                         }
2935 #ifdef EM_MULTIQUEUE
2936                 }
2937 #endif
2938
2939                 if ((pci_alloc_msix(dev, &val) == 0)) {
2940                         device_printf(adapter->dev,
2941                             "Using MSIX interrupts "
2942                             "with %d vectors\n", val);
2943                         return (val);
2944                 }
2945
2946                 /*
2947                 ** If MSIX alloc failed or provided us with
2948                 ** less than needed, free and fall through to MSI
2949                 */
2950                 pci_release_msi(dev);
2951         }
2952 msi:
2953         if (adapter->msix_mem != NULL) {
2954                 bus_release_resource(dev, SYS_RES_MEMORY,
2955                     adapter->memrid, adapter->msix_mem);
2956                 adapter->msix_mem = NULL;
2957         }
2958         val = 1;
2959         if (pci_alloc_msi(dev, &val) == 0) {
2960                 device_printf(adapter->dev, "Using an MSI interrupt\n");
2961                 return (val);
2962         } 
2963         /* Should only happen due to manual configuration */
2964         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2965         return (0);
2966 }
2967
2968
2969 /*
2970 ** The 3 following flush routines are used as a workaround in the
2971 ** I219 client parts and only for them.
2972 **
2973 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2974 **
2975 ** We want to clear all pending descriptors from the TX ring.
2976 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2977 ** the data of the next descriptor. We don't care about the data we are about
2978 ** to reset the HW.
2979 */
2980 static void
2981 em_flush_tx_ring(struct adapter *adapter)
2982 {
2983         struct e1000_hw         *hw = &adapter->hw;
2984         struct tx_ring          *txr = adapter->tx_rings;
2985         struct e1000_tx_desc    *txd;
2986         u32                     tctl, txd_lower = E1000_TXD_CMD_IFCS;
2987         u16                     size = 512;
2988
2989         tctl = E1000_READ_REG(hw, E1000_TCTL);
2990         E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2991
2992         txd = &txr->tx_base[txr->next_avail_desc++];
2993         if (txr->next_avail_desc == adapter->num_tx_desc)
2994                 txr->next_avail_desc = 0;
2995
2996         /* Just use the ring as a dummy buffer addr */
2997         txd->buffer_addr = txr->txdma.dma_paddr;
2998         txd->lower.data = htole32(txd_lower | size);
2999         txd->upper.data = 0;
3000
3001         /* flush descriptors to memory before notifying the HW */
3002         wmb();
3003
3004         E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
3005         mb();
3006         usec_delay(250);
3007 }
3008
3009 /*
3010 ** em_flush_rx_ring - remove all descriptors from the rx_ring
3011 **
3012 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
3013 */
3014 static void
3015 em_flush_rx_ring(struct adapter *adapter)
3016 {
3017         struct e1000_hw *hw = &adapter->hw;
3018         u32             rctl, rxdctl;
3019
3020         rctl = E1000_READ_REG(hw, E1000_RCTL);
3021         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3022         E1000_WRITE_FLUSH(hw);
3023         usec_delay(150);
3024
3025         rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
3026         /* zero the lower 14 bits (prefetch and host thresholds) */
3027         rxdctl &= 0xffffc000;
3028         /*
3029          * update thresholds: prefetch threshold to 31, host threshold to 1
3030          * and make sure the granularity is "descriptors" and not "cache lines"
3031          */
3032         rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3033         E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
3034
3035         /* momentarily enable the RX ring for the changes to take effect */
3036         E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3037         E1000_WRITE_FLUSH(hw);
3038         usec_delay(150);
3039         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3040 }
3041
3042 /*
3043 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
3044 **
3045 ** In i219, the descriptor rings must be emptied before resetting the HW
3046 ** or before changing the device state to D3 during runtime (runtime PM).
3047 **
3048 ** Failure to do this will cause the HW to enter a unit hang state which can
3049 ** only be released by PCI reset on the device
3050 **
3051 */
3052 static void
3053 em_flush_desc_rings(struct adapter *adapter)
3054 {
3055         struct e1000_hw *hw = &adapter->hw;
3056         device_t        dev = adapter->dev;
3057         u16             hang_state;
3058         u32             fext_nvm11, tdlen;
3059  
3060         /* First, disable MULR fix in FEXTNVM11 */
3061         fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3062         fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3063         E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3064         
3065         /* do nothing if we're not in faulty state, or if the queue is empty */
3066         tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3067         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3068         if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3069                 return;
3070         em_flush_tx_ring(adapter);
3071
3072         /* recheck, maybe the fault is caused by the rx ring */
3073         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3074         if (hang_state & FLUSH_DESC_REQUIRED)
3075                 em_flush_rx_ring(adapter);
3076 }
3077
3078
3079 /*********************************************************************
3080  *
3081  *  Initialize the hardware to a configuration
3082  *  as specified by the adapter structure.
3083  *
3084  **********************************************************************/
3085 static void
3086 em_reset(struct adapter *adapter)
3087 {
3088         device_t        dev = adapter->dev;
3089         struct ifnet    *ifp = adapter->ifp;
3090         struct e1000_hw *hw = &adapter->hw;
3091         u16             rx_buffer_size;
3092         u32             pba;
3093
3094         INIT_DEBUGOUT("em_reset: begin");
3095
3096         /* Set up smart power down as default off on newer adapters. */
3097         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3098             hw->mac.type == e1000_82572)) {
3099                 u16 phy_tmp = 0;
3100
3101                 /* Speed up time to link by disabling smart power down. */
3102                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3103                 phy_tmp &= ~IGP02E1000_PM_SPD;
3104                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3105         }
3106
3107         /*
3108          * Packet Buffer Allocation (PBA)
3109          * Writing PBA sets the receive portion of the buffer
3110          * the remainder is used for the transmit buffer.
3111          */
3112         switch (hw->mac.type) {
3113         /* Total Packet Buffer on these is 48K */
3114         case e1000_82571:
3115         case e1000_82572:
3116         case e1000_80003es2lan:
3117                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3118                 break;
3119         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3120                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3121                 break;
3122         case e1000_82574:
3123         case e1000_82583:
3124                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3125                 break;
3126         case e1000_ich8lan:
3127                 pba = E1000_PBA_8K;
3128                 break;
3129         case e1000_ich9lan:
3130         case e1000_ich10lan:
3131                 /* Boost Receive side for jumbo frames */
3132                 if (adapter->hw.mac.max_frame_size > 4096)
3133                         pba = E1000_PBA_14K;
3134                 else
3135                         pba = E1000_PBA_10K;
3136                 break;
3137         case e1000_pchlan:
3138         case e1000_pch2lan:
3139         case e1000_pch_lpt:
3140         case e1000_pch_spt:
3141         case e1000_pch_cnp:
3142                 pba = E1000_PBA_26K;
3143                 break;
3144         default:
3145                 if (adapter->hw.mac.max_frame_size > 8192)
3146                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3147                 else
3148                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3149         }
3150         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3151
3152         /*
3153          * These parameters control the automatic generation (Tx) and
3154          * response (Rx) to Ethernet PAUSE frames.
3155          * - High water mark should allow for at least two frames to be
3156          *   received after sending an XOFF.
3157          * - Low water mark works best when it is very near the high water mark.
3158          *   This allows the receiver to restart by sending XON when it has
3159          *   drained a bit. Here we use an arbitary value of 1500 which will
3160          *   restart after one full frame is pulled from the buffer. There
3161          *   could be several smaller frames in the buffer and if so they will
3162          *   not trigger the XON until their total number reduces the buffer
3163          *   by 1500.
3164          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3165          */
3166         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3167         hw->fc.high_water = rx_buffer_size -
3168             roundup2(adapter->hw.mac.max_frame_size, 1024);
3169         hw->fc.low_water = hw->fc.high_water - 1500;
3170
3171         if (adapter->fc) /* locally set flow control value? */
3172                 hw->fc.requested_mode = adapter->fc;
3173         else
3174                 hw->fc.requested_mode = e1000_fc_full;
3175
3176         if (hw->mac.type == e1000_80003es2lan)
3177                 hw->fc.pause_time = 0xFFFF;
3178         else
3179                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3180
3181         hw->fc.send_xon = TRUE;
3182
3183         /* Device specific overrides/settings */
3184         switch (hw->mac.type) {
3185         case e1000_pchlan:
3186                 /* Workaround: no TX flow ctrl for PCH */
3187                 hw->fc.requested_mode = e1000_fc_rx_pause;
3188                 hw->fc.pause_time = 0xFFFF; /* override */
3189                 if (ifp->if_mtu > ETHERMTU) {
3190                         hw->fc.high_water = 0x3500;
3191                         hw->fc.low_water = 0x1500;
3192                 } else {
3193                         hw->fc.high_water = 0x5000;
3194                         hw->fc.low_water = 0x3000;
3195                 }
3196                 hw->fc.refresh_time = 0x1000;
3197                 break;
3198         case e1000_pch2lan:
3199         case e1000_pch_lpt:
3200         case e1000_pch_spt:
3201         case e1000_pch_cnp:
3202                 hw->fc.high_water = 0x5C20;
3203                 hw->fc.low_water = 0x5048;
3204                 hw->fc.pause_time = 0x0650;
3205                 hw->fc.refresh_time = 0x0400;
3206                 /* Jumbos need adjusted PBA */
3207                 if (ifp->if_mtu > ETHERMTU)
3208                         E1000_WRITE_REG(hw, E1000_PBA, 12);
3209                 else
3210                         E1000_WRITE_REG(hw, E1000_PBA, 26);
3211                 break;
3212         case e1000_ich9lan:
3213         case e1000_ich10lan:
3214                 if (ifp->if_mtu > ETHERMTU) {
3215                         hw->fc.high_water = 0x2800;
3216                         hw->fc.low_water = hw->fc.high_water - 8;
3217                         break;
3218                 } 
3219                 /* else fall thru */
3220         default:
3221                 if (hw->mac.type == e1000_80003es2lan)
3222                         hw->fc.pause_time = 0xFFFF;
3223                 break;
3224         }
3225
3226         /* I219 needs some special flushing to avoid hangs */
3227         if (hw->mac.type == e1000_pch_spt)
3228                 em_flush_desc_rings(adapter);
3229
3230         /* Issue a global reset */
3231         e1000_reset_hw(hw);
3232         E1000_WRITE_REG(hw, E1000_WUC, 0);
3233         em_disable_aspm(adapter);
3234         /* and a re-init */
3235         if (e1000_init_hw(hw) < 0) {
3236                 device_printf(dev, "Hardware Initialization Failed\n");
3237                 return;
3238         }
3239
3240         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3241         e1000_get_phy_info(hw);
3242         e1000_check_for_link(hw);
3243         return;
3244 }
3245
3246 /*********************************************************************
3247  *
3248  *  Setup networking device structure and register an interface.
3249  *
3250  **********************************************************************/
3251 static int
3252 em_setup_interface(device_t dev, struct adapter *adapter)
3253 {
3254         struct ifnet   *ifp;
3255
3256         INIT_DEBUGOUT("em_setup_interface: begin");
3257
3258         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3259         if (ifp == NULL) {
3260                 device_printf(dev, "can not allocate ifnet structure\n");
3261                 return (-1);
3262         }
3263         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3264         ifp->if_init =  em_init;
3265         ifp->if_softc = adapter;
3266         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3267         ifp->if_ioctl = em_ioctl;
3268
3269         /* TSO parameters */
3270         ifp->if_hw_tsomax = IP_MAXPACKET;
3271         /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3272         ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3273         ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3274
3275 #ifdef EM_MULTIQUEUE
3276         /* Multiqueue stack interface */
3277         ifp->if_transmit = em_mq_start;
3278         ifp->if_qflush = em_qflush;
3279 #else
3280         ifp->if_start = em_start;
3281         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3282         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3283         IFQ_SET_READY(&ifp->if_snd);
3284 #endif  
3285
3286         ether_ifattach(ifp, adapter->hw.mac.addr);
3287
3288         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3289         ifp->if_capenable = ifp->if_capabilities;
3290
3291         /*
3292          * Tell the upper layer(s) we
3293          * support full VLAN capability
3294          */
3295         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3296         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3297                              |  IFCAP_VLAN_HWTSO
3298                              |  IFCAP_VLAN_MTU;
3299         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3300                           |  IFCAP_VLAN_MTU;
3301
3302         /*
3303          * We don't enable IFCAP_{TSO4,VLAN_HWTSO} by default because:
3304          * - Although the silicon bug of TSO only working at gigabit speed is
3305          *   worked around in em_update_link_status() by selectively setting
3306          *   CSUM_IP_TSO, we cannot atomically flush already queued TSO-using
3307          *   descriptors.  Thus, such descriptors may still cause the MAC to
3308          *   hang and, consequently, TSO is only safe to be used in setups
3309          *   where the link isn't expected to switch from gigabit to lower
3310          *   speeds.
3311          * - Similarly, there's currently no way to trigger a reconfiguration
3312          *   of vlan(4) when the state of IFCAP_VLAN_HWTSO support changes at
3313          *   runtime.  Therefore, IFCAP_VLAN_HWTSO also only is safe to use
3314          *   when link speed changes are not to be expected.
3315          * - Despite all the workarounds for TSO-related silicon bugs, at
3316          *   least 82579 still may hang at gigabit speed with IFCAP_TSO4.
3317          */
3318         ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO;
3319
3320         /*
3321         ** Don't turn this on by default, if vlans are
3322         ** created on another pseudo device (eg. lagg)
3323         ** then vlan events are not passed thru, breaking
3324         ** operation, but with HW FILTER off it works. If
3325         ** using vlans directly on the em driver you can
3326         ** enable this and get full hardware tag filtering.
3327         */
3328         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3329
3330 #ifdef DEVICE_POLLING
3331         ifp->if_capabilities |= IFCAP_POLLING;
3332 #endif
3333
3334         /* Enable only WOL MAGIC by default */
3335         if (adapter->wol) {
3336                 ifp->if_capabilities |= IFCAP_WOL;
3337                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3338         }
3339                 
3340         /*
3341          * Specify the media types supported by this adapter and register
3342          * callbacks to update media and link information
3343          */
3344         ifmedia_init(&adapter->media, IFM_IMASK,
3345             em_media_change, em_media_status);
3346         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3347             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3348                 u_char fiber_type = IFM_1000_SX;        /* default type */
3349
3350                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3351                             0, NULL);
3352                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3353         } else {
3354                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3355                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3356                             0, NULL);
3357                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3358                             0, NULL);
3359                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3360                             0, NULL);
3361                 if (adapter->hw.phy.type != e1000_phy_ife) {
3362                         ifmedia_add(&adapter->media,
3363                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3364                         ifmedia_add(&adapter->media,
3365                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3366                 }
3367         }
3368         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3369         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3370         return (0);
3371 }
3372
3373
3374 /*
3375  * Manage DMA'able memory.
3376  */
3377 static void
3378 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3379 {
3380         if (error)
3381                 return;
3382         *(bus_addr_t *) arg = segs[0].ds_addr;
3383 }
3384
3385 static int
3386 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3387         struct em_dma_alloc *dma, int mapflags)
3388 {
3389         int error;
3390
3391         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3392                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3393                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3394                                 BUS_SPACE_MAXADDR,      /* highaddr */
3395                                 NULL, NULL,             /* filter, filterarg */
3396                                 size,                   /* maxsize */
3397                                 1,                      /* nsegments */
3398                                 size,                   /* maxsegsize */
3399                                 0,                      /* flags */
3400                                 NULL,                   /* lockfunc */
3401                                 NULL,                   /* lockarg */
3402                                 &dma->dma_tag);
3403         if (error) {
3404                 device_printf(adapter->dev,
3405                     "%s: bus_dma_tag_create failed: %d\n",
3406                     __func__, error);
3407                 goto fail_0;
3408         }
3409
3410         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3411             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3412         if (error) {
3413                 device_printf(adapter->dev,
3414                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3415                     __func__, (uintmax_t)size, error);
3416                 goto fail_2;
3417         }
3418
3419         dma->dma_paddr = 0;
3420         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3421             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3422         if (error || dma->dma_paddr == 0) {
3423                 device_printf(adapter->dev,
3424                     "%s: bus_dmamap_load failed: %d\n",
3425                     __func__, error);
3426                 goto fail_3;
3427         }
3428
3429         return (0);
3430
3431 fail_3:
3432         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3433 fail_2:
3434         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3435         bus_dma_tag_destroy(dma->dma_tag);
3436 fail_0:
3437         dma->dma_tag = NULL;
3438
3439         return (error);
3440 }
3441
3442 static void
3443 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3444 {
3445         if (dma->dma_tag == NULL)
3446                 return;
3447         if (dma->dma_paddr != 0) {
3448                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3449                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3450                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3451                 dma->dma_paddr = 0;
3452         }
3453         if (dma->dma_vaddr != NULL) {
3454                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3455                 dma->dma_vaddr = NULL;
3456         }
3457         bus_dma_tag_destroy(dma->dma_tag);
3458         dma->dma_tag = NULL;
3459 }
3460
3461
3462 /*********************************************************************
3463  *
3464  *  Allocate memory for the transmit and receive rings, and then
3465  *  the descriptors associated with each, called only once at attach.
3466  *
3467  **********************************************************************/
3468 static int
3469 em_allocate_queues(struct adapter *adapter)
3470 {
3471         device_t                dev = adapter->dev;
3472         struct tx_ring          *txr = NULL;
3473         struct rx_ring          *rxr = NULL;
3474         int rsize, tsize, error = E1000_SUCCESS;
3475         int txconf = 0, rxconf = 0;
3476
3477
3478         /* Allocate the TX ring struct memory */
3479         if (!(adapter->tx_rings =
3480             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3481             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3482                 device_printf(dev, "Unable to allocate TX ring memory\n");
3483                 error = ENOMEM;
3484                 goto fail;
3485         }
3486
3487         /* Now allocate the RX */
3488         if (!(adapter->rx_rings =
3489             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3490             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3491                 device_printf(dev, "Unable to allocate RX ring memory\n");
3492                 error = ENOMEM;
3493                 goto rx_fail;
3494         }
3495
3496         tsize = roundup2(adapter->num_tx_desc *
3497             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3498         /*
3499          * Now set up the TX queues, txconf is needed to handle the
3500          * possibility that things fail midcourse and we need to
3501          * undo memory gracefully
3502          */ 
3503         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3504                 /* Set up some basics */
3505                 txr = &adapter->tx_rings[i];
3506                 txr->adapter = adapter;
3507                 txr->me = i;
3508
3509                 /* Initialize the TX lock */
3510                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3511                     device_get_nameunit(dev), txr->me);
3512                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3513
3514                 if (em_dma_malloc(adapter, tsize,
3515                         &txr->txdma, BUS_DMA_NOWAIT)) {
3516                         device_printf(dev,
3517                             "Unable to allocate TX Descriptor memory\n");
3518                         error = ENOMEM;
3519                         goto err_tx_desc;
3520                 }
3521                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3522                 bzero((void *)txr->tx_base, tsize);
3523
3524                 if (em_allocate_transmit_buffers(txr)) {
3525                         device_printf(dev,
3526                             "Critical Failure setting up transmit buffers\n");
3527                         error = ENOMEM;
3528                         goto err_tx_desc;
3529                 }
3530 #if __FreeBSD_version >= 800000
3531                 /* Allocate a buf ring */
3532                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3533                     M_WAITOK, &txr->tx_mtx);
3534 #endif
3535         }
3536
3537         /*
3538          * Next the RX queues...
3539          */ 
3540         rsize = roundup2(adapter->num_rx_desc *
3541             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3542         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3543                 rxr = &adapter->rx_rings[i];
3544                 rxr->adapter = adapter;
3545                 rxr->me = i;
3546
3547                 /* Initialize the RX lock */
3548                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3549                     device_get_nameunit(dev), txr->me);
3550                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3551
3552                 if (em_dma_malloc(adapter, rsize,
3553                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3554                         device_printf(dev,
3555                             "Unable to allocate RxDescriptor memory\n");
3556                         error = ENOMEM;
3557                         goto err_rx_desc;
3558                 }
3559                 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3560                 bzero((void *)rxr->rx_base, rsize);
3561
3562                 /* Allocate receive buffers for the ring*/
3563                 if (em_allocate_receive_buffers(rxr)) {
3564                         device_printf(dev,
3565                             "Critical Failure setting up receive buffers\n");
3566                         error = ENOMEM;
3567                         goto err_rx_desc;
3568                 }
3569         }
3570
3571         return (0);
3572
3573 err_rx_desc:
3574         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3575                 em_dma_free(adapter, &rxr->rxdma);
3576 err_tx_desc:
3577         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3578                 em_dma_free(adapter, &txr->txdma);
3579         free(adapter->rx_rings, M_DEVBUF);
3580 rx_fail:
3581 #if __FreeBSD_version >= 800000
3582         buf_ring_free(txr->br, M_DEVBUF);
3583 #endif
3584         free(adapter->tx_rings, M_DEVBUF);
3585 fail:
3586         return (error);
3587 }
3588
3589
3590 /*********************************************************************
3591  *
3592  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3593  *  the information needed to transmit a packet on the wire. This is
3594  *  called only once at attach, setup is done every reset.
3595  *
3596  **********************************************************************/
3597 static int
3598 em_allocate_transmit_buffers(struct tx_ring *txr)
3599 {
3600         struct adapter *adapter = txr->adapter;
3601         device_t dev = adapter->dev;
3602         struct em_txbuffer *txbuf;
3603         int error, i;
3604
3605         /*
3606          * Setup DMA descriptor areas.
3607          */
3608         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3609                                1, 0,                    /* alignment, bounds */
3610                                BUS_SPACE_MAXADDR,       /* lowaddr */
3611                                BUS_SPACE_MAXADDR,       /* highaddr */
3612                                NULL, NULL,              /* filter, filterarg */
3613                                EM_TSO_SIZE,             /* maxsize */
3614                                EM_MAX_SCATTER,          /* nsegments */
3615                                PAGE_SIZE,               /* maxsegsize */
3616                                0,                       /* flags */
3617                                NULL,                    /* lockfunc */
3618                                NULL,                    /* lockfuncarg */
3619                                &txr->txtag))) {
3620                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3621                 goto fail;
3622         }
3623
3624         if (!(txr->tx_buffers =
3625             (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3626             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3627                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3628                 error = ENOMEM;
3629                 goto fail;
3630         }
3631
3632         /* Create the descriptor buffer dma maps */
3633         txbuf = txr->tx_buffers;
3634         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3635                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3636                 if (error != 0) {
3637                         device_printf(dev, "Unable to create TX DMA map\n");
3638                         goto fail;
3639                 }
3640         }
3641
3642         return 0;
3643 fail:
3644         /* We free all, it handles case where we are in the middle */
3645         em_free_transmit_structures(adapter);
3646         return (error);
3647 }
3648
3649 /*********************************************************************
3650  *
3651  *  Initialize a transmit ring.
3652  *
3653  **********************************************************************/
3654 static void
3655 em_setup_transmit_ring(struct tx_ring *txr)
3656 {
3657         struct adapter *adapter = txr->adapter;
3658         struct em_txbuffer *txbuf;
3659         int i;
3660 #ifdef DEV_NETMAP
3661         struct netmap_adapter *na = NA(adapter->ifp);
3662         struct netmap_slot *slot;
3663 #endif /* DEV_NETMAP */
3664
3665         /* Clear the old descriptor contents */
3666         EM_TX_LOCK(txr);
3667 #ifdef DEV_NETMAP
3668         slot = netmap_reset(na, NR_TX, txr->me, 0);
3669 #endif /* DEV_NETMAP */
3670
3671         bzero((void *)txr->tx_base,
3672               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3673         /* Reset indices */
3674         txr->next_avail_desc = 0;
3675         txr->next_to_clean = 0;
3676
3677         /* Free any existing tx buffers. */
3678         txbuf = txr->tx_buffers;
3679         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3680                 if (txbuf->m_head != NULL) {
3681                         bus_dmamap_sync(txr->txtag, txbuf->map,
3682                             BUS_DMASYNC_POSTWRITE);
3683                         bus_dmamap_unload(txr->txtag, txbuf->map);
3684                         m_freem(txbuf->m_head);
3685                         txbuf->m_head = NULL;
3686                 }
3687 #ifdef DEV_NETMAP
3688                 if (slot) {
3689                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3690                         uint64_t paddr;
3691                         void *addr;
3692
3693                         addr = PNMB(na, slot + si, &paddr);
3694                         txr->tx_base[i].buffer_addr = htole64(paddr);
3695                         /* reload the map for netmap mode */
3696                         netmap_load_map(na, txr->txtag, txbuf->map, addr);
3697                 }
3698 #endif /* DEV_NETMAP */
3699
3700                 /* clear the watch index */
3701                 txbuf->next_eop = -1;
3702         }
3703
3704         /* Set number of descriptors available */
3705         txr->tx_avail = adapter->num_tx_desc;
3706         txr->busy = EM_TX_IDLE;
3707
3708         /* Clear checksum offload context. */
3709         txr->last_hw_offload = 0;
3710         txr->last_hw_ipcss = 0;
3711         txr->last_hw_ipcso = 0;
3712         txr->last_hw_tucss = 0;
3713         txr->last_hw_tucso = 0;
3714
3715         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3716             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3717         EM_TX_UNLOCK(txr);
3718 }
3719
3720 /*********************************************************************
3721  *
3722  *  Initialize all transmit rings.
3723  *
3724  **********************************************************************/
3725 static void
3726 em_setup_transmit_structures(struct adapter *adapter)
3727 {
3728         struct tx_ring *txr = adapter->tx_rings;
3729
3730         for (int i = 0; i < adapter->num_queues; i++, txr++)
3731                 em_setup_transmit_ring(txr);
3732
3733         return;
3734 }
3735
3736 /*********************************************************************
3737  *
3738  *  Enable transmit unit.
3739  *
3740  **********************************************************************/
3741 static void
3742 em_initialize_transmit_unit(struct adapter *adapter)
3743 {
3744         struct tx_ring  *txr = adapter->tx_rings;
3745         struct e1000_hw *hw = &adapter->hw;
3746         u32     tctl, txdctl = 0, tarc, tipg = 0;
3747
3748          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3749
3750         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3751                 u64 bus_addr = txr->txdma.dma_paddr;
3752                 /* Base and Len of TX Ring */
3753                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3754                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3755                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3756                     (u32)(bus_addr >> 32));
3757                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3758                     (u32)bus_addr);
3759                 /* Init the HEAD/TAIL indices */
3760                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3761                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3762
3763                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3764                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3765                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3766
3767                 txr->busy = EM_TX_IDLE;
3768                 txdctl = 0; /* clear txdctl */
3769                 txdctl |= 0x1f; /* PTHRESH */
3770                 txdctl |= 1 << 8; /* HTHRESH */
3771                 txdctl |= 1 << 16;/* WTHRESH */
3772                 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3773                 txdctl |= E1000_TXDCTL_GRAN;
3774                 txdctl |= 1 << 25; /* LWTHRESH */
3775
3776                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3777         }
3778
3779         /* Set the default values for the Tx Inter Packet Gap timer */
3780         switch (adapter->hw.mac.type) {
3781         case e1000_80003es2lan:
3782                 tipg = DEFAULT_82543_TIPG_IPGR1;
3783                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3784                     E1000_TIPG_IPGR2_SHIFT;
3785                 break;
3786         default:
3787                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3788                     (adapter->hw.phy.media_type ==
3789                     e1000_media_type_internal_serdes))
3790                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3791                 else
3792                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3793                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3794                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3795         }
3796
3797         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3798         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3799
3800         if(adapter->hw.mac.type >= e1000_82540)
3801                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3802                     adapter->tx_abs_int_delay.value);
3803
3804         if ((adapter->hw.mac.type == e1000_82571) ||
3805             (adapter->hw.mac.type == e1000_82572)) {
3806                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3807                 tarc |= TARC_SPEED_MODE_BIT;
3808                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3809         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3810                 /* errata: program both queues to unweighted RR */
3811                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3812                 tarc |= 1;
3813                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3814                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3815                 tarc |= 1;
3816                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3817         } else if (adapter->hw.mac.type == e1000_82574) {
3818                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3819                 tarc |= TARC_ERRATA_BIT;
3820                 if ( adapter->num_queues > 1) {
3821                         tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3822                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3823                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3824                 } else
3825                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3826         }
3827
3828         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3829         if (adapter->tx_int_delay.value > 0)
3830                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3831
3832         /* Program the Transmit Control Register */
3833         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3834         tctl &= ~E1000_TCTL_CT;
3835         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3836                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3837
3838         if (adapter->hw.mac.type >= e1000_82571)
3839                 tctl |= E1000_TCTL_MULR;
3840
3841         /* This write will effectively turn on the transmit unit. */
3842         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3843
3844         /* SPT and KBL errata workarounds */
3845         if (hw->mac.type == e1000_pch_spt) {
3846                 u32 reg;
3847                 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3848                 reg |= E1000_RCTL_RDMTS_HEX;
3849                 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3850                 /* i218-i219 Specification Update 1.5.4.5 */
3851                 reg = E1000_READ_REG(hw, E1000_TARC(0));
3852                 reg &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
3853                 reg |= E1000_TARC0_CB_MULTIQ_2_REQ;
3854                 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3855         }
3856 }
3857
3858
3859 /*********************************************************************
3860  *
3861  *  Free all transmit rings.
3862  *
3863  **********************************************************************/
3864 static void
3865 em_free_transmit_structures(struct adapter *adapter)
3866 {
3867         struct tx_ring *txr = adapter->tx_rings;
3868
3869         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3870                 EM_TX_LOCK(txr);
3871                 em_free_transmit_buffers(txr);
3872                 em_dma_free(adapter, &txr->txdma);
3873                 EM_TX_UNLOCK(txr);
3874                 EM_TX_LOCK_DESTROY(txr);
3875         }
3876
3877         free(adapter->tx_rings, M_DEVBUF);
3878 }
3879
3880 /*********************************************************************
3881  *
3882  *  Free transmit ring related data structures.
3883  *
3884  **********************************************************************/
3885 static void
3886 em_free_transmit_buffers(struct tx_ring *txr)
3887 {
3888         struct adapter          *adapter = txr->adapter;
3889         struct em_txbuffer      *txbuf;
3890
3891         INIT_DEBUGOUT("free_transmit_ring: begin");
3892
3893         if (txr->tx_buffers == NULL)
3894                 return;
3895
3896         for (int i = 0; i < adapter->num_tx_desc; i++) {
3897                 txbuf = &txr->tx_buffers[i];
3898                 if (txbuf->m_head != NULL) {
3899                         bus_dmamap_sync(txr->txtag, txbuf->map,
3900                             BUS_DMASYNC_POSTWRITE);
3901                         bus_dmamap_unload(txr->txtag,
3902                             txbuf->map);
3903                         m_freem(txbuf->m_head);
3904                         txbuf->m_head = NULL;
3905                         if (txbuf->map != NULL) {
3906                                 bus_dmamap_destroy(txr->txtag,
3907                                     txbuf->map);
3908                                 txbuf->map = NULL;
3909                         }
3910                 } else if (txbuf->map != NULL) {
3911                         bus_dmamap_unload(txr->txtag,
3912                             txbuf->map);
3913                         bus_dmamap_destroy(txr->txtag,
3914                             txbuf->map);
3915                         txbuf->map = NULL;
3916                 }
3917         }
3918 #if __FreeBSD_version >= 800000
3919         if (txr->br != NULL)
3920                 buf_ring_free(txr->br, M_DEVBUF);
3921 #endif
3922         if (txr->tx_buffers != NULL) {
3923                 free(txr->tx_buffers, M_DEVBUF);
3924                 txr->tx_buffers = NULL;
3925         }
3926         if (txr->txtag != NULL) {
3927                 bus_dma_tag_destroy(txr->txtag);
3928                 txr->txtag = NULL;
3929         }
3930         return;
3931 }
3932
3933
3934 /*********************************************************************
3935  *  The offload context is protocol specific (TCP/UDP) and thus
3936  *  only needs to be set when the protocol changes. The occasion
3937  *  of a context change can be a performance detriment, and
3938  *  might be better just disabled. The reason arises in the way
3939  *  in which the controller supports pipelined requests from the
3940  *  Tx data DMA. Up to four requests can be pipelined, and they may
3941  *  belong to the same packet or to multiple packets. However all
3942  *  requests for one packet are issued before a request is issued
3943  *  for a subsequent packet and if a request for the next packet
3944  *  requires a context change, that request will be stalled
3945  *  until the previous request completes. This means setting up
3946  *  a new context effectively disables pipelined Tx data DMA which
3947  *  in turn greatly slow down performance to send small sized
3948  *  frames. 
3949  **********************************************************************/
3950 static void
3951 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3952     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3953 {
3954         struct adapter                  *adapter = txr->adapter;
3955         struct e1000_context_desc       *TXD = NULL;
3956         struct em_txbuffer              *tx_buffer;
3957         int                             cur, hdr_len;
3958         u32                             cmd = 0;
3959         u16                             offload = 0;
3960         u8                              ipcso, ipcss, tucso, tucss;
3961
3962         ipcss = ipcso = tucss = tucso = 0;
3963         hdr_len = ip_off + (ip->ip_hl << 2);
3964         cur = txr->next_avail_desc;
3965
3966         /* Setup of IP header checksum. */
3967         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3968                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3969                 offload |= CSUM_IP;
3970                 ipcss = ip_off;
3971                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3972                 /*
3973                  * Start offset for header checksum calculation.
3974                  * End offset for header checksum calculation.
3975                  * Offset of place to put the checksum.
3976                  */
3977                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3978                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3979                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3980                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3981                 cmd |= E1000_TXD_CMD_IP;
3982         }
3983
3984         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3985                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3986                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3987                 offload |= CSUM_TCP;
3988                 tucss = hdr_len;
3989                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3990                 /*
3991                  * The 82574L can only remember the *last* context used
3992                  * regardless of queue that it was use for.  We cannot reuse
3993                  * contexts on this hardware platform and must generate a new
3994                  * context every time.  82574L hardware spec, section 7.2.6,
3995                  * second note.
3996                  */
3997                 if (adapter->num_queues < 2) {
3998                         /*
3999                         * Setting up new checksum offload context for every
4000                         * frames takes a lot of processing time for hardware.
4001                         * This also reduces performance a lot for small sized
4002                         * frames so avoid it if driver can use previously
4003                         * configured checksum offload context.
4004                         */
4005                         if (txr->last_hw_offload == offload) {
4006                                 if (offload & CSUM_IP) {
4007                                         if (txr->last_hw_ipcss == ipcss &&
4008                                         txr->last_hw_ipcso == ipcso &&
4009                                         txr->last_hw_tucss == tucss &&
4010                                         txr->last_hw_tucso == tucso)
4011                                                 return;
4012                                 } else {
4013                                         if (txr->last_hw_tucss == tucss &&
4014                                         txr->last_hw_tucso == tucso)
4015                                                 return;
4016                                 }
4017                         }
4018                         txr->last_hw_offload = offload;
4019                         txr->last_hw_tucss = tucss;
4020                         txr->last_hw_tucso = tucso;
4021                 }
4022                 /*
4023                  * Start offset for payload checksum calculation.
4024                  * End offset for payload checksum calculation.
4025                  * Offset of place to put the checksum.
4026                  */
4027                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4028                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
4029                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4030                 TXD->upper_setup.tcp_fields.tucso = tucso;
4031                 cmd |= E1000_TXD_CMD_TCP;
4032         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
4033                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
4034                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
4035                 tucss = hdr_len;
4036                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
4037                 /*
4038                  * The 82574L can only remember the *last* context used
4039                  * regardless of queue that it was use for.  We cannot reuse
4040                  * contexts on this hardware platform and must generate a new
4041                  * context every time.  82574L hardware spec, section 7.2.6,
4042                  * second note.
4043                  */
4044                 if (adapter->num_queues < 2) {
4045                         /*
4046                         * Setting up new checksum offload context for every
4047                         * frames takes a lot of processing time for hardware.
4048                         * This also reduces performance a lot for small sized
4049                         * frames so avoid it if driver can use previously
4050                         * configured checksum offload context.
4051                         */
4052                         if (txr->last_hw_offload == offload) {
4053                                 if (offload & CSUM_IP) {
4054                                         if (txr->last_hw_ipcss == ipcss &&
4055                                         txr->last_hw_ipcso == ipcso &&
4056                                         txr->last_hw_tucss == tucss &&
4057                                         txr->last_hw_tucso == tucso)
4058                                                 return;
4059                                 } else {
4060                                         if (txr->last_hw_tucss == tucss &&
4061                                         txr->last_hw_tucso == tucso)
4062                                                 return;
4063                                 }
4064                         }
4065                         txr->last_hw_offload = offload;
4066                         txr->last_hw_tucss = tucss;
4067                         txr->last_hw_tucso = tucso;
4068                 }
4069                 /*
4070                  * Start offset for header checksum calculation.
4071                  * End offset for header checksum calculation.
4072                  * Offset of place to put the checksum.
4073                  */
4074                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4075                 TXD->upper_setup.tcp_fields.tucss = tucss;
4076                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4077                 TXD->upper_setup.tcp_fields.tucso = tucso;
4078         }
4079   
4080         if (offload & CSUM_IP) {
4081                 txr->last_hw_ipcss = ipcss;
4082                 txr->last_hw_ipcso = ipcso;
4083         }
4084
4085         TXD->tcp_seg_setup.data = htole32(0);
4086         TXD->cmd_and_length =
4087             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4088         tx_buffer = &txr->tx_buffers[cur];
4089         tx_buffer->m_head = NULL;
4090         tx_buffer->next_eop = -1;
4091
4092         if (++cur == adapter->num_tx_desc)
4093                 cur = 0;
4094
4095         txr->tx_avail--;
4096         txr->next_avail_desc = cur;
4097 }
4098
4099
4100 /**********************************************************************
4101  *
4102  *  Setup work for hardware segmentation offload (TSO)
4103  *
4104  **********************************************************************/
4105 static void
4106 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4107     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4108 {
4109         struct adapter                  *adapter = txr->adapter;
4110         struct e1000_context_desc       *TXD;
4111         struct em_txbuffer              *tx_buffer;
4112         int cur, hdr_len;
4113
4114         /*
4115          * In theory we can use the same TSO context if and only if
4116          * frame is the same type(IP/TCP) and the same MSS. However
4117          * checking whether a frame has the same IP/TCP structure is
4118          * hard thing so just ignore that and always restablish a
4119          * new TSO context.
4120          */
4121         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4122         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
4123                       E1000_TXD_DTYP_D |        /* Data descr type */
4124                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
4125
4126         /* IP and/or TCP header checksum calculation and insertion. */
4127         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4128
4129         cur = txr->next_avail_desc;
4130         tx_buffer = &txr->tx_buffers[cur];
4131         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4132
4133         /*
4134          * Start offset for header checksum calculation.
4135          * End offset for header checksum calculation.
4136          * Offset of place put the checksum.
4137          */
4138         TXD->lower_setup.ip_fields.ipcss = ip_off;
4139         TXD->lower_setup.ip_fields.ipcse =
4140             htole16(ip_off + (ip->ip_hl << 2) - 1);
4141         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4142         /*
4143          * Start offset for payload checksum calculation.
4144          * End offset for payload checksum calculation.
4145          * Offset of place to put the checksum.
4146          */
4147         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4148         TXD->upper_setup.tcp_fields.tucse = 0;
4149         TXD->upper_setup.tcp_fields.tucso =
4150             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4151         /*
4152          * Payload size per packet w/o any headers.
4153          * Length of all headers up to payload.
4154          */
4155         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4156         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4157
4158         TXD->cmd_and_length = htole32(adapter->txd_cmd |
4159                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
4160                                 E1000_TXD_CMD_TSE |     /* TSE context */
4161                                 E1000_TXD_CMD_IP |      /* Do IP csum */
4162                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
4163                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4164
4165         tx_buffer->m_head = NULL;
4166         tx_buffer->next_eop = -1;
4167
4168         if (++cur == adapter->num_tx_desc)
4169                 cur = 0;
4170
4171         txr->tx_avail--;
4172         txr->next_avail_desc = cur;
4173         txr->tx_tso = TRUE;
4174 }
4175
4176
4177 /**********************************************************************
4178  *
4179  *  Examine each tx_buffer in the used queue. If the hardware is done
4180  *  processing the packet then free associated resources. The
4181  *  tx_buffer is put back on the free queue.
4182  *
4183  **********************************************************************/
4184 static void
4185 em_txeof(struct tx_ring *txr)
4186 {
4187         struct adapter  *adapter = txr->adapter;
4188         int first, last, done, processed;
4189         struct em_txbuffer *tx_buffer;
4190         struct e1000_tx_desc   *tx_desc, *eop_desc;
4191         struct ifnet   *ifp = adapter->ifp;
4192
4193         EM_TX_LOCK_ASSERT(txr);
4194 #ifdef DEV_NETMAP
4195         if (netmap_tx_irq(ifp, txr->me))
4196                 return;
4197 #endif /* DEV_NETMAP */
4198
4199         /* No work, make sure hang detection is disabled */
4200         if (txr->tx_avail == adapter->num_tx_desc) {
4201                 txr->busy = EM_TX_IDLE;
4202                 return;
4203         }
4204
4205         processed = 0;
4206         first = txr->next_to_clean;
4207         tx_desc = &txr->tx_base[first];
4208         tx_buffer = &txr->tx_buffers[first];
4209         last = tx_buffer->next_eop;
4210         eop_desc = &txr->tx_base[last];
4211
4212         /*
4213          * What this does is get the index of the
4214          * first descriptor AFTER the EOP of the 
4215          * first packet, that way we can do the
4216          * simple comparison on the inner while loop.
4217          */
4218         if (++last == adapter->num_tx_desc)
4219                 last = 0;
4220         done = last;
4221
4222         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4223             BUS_DMASYNC_POSTREAD);
4224
4225         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4226                 /* We clean the range of the packet */
4227                 while (first != done) {
4228                         tx_desc->upper.data = 0;
4229                         tx_desc->lower.data = 0;
4230                         tx_desc->buffer_addr = 0;
4231                         ++txr->tx_avail;
4232                         ++processed;
4233
4234                         if (tx_buffer->m_head) {
4235                                 bus_dmamap_sync(txr->txtag,
4236                                     tx_buffer->map,
4237                                     BUS_DMASYNC_POSTWRITE);
4238                                 bus_dmamap_unload(txr->txtag,
4239                                     tx_buffer->map);
4240                                 m_freem(tx_buffer->m_head);
4241                                 tx_buffer->m_head = NULL;
4242                         }
4243                         tx_buffer->next_eop = -1;
4244
4245                         if (++first == adapter->num_tx_desc)
4246                                 first = 0;
4247
4248                         tx_buffer = &txr->tx_buffers[first];
4249                         tx_desc = &txr->tx_base[first];
4250                 }
4251                 ++ifp->if_opackets;
4252                 /* See if we can continue to the next packet */
4253                 last = tx_buffer->next_eop;
4254                 if (last != -1) {
4255                         eop_desc = &txr->tx_base[last];
4256                         /* Get new done point */
4257                         if (++last == adapter->num_tx_desc) last = 0;
4258                         done = last;
4259                 } else
4260                         break;
4261         }
4262         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4263             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4264
4265         txr->next_to_clean = first;
4266
4267         /*
4268         ** Hang detection: we know there's work outstanding
4269         ** or the entry return would have been taken, so no
4270         ** descriptor processed here indicates a potential hang.
4271         ** The local timer will examine this and do a reset if needed.
4272         */
4273         if (processed == 0) {
4274                 if (txr->busy != EM_TX_HUNG)
4275                         ++txr->busy;
4276         } else /* At least one descriptor was cleaned */
4277                 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4278
4279         /*
4280          * If we have a minimum free, clear IFF_DRV_OACTIVE
4281          * to tell the stack that it is OK to send packets.
4282          * Notice that all writes of OACTIVE happen under the
4283          * TX lock which, with a single queue, guarantees 
4284          * sanity.
4285          */
4286         if (txr->tx_avail >= EM_MAX_SCATTER) {
4287                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
4288         }
4289
4290         /* Disable hang detection if all clean */
4291         if (txr->tx_avail == adapter->num_tx_desc)
4292                 txr->busy = EM_TX_IDLE;
4293 }
4294
4295 /*********************************************************************
4296  *
4297  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4298  *
4299  **********************************************************************/
4300 static void
4301 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4302 {
4303         struct adapter          *adapter = rxr->adapter;
4304         struct mbuf             *m;
4305         bus_dma_segment_t       segs;
4306         struct em_rxbuffer      *rxbuf;
4307         int                     i, j, error, nsegs;
4308         bool                    cleaned = FALSE;
4309
4310         i = j = rxr->next_to_refresh;
4311         /*
4312         ** Get one descriptor beyond
4313         ** our work mark to control
4314         ** the loop.
4315         */
4316         if (++j == adapter->num_rx_desc)
4317                 j = 0;
4318
4319         while (j != limit) {
4320                 rxbuf = &rxr->rx_buffers[i];
4321                 if (rxbuf->m_head == NULL) {
4322                         m = m_getjcl(M_NOWAIT, MT_DATA,
4323                             M_PKTHDR, adapter->rx_mbuf_sz);
4324                         /*
4325                         ** If we have a temporary resource shortage
4326                         ** that causes a failure, just abort refresh
4327                         ** for now, we will return to this point when
4328                         ** reinvoked from em_rxeof.
4329                         */
4330                         if (m == NULL)
4331                                 goto update;
4332                 } else
4333                         m = rxbuf->m_head;
4334
4335                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4336                 m->m_flags |= M_PKTHDR;
4337                 m->m_data = m->m_ext.ext_buf;
4338
4339                 /* Use bus_dma machinery to setup the memory mapping  */
4340                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4341                     m, &segs, &nsegs, BUS_DMA_NOWAIT);
4342                 if (error != 0) {
4343                         printf("Refresh mbufs: hdr dmamap load"
4344                             " failure - %d\n", error);
4345                         m_free(m);
4346                         rxbuf->m_head = NULL;
4347                         goto update;
4348                 }
4349                 rxbuf->m_head = m;
4350                 rxbuf->paddr = segs.ds_addr;
4351                 bus_dmamap_sync(rxr->rxtag,
4352                     rxbuf->map, BUS_DMASYNC_PREREAD);
4353                 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4354                 cleaned = TRUE;
4355
4356                 i = j; /* Next is precalulated for us */
4357                 rxr->next_to_refresh = i;
4358                 /* Calculate next controlling index */
4359                 if (++j == adapter->num_rx_desc)
4360                         j = 0;
4361         }
4362 update:
4363         /*
4364         ** Update the tail pointer only if,
4365         ** and as far as we have refreshed.
4366         */
4367         if (cleaned)
4368                 E1000_WRITE_REG(&adapter->hw,
4369                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4370
4371         return;
4372 }
4373
4374
4375 /*********************************************************************
4376  *
4377  *  Allocate memory for rx_buffer structures. Since we use one
4378  *  rx_buffer per received packet, the maximum number of rx_buffer's
4379  *  that we'll need is equal to the number of receive descriptors
4380  *  that we've allocated.
4381  *
4382  **********************************************************************/
4383 static int
4384 em_allocate_receive_buffers(struct rx_ring *rxr)
4385 {
4386         struct adapter          *adapter = rxr->adapter;
4387         device_t                dev = adapter->dev;
4388         struct em_rxbuffer      *rxbuf;
4389         int                     error;
4390
4391         rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4392             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4393         if (rxr->rx_buffers == NULL) {
4394                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4395                 return (ENOMEM);
4396         }
4397
4398         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4399                                 1, 0,                   /* alignment, bounds */
4400                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4401                                 BUS_SPACE_MAXADDR,      /* highaddr */
4402                                 NULL, NULL,             /* filter, filterarg */
4403                                 MJUM9BYTES,             /* maxsize */
4404                                 1,                      /* nsegments */
4405                                 MJUM9BYTES,             /* maxsegsize */
4406                                 0,                      /* flags */
4407                                 NULL,                   /* lockfunc */
4408                                 NULL,                   /* lockarg */
4409                                 &rxr->rxtag);
4410         if (error) {
4411                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4412                     __func__, error);
4413                 goto fail;
4414         }
4415
4416         rxbuf = rxr->rx_buffers;
4417         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4418                 rxbuf = &rxr->rx_buffers[i];
4419                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4420                 if (error) {
4421                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4422                             __func__, error);
4423                         goto fail;
4424                 }
4425         }
4426
4427         return (0);
4428
4429 fail:
4430         em_free_receive_structures(adapter);
4431         return (error);
4432 }
4433
4434
4435 /*********************************************************************
4436  *
4437  *  Initialize a receive ring and its buffers.
4438  *
4439  **********************************************************************/
4440 static int
4441 em_setup_receive_ring(struct rx_ring *rxr)
4442 {
4443         struct  adapter         *adapter = rxr->adapter;
4444         struct em_rxbuffer      *rxbuf;
4445         bus_dma_segment_t       seg[1];
4446         int                     rsize, nsegs, error = 0;
4447 #ifdef DEV_NETMAP
4448         struct netmap_adapter *na = NA(adapter->ifp);
4449         struct netmap_slot *slot;
4450 #endif
4451
4452
4453         /* Clear the ring contents */
4454         EM_RX_LOCK(rxr);
4455         rsize = roundup2(adapter->num_rx_desc *
4456             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4457         bzero((void *)rxr->rx_base, rsize);
4458 #ifdef DEV_NETMAP
4459         slot = netmap_reset(na, NR_RX, 0, 0);
4460 #endif
4461
4462         /*
4463         ** Free current RX buffer structs and their mbufs
4464         */
4465         for (int i = 0; i < adapter->num_rx_desc; i++) {
4466                 rxbuf = &rxr->rx_buffers[i];
4467                 if (rxbuf->m_head != NULL) {
4468                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4469                             BUS_DMASYNC_POSTREAD);
4470                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4471                         m_freem(rxbuf->m_head);
4472                         rxbuf->m_head = NULL; /* mark as freed */
4473                 }
4474         }
4475
4476         /* Now replenish the mbufs */
4477         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4478                 rxbuf = &rxr->rx_buffers[j];
4479 #ifdef DEV_NETMAP
4480                 if (slot) {
4481                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4482                         uint64_t paddr;
4483                         void *addr;
4484
4485                         addr = PNMB(na, slot + si, &paddr);
4486                         netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4487                         rxbuf->paddr = paddr;
4488                         em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4489                         continue;
4490                 }
4491 #endif /* DEV_NETMAP */
4492                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4493                     M_PKTHDR, adapter->rx_mbuf_sz);
4494                 if (rxbuf->m_head == NULL) {
4495                         error = ENOBUFS;
4496                         goto fail;
4497                 }
4498                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4499                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4500                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4501
4502                 /* Get the memory mapping */
4503                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4504                     rxbuf->map, rxbuf->m_head, seg,
4505                     &nsegs, BUS_DMA_NOWAIT);
4506                 if (error != 0) {
4507                         m_freem(rxbuf->m_head);
4508                         rxbuf->m_head = NULL;
4509                         goto fail;
4510                 }
4511                 bus_dmamap_sync(rxr->rxtag,
4512                     rxbuf->map, BUS_DMASYNC_PREREAD);
4513
4514                 rxbuf->paddr = seg[0].ds_addr;
4515                 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4516         }
4517         rxr->next_to_check = 0;
4518         rxr->next_to_refresh = 0;
4519         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4520             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4521
4522 fail:
4523         EM_RX_UNLOCK(rxr);
4524         return (error);
4525 }
4526
4527 /*********************************************************************
4528  *
4529  *  Initialize all receive rings.
4530  *
4531  **********************************************************************/
4532 static int
4533 em_setup_receive_structures(struct adapter *adapter)
4534 {
4535         struct rx_ring *rxr = adapter->rx_rings;
4536         int q;
4537
4538         for (q = 0; q < adapter->num_queues; q++, rxr++)
4539                 if (em_setup_receive_ring(rxr))
4540                         goto fail;
4541
4542         return (0);
4543 fail:
4544         /*
4545          * Free RX buffers allocated so far, we will only handle
4546          * the rings that completed, the failing case will have
4547          * cleaned up for itself. 'q' failed, so its the terminus.
4548          */
4549         for (int i = 0; i < q; ++i) {
4550                 rxr = &adapter->rx_rings[i];
4551                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4552                         struct em_rxbuffer *rxbuf;
4553                         rxbuf = &rxr->rx_buffers[n];
4554                         if (rxbuf->m_head != NULL) {
4555                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4556                                   BUS_DMASYNC_POSTREAD);
4557                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4558                                 m_freem(rxbuf->m_head);
4559                                 rxbuf->m_head = NULL;
4560                         }
4561                 }
4562                 rxr->next_to_check = 0;
4563                 rxr->next_to_refresh = 0;
4564         }
4565
4566         return (ENOBUFS);
4567 }
4568
4569 /*********************************************************************
4570  *
4571  *  Free all receive rings.
4572  *
4573  **********************************************************************/
4574 static void
4575 em_free_receive_structures(struct adapter *adapter)
4576 {
4577         struct rx_ring *rxr = adapter->rx_rings;
4578
4579         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4580                 em_free_receive_buffers(rxr);
4581                 /* Free the ring memory as well */
4582                 em_dma_free(adapter, &rxr->rxdma);
4583                 EM_RX_LOCK_DESTROY(rxr);
4584         }
4585
4586         free(adapter->rx_rings, M_DEVBUF);
4587 }
4588
4589
4590 /*********************************************************************
4591  *
4592  *  Free receive ring data structures
4593  *
4594  **********************************************************************/
4595 static void
4596 em_free_receive_buffers(struct rx_ring *rxr)
4597 {
4598         struct adapter          *adapter = rxr->adapter;
4599         struct em_rxbuffer      *rxbuf = NULL;
4600
4601         INIT_DEBUGOUT("free_receive_buffers: begin");
4602
4603         if (rxr->rx_buffers != NULL) {
4604                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4605                         rxbuf = &rxr->rx_buffers[i];
4606                         if (rxbuf->map != NULL) {
4607                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4608                                     BUS_DMASYNC_POSTREAD);
4609                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4610                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4611                         }
4612                         if (rxbuf->m_head != NULL) {
4613                                 m_freem(rxbuf->m_head);
4614                                 rxbuf->m_head = NULL;
4615                         }
4616                 }
4617                 free(rxr->rx_buffers, M_DEVBUF);
4618                 rxr->rx_buffers = NULL;
4619                 rxr->next_to_check = 0;
4620                 rxr->next_to_refresh = 0;
4621         }
4622
4623         if (rxr->rxtag != NULL) {
4624                 bus_dma_tag_destroy(rxr->rxtag);
4625                 rxr->rxtag = NULL;
4626         }
4627
4628         return;
4629 }
4630
4631
4632 /*********************************************************************
4633  *
4634  *  Enable receive unit.
4635  *
4636  **********************************************************************/
4637
4638 static void
4639 em_initialize_receive_unit(struct adapter *adapter)
4640 {
4641         struct rx_ring *rxr = adapter->rx_rings;
4642         struct ifnet    *ifp = adapter->ifp;
4643         struct e1000_hw *hw = &adapter->hw;
4644         u32     rctl, rxcsum, rfctl;
4645
4646         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4647
4648         /*
4649          * Make sure receives are disabled while setting
4650          * up the descriptor ring
4651          */
4652         rctl = E1000_READ_REG(hw, E1000_RCTL);
4653         /* Do not disable if ever enabled on this hardware */
4654         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4655                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4656
4657         /* Setup the Receive Control Register */
4658         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4659         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4660             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4661             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4662
4663         /* Do not store bad packets */
4664         rctl &= ~E1000_RCTL_SBP;
4665
4666         /* Enable Long Packet receive */
4667         if (ifp->if_mtu > ETHERMTU)
4668                 rctl |= E1000_RCTL_LPE;
4669         else
4670                 rctl &= ~E1000_RCTL_LPE;
4671
4672         /* Strip the CRC */
4673         if (!em_disable_crc_stripping)
4674                 rctl |= E1000_RCTL_SECRC;
4675
4676         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4677             adapter->rx_abs_int_delay.value);
4678
4679         E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4680             adapter->rx_int_delay.value);
4681         /*
4682          * Set the interrupt throttling rate. Value is calculated
4683          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4684          */
4685         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4686
4687         /* Use extended rx descriptor formats */
4688         rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4689         rfctl |= E1000_RFCTL_EXTEN;
4690         /*
4691         ** When using MSIX interrupts we need to throttle
4692         ** using the EITR register (82574 only)
4693         */
4694         if (hw->mac.type == e1000_82574) {
4695                 for (int i = 0; i < 4; i++)
4696                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4697                             DEFAULT_ITR);
4698                 /* Disable accelerated acknowledge */
4699                 rfctl |= E1000_RFCTL_ACK_DIS;
4700         }
4701         E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4702
4703         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4704         if (ifp->if_capenable & IFCAP_RXCSUM) {
4705 #ifdef EM_MULTIQUEUE
4706                 rxcsum |= E1000_RXCSUM_TUOFL |
4707                           E1000_RXCSUM_IPOFL |
4708                           E1000_RXCSUM_PCSD;
4709 #else
4710                 rxcsum |= E1000_RXCSUM_TUOFL;
4711 #endif
4712         } else
4713                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4714
4715         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4716
4717 #ifdef EM_MULTIQUEUE
4718 #define RSSKEYLEN 10
4719         if (adapter->num_queues > 1) {
4720                 uint8_t  rss_key[4 * RSSKEYLEN];
4721                 uint32_t reta = 0;
4722                 int i;
4723
4724                 /*
4725                 * Configure RSS key
4726                 */
4727                 arc4rand(rss_key, sizeof(rss_key), 0);
4728                 for (i = 0; i < RSSKEYLEN; ++i) {
4729                         uint32_t rssrk = 0;
4730
4731                         rssrk = EM_RSSRK_VAL(rss_key, i);
4732                         E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4733                 }
4734
4735                 /*
4736                 * Configure RSS redirect table in following fashion:
4737                 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4738                 */
4739                 for (i = 0; i < sizeof(reta); ++i) {
4740                         uint32_t q;
4741
4742                         q = (i % adapter->num_queues) << 7;
4743                         reta |= q << (8 * i);
4744                 }
4745
4746                 for (i = 0; i < 32; ++i) {
4747                         E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4748                 }
4749
4750                 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 
4751                                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4752                                 E1000_MRQC_RSS_FIELD_IPV4 |
4753                                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4754                                 E1000_MRQC_RSS_FIELD_IPV6_EX |
4755                                 E1000_MRQC_RSS_FIELD_IPV6);
4756         }
4757 #endif
4758         /*
4759         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4760         ** long latencies are observed, like Lenovo X60. This
4761         ** change eliminates the problem, but since having positive
4762         ** values in RDTR is a known source of problems on other
4763         ** platforms another solution is being sought.
4764         */
4765         if (hw->mac.type == e1000_82573)
4766                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4767
4768         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4769                 /* Setup the Base and Length of the Rx Descriptor Ring */
4770                 u64 bus_addr = rxr->rxdma.dma_paddr;
4771                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4772
4773                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4774                     adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4775                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4776                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4777                 /* Setup the Head and Tail Descriptor Pointers */
4778                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4779 #ifdef DEV_NETMAP
4780                 /*
4781                  * an init() while a netmap client is active must
4782                  * preserve the rx buffers passed to userspace.
4783                  */
4784                 if (ifp->if_capenable & IFCAP_NETMAP)
4785                         rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4786 #endif /* DEV_NETMAP */
4787                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4788         }
4789
4790         /*
4791          * Set PTHRESH for improved jumbo performance
4792          * According to 10.2.5.11 of Intel 82574 Datasheet,
4793          * RXDCTL(1) is written whenever RXDCTL(0) is written.
4794          * Only write to RXDCTL(1) if there is a need for different
4795          * settings.
4796          */
4797         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4798             (adapter->hw.mac.type == e1000_pch2lan) ||
4799             (adapter->hw.mac.type == e1000_ich10lan)) &&
4800             (ifp->if_mtu > ETHERMTU)) {
4801                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4802                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4803         } else if (adapter->hw.mac.type == e1000_82574) {
4804                 for (int i = 0; i < adapter->num_queues; i++) {
4805                         u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4806
4807                         rxdctl |= 0x20; /* PTHRESH */
4808                         rxdctl |= 4 << 8; /* HTHRESH */
4809                         rxdctl |= 4 << 16;/* WTHRESH */
4810                         rxdctl |= 1 << 24; /* Switch to granularity */
4811                         E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4812                 }
4813         }
4814                 
4815         if (adapter->hw.mac.type >= e1000_pch2lan) {
4816                 if (ifp->if_mtu > ETHERMTU)
4817                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4818                 else
4819                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4820         }
4821
4822         /* Make sure VLAN Filters are off */
4823         rctl &= ~E1000_RCTL_VFE;
4824
4825         if (adapter->rx_mbuf_sz == MCLBYTES)
4826                 rctl |= E1000_RCTL_SZ_2048;
4827         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4828                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4829         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4830                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4831
4832         /* ensure we clear use DTYPE of 00 here */
4833         rctl &= ~0x00000C00;
4834         /* Write out the settings */
4835         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4836
4837         return;
4838 }
4839
4840
4841 /*********************************************************************
4842  *
4843  *  This routine executes in interrupt context. It replenishes
4844  *  the mbufs in the descriptor and sends data which has been
4845  *  dma'ed into host memory to upper layer.
4846  *
4847  *  We loop at most count times if count is > 0, or until done if
4848  *  count < 0.
4849  *  
4850  *  For polling we also now return the number of cleaned packets
4851  *********************************************************************/
4852 static bool
4853 em_rxeof(struct rx_ring *rxr, int count, int *done)
4854 {
4855         struct adapter          *adapter = rxr->adapter;
4856         struct ifnet            *ifp = adapter->ifp;
4857         struct mbuf             *mp, *sendmp;
4858         u32                     status = 0;
4859         u16                     len;
4860         int                     i, processed, rxdone = 0;
4861         bool                    eop;
4862         union e1000_rx_desc_extended    *cur;
4863
4864         EM_RX_LOCK(rxr);
4865
4866         /* Sync the ring */
4867         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4868             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4869
4870
4871 #ifdef DEV_NETMAP
4872         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4873                 EM_RX_UNLOCK(rxr);
4874                 return (FALSE);
4875         }
4876 #endif /* DEV_NETMAP */
4877
4878         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4879                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4880                         break;
4881
4882                 cur = &rxr->rx_base[i];
4883                 status = le32toh(cur->wb.upper.status_error);
4884                 mp = sendmp = NULL;
4885
4886                 if ((status & E1000_RXD_STAT_DD) == 0)
4887                         break;
4888
4889                 len = le16toh(cur->wb.upper.length);
4890                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4891
4892                 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4893                     (rxr->discard == TRUE)) {
4894                         adapter->dropped_pkts++;
4895                         ++rxr->rx_discarded;
4896                         if (!eop) /* Catch subsequent segs */
4897                                 rxr->discard = TRUE;
4898                         else
4899                                 rxr->discard = FALSE;
4900                         em_rx_discard(rxr, i);
4901                         goto next_desc;
4902                 }
4903                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4904
4905                 /* Assign correct length to the current fragment */
4906                 mp = rxr->rx_buffers[i].m_head;
4907                 mp->m_len = len;
4908
4909                 /* Trigger for refresh */
4910                 rxr->rx_buffers[i].m_head = NULL;
4911
4912                 /* First segment? */
4913                 if (rxr->fmp == NULL) {
4914                         mp->m_pkthdr.len = len;
4915                         rxr->fmp = rxr->lmp = mp;
4916                 } else {
4917                         /* Chain mbuf's together */
4918                         mp->m_flags &= ~M_PKTHDR;
4919                         rxr->lmp->m_next = mp;
4920                         rxr->lmp = mp;
4921                         rxr->fmp->m_pkthdr.len += len;
4922                 }
4923
4924                 if (eop) {
4925                         --count;
4926                         sendmp = rxr->fmp;
4927                         sendmp->m_pkthdr.rcvif = ifp;
4928                         ifp->if_ipackets++;
4929                         em_receive_checksum(status, sendmp);
4930 #ifndef __NO_STRICT_ALIGNMENT
4931                         if (adapter->hw.mac.max_frame_size >
4932                             (MCLBYTES - ETHER_ALIGN) &&
4933                             em_fixup_rx(rxr) != 0)
4934                                 goto skip;
4935 #endif
4936                         if (status & E1000_RXD_STAT_VP) {
4937                                 sendmp->m_pkthdr.ether_vtag =
4938                                     le16toh(cur->wb.upper.vlan);
4939                                 sendmp->m_flags |= M_VLANTAG;
4940                         }
4941 #ifndef __NO_STRICT_ALIGNMENT
4942 skip:
4943 #endif
4944                         rxr->fmp = rxr->lmp = NULL;
4945                 }
4946 next_desc:
4947                 /* Sync the ring */
4948                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4949                         BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4950
4951                 /* Zero out the receive descriptors status. */
4952                 cur->wb.upper.status_error &= htole32(~0xFF);
4953                 ++rxdone;       /* cumulative for POLL */
4954                 ++processed;
4955
4956                 /* Advance our pointers to the next descriptor. */
4957                 if (++i == adapter->num_rx_desc)
4958                         i = 0;
4959
4960                 /* Send to the stack */
4961                 if (sendmp != NULL) {
4962                         rxr->next_to_check = i;
4963                         EM_RX_UNLOCK(rxr);
4964                         (*ifp->if_input)(ifp, sendmp);
4965                         EM_RX_LOCK(rxr);
4966                         i = rxr->next_to_check;
4967                 }
4968
4969                 /* Only refresh mbufs every 8 descriptors */
4970                 if (processed == 8) {
4971                         em_refresh_mbufs(rxr, i);
4972                         processed = 0;
4973                 }
4974         }
4975
4976         /* Catch any remaining refresh work */
4977         if (e1000_rx_unrefreshed(rxr))
4978                 em_refresh_mbufs(rxr, i);
4979
4980         rxr->next_to_check = i;
4981         if (done != NULL)
4982                 *done = rxdone;
4983         EM_RX_UNLOCK(rxr);
4984
4985         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4986 }
4987
4988 static __inline void
4989 em_rx_discard(struct rx_ring *rxr, int i)
4990 {
4991         struct em_rxbuffer      *rbuf;
4992
4993         rbuf = &rxr->rx_buffers[i];
4994         bus_dmamap_unload(rxr->rxtag, rbuf->map);
4995
4996         /* Free any previous pieces */
4997         if (rxr->fmp != NULL) {
4998                 rxr->fmp->m_flags |= M_PKTHDR;
4999                 m_freem(rxr->fmp);
5000                 rxr->fmp = NULL;
5001                 rxr->lmp = NULL;
5002         }
5003         /*
5004         ** Free buffer and allow em_refresh_mbufs()
5005         ** to clean up and recharge buffer.
5006         */
5007         if (rbuf->m_head) {
5008                 m_free(rbuf->m_head);
5009                 rbuf->m_head = NULL;
5010         }
5011         return;
5012 }
5013
5014 #ifndef __NO_STRICT_ALIGNMENT
5015 /*
5016  * When jumbo frames are enabled we should realign entire payload on
5017  * architecures with strict alignment. This is serious design mistake of 8254x
5018  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
5019  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
5020  * payload. On architecures without strict alignment restrictions 8254x still
5021  * performs unaligned memory access which would reduce the performance too.
5022  * To avoid copying over an entire frame to align, we allocate a new mbuf and
5023  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
5024  * existing mbuf chain.
5025  *
5026  * Be aware, best performance of the 8254x is achived only when jumbo frame is
5027  * not used at all on architectures with strict alignment.
5028  */
5029 static int
5030 em_fixup_rx(struct rx_ring *rxr)
5031 {
5032         struct adapter *adapter = rxr->adapter;
5033         struct mbuf *m, *n;
5034         int error;
5035
5036         error = 0;
5037         m = rxr->fmp;
5038         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
5039                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
5040                 m->m_data += ETHER_HDR_LEN;
5041         } else {
5042                 MGETHDR(n, M_NOWAIT, MT_DATA);
5043                 if (n != NULL) {
5044                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
5045                         m->m_data += ETHER_HDR_LEN;
5046                         m->m_len -= ETHER_HDR_LEN;
5047                         n->m_len = ETHER_HDR_LEN;
5048                         M_MOVE_PKTHDR(n, m);
5049                         n->m_next = m;
5050                         rxr->fmp = n;
5051                 } else {
5052                         adapter->dropped_pkts++;
5053                         m_freem(rxr->fmp);
5054                         rxr->fmp = NULL;
5055                         error = ENOMEM;
5056                 }
5057         }
5058
5059         return (error);
5060 }
5061 #endif
5062
5063 static void
5064 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5065 {
5066         rxd->read.buffer_addr = htole64(rxbuf->paddr);
5067         /* DD bits must be cleared */
5068         rxd->wb.upper.status_error= 0;
5069 }
5070
5071 /*********************************************************************
5072  *
5073  *  Verify that the hardware indicated that the checksum is valid.
5074  *  Inform the stack about the status of checksum so that stack
5075  *  doesn't spend time verifying the checksum.
5076  *
5077  *********************************************************************/
5078 static void
5079 em_receive_checksum(uint32_t status, struct mbuf *mp)
5080 {
5081         mp->m_pkthdr.csum_flags = 0;
5082
5083         /* Ignore Checksum bit is set */
5084         if (status & E1000_RXD_STAT_IXSM)
5085                 return;
5086
5087         /* If the IP checksum exists and there is no IP Checksum error */
5088         if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5089                 E1000_RXD_STAT_IPCS) {
5090                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5091         }
5092
5093         /* TCP or UDP checksum */
5094         if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5095             E1000_RXD_STAT_TCPCS) {
5096                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5097                 mp->m_pkthdr.csum_data = htons(0xffff);
5098         }
5099         if (status & E1000_RXD_STAT_UDPCS) {
5100                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5101                 mp->m_pkthdr.csum_data = htons(0xffff);
5102         }
5103 }
5104
5105 /*
5106  * This routine is run via an vlan
5107  * config EVENT
5108  */
5109 static void
5110 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5111 {
5112         struct adapter  *adapter = ifp->if_softc;
5113         u32             index, bit;
5114
5115         if (ifp->if_softc !=  arg)   /* Not our event */
5116                 return;
5117
5118         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5119                 return;
5120
5121         EM_CORE_LOCK(adapter);
5122         index = (vtag >> 5) & 0x7F;
5123         bit = vtag & 0x1F;
5124         adapter->shadow_vfta[index] |= (1 << bit);
5125         ++adapter->num_vlans;
5126         /* Re-init to load the changes */
5127         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5128                 em_init_locked(adapter);
5129         EM_CORE_UNLOCK(adapter);
5130 }
5131
5132 /*
5133  * This routine is run via an vlan
5134  * unconfig EVENT
5135  */
5136 static void
5137 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5138 {
5139         struct adapter  *adapter = ifp->if_softc;
5140         u32             index, bit;
5141
5142         if (ifp->if_softc !=  arg)
5143                 return;
5144
5145         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5146                 return;
5147
5148         EM_CORE_LOCK(adapter);
5149         index = (vtag >> 5) & 0x7F;
5150         bit = vtag & 0x1F;
5151         adapter->shadow_vfta[index] &= ~(1 << bit);
5152         --adapter->num_vlans;
5153         /* Re-init to load the changes */
5154         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5155                 em_init_locked(adapter);
5156         EM_CORE_UNLOCK(adapter);
5157 }
5158
5159 static void
5160 em_setup_vlan_hw_support(struct adapter *adapter)
5161 {
5162         struct e1000_hw *hw = &adapter->hw;
5163         u32             reg;
5164
5165         /*
5166         ** We get here thru init_locked, meaning
5167         ** a soft reset, this has already cleared
5168         ** the VFTA and other state, so if there
5169         ** have been no vlan's registered do nothing.
5170         */
5171         if (adapter->num_vlans == 0)
5172                 return;
5173
5174         /*
5175         ** A soft reset zero's out the VFTA, so
5176         ** we need to repopulate it now.
5177         */
5178         for (int i = 0; i < EM_VFTA_SIZE; i++)
5179                 if (adapter->shadow_vfta[i] != 0)
5180                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5181                             i, adapter->shadow_vfta[i]);
5182
5183         reg = E1000_READ_REG(hw, E1000_CTRL);
5184         reg |= E1000_CTRL_VME;
5185         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5186
5187         /* Enable the Filter Table */
5188         reg = E1000_READ_REG(hw, E1000_RCTL);
5189         reg &= ~E1000_RCTL_CFIEN;
5190         reg |= E1000_RCTL_VFE;
5191         E1000_WRITE_REG(hw, E1000_RCTL, reg);
5192 }
5193
5194 static void
5195 em_enable_intr(struct adapter *adapter)
5196 {
5197         struct e1000_hw *hw = &adapter->hw;
5198         u32 ims_mask = IMS_ENABLE_MASK;
5199
5200         if (hw->mac.type == e1000_82574) {
5201                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5202                 ims_mask |= EM_MSIX_MASK;
5203         } 
5204         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5205 }
5206
5207 static void
5208 em_disable_intr(struct adapter *adapter)
5209 {
5210         struct e1000_hw *hw = &adapter->hw;
5211
5212         if (hw->mac.type == e1000_82574)
5213                 E1000_WRITE_REG(hw, EM_EIAC, 0);
5214         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5215 }
5216
5217 /*
5218  * Bit of a misnomer, what this really means is
5219  * to enable OS management of the system... aka
5220  * to disable special hardware management features 
5221  */
5222 static void
5223 em_init_manageability(struct adapter *adapter)
5224 {
5225         /* A shared code workaround */
5226 #define E1000_82542_MANC2H E1000_MANC2H
5227         if (adapter->has_manage) {
5228                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5229                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5230
5231                 /* disable hardware interception of ARP */
5232                 manc &= ~(E1000_MANC_ARP_EN);
5233
5234                 /* enable receiving management packets to the host */
5235                 manc |= E1000_MANC_EN_MNG2HOST;
5236 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5237 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5238                 manc2h |= E1000_MNG2HOST_PORT_623;
5239                 manc2h |= E1000_MNG2HOST_PORT_664;
5240                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5241                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5242         }
5243 }
5244
5245 /*
5246  * Give control back to hardware management
5247  * controller if there is one.
5248  */
5249 static void
5250 em_release_manageability(struct adapter *adapter)
5251 {
5252         if (adapter->has_manage) {
5253                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5254
5255                 /* re-enable hardware interception of ARP */
5256                 manc |= E1000_MANC_ARP_EN;
5257                 manc &= ~E1000_MANC_EN_MNG2HOST;
5258
5259                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5260         }
5261 }
5262
5263 /*
5264  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5265  * For ASF and Pass Through versions of f/w this means
5266  * that the driver is loaded. For AMT version type f/w
5267  * this means that the network i/f is open.
5268  */
5269 static void
5270 em_get_hw_control(struct adapter *adapter)
5271 {
5272         u32 ctrl_ext, swsm;
5273
5274         if (adapter->hw.mac.type == e1000_82573) {
5275                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5276                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5277                     swsm | E1000_SWSM_DRV_LOAD);
5278                 return;
5279         }
5280         /* else */
5281         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5282         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5283             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5284         return;
5285 }
5286
5287 /*
5288  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5289  * For ASF and Pass Through versions of f/w this means that
5290  * the driver is no longer loaded. For AMT versions of the
5291  * f/w this means that the network i/f is closed.
5292  */
5293 static void
5294 em_release_hw_control(struct adapter *adapter)
5295 {
5296         u32 ctrl_ext, swsm;
5297
5298         if (!adapter->has_manage)
5299                 return;
5300
5301         if (adapter->hw.mac.type == e1000_82573) {
5302                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5303                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5304                     swsm & ~E1000_SWSM_DRV_LOAD);
5305                 return;
5306         }
5307         /* else */
5308         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5309         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5310             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5311         return;
5312 }
5313
5314 static int
5315 em_is_valid_ether_addr(u8 *addr)
5316 {
5317         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5318
5319         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5320                 return (FALSE);
5321         }
5322
5323         return (TRUE);
5324 }
5325
5326 /*
5327 ** Parse the interface capabilities with regard
5328 ** to both system management and wake-on-lan for
5329 ** later use.
5330 */
5331 static void
5332 em_get_wakeup(device_t dev)
5333 {
5334         struct adapter  *adapter = device_get_softc(dev);
5335         u16             eeprom_data = 0, device_id, apme_mask;
5336
5337         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5338         apme_mask = EM_EEPROM_APME;
5339
5340         switch (adapter->hw.mac.type) {
5341         case e1000_82573:
5342         case e1000_82583:
5343                 adapter->has_amt = TRUE;
5344                 /* Falls thru */
5345         case e1000_82571:
5346         case e1000_82572:
5347         case e1000_80003es2lan:
5348                 if (adapter->hw.bus.func == 1) {
5349                         e1000_read_nvm(&adapter->hw,
5350                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5351                         break;
5352                 } else
5353                         e1000_read_nvm(&adapter->hw,
5354                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5355                 break;
5356         case e1000_ich8lan:
5357         case e1000_ich9lan:
5358         case e1000_ich10lan:
5359         case e1000_pchlan:
5360         case e1000_pch2lan:
5361         case e1000_pch_lpt:
5362         case e1000_pch_spt:
5363         case e1000_pch_cnp:
5364                 apme_mask = E1000_WUC_APME;
5365                 adapter->has_amt = TRUE;
5366                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5367                 break;
5368         default:
5369                 e1000_read_nvm(&adapter->hw,
5370                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5371                 break;
5372         }
5373         if (eeprom_data & apme_mask)
5374                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5375         /*
5376          * We have the eeprom settings, now apply the special cases
5377          * where the eeprom may be wrong or the board won't support
5378          * wake on lan on a particular port
5379          */
5380         device_id = pci_get_device(dev);
5381         switch (device_id) {
5382         case E1000_DEV_ID_82571EB_FIBER:
5383                 /* Wake events only supported on port A for dual fiber
5384                  * regardless of eeprom setting */
5385                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5386                     E1000_STATUS_FUNC_1)
5387                         adapter->wol = 0;
5388                 break;
5389         case E1000_DEV_ID_82571EB_QUAD_COPPER:
5390         case E1000_DEV_ID_82571EB_QUAD_FIBER:
5391         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5392                 /* if quad port adapter, disable WoL on all but port A */
5393                 if (global_quad_port_a != 0)
5394                         adapter->wol = 0;
5395                 /* Reset for multiple quad port adapters */
5396                 if (++global_quad_port_a == 4)
5397                         global_quad_port_a = 0;
5398                 break;
5399         }
5400         return;
5401 }
5402
5403
5404 /*
5405  * Enable PCI Wake On Lan capability
5406  */
5407 static void
5408 em_enable_wakeup(device_t dev)
5409 {
5410         struct adapter  *adapter = device_get_softc(dev);
5411         struct ifnet    *ifp = adapter->ifp;
5412         int             error = 0;
5413         u32             pmc, ctrl, ctrl_ext, rctl;
5414         u16             status;
5415
5416         if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5417                 return;
5418
5419         /*
5420         ** Determine type of Wakeup: note that wol
5421         ** is set with all bits on by default.
5422         */
5423         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5424                 adapter->wol &= ~E1000_WUFC_MAG;
5425
5426         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5427                 adapter->wol &= ~E1000_WUFC_MC;
5428         else {
5429                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5430                 rctl |= E1000_RCTL_MPE;
5431                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5432         }
5433
5434         if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5435                 goto pme;
5436
5437         /* Advertise the wakeup capability */
5438         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5439         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5440         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5441
5442         /* Keep the laser running on Fiber adapters */
5443         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5444             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5445                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5446                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5447                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5448         }
5449
5450         if ((adapter->hw.mac.type == e1000_ich8lan) ||
5451             (adapter->hw.mac.type == e1000_pchlan) ||
5452             (adapter->hw.mac.type == e1000_ich9lan) ||
5453             (adapter->hw.mac.type == e1000_ich10lan))
5454                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5455
5456         if ((adapter->hw.mac.type == e1000_pchlan)  ||
5457             (adapter->hw.mac.type == e1000_pch2lan) ||
5458             (adapter->hw.mac.type == e1000_pch_lpt) ||
5459             (adapter->hw.mac.type == e1000_pch_spt)) {
5460                 error = em_enable_phy_wakeup(adapter);
5461                 if (error)
5462                         goto pme;
5463         } else {
5464                 /* Enable wakeup by the MAC */
5465                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5466                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5467         }
5468
5469         if (adapter->hw.phy.type == e1000_phy_igp_3)
5470                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5471
5472 pme:
5473         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5474         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5475         if (!error && (ifp->if_capenable & IFCAP_WOL))
5476                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5477         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5478
5479         return;
5480 }
5481
5482 /*
5483 ** WOL in the newer chipset interfaces (pchlan)
5484 ** require thing to be copied into the phy
5485 */
5486 static int
5487 em_enable_phy_wakeup(struct adapter *adapter)
5488 {
5489         struct e1000_hw *hw = &adapter->hw;
5490         u32 mreg, ret = 0;
5491         u16 preg;
5492
5493         /* copy MAC RARs to PHY RARs */
5494         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5495
5496         /* copy MAC MTA to PHY MTA */
5497         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5498                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5499                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5500                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5501                     (u16)((mreg >> 16) & 0xFFFF));
5502         }
5503
5504         /* configure PHY Rx Control register */
5505         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5506         mreg = E1000_READ_REG(hw, E1000_RCTL);
5507         if (mreg & E1000_RCTL_UPE)
5508                 preg |= BM_RCTL_UPE;
5509         if (mreg & E1000_RCTL_MPE)
5510                 preg |= BM_RCTL_MPE;
5511         preg &= ~(BM_RCTL_MO_MASK);
5512         if (mreg & E1000_RCTL_MO_3)
5513                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5514                                 << BM_RCTL_MO_SHIFT);
5515         if (mreg & E1000_RCTL_BAM)
5516                 preg |= BM_RCTL_BAM;
5517         if (mreg & E1000_RCTL_PMCF)
5518                 preg |= BM_RCTL_PMCF;
5519         mreg = E1000_READ_REG(hw, E1000_CTRL);
5520         if (mreg & E1000_CTRL_RFCE)
5521                 preg |= BM_RCTL_RFCE;
5522         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5523
5524         /* enable PHY wakeup in MAC register */
5525         E1000_WRITE_REG(hw, E1000_WUC,
5526             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5527         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5528
5529         /* configure and enable PHY wakeup in PHY registers */
5530         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5531         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5532
5533         /* activate PHY wakeup */
5534         ret = hw->phy.ops.acquire(hw);
5535         if (ret) {
5536                 printf("Could not acquire PHY\n");
5537                 return ret;
5538         }
5539         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5540                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5541         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5542         if (ret) {
5543                 printf("Could not read PHY page 769\n");
5544                 goto out;
5545         }
5546         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5547         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5548         if (ret)
5549                 printf("Could not set PHY Host Wakeup bit\n");
5550 out:
5551         hw->phy.ops.release(hw);
5552
5553         return ret;
5554 }
5555
5556 static void
5557 em_led_func(void *arg, int onoff)
5558 {
5559         struct adapter  *adapter = arg;
5560  
5561         EM_CORE_LOCK(adapter);
5562         if (onoff) {
5563                 e1000_setup_led(&adapter->hw);
5564                 e1000_led_on(&adapter->hw);
5565         } else {
5566                 e1000_led_off(&adapter->hw);
5567                 e1000_cleanup_led(&adapter->hw);
5568         }
5569         EM_CORE_UNLOCK(adapter);
5570 }
5571
5572 /*
5573 ** Disable the L0S and L1 LINK states
5574 */
5575 static void
5576 em_disable_aspm(struct adapter *adapter)
5577 {
5578         int             base, reg;
5579         u16             link_cap,link_ctrl;
5580         device_t        dev = adapter->dev;
5581
5582         switch (adapter->hw.mac.type) {
5583                 case e1000_82573:
5584                 case e1000_82574:
5585                 case e1000_82583:
5586                         break;
5587                 default:
5588                         return;
5589         }
5590         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5591                 return;
5592         reg = base + PCIER_LINK_CAP;
5593         link_cap = pci_read_config(dev, reg, 2);
5594         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5595                 return;
5596         reg = base + PCIER_LINK_CTL;
5597         link_ctrl = pci_read_config(dev, reg, 2);
5598         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5599         pci_write_config(dev, reg, link_ctrl, 2);
5600         return;
5601 }
5602
5603 /**********************************************************************
5604  *
5605  *  Update the board statistics counters.
5606  *
5607  **********************************************************************/
5608 static void
5609 em_update_stats_counters(struct adapter *adapter)
5610 {
5611         struct ifnet   *ifp;
5612
5613         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5614            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5615                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5616                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5617         }
5618         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5619         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5620         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5621         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5622
5623         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5624         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5625         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5626         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5627         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5628         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5629         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5630         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5631         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5632         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5633         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5634         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5635         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5636         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5637         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5638         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5639         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5640         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5641         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5642         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5643
5644         /* For the 64-bit byte counters the low dword must be read first. */
5645         /* Both registers clear on the read of the high dword */
5646
5647         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5648             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5649         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5650             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5651
5652         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5653         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5654         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5655         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5656         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5657
5658         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5659         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5660
5661         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5662         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5663         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5664         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5665         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5666         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5667         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5668         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5669         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5670         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5671
5672         /* Interrupt Counts */
5673
5674         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5675         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5676         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5677         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5678         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5679         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5680         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5681         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5682         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5683
5684         if (adapter->hw.mac.type >= e1000_82543) {
5685                 adapter->stats.algnerrc += 
5686                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5687                 adapter->stats.rxerrc += 
5688                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5689                 adapter->stats.tncrs += 
5690                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5691                 adapter->stats.cexterr += 
5692                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5693                 adapter->stats.tsctc += 
5694                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5695                 adapter->stats.tsctfc += 
5696                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5697         }
5698         ifp = adapter->ifp;
5699
5700         ifp->if_collisions = adapter->stats.colc;
5701
5702         /* Rx Errors */
5703         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5704             adapter->stats.crcerrs + adapter->stats.algnerrc +
5705             adapter->stats.ruc + adapter->stats.roc +
5706             adapter->stats.mpc + adapter->stats.cexterr;
5707
5708         /* Tx Errors */
5709         ifp->if_oerrors = adapter->stats.ecol +
5710             adapter->stats.latecol + adapter->watchdog_events;
5711 }
5712
5713 /* Export a single 32-bit register via a read-only sysctl. */
5714 static int
5715 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5716 {
5717         struct adapter *adapter;
5718         u_int val;
5719
5720         adapter = oidp->oid_arg1;
5721         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5722         return (sysctl_handle_int(oidp, &val, 0, req));
5723 }
5724
5725 /*
5726  * Add sysctl variables, one per statistic, to the system.
5727  */
5728 static void
5729 em_add_hw_stats(struct adapter *adapter)
5730 {
5731         device_t dev = adapter->dev;
5732
5733         struct tx_ring *txr = adapter->tx_rings;
5734         struct rx_ring *rxr = adapter->rx_rings;
5735
5736         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5737         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5738         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5739         struct e1000_hw_stats *stats = &adapter->stats;
5740
5741         struct sysctl_oid *stat_node, *queue_node, *int_node;
5742         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5743
5744 #define QUEUE_NAME_LEN 32
5745         char namebuf[QUEUE_NAME_LEN];
5746         
5747         /* Driver Statistics */
5748         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5749                         CTLFLAG_RD, &adapter->dropped_pkts,
5750                         "Driver dropped packets");
5751         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5752                         CTLFLAG_RD, &adapter->link_irq,
5753                         "Link MSIX IRQ Handled");
5754         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 
5755                          CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5756                          "Defragmenting mbuf chain failed");
5757         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5758                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5759                         "Driver tx dma failure in xmit");
5760         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5761                         CTLFLAG_RD, &adapter->rx_overruns,
5762                         "RX overruns");
5763         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5764                         CTLFLAG_RD, &adapter->watchdog_events,
5765                         "Watchdog timeouts");
5766         
5767         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5768                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5769                         em_sysctl_reg_handler, "IU",
5770                         "Device Control Register");
5771         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5772                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5773                         em_sysctl_reg_handler, "IU",
5774                         "Receiver Control Register");
5775         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5776                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5777                         "Flow Control High Watermark");
5778         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5779                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5780                         "Flow Control Low Watermark");
5781
5782         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5783                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5784                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5785                                             CTLFLAG_RD, NULL, "TX Queue Name");
5786                 queue_list = SYSCTL_CHILDREN(queue_node);
5787
5788                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5789                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5790                                 E1000_TDH(txr->me),
5791                                 em_sysctl_reg_handler, "IU",
5792                                 "Transmit Descriptor Head");
5793                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5794                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5795                                 E1000_TDT(txr->me),
5796                                 em_sysctl_reg_handler, "IU",
5797                                 "Transmit Descriptor Tail");
5798                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5799                                 CTLFLAG_RD, &txr->tx_irq,
5800                                 "Queue MSI-X Transmit Interrupts");
5801                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5802                                 CTLFLAG_RD, &txr->no_desc_avail,
5803                                 "Queue No Descriptor Available");
5804
5805                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5806                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5807                                             CTLFLAG_RD, NULL, "RX Queue Name");
5808                 queue_list = SYSCTL_CHILDREN(queue_node);
5809
5810                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5811                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5812                                 E1000_RDH(rxr->me),
5813                                 em_sysctl_reg_handler, "IU",
5814                                 "Receive Descriptor Head");
5815                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5816                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5817                                 E1000_RDT(rxr->me),
5818                                 em_sysctl_reg_handler, "IU",
5819                                 "Receive Descriptor Tail");
5820                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5821                                 CTLFLAG_RD, &rxr->rx_irq,
5822                                 "Queue MSI-X Receive Interrupts");
5823         }
5824
5825         /* MAC stats get their own sub node */
5826
5827         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5828                                     CTLFLAG_RD, NULL, "Statistics");
5829         stat_list = SYSCTL_CHILDREN(stat_node);
5830
5831         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5832                         CTLFLAG_RD, &stats->ecol,
5833                         "Excessive collisions");
5834         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5835                         CTLFLAG_RD, &stats->scc,
5836                         "Single collisions");
5837         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5838                         CTLFLAG_RD, &stats->mcc,
5839                         "Multiple collisions");
5840         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5841                         CTLFLAG_RD, &stats->latecol,
5842                         "Late collisions");
5843         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5844                         CTLFLAG_RD, &stats->colc,
5845                         "Collision Count");
5846         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5847                         CTLFLAG_RD, &adapter->stats.symerrs,
5848                         "Symbol Errors");
5849         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5850                         CTLFLAG_RD, &adapter->stats.sec,
5851                         "Sequence Errors");
5852         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5853                         CTLFLAG_RD, &adapter->stats.dc,
5854                         "Defer Count");
5855         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5856                         CTLFLAG_RD, &adapter->stats.mpc,
5857                         "Missed Packets");
5858         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5859                         CTLFLAG_RD, &adapter->stats.rnbc,
5860                         "Receive No Buffers");
5861         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5862                         CTLFLAG_RD, &adapter->stats.ruc,
5863                         "Receive Undersize");
5864         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5865                         CTLFLAG_RD, &adapter->stats.rfc,
5866                         "Fragmented Packets Received ");
5867         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5868                         CTLFLAG_RD, &adapter->stats.roc,
5869                         "Oversized Packets Received");
5870         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5871                         CTLFLAG_RD, &adapter->stats.rjc,
5872                         "Recevied Jabber");
5873         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5874                         CTLFLAG_RD, &adapter->stats.rxerrc,
5875                         "Receive Errors");
5876         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5877                         CTLFLAG_RD, &adapter->stats.crcerrs,
5878                         "CRC errors");
5879         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5880                         CTLFLAG_RD, &adapter->stats.algnerrc,
5881                         "Alignment Errors");
5882         /* On 82575 these are collision counts */
5883         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5884                         CTLFLAG_RD, &adapter->stats.cexterr,
5885                         "Collision/Carrier extension errors");
5886         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5887                         CTLFLAG_RD, &adapter->stats.xonrxc,
5888                         "XON Received");
5889         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5890                         CTLFLAG_RD, &adapter->stats.xontxc,
5891                         "XON Transmitted");
5892         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5893                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5894                         "XOFF Received");
5895         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5896                         CTLFLAG_RD, &adapter->stats.xofftxc,
5897                         "XOFF Transmitted");
5898
5899         /* Packet Reception Stats */
5900         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5901                         CTLFLAG_RD, &adapter->stats.tpr,
5902                         "Total Packets Received ");
5903         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5904                         CTLFLAG_RD, &adapter->stats.gprc,
5905                         "Good Packets Received");
5906         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5907                         CTLFLAG_RD, &adapter->stats.bprc,
5908                         "Broadcast Packets Received");
5909         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5910                         CTLFLAG_RD, &adapter->stats.mprc,
5911                         "Multicast Packets Received");
5912         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5913                         CTLFLAG_RD, &adapter->stats.prc64,
5914                         "64 byte frames received ");
5915         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5916                         CTLFLAG_RD, &adapter->stats.prc127,
5917                         "65-127 byte frames received");
5918         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5919                         CTLFLAG_RD, &adapter->stats.prc255,
5920                         "128-255 byte frames received");
5921         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5922                         CTLFLAG_RD, &adapter->stats.prc511,
5923                         "256-511 byte frames received");
5924         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5925                         CTLFLAG_RD, &adapter->stats.prc1023,
5926                         "512-1023 byte frames received");
5927         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5928                         CTLFLAG_RD, &adapter->stats.prc1522,
5929                         "1023-1522 byte frames received");
5930         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5931                         CTLFLAG_RD, &adapter->stats.gorc, 
5932                         "Good Octets Received"); 
5933
5934         /* Packet Transmission Stats */
5935         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5936                         CTLFLAG_RD, &adapter->stats.gotc, 
5937                         "Good Octets Transmitted"); 
5938         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5939                         CTLFLAG_RD, &adapter->stats.tpt,
5940                         "Total Packets Transmitted");
5941         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5942                         CTLFLAG_RD, &adapter->stats.gptc,
5943                         "Good Packets Transmitted");
5944         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5945                         CTLFLAG_RD, &adapter->stats.bptc,
5946                         "Broadcast Packets Transmitted");
5947         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5948                         CTLFLAG_RD, &adapter->stats.mptc,
5949                         "Multicast Packets Transmitted");
5950         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5951                         CTLFLAG_RD, &adapter->stats.ptc64,
5952                         "64 byte frames transmitted ");
5953         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5954                         CTLFLAG_RD, &adapter->stats.ptc127,
5955                         "65-127 byte frames transmitted");
5956         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5957                         CTLFLAG_RD, &adapter->stats.ptc255,
5958                         "128-255 byte frames transmitted");
5959         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5960                         CTLFLAG_RD, &adapter->stats.ptc511,
5961                         "256-511 byte frames transmitted");
5962         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5963                         CTLFLAG_RD, &adapter->stats.ptc1023,
5964                         "512-1023 byte frames transmitted");
5965         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5966                         CTLFLAG_RD, &adapter->stats.ptc1522,
5967                         "1024-1522 byte frames transmitted");
5968         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5969                         CTLFLAG_RD, &adapter->stats.tsctc,
5970                         "TSO Contexts Transmitted");
5971         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5972                         CTLFLAG_RD, &adapter->stats.tsctfc,
5973                         "TSO Contexts Failed");
5974
5975
5976         /* Interrupt Stats */
5977
5978         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5979                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5980         int_list = SYSCTL_CHILDREN(int_node);
5981
5982         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5983                         CTLFLAG_RD, &adapter->stats.iac,
5984                         "Interrupt Assertion Count");
5985
5986         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5987                         CTLFLAG_RD, &adapter->stats.icrxptc,
5988                         "Interrupt Cause Rx Pkt Timer Expire Count");
5989
5990         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5991                         CTLFLAG_RD, &adapter->stats.icrxatc,
5992                         "Interrupt Cause Rx Abs Timer Expire Count");
5993
5994         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5995                         CTLFLAG_RD, &adapter->stats.ictxptc,
5996                         "Interrupt Cause Tx Pkt Timer Expire Count");
5997
5998         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5999                         CTLFLAG_RD, &adapter->stats.ictxatc,
6000                         "Interrupt Cause Tx Abs Timer Expire Count");
6001
6002         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6003                         CTLFLAG_RD, &adapter->stats.ictxqec,
6004                         "Interrupt Cause Tx Queue Empty Count");
6005
6006         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6007                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
6008                         "Interrupt Cause Tx Queue Min Thresh Count");
6009
6010         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6011                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
6012                         "Interrupt Cause Rx Desc Min Thresh Count");
6013
6014         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6015                         CTLFLAG_RD, &adapter->stats.icrxoc,
6016                         "Interrupt Cause Receiver Overrun Count");
6017 }
6018
6019 /**********************************************************************
6020  *
6021  *  This routine provides a way to dump out the adapter eeprom,
6022  *  often a useful debug/service tool. This only dumps the first
6023  *  32 words, stuff that matters is in that extent.
6024  *
6025  **********************************************************************/
6026 static int
6027 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6028 {
6029         struct adapter *adapter = (struct adapter *)arg1;
6030         int error;
6031         int result;
6032
6033         result = -1;
6034         error = sysctl_handle_int(oidp, &result, 0, req);
6035
6036         if (error || !req->newptr)
6037                 return (error);
6038
6039         /*
6040          * This value will cause a hex dump of the
6041          * first 32 16-bit words of the EEPROM to
6042          * the screen.
6043          */
6044         if (result == 1)
6045                 em_print_nvm_info(adapter);
6046
6047         return (error);
6048 }
6049
6050 static void
6051 em_print_nvm_info(struct adapter *adapter)
6052 {
6053         u16     eeprom_data;
6054         int     i, j, row = 0;
6055
6056         /* Its a bit crude, but it gets the job done */
6057         printf("\nInterface EEPROM Dump:\n");
6058         printf("Offset\n0x0000  ");
6059         for (i = 0, j = 0; i < 32; i++, j++) {
6060                 if (j == 8) { /* Make the offset block */
6061                         j = 0; ++row;
6062                         printf("\n0x00%x0  ",row);
6063                 }
6064                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6065                 printf("%04x ", eeprom_data);
6066         }
6067         printf("\n");
6068 }
6069
6070 static int
6071 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6072 {
6073         struct em_int_delay_info *info;
6074         struct adapter *adapter;
6075         u32 regval;
6076         int error, usecs, ticks;
6077
6078         info = (struct em_int_delay_info *)arg1;
6079         usecs = info->value;
6080         error = sysctl_handle_int(oidp, &usecs, 0, req);
6081         if (error != 0 || req->newptr == NULL)
6082                 return (error);
6083         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6084                 return (EINVAL);
6085         info->value = usecs;
6086         ticks = EM_USECS_TO_TICKS(usecs);
6087         if (info->offset == E1000_ITR)  /* units are 256ns here */
6088                 ticks *= 4;
6089
6090         adapter = info->adapter;
6091         
6092         EM_CORE_LOCK(adapter);
6093         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6094         regval = (regval & ~0xffff) | (ticks & 0xffff);
6095         /* Handle a few special cases. */
6096         switch (info->offset) {
6097         case E1000_RDTR:
6098                 break;
6099         case E1000_TIDV:
6100                 if (ticks == 0) {
6101                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6102                         /* Don't write 0 into the TIDV register. */
6103                         regval++;
6104                 } else
6105                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6106                 break;
6107         }
6108         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6109         EM_CORE_UNLOCK(adapter);
6110         return (0);
6111 }
6112
6113 static void
6114 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6115         const char *description, struct em_int_delay_info *info,
6116         int offset, int value)
6117 {
6118         info->adapter = adapter;
6119         info->offset = offset;
6120         info->value = value;
6121         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6122             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6123             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6124             info, 0, em_sysctl_int_delay, "I", description);
6125 }
6126
6127 static void
6128 em_set_sysctl_value(struct adapter *adapter, const char *name,
6129         const char *description, int *limit, int value)
6130 {
6131         *limit = value;
6132         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6133             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6134             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6135 }
6136
6137
6138 /*
6139 ** Set flow control using sysctl:
6140 ** Flow control values:
6141 **      0 - off
6142 **      1 - rx pause
6143 **      2 - tx pause
6144 **      3 - full
6145 */
6146 static int
6147 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6148 {       
6149         int             error;
6150         static int      input = 3; /* default is full */
6151         struct adapter  *adapter = (struct adapter *) arg1;
6152                     
6153         error = sysctl_handle_int(oidp, &input, 0, req);
6154     
6155         if ((error) || (req->newptr == NULL))
6156                 return (error);
6157                 
6158         if (input == adapter->fc) /* no change? */
6159                 return (error);
6160
6161         switch (input) {
6162                 case e1000_fc_rx_pause:
6163                 case e1000_fc_tx_pause:
6164                 case e1000_fc_full:
6165                 case e1000_fc_none:
6166                         adapter->hw.fc.requested_mode = input;
6167                         adapter->fc = input;
6168                         break;
6169                 default:
6170                         /* Do nothing */
6171                         return (error);
6172         }
6173
6174         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6175         e1000_force_mac_fc(&adapter->hw);
6176         return (error);
6177 }
6178
6179 /*
6180 ** Manage Energy Efficient Ethernet:
6181 ** Control values:
6182 **     0/1 - enabled/disabled
6183 */
6184 static int
6185 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6186 {
6187        struct adapter *adapter = (struct adapter *) arg1;
6188        int             error, value;
6189
6190        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6191        error = sysctl_handle_int(oidp, &value, 0, req);
6192        if (error || req->newptr == NULL)
6193                return (error);
6194        EM_CORE_LOCK(adapter);
6195        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6196        em_init_locked(adapter);
6197        EM_CORE_UNLOCK(adapter);
6198        return (0);
6199 }
6200
6201 static int
6202 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6203 {
6204         struct adapter *adapter;
6205         int error;
6206         int result;
6207
6208         result = -1;
6209         error = sysctl_handle_int(oidp, &result, 0, req);
6210
6211         if (error || !req->newptr)
6212                 return (error);
6213
6214         if (result == 1) {
6215                 adapter = (struct adapter *)arg1;
6216                 em_print_debug_info(adapter);
6217         }
6218
6219         return (error);
6220 }
6221
6222 /*
6223 ** This routine is meant to be fluid, add whatever is
6224 ** needed for debugging a problem.  -jfv
6225 */
6226 static void
6227 em_print_debug_info(struct adapter *adapter)
6228 {
6229         device_t dev = adapter->dev;
6230         struct tx_ring *txr = adapter->tx_rings;
6231         struct rx_ring *rxr = adapter->rx_rings;
6232
6233         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
6234                 printf("Interface is RUNNING ");
6235         else
6236                 printf("Interface is NOT RUNNING\n");
6237
6238         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
6239                 printf("and INACTIVE\n");
6240         else
6241                 printf("and ACTIVE\n");
6242
6243         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6244                 device_printf(dev, "TX Queue %d ------\n", i);
6245                 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6246                         E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6247                         E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6248                 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6249                 device_printf(dev, "TX descriptors avail = %d\n",
6250                         txr->tx_avail);
6251                 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6252                         txr->no_desc_avail);
6253                 device_printf(dev, "RX Queue %d ------\n", i);
6254                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6255                         E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6256                         E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6257                 device_printf(dev, "RX discarded packets = %ld\n",
6258                         rxr->rx_discarded);
6259                 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6260                 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6261         }
6262 }
6263
6264 #ifdef EM_MULTIQUEUE
6265 /*
6266  * 82574 only:
6267  * Write a new value to the EEPROM increasing the number of MSIX
6268  * vectors from 3 to 5, for proper multiqueue support.
6269  */
6270 static void
6271 em_enable_vectors_82574(struct adapter *adapter)
6272 {
6273         struct e1000_hw *hw = &adapter->hw;
6274         device_t dev = adapter->dev;
6275         u16 edata;
6276
6277         e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6278         printf("Current cap: %#06x\n", edata);
6279         if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6280                 device_printf(dev, "Writing to eeprom: increasing "
6281                     "reported MSIX vectors from 3 to 5...\n");
6282                 edata &= ~(EM_NVM_MSIX_N_MASK);
6283                 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6284                 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6285                 e1000_update_nvm_checksum(hw);
6286                 device_printf(dev, "Writing to eeprom: done\n");
6287         }
6288 }
6289 #endif
6290
6291 #ifdef DDB
6292 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6293 {
6294         devclass_t      dc;
6295         int max_em;
6296
6297         dc = devclass_find("em");
6298         max_em = devclass_get_maxunit(dc);
6299
6300         for (int index = 0; index < (max_em - 1); index++) {
6301                 device_t dev;
6302                 dev = devclass_get_device(dc, index);
6303                 if (device_get_driver(dev) == &em_driver) {
6304                         struct adapter *adapter = device_get_softc(dev);
6305                         EM_CORE_LOCK(adapter);
6306                         em_init_locked(adapter);
6307                         EM_CORE_UNLOCK(adapter);
6308                 }
6309         }
6310 }
6311 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6312 {
6313         devclass_t      dc;
6314         int max_em;
6315
6316         dc = devclass_find("em");
6317         max_em = devclass_get_maxunit(dc);
6318
6319         for (int index = 0; index < (max_em - 1); index++) {
6320                 device_t dev;
6321                 dev = devclass_get_device(dc, index);
6322                 if (device_get_driver(dev) == &em_driver)
6323                         em_print_debug_info(device_get_softc(dev));
6324         }
6325
6326 }
6327 #endif