1 /******************************************************************************
3 Copyright (c) 2001-2015, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
38 #include "opt_inet6.h"
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
44 #include <sys/param.h>
45 #include <sys/systm.h>
47 #include <sys/types.h>
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
59 #include <sys/module.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
71 #include <net/ethernet.h>
73 #include <net/if_arp.h>
74 #include <net/if_dl.h>
75 #include <net/if_media.h>
77 #include <net/if_types.h>
78 #include <net/if_vlan_var.h>
80 #include <netinet/in_systm.h>
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip6.h>
85 #include <netinet/tcp.h>
86 #include <netinet/udp.h>
88 #include <machine/in_cksum.h>
89 #include <dev/led/led.h>
90 #include <dev/pci/pcivar.h>
91 #include <dev/pci/pcireg.h>
93 #include "e1000_api.h"
94 #include "e1000_82571.h"
97 /*********************************************************************
99 *********************************************************************/
100 char em_driver_version[] = "7.6.1-k";
102 /*********************************************************************
103 * PCI Device ID Table
105 * Used by probe to select devices to load on
106 * Last field stores an index into e1000_strings
107 * Last entry must be all 0s
109 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
110 *********************************************************************/
112 static em_vendor_info_t em_vendor_info_array[] =
114 /* Intel(R) PRO/1000 Network Connection */
115 { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
116 { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
117 { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
118 { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
119 PCI_ANY_ID, PCI_ANY_ID, 0},
120 { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
121 PCI_ANY_ID, PCI_ANY_ID, 0},
122 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
123 PCI_ANY_ID, PCI_ANY_ID, 0},
124 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
125 PCI_ANY_ID, PCI_ANY_ID, 0},
126 { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
127 PCI_ANY_ID, PCI_ANY_ID, 0},
128 { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
129 PCI_ANY_ID, PCI_ANY_ID, 0},
130 { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
131 { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
132 { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
133 { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0},
135 { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0},
136 { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0},
137 { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0},
138 { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0},
139 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
140 PCI_ANY_ID, PCI_ANY_ID, 0},
141 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
142 PCI_ANY_ID, PCI_ANY_ID, 0},
143 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
144 PCI_ANY_ID, PCI_ANY_ID, 0},
145 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
146 PCI_ANY_ID, PCI_ANY_ID, 0},
147 { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
148 { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
149 { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
150 { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
151 { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
152 { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
153 { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
154 { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0},
155 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
156 { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
157 { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
158 { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
159 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0},
160 { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
161 { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
162 { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
163 { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0},
164 { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0},
165 { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0},
166 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
167 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
168 { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
169 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
170 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
171 { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
172 { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
173 { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0},
174 { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0},
175 { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0},
176 { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
177 { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0},
178 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
179 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0},
180 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
181 PCI_ANY_ID, PCI_ANY_ID, 0},
182 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
183 PCI_ANY_ID, PCI_ANY_ID, 0},
184 { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0},
185 { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
186 { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0},
187 { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0},
188 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
189 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0},
190 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
191 PCI_ANY_ID, PCI_ANY_ID, 0},
192 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
193 { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
194 PCI_ANY_ID, PCI_ANY_ID, 0},
195 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
196 PCI_ANY_ID, PCI_ANY_ID, 0},
197 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
198 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
199 PCI_ANY_ID, PCI_ANY_ID, 0},
200 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
201 /* required last entry */
205 /*********************************************************************
206 * Table of branding strings for all supported NICs.
207 *********************************************************************/
209 static char *em_strings[] = {
210 "Intel(R) PRO/1000 Network Connection"
213 /*********************************************************************
214 * Function prototypes
215 *********************************************************************/
216 static int em_probe(device_t);
217 static int em_attach(device_t);
218 static int em_detach(device_t);
219 static int em_shutdown(device_t);
220 static int em_suspend(device_t);
221 static int em_resume(device_t);
223 static int em_mq_start(struct ifnet *, struct mbuf *);
224 static int em_mq_start_locked(struct ifnet *,
226 static void em_qflush(struct ifnet *);
228 static void em_start(struct ifnet *);
229 static void em_start_locked(struct ifnet *, struct tx_ring *);
231 static int em_ioctl(struct ifnet *, u_long, caddr_t);
232 static void em_init(void *);
233 static void em_init_locked(struct adapter *);
234 static void em_stop(void *);
235 static void em_media_status(struct ifnet *, struct ifmediareq *);
236 static int em_media_change(struct ifnet *);
237 static void em_identify_hardware(struct adapter *);
238 static int em_allocate_pci_resources(struct adapter *);
239 static int em_allocate_legacy(struct adapter *);
240 static int em_allocate_msix(struct adapter *);
241 static int em_allocate_queues(struct adapter *);
242 static int em_setup_msix(struct adapter *);
243 static void em_free_pci_resources(struct adapter *);
244 static void em_local_timer(void *);
245 static void em_reset(struct adapter *);
246 static int em_setup_interface(device_t, struct adapter *);
247 static void em_flush_desc_rings(struct adapter *);
249 static void em_setup_transmit_structures(struct adapter *);
250 static void em_initialize_transmit_unit(struct adapter *);
251 static int em_allocate_transmit_buffers(struct tx_ring *);
252 static void em_free_transmit_structures(struct adapter *);
253 static void em_free_transmit_buffers(struct tx_ring *);
255 static int em_setup_receive_structures(struct adapter *);
256 static int em_allocate_receive_buffers(struct rx_ring *);
257 static void em_initialize_receive_unit(struct adapter *);
258 static void em_free_receive_structures(struct adapter *);
259 static void em_free_receive_buffers(struct rx_ring *);
261 static void em_enable_intr(struct adapter *);
262 static void em_disable_intr(struct adapter *);
263 static void em_update_stats_counters(struct adapter *);
264 static void em_add_hw_stats(struct adapter *adapter);
265 static void em_txeof(struct tx_ring *);
266 static bool em_rxeof(struct rx_ring *, int, int *);
267 #ifndef __NO_STRICT_ALIGNMENT
268 static int em_fixup_rx(struct rx_ring *);
270 static void em_setup_rxdesc(union e1000_rx_desc_extended *,
271 const struct em_rxbuffer *rxbuf);
272 static void em_receive_checksum(uint32_t status, struct mbuf *);
273 static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
274 struct ip *, u32 *, u32 *);
275 static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
276 struct tcphdr *, u32 *, u32 *);
277 static void em_set_promisc(struct adapter *);
278 static void em_disable_promisc(struct adapter *);
279 static void em_set_multi(struct adapter *);
280 static void em_update_link_status(struct adapter *);
281 static void em_refresh_mbufs(struct rx_ring *, int);
282 static void em_register_vlan(void *, struct ifnet *, u16);
283 static void em_unregister_vlan(void *, struct ifnet *, u16);
284 static void em_setup_vlan_hw_support(struct adapter *);
285 static int em_xmit(struct tx_ring *, struct mbuf **);
286 static int em_dma_malloc(struct adapter *, bus_size_t,
287 struct em_dma_alloc *, int);
288 static void em_dma_free(struct adapter *, struct em_dma_alloc *);
289 static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
290 static void em_print_nvm_info(struct adapter *);
291 static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
292 static void em_print_debug_info(struct adapter *);
293 static int em_is_valid_ether_addr(u8 *);
294 static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
295 static void em_add_int_delay_sysctl(struct adapter *, const char *,
296 const char *, struct em_int_delay_info *, int, int);
297 /* Management and WOL Support */
298 static void em_init_manageability(struct adapter *);
299 static void em_release_manageability(struct adapter *);
300 static void em_get_hw_control(struct adapter *);
301 static void em_release_hw_control(struct adapter *);
302 static void em_get_wakeup(device_t);
303 static void em_enable_wakeup(device_t);
304 static int em_enable_phy_wakeup(struct adapter *);
305 static void em_led_func(void *, int);
306 static void em_disable_aspm(struct adapter *);
308 static int em_irq_fast(void *);
311 static void em_msix_tx(void *);
312 static void em_msix_rx(void *);
313 static void em_msix_link(void *);
314 static void em_handle_tx(void *context, int pending);
315 static void em_handle_rx(void *context, int pending);
316 static void em_handle_link(void *context, int pending);
319 static void em_enable_vectors_82574(struct adapter *);
322 static void em_set_sysctl_value(struct adapter *, const char *,
323 const char *, int *, int);
324 static int em_set_flowcntl(SYSCTL_HANDLER_ARGS);
325 static int em_sysctl_eee(SYSCTL_HANDLER_ARGS);
327 static __inline void em_rx_discard(struct rx_ring *, int);
329 #ifdef DEVICE_POLLING
330 static poll_handler_t em_poll;
333 /*********************************************************************
334 * FreeBSD Device Interface Entry Points
335 *********************************************************************/
337 static device_method_t em_methods[] = {
338 /* Device interface */
339 DEVMETHOD(device_probe, em_probe),
340 DEVMETHOD(device_attach, em_attach),
341 DEVMETHOD(device_detach, em_detach),
342 DEVMETHOD(device_shutdown, em_shutdown),
343 DEVMETHOD(device_suspend, em_suspend),
344 DEVMETHOD(device_resume, em_resume),
348 static driver_t em_driver = {
349 "em", em_methods, sizeof(struct adapter),
352 devclass_t em_devclass;
353 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
354 MODULE_DEPEND(em, pci, 1, 1, 1);
355 MODULE_DEPEND(em, ether, 1, 1, 1);
357 /*********************************************************************
358 * Tunable default values.
359 *********************************************************************/
361 #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
362 #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
365 #define MAX_INTS_PER_SEC 8000
366 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256))
368 /* Allow common code without TSO */
373 #define TSO_WORKAROUND 4
375 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
377 static int em_disable_crc_stripping = 0;
378 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
379 &em_disable_crc_stripping, 0, "Disable CRC Stripping");
381 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
382 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
383 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
384 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
385 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
386 0, "Default transmit interrupt delay in usecs");
387 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
388 0, "Default receive interrupt delay in usecs");
390 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
391 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
392 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
393 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
394 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
395 &em_tx_abs_int_delay_dflt, 0,
396 "Default transmit interrupt delay limit in usecs");
397 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
398 &em_rx_abs_int_delay_dflt, 0,
399 "Default receive interrupt delay limit in usecs");
401 static int em_rxd = EM_DEFAULT_RXD;
402 static int em_txd = EM_DEFAULT_TXD;
403 TUNABLE_INT("hw.em.rxd", &em_rxd);
404 TUNABLE_INT("hw.em.txd", &em_txd);
405 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
406 "Number of receive descriptors per queue");
407 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
408 "Number of transmit descriptors per queue");
410 static int em_smart_pwr_down = FALSE;
411 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
412 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
413 0, "Set to true to leave smart power down enabled on newer adapters");
415 /* Controls whether promiscuous also shows bad packets */
416 static int em_debug_sbp = FALSE;
417 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
418 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
419 "Show bad packets in promiscuous mode");
421 static int em_enable_msix = TRUE;
422 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
423 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
424 "Enable MSI-X interrupts");
427 static int em_num_queues = 1;
428 TUNABLE_INT("hw.em.num_queues", &em_num_queues);
429 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
430 "82574 only: Number of queues to configure, 0 indicates autoconfigure");
434 ** Global variable to store last used CPU when binding queues
435 ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
436 ** queue is bound to a cpu.
438 static int em_last_bind_cpu = -1;
440 /* How many packets rxeof tries to clean at a time */
441 static int em_rx_process_limit = 100;
442 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
443 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
444 &em_rx_process_limit, 0,
445 "Maximum number of received packets to process "
446 "at a time, -1 means unlimited");
448 /* Energy efficient ethernet - default to OFF */
449 static int eee_setting = 1;
450 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
451 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
452 "Enable Energy Efficient Ethernet");
454 /* Global used in WOL setup with multiport cards */
455 static int global_quad_port_a = 0;
457 #ifdef DEV_NETMAP /* see ixgbe.c for details */
458 #include <dev/netmap/if_em_netmap.h>
459 #endif /* DEV_NETMAP */
461 /*********************************************************************
462 * Device identification routine
464 * em_probe determines if the driver should be loaded on
465 * adapter based on PCI vendor/device id of the adapter.
467 * return BUS_PROBE_DEFAULT on success, positive on failure
468 *********************************************************************/
471 em_probe(device_t dev)
473 char adapter_name[60];
474 uint16_t pci_vendor_id = 0;
475 uint16_t pci_device_id = 0;
476 uint16_t pci_subvendor_id = 0;
477 uint16_t pci_subdevice_id = 0;
478 em_vendor_info_t *ent;
480 INIT_DEBUGOUT("em_probe: begin");
482 pci_vendor_id = pci_get_vendor(dev);
483 if (pci_vendor_id != EM_VENDOR_ID)
486 pci_device_id = pci_get_device(dev);
487 pci_subvendor_id = pci_get_subvendor(dev);
488 pci_subdevice_id = pci_get_subdevice(dev);
490 ent = em_vendor_info_array;
491 while (ent->vendor_id != 0) {
492 if ((pci_vendor_id == ent->vendor_id) &&
493 (pci_device_id == ent->device_id) &&
495 ((pci_subvendor_id == ent->subvendor_id) ||
496 (ent->subvendor_id == PCI_ANY_ID)) &&
498 ((pci_subdevice_id == ent->subdevice_id) ||
499 (ent->subdevice_id == PCI_ANY_ID))) {
500 sprintf(adapter_name, "%s %s",
501 em_strings[ent->index],
503 device_set_desc_copy(dev, adapter_name);
504 return (BUS_PROBE_DEFAULT);
512 /*********************************************************************
513 * Device initialization routine
515 * The attach entry point is called when the driver is being loaded.
516 * This routine identifies the type of hardware, allocates all resources
517 * and initializes the hardware.
519 * return 0 on success, positive on failure
520 *********************************************************************/
523 em_attach(device_t dev)
525 struct adapter *adapter;
529 INIT_DEBUGOUT("em_attach: begin");
531 if (resource_disabled("em", device_get_unit(dev))) {
532 device_printf(dev, "Disabled by device hint\n");
536 adapter = device_get_softc(dev);
537 adapter->dev = adapter->osdep.dev = dev;
539 EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
542 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
543 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
544 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
545 em_sysctl_nvm_info, "I", "NVM Information");
547 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
550 em_sysctl_debug_info, "I", "Debug Information");
552 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
553 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
554 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
555 em_set_flowcntl, "I", "Flow Control");
557 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
559 /* Determine hardware and mac info */
560 em_identify_hardware(adapter);
562 /* Setup PCI resources */
563 if (em_allocate_pci_resources(adapter)) {
564 device_printf(dev, "Allocation of PCI resources failed\n");
570 ** For ICH8 and family we need to
571 ** map the flash memory, and this
572 ** must happen after the MAC is
575 if ((hw->mac.type == e1000_ich8lan) ||
576 (hw->mac.type == e1000_ich9lan) ||
577 (hw->mac.type == e1000_ich10lan) ||
578 (hw->mac.type == e1000_pchlan) ||
579 (hw->mac.type == e1000_pch2lan) ||
580 (hw->mac.type == e1000_pch_lpt)) {
581 int rid = EM_BAR_TYPE_FLASH;
582 adapter->flash = bus_alloc_resource_any(dev,
583 SYS_RES_MEMORY, &rid, RF_ACTIVE);
584 if (adapter->flash == NULL) {
585 device_printf(dev, "Mapping of Flash failed\n");
589 /* This is used in the shared code */
590 hw->flash_address = (u8 *)adapter->flash;
591 adapter->osdep.flash_bus_space_tag =
592 rman_get_bustag(adapter->flash);
593 adapter->osdep.flash_bus_space_handle =
594 rman_get_bushandle(adapter->flash);
597 ** In the new SPT device flash is not a
598 ** seperate BAR, rather it is also in BAR0,
599 ** so use the same tag and an offset handle for the
600 ** FLASH read/write macros in the shared code.
602 else if (hw->mac.type == e1000_pch_spt) {
603 adapter->osdep.flash_bus_space_tag =
604 adapter->osdep.mem_bus_space_tag;
605 adapter->osdep.flash_bus_space_handle =
606 adapter->osdep.mem_bus_space_handle
607 + E1000_FLASH_BASE_ADDR;
610 /* Do Shared Code initialization */
611 error = e1000_setup_init_funcs(hw, TRUE);
613 device_printf(dev, "Setup of Shared code failed, error %d\n",
620 * Setup MSI/X or MSI if PCI Express
622 adapter->msix = em_setup_msix(adapter);
624 e1000_get_bus_info(hw);
626 /* Set up some sysctls for the tunable interrupt delays */
627 em_add_int_delay_sysctl(adapter, "rx_int_delay",
628 "receive interrupt delay in usecs", &adapter->rx_int_delay,
629 E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
630 em_add_int_delay_sysctl(adapter, "tx_int_delay",
631 "transmit interrupt delay in usecs", &adapter->tx_int_delay,
632 E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
633 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
634 "receive interrupt delay limit in usecs",
635 &adapter->rx_abs_int_delay,
636 E1000_REGISTER(hw, E1000_RADV),
637 em_rx_abs_int_delay_dflt);
638 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
639 "transmit interrupt delay limit in usecs",
640 &adapter->tx_abs_int_delay,
641 E1000_REGISTER(hw, E1000_TADV),
642 em_tx_abs_int_delay_dflt);
643 em_add_int_delay_sysctl(adapter, "itr",
644 "interrupt delay limit in usecs/4",
646 E1000_REGISTER(hw, E1000_ITR),
649 /* Sysctl for limiting the amount of work done in the taskqueue */
650 em_set_sysctl_value(adapter, "rx_processing_limit",
651 "max number of rx packets to process", &adapter->rx_process_limit,
652 em_rx_process_limit);
655 * Validate number of transmit and receive descriptors. It
656 * must not exceed hardware maximum, and must be multiple
657 * of E1000_DBA_ALIGN.
659 if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
660 (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
661 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
662 EM_DEFAULT_TXD, em_txd);
663 adapter->num_tx_desc = EM_DEFAULT_TXD;
665 adapter->num_tx_desc = em_txd;
667 if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
668 (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
669 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
670 EM_DEFAULT_RXD, em_rxd);
671 adapter->num_rx_desc = EM_DEFAULT_RXD;
673 adapter->num_rx_desc = em_rxd;
675 hw->mac.autoneg = DO_AUTO_NEG;
676 hw->phy.autoneg_wait_to_complete = FALSE;
677 hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
680 if (hw->phy.media_type == e1000_media_type_copper) {
681 hw->phy.mdix = AUTO_ALL_MODES;
682 hw->phy.disable_polarity_correction = FALSE;
683 hw->phy.ms_type = EM_MASTER_SLAVE;
687 * Set the frame limits assuming
688 * standard ethernet sized frames.
690 adapter->hw.mac.max_frame_size =
691 ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
694 * This controls when hardware reports transmit completion
697 hw->mac.report_tx_early = 1;
700 ** Get queue/ring memory
702 if (em_allocate_queues(adapter)) {
707 /* Allocate multicast array memory. */
708 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
709 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
710 if (adapter->mta == NULL) {
711 device_printf(dev, "Can not allocate multicast setup array\n");
716 /* Check SOL/IDER usage */
717 if (e1000_check_reset_block(hw))
718 device_printf(dev, "PHY reset is blocked"
719 " due to SOL/IDER session.\n");
721 /* Sysctl for setting Energy Efficient Ethernet */
722 hw->dev_spec.ich8lan.eee_disable = eee_setting;
723 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
724 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
725 OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
726 adapter, 0, em_sysctl_eee, "I",
727 "Disable Energy Efficient Ethernet");
730 ** Start from a known state, this is
731 ** important in reading the nvm and
737 /* Make sure we have a good EEPROM before we read from it */
738 if (e1000_validate_nvm_checksum(hw) < 0) {
740 ** Some PCI-E parts fail the first check due to
741 ** the link being in sleep state, call it again,
742 ** if it fails a second time its a real issue.
744 if (e1000_validate_nvm_checksum(hw) < 0) {
746 "The EEPROM Checksum Is Not Valid\n");
752 /* Copy the permanent MAC address out of the EEPROM */
753 if (e1000_read_mac_addr(hw) < 0) {
754 device_printf(dev, "EEPROM read error while reading MAC"
760 if (!em_is_valid_ether_addr(hw->mac.addr)) {
761 device_printf(dev, "Invalid MAC address\n");
766 /* Disable ULP support */
767 e1000_disable_ulp_lpt_lp(hw, TRUE);
770 ** Do interrupt configuration
772 if (adapter->msix > 1) /* Do MSIX */
773 error = em_allocate_msix(adapter);
774 else /* MSI or Legacy */
775 error = em_allocate_legacy(adapter);
780 * Get Wake-on-Lan and Management info for later use
784 /* Setup OS specific network interface */
785 if (em_setup_interface(dev, adapter) != 0)
790 /* Initialize statistics */
791 em_update_stats_counters(adapter);
793 hw->mac.get_link_status = 1;
794 em_update_link_status(adapter);
796 /* Register for VLAN events */
797 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
798 em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
799 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
800 em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
802 em_add_hw_stats(adapter);
804 /* Non-AMT based hardware can now take control from firmware */
805 if (adapter->has_manage && !adapter->has_amt)
806 em_get_hw_control(adapter);
808 /* Tell the stack that the interface is not active */
809 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
810 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
812 adapter->led_dev = led_create(em_led_func, adapter,
813 device_get_nameunit(dev));
815 em_netmap_attach(adapter);
816 #endif /* DEV_NETMAP */
818 INIT_DEBUGOUT("em_attach: end");
823 em_free_transmit_structures(adapter);
824 em_free_receive_structures(adapter);
825 em_release_hw_control(adapter);
826 if (adapter->ifp != NULL)
827 if_free(adapter->ifp);
829 em_free_pci_resources(adapter);
830 free(adapter->mta, M_DEVBUF);
831 EM_CORE_LOCK_DESTROY(adapter);
836 /*********************************************************************
837 * Device removal routine
839 * The detach entry point is called when the driver is being removed.
840 * This routine stops the adapter and deallocates all the resources
841 * that were allocated for driver operation.
843 * return 0 on success, positive on failure
844 *********************************************************************/
847 em_detach(device_t dev)
849 struct adapter *adapter = device_get_softc(dev);
850 struct ifnet *ifp = adapter->ifp;
852 INIT_DEBUGOUT("em_detach: begin");
854 /* Make sure VLANS are not using driver */
855 if (adapter->ifp->if_vlantrunk != NULL) {
856 device_printf(dev,"Vlan in use, detach first\n");
860 #ifdef DEVICE_POLLING
861 if (ifp->if_capenable & IFCAP_POLLING)
862 ether_poll_deregister(ifp);
865 if (adapter->led_dev != NULL)
866 led_destroy(adapter->led_dev);
868 EM_CORE_LOCK(adapter);
869 adapter->in_detach = 1;
871 EM_CORE_UNLOCK(adapter);
872 EM_CORE_LOCK_DESTROY(adapter);
874 e1000_phy_hw_reset(&adapter->hw);
876 em_release_manageability(adapter);
877 em_release_hw_control(adapter);
879 /* Unregister VLAN events */
880 if (adapter->vlan_attach != NULL)
881 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
882 if (adapter->vlan_detach != NULL)
883 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
885 ether_ifdetach(adapter->ifp);
886 callout_drain(&adapter->timer);
890 #endif /* DEV_NETMAP */
892 em_free_pci_resources(adapter);
893 bus_generic_detach(dev);
896 em_free_transmit_structures(adapter);
897 em_free_receive_structures(adapter);
899 em_release_hw_control(adapter);
900 free(adapter->mta, M_DEVBUF);
905 /*********************************************************************
907 * Shutdown entry point
909 **********************************************************************/
912 em_shutdown(device_t dev)
914 return em_suspend(dev);
918 * Suspend/resume device methods.
921 em_suspend(device_t dev)
923 struct adapter *adapter = device_get_softc(dev);
925 EM_CORE_LOCK(adapter);
927 em_release_manageability(adapter);
928 em_release_hw_control(adapter);
929 em_enable_wakeup(dev);
931 EM_CORE_UNLOCK(adapter);
933 return bus_generic_suspend(dev);
937 em_resume(device_t dev)
939 struct adapter *adapter = device_get_softc(dev);
940 struct tx_ring *txr = adapter->tx_rings;
941 struct ifnet *ifp = adapter->ifp;
943 EM_CORE_LOCK(adapter);
944 if (adapter->hw.mac.type == e1000_pch2lan)
945 e1000_resume_workarounds_pchlan(&adapter->hw);
946 em_init_locked(adapter);
947 em_init_manageability(adapter);
949 if ((ifp->if_flags & IFF_UP) &&
950 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
951 for (int i = 0; i < adapter->num_queues; i++, txr++) {
954 if (!drbr_empty(ifp, txr->br))
955 em_mq_start_locked(ifp, txr);
957 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
958 em_start_locked(ifp, txr);
963 EM_CORE_UNLOCK(adapter);
965 return bus_generic_resume(dev);
969 #ifndef EM_MULTIQUEUE
971 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
973 struct adapter *adapter = ifp->if_softc;
976 EM_TX_LOCK_ASSERT(txr);
978 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
982 if (!adapter->link_active)
985 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
986 /* Call cleanup if number of TX descriptors low */
987 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
989 if (txr->tx_avail < EM_MAX_SCATTER) {
990 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
993 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
997 * Encapsulation can modify our pointer, and or make it
998 * NULL on failure. In that event, we can't requeue.
1000 if (em_xmit(txr, &m_head)) {
1003 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1007 /* Mark the queue as having work */
1008 if (txr->busy == EM_TX_IDLE)
1009 txr->busy = EM_TX_BUSY;
1011 /* Send a copy of the frame to the BPF listener */
1012 ETHER_BPF_MTAP(ifp, m_head);
1020 em_start(struct ifnet *ifp)
1022 struct adapter *adapter = ifp->if_softc;
1023 struct tx_ring *txr = adapter->tx_rings;
1025 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1027 em_start_locked(ifp, txr);
1032 #else /* EM_MULTIQUEUE */
1033 /*********************************************************************
1034 * Multiqueue Transmit routines
1036 * em_mq_start is called by the stack to initiate a transmit.
1037 * however, if busy the driver can queue the request rather
1038 * than do an immediate send. It is this that is an advantage
1039 * in this driver, rather than also having multiple tx queues.
1040 **********************************************************************/
1042 ** Multiqueue capable stack interface
1045 em_mq_start(struct ifnet *ifp, struct mbuf *m)
1047 struct adapter *adapter = ifp->if_softc;
1048 struct tx_ring *txr = adapter->tx_rings;
1049 unsigned int i, error;
1051 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1052 i = m->m_pkthdr.flowid % adapter->num_queues;
1054 i = curcpu % adapter->num_queues;
1056 txr = &adapter->tx_rings[i];
1058 error = drbr_enqueue(ifp, txr->br, m);
1062 if (EM_TX_TRYLOCK(txr)) {
1063 em_mq_start_locked(ifp, txr);
1066 taskqueue_enqueue(txr->tq, &txr->tx_task);
1072 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1074 struct adapter *adapter = txr->adapter;
1076 int err = 0, enq = 0;
1078 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1079 IFF_DRV_RUNNING || adapter->link_active == 0) {
1083 /* Process the queue */
1084 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1085 if ((err = em_xmit(txr, &next)) != 0) {
1087 /* It was freed, move forward */
1088 drbr_advance(ifp, txr->br);
1091 * Still have one left, it may not be
1092 * the same since the transmit function
1093 * may have changed it.
1095 drbr_putback(ifp, txr->br, next);
1099 drbr_advance(ifp, txr->br);
1101 ifp->if_obytes += next->m_pkthdr.len;
1102 if (next->m_flags & M_MCAST)
1104 ETHER_BPF_MTAP(ifp, next);
1105 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1109 /* Mark the queue as having work */
1110 if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1111 txr->busy = EM_TX_BUSY;
1113 if (txr->tx_avail < EM_MAX_SCATTER)
1115 if (txr->tx_avail < EM_MAX_SCATTER) {
1116 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1122 ** Flush all ring buffers
1125 em_qflush(struct ifnet *ifp)
1127 struct adapter *adapter = ifp->if_softc;
1128 struct tx_ring *txr = adapter->tx_rings;
1131 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1133 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1139 #endif /* EM_MULTIQUEUE */
1141 /*********************************************************************
1144 * em_ioctl is called when the user wants to configure the
1147 * return 0 on success, positive on failure
1148 **********************************************************************/
1151 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1153 struct adapter *adapter = ifp->if_softc;
1154 struct ifreq *ifr = (struct ifreq *)data;
1155 #if defined(INET) || defined(INET6)
1156 struct ifaddr *ifa = (struct ifaddr *)data;
1158 bool avoid_reset = FALSE;
1161 if (adapter->in_detach)
1167 if (ifa->ifa_addr->sa_family == AF_INET)
1171 if (ifa->ifa_addr->sa_family == AF_INET6)
1175 ** Calling init results in link renegotiation,
1176 ** so we avoid doing it when possible.
1179 ifp->if_flags |= IFF_UP;
1180 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1183 if (!(ifp->if_flags & IFF_NOARP))
1184 arp_ifinit(ifp, ifa);
1187 error = ether_ioctl(ifp, command, data);
1193 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1195 EM_CORE_LOCK(adapter);
1196 switch (adapter->hw.mac.type) {
1200 case e1000_ich10lan:
1206 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1207 max_frame_size = 9234;
1210 max_frame_size = 4096;
1212 /* Adapters that do not support jumbo frames */
1214 max_frame_size = ETHER_MAX_LEN;
1217 max_frame_size = MAX_JUMBO_FRAME_SIZE;
1219 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1221 EM_CORE_UNLOCK(adapter);
1226 ifp->if_mtu = ifr->ifr_mtu;
1227 adapter->hw.mac.max_frame_size =
1228 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1229 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1230 em_init_locked(adapter);
1231 EM_CORE_UNLOCK(adapter);
1235 IOCTL_DEBUGOUT("ioctl rcv'd:\
1236 SIOCSIFFLAGS (Set Interface Flags)");
1237 EM_CORE_LOCK(adapter);
1238 if (ifp->if_flags & IFF_UP) {
1239 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1240 if ((ifp->if_flags ^ adapter->if_flags) &
1241 (IFF_PROMISC | IFF_ALLMULTI)) {
1242 em_disable_promisc(adapter);
1243 em_set_promisc(adapter);
1246 em_init_locked(adapter);
1248 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1250 adapter->if_flags = ifp->if_flags;
1251 EM_CORE_UNLOCK(adapter);
1255 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1256 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1257 EM_CORE_LOCK(adapter);
1258 em_disable_intr(adapter);
1259 em_set_multi(adapter);
1260 #ifdef DEVICE_POLLING
1261 if (!(ifp->if_capenable & IFCAP_POLLING))
1263 em_enable_intr(adapter);
1264 EM_CORE_UNLOCK(adapter);
1268 /* Check SOL/IDER usage */
1269 EM_CORE_LOCK(adapter);
1270 if (e1000_check_reset_block(&adapter->hw)) {
1271 EM_CORE_UNLOCK(adapter);
1272 device_printf(adapter->dev, "Media change is"
1273 " blocked due to SOL/IDER session.\n");
1276 EM_CORE_UNLOCK(adapter);
1279 IOCTL_DEBUGOUT("ioctl rcv'd: \
1280 SIOCxIFMEDIA (Get/Set Interface Media)");
1281 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1287 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1289 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1290 #ifdef DEVICE_POLLING
1291 if (mask & IFCAP_POLLING) {
1292 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1293 error = ether_poll_register(em_poll, ifp);
1296 EM_CORE_LOCK(adapter);
1297 em_disable_intr(adapter);
1298 ifp->if_capenable |= IFCAP_POLLING;
1299 EM_CORE_UNLOCK(adapter);
1301 error = ether_poll_deregister(ifp);
1302 /* Enable interrupt even in error case */
1303 EM_CORE_LOCK(adapter);
1304 em_enable_intr(adapter);
1305 ifp->if_capenable &= ~IFCAP_POLLING;
1306 EM_CORE_UNLOCK(adapter);
1310 if (mask & IFCAP_HWCSUM) {
1311 ifp->if_capenable ^= IFCAP_HWCSUM;
1314 if (mask & IFCAP_TSO4) {
1315 ifp->if_capenable ^= IFCAP_TSO4;
1318 if (mask & IFCAP_VLAN_HWTAGGING) {
1319 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1322 if (mask & IFCAP_VLAN_HWFILTER) {
1323 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1326 if (mask & IFCAP_VLAN_HWTSO) {
1327 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1330 if ((mask & IFCAP_WOL) &&
1331 (ifp->if_capabilities & IFCAP_WOL) != 0) {
1332 if (mask & IFCAP_WOL_MCAST)
1333 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1334 if (mask & IFCAP_WOL_MAGIC)
1335 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1337 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1339 VLAN_CAPABILITIES(ifp);
1344 error = ether_ioctl(ifp, command, data);
1352 /*********************************************************************
1355 * This routine is used in two ways. It is used by the stack as
1356 * init entry point in network interface structure. It is also used
1357 * by the driver as a hw/sw initialization routine to get to a
1360 * return 0 on success, positive on failure
1361 **********************************************************************/
1364 em_init_locked(struct adapter *adapter)
1366 struct ifnet *ifp = adapter->ifp;
1367 device_t dev = adapter->dev;
1369 INIT_DEBUGOUT("em_init: begin");
1371 EM_CORE_LOCK_ASSERT(adapter);
1373 em_disable_intr(adapter);
1374 callout_stop(&adapter->timer);
1376 /* Get the latest mac address, User can use a LAA */
1377 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1380 /* Put the address into the Receive Address Array */
1381 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1384 * With the 82571 adapter, RAR[0] may be overwritten
1385 * when the other port is reset, we make a duplicate
1386 * in RAR[14] for that eventuality, this assures
1387 * the interface continues to function.
1389 if (adapter->hw.mac.type == e1000_82571) {
1390 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1391 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1392 E1000_RAR_ENTRIES - 1);
1395 /* Initialize the hardware */
1397 em_update_link_status(adapter);
1399 /* Setup VLAN support, basic and offload if available */
1400 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1402 /* Set hardware offload abilities */
1403 ifp->if_hwassist = 0;
1404 if (ifp->if_capenable & IFCAP_TXCSUM)
1405 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1407 ** There have proven to be problems with TSO when not
1408 ** at full gigabit speed, so disable the assist automatically
1409 ** when at lower speeds. -jfv
1411 if (ifp->if_capenable & IFCAP_TSO4) {
1412 if (adapter->link_speed == SPEED_1000)
1413 ifp->if_hwassist |= CSUM_TSO;
1416 /* Configure for OS presence */
1417 em_init_manageability(adapter);
1419 /* Prepare transmit descriptors and buffers */
1420 em_setup_transmit_structures(adapter);
1421 em_initialize_transmit_unit(adapter);
1423 /* Setup Multicast table */
1424 em_set_multi(adapter);
1427 ** Figure out the desired mbuf
1428 ** pool for doing jumbos
1430 if (adapter->hw.mac.max_frame_size <= 2048)
1431 adapter->rx_mbuf_sz = MCLBYTES;
1432 else if (adapter->hw.mac.max_frame_size <= 4096)
1433 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1435 adapter->rx_mbuf_sz = MJUM9BYTES;
1437 /* Prepare receive descriptors and buffers */
1438 if (em_setup_receive_structures(adapter)) {
1439 device_printf(dev, "Could not setup receive structures\n");
1443 em_initialize_receive_unit(adapter);
1445 /* Use real VLAN Filter support? */
1446 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1447 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1448 /* Use real VLAN Filter support */
1449 em_setup_vlan_hw_support(adapter);
1452 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1453 ctrl |= E1000_CTRL_VME;
1454 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1458 /* Don't lose promiscuous settings */
1459 em_set_promisc(adapter);
1461 /* Set the interface as ACTIVE */
1462 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1463 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1465 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1466 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1468 /* MSI/X configuration for 82574 */
1469 if (adapter->hw.mac.type == e1000_82574) {
1471 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1472 tmp |= E1000_CTRL_EXT_PBA_CLR;
1473 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1474 /* Set the IVAR - interrupt vector routing. */
1475 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1478 #ifdef DEVICE_POLLING
1480 * Only enable interrupts if we are not polling, make sure
1481 * they are off otherwise.
1483 if (ifp->if_capenable & IFCAP_POLLING)
1484 em_disable_intr(adapter);
1486 #endif /* DEVICE_POLLING */
1487 em_enable_intr(adapter);
1489 /* AMT based hardware can now take control from firmware */
1490 if (adapter->has_manage && adapter->has_amt)
1491 em_get_hw_control(adapter);
1497 struct adapter *adapter = arg;
1499 EM_CORE_LOCK(adapter);
1500 em_init_locked(adapter);
1501 EM_CORE_UNLOCK(adapter);
1505 #ifdef DEVICE_POLLING
1506 /*********************************************************************
1508 * Legacy polling routine: note this only works with single queue
1510 *********************************************************************/
1512 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1514 struct adapter *adapter = ifp->if_softc;
1515 struct tx_ring *txr = adapter->tx_rings;
1516 struct rx_ring *rxr = adapter->rx_rings;
1520 EM_CORE_LOCK(adapter);
1521 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1522 EM_CORE_UNLOCK(adapter);
1526 if (cmd == POLL_AND_CHECK_STATUS) {
1527 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1528 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1529 callout_stop(&adapter->timer);
1530 adapter->hw.mac.get_link_status = 1;
1531 em_update_link_status(adapter);
1532 callout_reset(&adapter->timer, hz,
1533 em_local_timer, adapter);
1536 EM_CORE_UNLOCK(adapter);
1538 em_rxeof(rxr, count, &rx_done);
1542 #ifdef EM_MULTIQUEUE
1543 if (!drbr_empty(ifp, txr->br))
1544 em_mq_start_locked(ifp, txr);
1546 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1547 em_start_locked(ifp, txr);
1553 #endif /* DEVICE_POLLING */
1556 /*********************************************************************
1558 * Fast Legacy/MSI Combined Interrupt Service routine
1560 *********************************************************************/
1562 em_irq_fast(void *arg)
1564 struct adapter *adapter = arg;
1570 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1573 if (reg_icr == 0xffffffff)
1574 return FILTER_STRAY;
1576 /* Definitely not our interrupt. */
1578 return FILTER_STRAY;
1581 * Starting with the 82571 chip, bit 31 should be used to
1582 * determine whether the interrupt belongs to us.
1584 if (adapter->hw.mac.type >= e1000_82571 &&
1585 (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1586 return FILTER_STRAY;
1588 em_disable_intr(adapter);
1589 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1591 /* Link status change */
1592 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1593 adapter->hw.mac.get_link_status = 1;
1594 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1597 if (reg_icr & E1000_ICR_RXO)
1598 adapter->rx_overruns++;
1599 return FILTER_HANDLED;
1602 /* Combined RX/TX handler, used by Legacy and MSI */
1604 em_handle_que(void *context, int pending)
1606 struct adapter *adapter = context;
1607 struct ifnet *ifp = adapter->ifp;
1608 struct tx_ring *txr = adapter->tx_rings;
1609 struct rx_ring *rxr = adapter->rx_rings;
1611 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1612 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1616 #ifdef EM_MULTIQUEUE
1617 if (!drbr_empty(ifp, txr->br))
1618 em_mq_start_locked(ifp, txr);
1620 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1621 em_start_locked(ifp, txr);
1625 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1630 em_enable_intr(adapter);
1635 /*********************************************************************
1637 * MSIX Interrupt Service Routines
1639 **********************************************************************/
1641 em_msix_tx(void *arg)
1643 struct tx_ring *txr = arg;
1644 struct adapter *adapter = txr->adapter;
1645 struct ifnet *ifp = adapter->ifp;
1650 #ifdef EM_MULTIQUEUE
1651 if (!drbr_empty(ifp, txr->br))
1652 em_mq_start_locked(ifp, txr);
1654 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1655 em_start_locked(ifp, txr);
1658 /* Reenable this interrupt */
1659 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1664 /*********************************************************************
1666 * MSIX RX Interrupt Service routine
1668 **********************************************************************/
1671 em_msix_rx(void *arg)
1673 struct rx_ring *rxr = arg;
1674 struct adapter *adapter = rxr->adapter;
1678 if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1680 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1682 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1684 /* Reenable this interrupt */
1685 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1690 /*********************************************************************
1692 * MSIX Link Fast Interrupt Service routine
1694 **********************************************************************/
1696 em_msix_link(void *arg)
1698 struct adapter *adapter = arg;
1701 ++adapter->link_irq;
1702 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1704 if (reg_icr & E1000_ICR_RXO)
1705 adapter->rx_overruns++;
1707 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1708 adapter->hw.mac.get_link_status = 1;
1709 em_handle_link(adapter, 0);
1711 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1712 EM_MSIX_LINK | E1000_IMS_LSC);
1714 ** Because we must read the ICR for this interrupt
1715 ** it may clear other causes using autoclear, for
1716 ** this reason we simply create a soft interrupt
1717 ** for all these vectors.
1720 E1000_WRITE_REG(&adapter->hw,
1721 E1000_ICS, adapter->ims);
1727 em_handle_rx(void *context, int pending)
1729 struct rx_ring *rxr = context;
1730 struct adapter *adapter = rxr->adapter;
1733 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1735 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1737 /* Reenable this interrupt */
1738 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1743 em_handle_tx(void *context, int pending)
1745 struct tx_ring *txr = context;
1746 struct adapter *adapter = txr->adapter;
1747 struct ifnet *ifp = adapter->ifp;
1751 #ifdef EM_MULTIQUEUE
1752 if (!drbr_empty(ifp, txr->br))
1753 em_mq_start_locked(ifp, txr);
1755 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1756 em_start_locked(ifp, txr);
1758 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1763 em_handle_link(void *context, int pending)
1765 struct adapter *adapter = context;
1766 struct tx_ring *txr = adapter->tx_rings;
1767 struct ifnet *ifp = adapter->ifp;
1769 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1772 EM_CORE_LOCK(adapter);
1773 callout_stop(&adapter->timer);
1774 em_update_link_status(adapter);
1775 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1776 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1777 EM_MSIX_LINK | E1000_IMS_LSC);
1778 if (adapter->link_active) {
1779 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1781 #ifdef EM_MULTIQUEUE
1782 if (!drbr_empty(ifp, txr->br))
1783 em_mq_start_locked(ifp, txr);
1785 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1786 em_start_locked(ifp, txr);
1791 EM_CORE_UNLOCK(adapter);
1795 /*********************************************************************
1797 * Media Ioctl callback
1799 * This routine is called whenever the user queries the status of
1800 * the interface using ifconfig.
1802 **********************************************************************/
1804 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1806 struct adapter *adapter = ifp->if_softc;
1807 u_char fiber_type = IFM_1000_SX;
1809 INIT_DEBUGOUT("em_media_status: begin");
1811 EM_CORE_LOCK(adapter);
1812 em_update_link_status(adapter);
1814 ifmr->ifm_status = IFM_AVALID;
1815 ifmr->ifm_active = IFM_ETHER;
1817 if (!adapter->link_active) {
1818 EM_CORE_UNLOCK(adapter);
1822 ifmr->ifm_status |= IFM_ACTIVE;
1824 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1825 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1826 ifmr->ifm_active |= fiber_type | IFM_FDX;
1828 switch (adapter->link_speed) {
1830 ifmr->ifm_active |= IFM_10_T;
1833 ifmr->ifm_active |= IFM_100_TX;
1836 ifmr->ifm_active |= IFM_1000_T;
1839 if (adapter->link_duplex == FULL_DUPLEX)
1840 ifmr->ifm_active |= IFM_FDX;
1842 ifmr->ifm_active |= IFM_HDX;
1844 EM_CORE_UNLOCK(adapter);
1847 /*********************************************************************
1849 * Media Ioctl callback
1851 * This routine is called when the user changes speed/duplex using
1852 * media/mediopt option with ifconfig.
1854 **********************************************************************/
1856 em_media_change(struct ifnet *ifp)
1858 struct adapter *adapter = ifp->if_softc;
1859 struct ifmedia *ifm = &adapter->media;
1861 INIT_DEBUGOUT("em_media_change: begin");
1863 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1866 EM_CORE_LOCK(adapter);
1867 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1869 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1870 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1875 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1876 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1879 adapter->hw.mac.autoneg = FALSE;
1880 adapter->hw.phy.autoneg_advertised = 0;
1881 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1882 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1884 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1887 adapter->hw.mac.autoneg = FALSE;
1888 adapter->hw.phy.autoneg_advertised = 0;
1889 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1890 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1892 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1895 device_printf(adapter->dev, "Unsupported media type\n");
1898 em_init_locked(adapter);
1899 EM_CORE_UNLOCK(adapter);
1904 /*********************************************************************
1906 * This routine maps the mbufs to tx descriptors.
1908 * return 0 on success, positive on failure
1909 **********************************************************************/
1912 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1914 struct adapter *adapter = txr->adapter;
1915 bus_dma_segment_t segs[EM_MAX_SCATTER];
1917 struct em_txbuffer *tx_buffer, *tx_buffer_mapped;
1918 struct e1000_tx_desc *ctxd = NULL;
1919 struct mbuf *m_head;
1920 struct ether_header *eh;
1921 struct ip *ip = NULL;
1922 struct tcphdr *tp = NULL;
1923 u32 txd_upper = 0, txd_lower = 0;
1925 int nsegs, i, j, first, last = 0;
1927 bool do_tso, tso_desc, remap = TRUE;
1930 do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1935 * Intel recommends entire IP/TCP header length reside in a single
1936 * buffer. If multiple descriptors are used to describe the IP and
1937 * TCP header, each descriptor should describe one or more
1938 * complete headers; descriptors referencing only parts of headers
1939 * are not supported. If all layer headers are not coalesced into
1940 * a single buffer, each buffer should not cross a 4KB boundary,
1941 * or be larger than the maximum read request size.
1942 * Controller also requires modifing IP/TCP header to make TSO work
1943 * so we firstly get a writable mbuf chain then coalesce ethernet/
1944 * IP/TCP header into a single buffer to meet the requirement of
1945 * controller. This also simplifies IP/TCP/UDP checksum offloading
1946 * which also has similiar restrictions.
1948 if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1949 if (do_tso || (m_head->m_next != NULL &&
1950 m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1951 if (M_WRITABLE(*m_headp) == 0) {
1952 m_head = m_dup(*m_headp, M_NOWAIT);
1954 if (m_head == NULL) {
1963 * Assume IPv4, we don't have TSO/checksum offload support
1966 ip_off = sizeof(struct ether_header);
1967 if (m_head->m_len < ip_off) {
1968 m_head = m_pullup(m_head, ip_off);
1969 if (m_head == NULL) {
1974 eh = mtod(m_head, struct ether_header *);
1975 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1976 ip_off = sizeof(struct ether_vlan_header);
1977 if (m_head->m_len < ip_off) {
1978 m_head = m_pullup(m_head, ip_off);
1979 if (m_head == NULL) {
1985 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1986 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1987 if (m_head == NULL) {
1992 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1993 poff = ip_off + (ip->ip_hl << 2);
1995 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1996 if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1997 m_head = m_pullup(m_head, poff +
1998 sizeof(struct tcphdr));
1999 if (m_head == NULL) {
2004 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2007 * pull 4 more bytes of data into it.
2009 if (m_head->m_len < poff + (tp->th_off << 2)) {
2010 m_head = m_pullup(m_head, poff +
2013 if (m_head == NULL) {
2018 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2019 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2021 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2026 * The pseudo TCP checksum does not include TCP
2027 * payload length so driver should recompute
2028 * the checksum here what hardware expect to
2029 * see. This is adherence of Microsoft's Large
2030 * Send specification.
2032 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2033 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2035 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2036 if (m_head->m_len < poff + sizeof(struct udphdr)) {
2037 m_head = m_pullup(m_head, poff +
2038 sizeof(struct udphdr));
2039 if (m_head == NULL) {
2044 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2050 * Map the packet for DMA
2052 * Capture the first descriptor index,
2053 * this descriptor will have the index
2054 * of the EOP which is the only one that
2055 * now gets a DONE bit writeback.
2057 first = txr->next_avail_desc;
2058 tx_buffer = &txr->tx_buffers[first];
2059 tx_buffer_mapped = tx_buffer;
2060 map = tx_buffer->map;
2063 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2064 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2067 * There are two types of errors we can (try) to handle:
2068 * - EFBIG means the mbuf chain was too long and bus_dma ran
2069 * out of segments. Defragment the mbuf chain and try again.
2070 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2071 * at this point in time. Defer sending and try again later.
2072 * All other errors, in particular EINVAL, are fatal and prevent the
2073 * mbuf chain from ever going through. Drop it and report error.
2075 if (error == EFBIG && remap) {
2078 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2080 adapter->mbuf_defrag_failed++;
2087 /* Try it again, but only once */
2090 } else if (error != 0) {
2091 adapter->no_tx_dma_setup++;
2098 * TSO Hardware workaround, if this packet is not
2099 * TSO, and is only a single descriptor long, and
2100 * it follows a TSO burst, then we need to add a
2101 * sentinel descriptor to prevent premature writeback.
2103 if ((!do_tso) && (txr->tx_tso == TRUE)) {
2106 txr->tx_tso = FALSE;
2109 if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2110 txr->no_desc_avail++;
2111 bus_dmamap_unload(txr->txtag, map);
2116 /* Do hardware assists */
2117 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2118 em_tso_setup(txr, m_head, ip_off, ip, tp,
2119 &txd_upper, &txd_lower);
2120 /* we need to make a final sentinel transmit desc */
2122 } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2123 em_transmit_checksum_setup(txr, m_head,
2124 ip_off, ip, &txd_upper, &txd_lower);
2126 if (m_head->m_flags & M_VLANTAG) {
2127 /* Set the vlan id. */
2129 (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2130 /* Tell hardware to add tag */
2131 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2134 i = txr->next_avail_desc;
2136 /* Set up our transmit descriptors */
2137 for (j = 0; j < nsegs; j++) {
2139 bus_addr_t seg_addr;
2141 tx_buffer = &txr->tx_buffers[i];
2142 ctxd = &txr->tx_base[i];
2143 seg_addr = segs[j].ds_addr;
2144 seg_len = segs[j].ds_len;
2147 ** If this is the last descriptor, we want to
2148 ** split it so we have a small final sentinel
2150 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2151 seg_len -= TSO_WORKAROUND;
2152 ctxd->buffer_addr = htole64(seg_addr);
2153 ctxd->lower.data = htole32(
2154 adapter->txd_cmd | txd_lower | seg_len);
2155 ctxd->upper.data = htole32(txd_upper);
2156 if (++i == adapter->num_tx_desc)
2159 /* Now make the sentinel */
2161 ctxd = &txr->tx_base[i];
2162 tx_buffer = &txr->tx_buffers[i];
2164 htole64(seg_addr + seg_len);
2165 ctxd->lower.data = htole32(
2166 adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2170 if (++i == adapter->num_tx_desc)
2173 ctxd->buffer_addr = htole64(seg_addr);
2174 ctxd->lower.data = htole32(
2175 adapter->txd_cmd | txd_lower | seg_len);
2176 ctxd->upper.data = htole32(txd_upper);
2178 if (++i == adapter->num_tx_desc)
2181 tx_buffer->m_head = NULL;
2182 tx_buffer->next_eop = -1;
2185 txr->next_avail_desc = i;
2186 txr->tx_avail -= nsegs;
2188 tx_buffer->m_head = m_head;
2190 ** Here we swap the map so the last descriptor,
2191 ** which gets the completion interrupt has the
2192 ** real map, and the first descriptor gets the
2193 ** unused map from this descriptor.
2195 tx_buffer_mapped->map = tx_buffer->map;
2196 tx_buffer->map = map;
2197 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2200 * Last Descriptor of Packet
2201 * needs End Of Packet (EOP)
2202 * and Report Status (RS)
2205 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2207 * Keep track in the first buffer which
2208 * descriptor will be written back
2210 tx_buffer = &txr->tx_buffers[first];
2211 tx_buffer->next_eop = last;
2214 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2215 * that this frame is available to transmit.
2217 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2218 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2219 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2225 em_set_promisc(struct adapter *adapter)
2227 struct ifnet *ifp = adapter->ifp;
2230 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2232 if (ifp->if_flags & IFF_PROMISC) {
2233 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2234 /* Turn this on if you want to see bad packets */
2236 reg_rctl |= E1000_RCTL_SBP;
2237 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2238 } else if (ifp->if_flags & IFF_ALLMULTI) {
2239 reg_rctl |= E1000_RCTL_MPE;
2240 reg_rctl &= ~E1000_RCTL_UPE;
2241 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2246 em_disable_promisc(struct adapter *adapter)
2248 struct ifnet *ifp = adapter->ifp;
2252 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2253 reg_rctl &= (~E1000_RCTL_UPE);
2254 if (ifp->if_flags & IFF_ALLMULTI)
2255 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2257 struct ifmultiaddr *ifma;
2258 #if __FreeBSD_version < 800000
2261 if_maddr_rlock(ifp);
2263 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2264 if (ifma->ifma_addr->sa_family != AF_LINK)
2266 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2270 #if __FreeBSD_version < 800000
2271 IF_ADDR_UNLOCK(ifp);
2273 if_maddr_runlock(ifp);
2276 /* Don't disable if in MAX groups */
2277 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2278 reg_rctl &= (~E1000_RCTL_MPE);
2279 reg_rctl &= (~E1000_RCTL_SBP);
2280 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2284 /*********************************************************************
2287 * This routine is called whenever multicast address list is updated.
2289 **********************************************************************/
2292 em_set_multi(struct adapter *adapter)
2294 struct ifnet *ifp = adapter->ifp;
2295 struct ifmultiaddr *ifma;
2297 u8 *mta; /* Multicast array memory */
2300 IOCTL_DEBUGOUT("em_set_multi: begin");
2303 bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2305 if (adapter->hw.mac.type == e1000_82542 &&
2306 adapter->hw.revision_id == E1000_REVISION_2) {
2307 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2308 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2309 e1000_pci_clear_mwi(&adapter->hw);
2310 reg_rctl |= E1000_RCTL_RST;
2311 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2315 #if __FreeBSD_version < 800000
2318 if_maddr_rlock(ifp);
2320 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2321 if (ifma->ifma_addr->sa_family != AF_LINK)
2324 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2327 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2328 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2331 #if __FreeBSD_version < 800000
2332 IF_ADDR_UNLOCK(ifp);
2334 if_maddr_runlock(ifp);
2336 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2337 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2338 reg_rctl |= E1000_RCTL_MPE;
2339 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2341 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2343 if (adapter->hw.mac.type == e1000_82542 &&
2344 adapter->hw.revision_id == E1000_REVISION_2) {
2345 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2346 reg_rctl &= ~E1000_RCTL_RST;
2347 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2349 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2350 e1000_pci_set_mwi(&adapter->hw);
2355 /*********************************************************************
2358 * This routine checks for link status and updates statistics.
2360 **********************************************************************/
2363 em_local_timer(void *arg)
2365 struct adapter *adapter = arg;
2366 struct ifnet *ifp = adapter->ifp;
2367 struct tx_ring *txr = adapter->tx_rings;
2368 struct rx_ring *rxr = adapter->rx_rings;
2371 EM_CORE_LOCK_ASSERT(adapter);
2373 em_update_link_status(adapter);
2374 em_update_stats_counters(adapter);
2376 /* Reset LAA into RAR[0] on 82571 */
2377 if ((adapter->hw.mac.type == e1000_82571) &&
2378 e1000_get_laa_state_82571(&adapter->hw))
2379 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2381 /* Mask to use in the irq trigger */
2382 if (adapter->msix_mem) {
2383 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2384 trigger |= rxr->ims;
2385 rxr = adapter->rx_rings;
2387 trigger = E1000_ICS_RXDMT0;
2390 ** Check on the state of the TX queue(s), this
2391 ** can be done without the lock because its RO
2392 ** and the HUNG state will be static if set.
2394 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2395 if (txr->busy == EM_TX_HUNG)
2397 if (txr->busy >= EM_TX_MAXTRIES)
2398 txr->busy = EM_TX_HUNG;
2399 /* Schedule a TX tasklet if needed */
2400 if (txr->tx_avail <= EM_MAX_SCATTER)
2401 taskqueue_enqueue(txr->tq, &txr->tx_task);
2404 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2405 #ifndef DEVICE_POLLING
2406 /* Trigger an RX interrupt to guarantee mbuf refresh */
2407 E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2411 /* Looks like we're hung */
2412 device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2414 em_print_debug_info(adapter);
2415 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2416 adapter->watchdog_events++;
2417 em_init_locked(adapter);
2422 em_update_link_status(struct adapter *adapter)
2424 struct e1000_hw *hw = &adapter->hw;
2425 struct ifnet *ifp = adapter->ifp;
2426 device_t dev = adapter->dev;
2427 struct tx_ring *txr = adapter->tx_rings;
2430 /* Get the cached link value or read phy for real */
2431 switch (hw->phy.media_type) {
2432 case e1000_media_type_copper:
2433 if (hw->mac.get_link_status) {
2434 if (hw->mac.type == e1000_pch_spt)
2436 /* Do the work to read phy */
2437 e1000_check_for_link(hw);
2438 link_check = !hw->mac.get_link_status;
2439 if (link_check) /* ESB2 fix */
2440 e1000_cfg_on_link_up(hw);
2444 case e1000_media_type_fiber:
2445 e1000_check_for_link(hw);
2446 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2449 case e1000_media_type_internal_serdes:
2450 e1000_check_for_link(hw);
2451 link_check = adapter->hw.mac.serdes_has_link;
2454 case e1000_media_type_unknown:
2458 /* Now check for a transition */
2459 if (link_check && (adapter->link_active == 0)) {
2460 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2461 &adapter->link_duplex);
2462 /* Check if we must disable SPEED_MODE bit on PCI-E */
2463 if ((adapter->link_speed != SPEED_1000) &&
2464 ((hw->mac.type == e1000_82571) ||
2465 (hw->mac.type == e1000_82572))) {
2467 tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2468 tarc0 &= ~TARC_SPEED_MODE_BIT;
2469 E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2472 device_printf(dev, "Link is up %d Mbps %s\n",
2473 adapter->link_speed,
2474 ((adapter->link_duplex == FULL_DUPLEX) ?
2475 "Full Duplex" : "Half Duplex"));
2476 adapter->link_active = 1;
2477 adapter->smartspeed = 0;
2478 ifp->if_baudrate = adapter->link_speed * 1000000;
2479 if_link_state_change(ifp, LINK_STATE_UP);
2480 } else if (!link_check && (adapter->link_active == 1)) {
2481 ifp->if_baudrate = adapter->link_speed = 0;
2482 adapter->link_duplex = 0;
2484 device_printf(dev, "Link is Down\n");
2485 adapter->link_active = 0;
2486 /* Link down, disable hang detection */
2487 for (int i = 0; i < adapter->num_queues; i++, txr++)
2488 txr->busy = EM_TX_IDLE;
2489 if_link_state_change(ifp, LINK_STATE_DOWN);
2493 /*********************************************************************
2495 * This routine disables all traffic on the adapter by issuing a
2496 * global reset on the MAC and deallocates TX/RX buffers.
2498 * This routine should always be called with BOTH the CORE
2500 **********************************************************************/
2505 struct adapter *adapter = arg;
2506 struct ifnet *ifp = adapter->ifp;
2507 struct tx_ring *txr = adapter->tx_rings;
2509 EM_CORE_LOCK_ASSERT(adapter);
2511 INIT_DEBUGOUT("em_stop: begin");
2513 em_disable_intr(adapter);
2514 callout_stop(&adapter->timer);
2516 /* Tell the stack that the interface is no longer active */
2517 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2518 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2520 /* Disarm Hang Detection. */
2521 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2523 txr->busy = EM_TX_IDLE;
2527 /* I219 needs some special flushing to avoid hangs */
2528 if (adapter->hw.mac.type == e1000_pch_spt)
2529 em_flush_desc_rings(adapter);
2531 e1000_reset_hw(&adapter->hw);
2532 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2534 e1000_led_off(&adapter->hw);
2535 e1000_cleanup_led(&adapter->hw);
2539 /*********************************************************************
2541 * Determine hardware revision.
2543 **********************************************************************/
2545 em_identify_hardware(struct adapter *adapter)
2547 device_t dev = adapter->dev;
2549 /* Make sure our PCI config space has the necessary stuff set */
2550 pci_enable_busmaster(dev);
2551 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2553 /* Save off the information about this board */
2554 adapter->hw.vendor_id = pci_get_vendor(dev);
2555 adapter->hw.device_id = pci_get_device(dev);
2556 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2557 adapter->hw.subsystem_vendor_id =
2558 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2559 adapter->hw.subsystem_device_id =
2560 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2562 /* Do Shared Code Init and Setup */
2563 if (e1000_set_mac_type(&adapter->hw)) {
2564 device_printf(dev, "Setup init failure\n");
2570 em_allocate_pci_resources(struct adapter *adapter)
2572 device_t dev = adapter->dev;
2576 adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2578 if (adapter->memory == NULL) {
2579 device_printf(dev, "Unable to allocate bus resource: memory\n");
2582 adapter->osdep.mem_bus_space_tag =
2583 rman_get_bustag(adapter->memory);
2584 adapter->osdep.mem_bus_space_handle =
2585 rman_get_bushandle(adapter->memory);
2586 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2588 adapter->hw.back = &adapter->osdep;
2593 /*********************************************************************
2595 * Setup the Legacy or MSI Interrupt handler
2597 **********************************************************************/
2599 em_allocate_legacy(struct adapter *adapter)
2601 device_t dev = adapter->dev;
2602 struct tx_ring *txr = adapter->tx_rings;
2605 /* Manually turn off all interrupts */
2606 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2608 if (adapter->msix == 1) /* using MSI */
2610 /* We allocate a single interrupt resource */
2611 adapter->res = bus_alloc_resource_any(dev,
2612 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2613 if (adapter->res == NULL) {
2614 device_printf(dev, "Unable to allocate bus resource: "
2620 * Allocate a fast interrupt and the associated
2621 * deferred processing contexts.
2623 TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2624 adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2625 taskqueue_thread_enqueue, &adapter->tq);
2626 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2627 device_get_nameunit(adapter->dev));
2628 /* Use a TX only tasklet for local timer */
2629 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2630 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2631 taskqueue_thread_enqueue, &txr->tq);
2632 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2633 device_get_nameunit(adapter->dev));
2634 TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2635 if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2636 em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2637 device_printf(dev, "Failed to register fast interrupt "
2638 "handler: %d\n", error);
2639 taskqueue_free(adapter->tq);
2647 /*********************************************************************
2649 * Setup the MSIX Interrupt handlers
2650 * This is not really Multiqueue, rather
2651 * its just seperate interrupt vectors
2652 * for TX, RX, and Link.
2654 **********************************************************************/
2656 em_allocate_msix(struct adapter *adapter)
2658 device_t dev = adapter->dev;
2659 struct tx_ring *txr = adapter->tx_rings;
2660 struct rx_ring *rxr = adapter->rx_rings;
2661 int error, rid, vector = 0;
2665 /* Make sure all interrupts are disabled */
2666 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2668 /* First set up ring resources */
2669 for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2674 rxr->res = bus_alloc_resource_any(dev,
2675 SYS_RES_IRQ, &rid, RF_ACTIVE);
2676 if (rxr->res == NULL) {
2678 "Unable to allocate bus resource: "
2679 "RX MSIX Interrupt %d\n", i);
2682 if ((error = bus_setup_intr(dev, rxr->res,
2683 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2684 rxr, &rxr->tag)) != 0) {
2685 device_printf(dev, "Failed to register RX handler");
2688 #if __FreeBSD_version >= 800504
2689 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2693 if (em_last_bind_cpu < 0)
2694 em_last_bind_cpu = CPU_FIRST();
2695 cpu_id = em_last_bind_cpu;
2696 bus_bind_intr(dev, rxr->res, cpu_id);
2698 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2699 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2700 taskqueue_thread_enqueue, &rxr->tq);
2701 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2702 device_get_nameunit(adapter->dev), cpu_id);
2704 ** Set the bit to enable interrupt
2705 ** in E1000_IMS -- bits 20 and 21
2706 ** are for RX0 and RX1, note this has
2707 ** NOTHING to do with the MSIX vector
2709 rxr->ims = 1 << (20 + i);
2710 adapter->ims |= rxr->ims;
2711 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2713 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2716 for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2719 txr->res = bus_alloc_resource_any(dev,
2720 SYS_RES_IRQ, &rid, RF_ACTIVE);
2721 if (txr->res == NULL) {
2723 "Unable to allocate bus resource: "
2724 "TX MSIX Interrupt %d\n", i);
2727 if ((error = bus_setup_intr(dev, txr->res,
2728 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2729 txr, &txr->tag)) != 0) {
2730 device_printf(dev, "Failed to register TX handler");
2733 #if __FreeBSD_version >= 800504
2734 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2738 if (em_last_bind_cpu < 0)
2739 em_last_bind_cpu = CPU_FIRST();
2740 cpu_id = em_last_bind_cpu;
2741 bus_bind_intr(dev, txr->res, cpu_id);
2743 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2744 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2745 taskqueue_thread_enqueue, &txr->tq);
2746 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2747 device_get_nameunit(adapter->dev), cpu_id);
2749 ** Set the bit to enable interrupt
2750 ** in E1000_IMS -- bits 22 and 23
2751 ** are for TX0 and TX1, note this has
2752 ** NOTHING to do with the MSIX vector
2754 txr->ims = 1 << (22 + i);
2755 adapter->ims |= txr->ims;
2756 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2758 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2761 /* Link interrupt */
2763 adapter->res = bus_alloc_resource_any(dev,
2764 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2765 if (!adapter->res) {
2766 device_printf(dev,"Unable to allocate "
2767 "bus resource: Link interrupt [%d]\n", rid);
2770 /* Set the link handler function */
2771 error = bus_setup_intr(dev, adapter->res,
2772 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2773 em_msix_link, adapter, &adapter->tag);
2775 adapter->res = NULL;
2776 device_printf(dev, "Failed to register LINK handler");
2779 #if __FreeBSD_version >= 800504
2780 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2782 adapter->linkvec = vector;
2783 adapter->ivars |= (8 | vector) << 16;
2784 adapter->ivars |= 0x80000000;
2791 em_free_pci_resources(struct adapter *adapter)
2793 device_t dev = adapter->dev;
2794 struct tx_ring *txr;
2795 struct rx_ring *rxr;
2800 ** Release all the queue interrupt resources:
2802 for (int i = 0; i < adapter->num_queues; i++) {
2803 txr = &adapter->tx_rings[i];
2804 /* an early abort? */
2808 if (txr->tag != NULL) {
2809 bus_teardown_intr(dev, txr->res, txr->tag);
2812 if (txr->res != NULL)
2813 bus_release_resource(dev, SYS_RES_IRQ,
2816 rxr = &adapter->rx_rings[i];
2817 /* an early abort? */
2821 if (rxr->tag != NULL) {
2822 bus_teardown_intr(dev, rxr->res, rxr->tag);
2825 if (rxr->res != NULL)
2826 bus_release_resource(dev, SYS_RES_IRQ,
2830 if (adapter->linkvec) /* we are doing MSIX */
2831 rid = adapter->linkvec + 1;
2833 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2835 if (adapter->tag != NULL) {
2836 bus_teardown_intr(dev, adapter->res, adapter->tag);
2837 adapter->tag = NULL;
2840 if (adapter->res != NULL)
2841 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2845 pci_release_msi(dev);
2847 if (adapter->msix_mem != NULL)
2848 bus_release_resource(dev, SYS_RES_MEMORY,
2849 adapter->memrid, adapter->msix_mem);
2851 if (adapter->memory != NULL)
2852 bus_release_resource(dev, SYS_RES_MEMORY,
2853 PCIR_BAR(0), adapter->memory);
2855 if (adapter->flash != NULL)
2856 bus_release_resource(dev, SYS_RES_MEMORY,
2857 EM_FLASH, adapter->flash);
2861 * Setup MSI or MSI/X
2864 em_setup_msix(struct adapter *adapter)
2866 device_t dev = adapter->dev;
2869 /* Nearly always going to use one queue */
2870 adapter->num_queues = 1;
2873 ** Try using MSI-X for Hartwell adapters
2875 if ((adapter->hw.mac.type == e1000_82574) &&
2876 (em_enable_msix == TRUE)) {
2877 #ifdef EM_MULTIQUEUE
2878 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2879 if (adapter->num_queues > 1)
2880 em_enable_vectors_82574(adapter);
2882 /* Map the MSIX BAR */
2883 adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2884 adapter->msix_mem = bus_alloc_resource_any(dev,
2885 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2886 if (adapter->msix_mem == NULL) {
2887 /* May not be enabled */
2888 device_printf(adapter->dev,
2889 "Unable to map MSIX table \n");
2892 val = pci_msix_count(dev);
2894 #ifdef EM_MULTIQUEUE
2895 /* We need 5 vectors in the multiqueue case */
2896 if (adapter->num_queues > 1 ) {
2900 adapter->num_queues = 1;
2901 device_printf(adapter->dev,
2902 "Insufficient MSIX vectors for >1 queue, "
2903 "using single queue...\n");
2912 device_printf(adapter->dev,
2913 "Insufficient MSIX vectors, using MSI\n");
2916 #ifdef EM_MULTIQUEUE
2920 if ((pci_alloc_msix(dev, &val) == 0)) {
2921 device_printf(adapter->dev,
2922 "Using MSIX interrupts "
2923 "with %d vectors\n", val);
2928 ** If MSIX alloc failed or provided us with
2929 ** less than needed, free and fall through to MSI
2931 pci_release_msi(dev);
2934 if (adapter->msix_mem != NULL) {
2935 bus_release_resource(dev, SYS_RES_MEMORY,
2936 adapter->memrid, adapter->msix_mem);
2937 adapter->msix_mem = NULL;
2940 if (pci_alloc_msi(dev, &val) == 0) {
2941 device_printf(adapter->dev, "Using an MSI interrupt\n");
2944 /* Should only happen due to manual configuration */
2945 device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2951 ** The 3 following flush routines are used as a workaround in the
2952 ** I219 client parts and only for them.
2954 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2956 ** We want to clear all pending descriptors from the TX ring.
2957 ** zeroing happens when the HW reads the regs. We assign the ring itself as
2958 ** the data of the next descriptor. We don't care about the data we are about
2962 em_flush_tx_ring(struct adapter *adapter)
2964 struct e1000_hw *hw = &adapter->hw;
2965 struct tx_ring *txr = adapter->tx_rings;
2966 struct e1000_tx_desc *txd;
2967 u32 tctl, txd_lower = E1000_TXD_CMD_IFCS;
2970 tctl = E1000_READ_REG(hw, E1000_TCTL);
2971 E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2973 txd = &txr->tx_base[txr->next_avail_desc++];
2974 if (txr->next_avail_desc == adapter->num_tx_desc)
2975 txr->next_avail_desc = 0;
2977 /* Just use the ring as a dummy buffer addr */
2978 txd->buffer_addr = txr->txdma.dma_paddr;
2979 txd->lower.data = htole32(txd_lower | size);
2980 txd->upper.data = 0;
2982 /* flush descriptors to memory before notifying the HW */
2985 E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2991 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2993 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2996 em_flush_rx_ring(struct adapter *adapter)
2998 struct e1000_hw *hw = &adapter->hw;
3001 rctl = E1000_READ_REG(hw, E1000_RCTL);
3002 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3003 E1000_WRITE_FLUSH(hw);
3006 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
3007 /* zero the lower 14 bits (prefetch and host thresholds) */
3008 rxdctl &= 0xffffc000;
3010 * update thresholds: prefetch threshold to 31, host threshold to 1
3011 * and make sure the granularity is "descriptors" and not "cache lines"
3013 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3014 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
3016 /* momentarily enable the RX ring for the changes to take effect */
3017 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3018 E1000_WRITE_FLUSH(hw);
3020 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3024 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
3026 ** In i219, the descriptor rings must be emptied before resetting the HW
3027 ** or before changing the device state to D3 during runtime (runtime PM).
3029 ** Failure to do this will cause the HW to enter a unit hang state which can
3030 ** only be released by PCI reset on the device
3034 em_flush_desc_rings(struct adapter *adapter)
3036 struct e1000_hw *hw = &adapter->hw;
3037 device_t dev = adapter->dev;
3039 u32 fext_nvm11, tdlen;
3041 /* First, disable MULR fix in FEXTNVM11 */
3042 fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3043 fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3044 E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3046 /* do nothing if we're not in faulty state, or if the queue is empty */
3047 tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3048 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3049 if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3051 em_flush_tx_ring(adapter);
3053 /* recheck, maybe the fault is caused by the rx ring */
3054 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3055 if (hang_state & FLUSH_DESC_REQUIRED)
3056 em_flush_rx_ring(adapter);
3060 /*********************************************************************
3062 * Initialize the hardware to a configuration
3063 * as specified by the adapter structure.
3065 **********************************************************************/
3067 em_reset(struct adapter *adapter)
3069 device_t dev = adapter->dev;
3070 struct ifnet *ifp = adapter->ifp;
3071 struct e1000_hw *hw = &adapter->hw;
3075 INIT_DEBUGOUT("em_reset: begin");
3077 /* Set up smart power down as default off on newer adapters. */
3078 if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3079 hw->mac.type == e1000_82572)) {
3082 /* Speed up time to link by disabling smart power down. */
3083 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3084 phy_tmp &= ~IGP02E1000_PM_SPD;
3085 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3089 * Packet Buffer Allocation (PBA)
3090 * Writing PBA sets the receive portion of the buffer
3091 * the remainder is used for the transmit buffer.
3093 switch (hw->mac.type) {
3094 /* Total Packet Buffer on these is 48K */
3097 case e1000_80003es2lan:
3098 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3100 case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3101 pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3105 pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3111 case e1000_ich10lan:
3112 /* Boost Receive side for jumbo frames */
3113 if (adapter->hw.mac.max_frame_size > 4096)
3114 pba = E1000_PBA_14K;
3116 pba = E1000_PBA_10K;
3122 pba = E1000_PBA_26K;
3125 if (adapter->hw.mac.max_frame_size > 8192)
3126 pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3128 pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3130 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3133 * These parameters control the automatic generation (Tx) and
3134 * response (Rx) to Ethernet PAUSE frames.
3135 * - High water mark should allow for at least two frames to be
3136 * received after sending an XOFF.
3137 * - Low water mark works best when it is very near the high water mark.
3138 * This allows the receiver to restart by sending XON when it has
3139 * drained a bit. Here we use an arbitary value of 1500 which will
3140 * restart after one full frame is pulled from the buffer. There
3141 * could be several smaller frames in the buffer and if so they will
3142 * not trigger the XON until their total number reduces the buffer
3144 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3146 rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3147 hw->fc.high_water = rx_buffer_size -
3148 roundup2(adapter->hw.mac.max_frame_size, 1024);
3149 hw->fc.low_water = hw->fc.high_water - 1500;
3151 if (adapter->fc) /* locally set flow control value? */
3152 hw->fc.requested_mode = adapter->fc;
3154 hw->fc.requested_mode = e1000_fc_full;
3156 if (hw->mac.type == e1000_80003es2lan)
3157 hw->fc.pause_time = 0xFFFF;
3159 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3161 hw->fc.send_xon = TRUE;
3163 /* Device specific overrides/settings */
3164 switch (hw->mac.type) {
3166 /* Workaround: no TX flow ctrl for PCH */
3167 hw->fc.requested_mode = e1000_fc_rx_pause;
3168 hw->fc.pause_time = 0xFFFF; /* override */
3169 if (ifp->if_mtu > ETHERMTU) {
3170 hw->fc.high_water = 0x3500;
3171 hw->fc.low_water = 0x1500;
3173 hw->fc.high_water = 0x5000;
3174 hw->fc.low_water = 0x3000;
3176 hw->fc.refresh_time = 0x1000;
3181 hw->fc.high_water = 0x5C20;
3182 hw->fc.low_water = 0x5048;
3183 hw->fc.pause_time = 0x0650;
3184 hw->fc.refresh_time = 0x0400;
3185 /* Jumbos need adjusted PBA */
3186 if (ifp->if_mtu > ETHERMTU)
3187 E1000_WRITE_REG(hw, E1000_PBA, 12);
3189 E1000_WRITE_REG(hw, E1000_PBA, 26);
3192 case e1000_ich10lan:
3193 if (ifp->if_mtu > ETHERMTU) {
3194 hw->fc.high_water = 0x2800;
3195 hw->fc.low_water = hw->fc.high_water - 8;
3198 /* else fall thru */
3200 if (hw->mac.type == e1000_80003es2lan)
3201 hw->fc.pause_time = 0xFFFF;
3205 /* I219 needs some special flushing to avoid hangs */
3206 if (hw->mac.type == e1000_pch_spt)
3207 em_flush_desc_rings(adapter);
3209 /* Issue a global reset */
3211 E1000_WRITE_REG(hw, E1000_WUC, 0);
3212 em_disable_aspm(adapter);
3214 if (e1000_init_hw(hw) < 0) {
3215 device_printf(dev, "Hardware Initialization Failed\n");
3219 E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3220 e1000_get_phy_info(hw);
3221 e1000_check_for_link(hw);
3225 /*********************************************************************
3227 * Setup networking device structure and register an interface.
3229 **********************************************************************/
3231 em_setup_interface(device_t dev, struct adapter *adapter)
3235 INIT_DEBUGOUT("em_setup_interface: begin");
3237 ifp = adapter->ifp = if_alloc(IFT_ETHER);
3239 device_printf(dev, "can not allocate ifnet structure\n");
3242 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3243 ifp->if_init = em_init;
3244 ifp->if_softc = adapter;
3245 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3246 ifp->if_ioctl = em_ioctl;
3248 /* TSO parameters */
3249 ifp->if_hw_tsomax = IP_MAXPACKET;
3250 /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3251 ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3252 ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3254 #ifdef EM_MULTIQUEUE
3255 /* Multiqueue stack interface */
3256 ifp->if_transmit = em_mq_start;
3257 ifp->if_qflush = em_qflush;
3259 ifp->if_start = em_start;
3260 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3261 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3262 IFQ_SET_READY(&ifp->if_snd);
3265 ether_ifattach(ifp, adapter->hw.mac.addr);
3267 ifp->if_capabilities = ifp->if_capenable = 0;
3270 ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3271 ifp->if_capabilities |= IFCAP_TSO4;
3273 * Tell the upper layer(s) we
3274 * support full VLAN capability
3276 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3277 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3280 ifp->if_capenable = ifp->if_capabilities;
3283 ** Don't turn this on by default, if vlans are
3284 ** created on another pseudo device (eg. lagg)
3285 ** then vlan events are not passed thru, breaking
3286 ** operation, but with HW FILTER off it works. If
3287 ** using vlans directly on the em driver you can
3288 ** enable this and get full hardware tag filtering.
3290 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3292 #ifdef DEVICE_POLLING
3293 ifp->if_capabilities |= IFCAP_POLLING;
3296 /* Enable only WOL MAGIC by default */
3298 ifp->if_capabilities |= IFCAP_WOL;
3299 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3303 * Specify the media types supported by this adapter and register
3304 * callbacks to update media and link information
3306 ifmedia_init(&adapter->media, IFM_IMASK,
3307 em_media_change, em_media_status);
3308 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3309 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3310 u_char fiber_type = IFM_1000_SX; /* default type */
3312 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3314 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3316 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3317 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3319 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3321 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3323 if (adapter->hw.phy.type != e1000_phy_ife) {
3324 ifmedia_add(&adapter->media,
3325 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3326 ifmedia_add(&adapter->media,
3327 IFM_ETHER | IFM_1000_T, 0, NULL);
3330 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3331 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3337 * Manage DMA'able memory.
3340 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3344 *(bus_addr_t *) arg = segs[0].ds_addr;
3348 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3349 struct em_dma_alloc *dma, int mapflags)
3353 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3354 EM_DBA_ALIGN, 0, /* alignment, bounds */
3355 BUS_SPACE_MAXADDR, /* lowaddr */
3356 BUS_SPACE_MAXADDR, /* highaddr */
3357 NULL, NULL, /* filter, filterarg */
3360 size, /* maxsegsize */
3362 NULL, /* lockfunc */
3366 device_printf(adapter->dev,
3367 "%s: bus_dma_tag_create failed: %d\n",
3372 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3373 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3375 device_printf(adapter->dev,
3376 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3377 __func__, (uintmax_t)size, error);
3382 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3383 size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3384 if (error || dma->dma_paddr == 0) {
3385 device_printf(adapter->dev,
3386 "%s: bus_dmamap_load failed: %d\n",
3394 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3396 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3397 bus_dma_tag_destroy(dma->dma_tag);
3399 dma->dma_tag = NULL;
3405 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3407 if (dma->dma_tag == NULL)
3409 if (dma->dma_paddr != 0) {
3410 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3411 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3412 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3415 if (dma->dma_vaddr != NULL) {
3416 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3417 dma->dma_vaddr = NULL;
3419 bus_dma_tag_destroy(dma->dma_tag);
3420 dma->dma_tag = NULL;
3424 /*********************************************************************
3426 * Allocate memory for the transmit and receive rings, and then
3427 * the descriptors associated with each, called only once at attach.
3429 **********************************************************************/
3431 em_allocate_queues(struct adapter *adapter)
3433 device_t dev = adapter->dev;
3434 struct tx_ring *txr = NULL;
3435 struct rx_ring *rxr = NULL;
3436 int rsize, tsize, error = E1000_SUCCESS;
3437 int txconf = 0, rxconf = 0;
3440 /* Allocate the TX ring struct memory */
3441 if (!(adapter->tx_rings =
3442 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3443 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3444 device_printf(dev, "Unable to allocate TX ring memory\n");
3449 /* Now allocate the RX */
3450 if (!(adapter->rx_rings =
3451 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3452 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3453 device_printf(dev, "Unable to allocate RX ring memory\n");
3458 tsize = roundup2(adapter->num_tx_desc *
3459 sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3461 * Now set up the TX queues, txconf is needed to handle the
3462 * possibility that things fail midcourse and we need to
3463 * undo memory gracefully
3465 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3466 /* Set up some basics */
3467 txr = &adapter->tx_rings[i];
3468 txr->adapter = adapter;
3471 /* Initialize the TX lock */
3472 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3473 device_get_nameunit(dev), txr->me);
3474 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3476 if (em_dma_malloc(adapter, tsize,
3477 &txr->txdma, BUS_DMA_NOWAIT)) {
3479 "Unable to allocate TX Descriptor memory\n");
3483 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3484 bzero((void *)txr->tx_base, tsize);
3486 if (em_allocate_transmit_buffers(txr)) {
3488 "Critical Failure setting up transmit buffers\n");
3492 #if __FreeBSD_version >= 800000
3493 /* Allocate a buf ring */
3494 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3495 M_WAITOK, &txr->tx_mtx);
3500 * Next the RX queues...
3502 rsize = roundup2(adapter->num_rx_desc *
3503 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3504 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3505 rxr = &adapter->rx_rings[i];
3506 rxr->adapter = adapter;
3509 /* Initialize the RX lock */
3510 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3511 device_get_nameunit(dev), txr->me);
3512 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3514 if (em_dma_malloc(adapter, rsize,
3515 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3517 "Unable to allocate RxDescriptor memory\n");
3521 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3522 bzero((void *)rxr->rx_base, rsize);
3524 /* Allocate receive buffers for the ring*/
3525 if (em_allocate_receive_buffers(rxr)) {
3527 "Critical Failure setting up receive buffers\n");
3536 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3537 em_dma_free(adapter, &rxr->rxdma);
3539 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3540 em_dma_free(adapter, &txr->txdma);
3541 free(adapter->rx_rings, M_DEVBUF);
3543 #if __FreeBSD_version >= 800000
3544 buf_ring_free(txr->br, M_DEVBUF);
3546 free(adapter->tx_rings, M_DEVBUF);
3552 /*********************************************************************
3554 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3555 * the information needed to transmit a packet on the wire. This is
3556 * called only once at attach, setup is done every reset.
3558 **********************************************************************/
3560 em_allocate_transmit_buffers(struct tx_ring *txr)
3562 struct adapter *adapter = txr->adapter;
3563 device_t dev = adapter->dev;
3564 struct em_txbuffer *txbuf;
3568 * Setup DMA descriptor areas.
3570 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3571 1, 0, /* alignment, bounds */
3572 BUS_SPACE_MAXADDR, /* lowaddr */
3573 BUS_SPACE_MAXADDR, /* highaddr */
3574 NULL, NULL, /* filter, filterarg */
3575 EM_TSO_SIZE, /* maxsize */
3576 EM_MAX_SCATTER, /* nsegments */
3577 PAGE_SIZE, /* maxsegsize */
3579 NULL, /* lockfunc */
3580 NULL, /* lockfuncarg */
3582 device_printf(dev,"Unable to allocate TX DMA tag\n");
3586 if (!(txr->tx_buffers =
3587 (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3588 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3589 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3594 /* Create the descriptor buffer dma maps */
3595 txbuf = txr->tx_buffers;
3596 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3597 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3599 device_printf(dev, "Unable to create TX DMA map\n");
3606 /* We free all, it handles case where we are in the middle */
3607 em_free_transmit_structures(adapter);
3611 /*********************************************************************
3613 * Initialize a transmit ring.
3615 **********************************************************************/
3617 em_setup_transmit_ring(struct tx_ring *txr)
3619 struct adapter *adapter = txr->adapter;
3620 struct em_txbuffer *txbuf;
3623 struct netmap_adapter *na = NA(adapter->ifp);
3624 struct netmap_slot *slot;
3625 #endif /* DEV_NETMAP */
3627 /* Clear the old descriptor contents */
3630 slot = netmap_reset(na, NR_TX, txr->me, 0);
3631 #endif /* DEV_NETMAP */
3633 bzero((void *)txr->tx_base,
3634 (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3636 txr->next_avail_desc = 0;
3637 txr->next_to_clean = 0;
3639 /* Free any existing tx buffers. */
3640 txbuf = txr->tx_buffers;
3641 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3642 if (txbuf->m_head != NULL) {
3643 bus_dmamap_sync(txr->txtag, txbuf->map,
3644 BUS_DMASYNC_POSTWRITE);
3645 bus_dmamap_unload(txr->txtag, txbuf->map);
3646 m_freem(txbuf->m_head);
3647 txbuf->m_head = NULL;
3651 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3655 addr = PNMB(na, slot + si, &paddr);
3656 txr->tx_base[i].buffer_addr = htole64(paddr);
3657 /* reload the map for netmap mode */
3658 netmap_load_map(na, txr->txtag, txbuf->map, addr);
3660 #endif /* DEV_NETMAP */
3662 /* clear the watch index */
3663 txbuf->next_eop = -1;
3666 /* Set number of descriptors available */
3667 txr->tx_avail = adapter->num_tx_desc;
3668 txr->busy = EM_TX_IDLE;
3670 /* Clear checksum offload context. */
3671 txr->last_hw_offload = 0;
3672 txr->last_hw_ipcss = 0;
3673 txr->last_hw_ipcso = 0;
3674 txr->last_hw_tucss = 0;
3675 txr->last_hw_tucso = 0;
3677 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3678 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3682 /*********************************************************************
3684 * Initialize all transmit rings.
3686 **********************************************************************/
3688 em_setup_transmit_structures(struct adapter *adapter)
3690 struct tx_ring *txr = adapter->tx_rings;
3692 for (int i = 0; i < adapter->num_queues; i++, txr++)
3693 em_setup_transmit_ring(txr);
3698 /*********************************************************************
3700 * Enable transmit unit.
3702 **********************************************************************/
3704 em_initialize_transmit_unit(struct adapter *adapter)
3706 struct tx_ring *txr = adapter->tx_rings;
3707 struct e1000_hw *hw = &adapter->hw;
3708 u32 tctl, txdctl = 0, tarc, tipg = 0;
3710 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3712 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3713 u64 bus_addr = txr->txdma.dma_paddr;
3714 /* Base and Len of TX Ring */
3715 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3716 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3717 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3718 (u32)(bus_addr >> 32));
3719 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3721 /* Init the HEAD/TAIL indices */
3722 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3723 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3725 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3726 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3727 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3729 txr->busy = EM_TX_IDLE;
3730 txdctl = 0; /* clear txdctl */
3731 txdctl |= 0x1f; /* PTHRESH */
3732 txdctl |= 1 << 8; /* HTHRESH */
3733 txdctl |= 1 << 16;/* WTHRESH */
3734 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3735 txdctl |= E1000_TXDCTL_GRAN;
3736 txdctl |= 1 << 25; /* LWTHRESH */
3738 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3741 /* Set the default values for the Tx Inter Packet Gap timer */
3742 switch (adapter->hw.mac.type) {
3743 case e1000_80003es2lan:
3744 tipg = DEFAULT_82543_TIPG_IPGR1;
3745 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3746 E1000_TIPG_IPGR2_SHIFT;
3749 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3750 (adapter->hw.phy.media_type ==
3751 e1000_media_type_internal_serdes))
3752 tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3754 tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3755 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3756 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3759 E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3760 E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3762 if(adapter->hw.mac.type >= e1000_82540)
3763 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3764 adapter->tx_abs_int_delay.value);
3766 if ((adapter->hw.mac.type == e1000_82571) ||
3767 (adapter->hw.mac.type == e1000_82572)) {
3768 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3769 tarc |= TARC_SPEED_MODE_BIT;
3770 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3771 } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3772 /* errata: program both queues to unweighted RR */
3773 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3775 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3776 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3778 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3779 } else if (adapter->hw.mac.type == e1000_82574) {
3780 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3781 tarc |= TARC_ERRATA_BIT;
3782 if ( adapter->num_queues > 1) {
3783 tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3784 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3785 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3787 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3790 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3791 if (adapter->tx_int_delay.value > 0)
3792 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3794 /* Program the Transmit Control Register */
3795 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3796 tctl &= ~E1000_TCTL_CT;
3797 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3798 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3800 if (adapter->hw.mac.type >= e1000_82571)
3801 tctl |= E1000_TCTL_MULR;
3803 /* This write will effectively turn on the transmit unit. */
3804 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3806 if (hw->mac.type == e1000_pch_spt) {
3808 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3809 reg |= E1000_RCTL_RDMTS_HEX;
3810 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3811 reg = E1000_READ_REG(hw, E1000_TARC(0));
3812 reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3813 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3818 /*********************************************************************
3820 * Free all transmit rings.
3822 **********************************************************************/
3824 em_free_transmit_structures(struct adapter *adapter)
3826 struct tx_ring *txr = adapter->tx_rings;
3828 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3830 em_free_transmit_buffers(txr);
3831 em_dma_free(adapter, &txr->txdma);
3833 EM_TX_LOCK_DESTROY(txr);
3836 free(adapter->tx_rings, M_DEVBUF);
3839 /*********************************************************************
3841 * Free transmit ring related data structures.
3843 **********************************************************************/
3845 em_free_transmit_buffers(struct tx_ring *txr)
3847 struct adapter *adapter = txr->adapter;
3848 struct em_txbuffer *txbuf;
3850 INIT_DEBUGOUT("free_transmit_ring: begin");
3852 if (txr->tx_buffers == NULL)
3855 for (int i = 0; i < adapter->num_tx_desc; i++) {
3856 txbuf = &txr->tx_buffers[i];
3857 if (txbuf->m_head != NULL) {
3858 bus_dmamap_sync(txr->txtag, txbuf->map,
3859 BUS_DMASYNC_POSTWRITE);
3860 bus_dmamap_unload(txr->txtag,
3862 m_freem(txbuf->m_head);
3863 txbuf->m_head = NULL;
3864 if (txbuf->map != NULL) {
3865 bus_dmamap_destroy(txr->txtag,
3869 } else if (txbuf->map != NULL) {
3870 bus_dmamap_unload(txr->txtag,
3872 bus_dmamap_destroy(txr->txtag,
3877 #if __FreeBSD_version >= 800000
3878 if (txr->br != NULL)
3879 buf_ring_free(txr->br, M_DEVBUF);
3881 if (txr->tx_buffers != NULL) {
3882 free(txr->tx_buffers, M_DEVBUF);
3883 txr->tx_buffers = NULL;
3885 if (txr->txtag != NULL) {
3886 bus_dma_tag_destroy(txr->txtag);
3893 /*********************************************************************
3894 * The offload context is protocol specific (TCP/UDP) and thus
3895 * only needs to be set when the protocol changes. The occasion
3896 * of a context change can be a performance detriment, and
3897 * might be better just disabled. The reason arises in the way
3898 * in which the controller supports pipelined requests from the
3899 * Tx data DMA. Up to four requests can be pipelined, and they may
3900 * belong to the same packet or to multiple packets. However all
3901 * requests for one packet are issued before a request is issued
3902 * for a subsequent packet and if a request for the next packet
3903 * requires a context change, that request will be stalled
3904 * until the previous request completes. This means setting up
3905 * a new context effectively disables pipelined Tx data DMA which
3906 * in turn greatly slow down performance to send small sized
3908 **********************************************************************/
3910 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3911 struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3913 struct adapter *adapter = txr->adapter;
3914 struct e1000_context_desc *TXD = NULL;
3915 struct em_txbuffer *tx_buffer;
3919 u8 ipcso, ipcss, tucso, tucss;
3921 ipcss = ipcso = tucss = tucso = 0;
3922 hdr_len = ip_off + (ip->ip_hl << 2);
3923 cur = txr->next_avail_desc;
3925 /* Setup of IP header checksum. */
3926 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3927 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3930 ipcso = ip_off + offsetof(struct ip, ip_sum);
3932 * Start offset for header checksum calculation.
3933 * End offset for header checksum calculation.
3934 * Offset of place to put the checksum.
3936 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3937 TXD->lower_setup.ip_fields.ipcss = ipcss;
3938 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3939 TXD->lower_setup.ip_fields.ipcso = ipcso;
3940 cmd |= E1000_TXD_CMD_IP;
3943 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3944 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3945 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3946 offload |= CSUM_TCP;
3948 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3950 * The 82574L can only remember the *last* context used
3951 * regardless of queue that it was use for. We cannot reuse
3952 * contexts on this hardware platform and must generate a new
3953 * context every time. 82574L hardware spec, section 7.2.6,
3956 if (adapter->num_queues < 2) {
3958 * Setting up new checksum offload context for every
3959 * frames takes a lot of processing time for hardware.
3960 * This also reduces performance a lot for small sized
3961 * frames so avoid it if driver can use previously
3962 * configured checksum offload context.
3964 if (txr->last_hw_offload == offload) {
3965 if (offload & CSUM_IP) {
3966 if (txr->last_hw_ipcss == ipcss &&
3967 txr->last_hw_ipcso == ipcso &&
3968 txr->last_hw_tucss == tucss &&
3969 txr->last_hw_tucso == tucso)
3972 if (txr->last_hw_tucss == tucss &&
3973 txr->last_hw_tucso == tucso)
3977 txr->last_hw_offload = offload;
3978 txr->last_hw_tucss = tucss;
3979 txr->last_hw_tucso = tucso;
3982 * Start offset for payload checksum calculation.
3983 * End offset for payload checksum calculation.
3984 * Offset of place to put the checksum.
3986 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3987 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3988 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3989 TXD->upper_setup.tcp_fields.tucso = tucso;
3990 cmd |= E1000_TXD_CMD_TCP;
3991 } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3992 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3993 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3995 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3997 * The 82574L can only remember the *last* context used
3998 * regardless of queue that it was use for. We cannot reuse
3999 * contexts on this hardware platform and must generate a new
4000 * context every time. 82574L hardware spec, section 7.2.6,
4003 if (adapter->num_queues < 2) {
4005 * Setting up new checksum offload context for every
4006 * frames takes a lot of processing time for hardware.
4007 * This also reduces performance a lot for small sized
4008 * frames so avoid it if driver can use previously
4009 * configured checksum offload context.
4011 if (txr->last_hw_offload == offload) {
4012 if (offload & CSUM_IP) {
4013 if (txr->last_hw_ipcss == ipcss &&
4014 txr->last_hw_ipcso == ipcso &&
4015 txr->last_hw_tucss == tucss &&
4016 txr->last_hw_tucso == tucso)
4019 if (txr->last_hw_tucss == tucss &&
4020 txr->last_hw_tucso == tucso)
4024 txr->last_hw_offload = offload;
4025 txr->last_hw_tucss = tucss;
4026 txr->last_hw_tucso = tucso;
4029 * Start offset for header checksum calculation.
4030 * End offset for header checksum calculation.
4031 * Offset of place to put the checksum.
4033 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4034 TXD->upper_setup.tcp_fields.tucss = tucss;
4035 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4036 TXD->upper_setup.tcp_fields.tucso = tucso;
4039 if (offload & CSUM_IP) {
4040 txr->last_hw_ipcss = ipcss;
4041 txr->last_hw_ipcso = ipcso;
4044 TXD->tcp_seg_setup.data = htole32(0);
4045 TXD->cmd_and_length =
4046 htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4047 tx_buffer = &txr->tx_buffers[cur];
4048 tx_buffer->m_head = NULL;
4049 tx_buffer->next_eop = -1;
4051 if (++cur == adapter->num_tx_desc)
4055 txr->next_avail_desc = cur;
4059 /**********************************************************************
4061 * Setup work for hardware segmentation offload (TSO)
4063 **********************************************************************/
4065 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4066 struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4068 struct adapter *adapter = txr->adapter;
4069 struct e1000_context_desc *TXD;
4070 struct em_txbuffer *tx_buffer;
4074 * In theory we can use the same TSO context if and only if
4075 * frame is the same type(IP/TCP) and the same MSS. However
4076 * checking whether a frame has the same IP/TCP structure is
4077 * hard thing so just ignore that and always restablish a
4080 hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4081 *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */
4082 E1000_TXD_DTYP_D | /* Data descr type */
4083 E1000_TXD_CMD_TSE); /* Do TSE on this packet */
4085 /* IP and/or TCP header checksum calculation and insertion. */
4086 *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4088 cur = txr->next_avail_desc;
4089 tx_buffer = &txr->tx_buffers[cur];
4090 TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4093 * Start offset for header checksum calculation.
4094 * End offset for header checksum calculation.
4095 * Offset of place put the checksum.
4097 TXD->lower_setup.ip_fields.ipcss = ip_off;
4098 TXD->lower_setup.ip_fields.ipcse =
4099 htole16(ip_off + (ip->ip_hl << 2) - 1);
4100 TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4102 * Start offset for payload checksum calculation.
4103 * End offset for payload checksum calculation.
4104 * Offset of place to put the checksum.
4106 TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4107 TXD->upper_setup.tcp_fields.tucse = 0;
4108 TXD->upper_setup.tcp_fields.tucso =
4109 ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4111 * Payload size per packet w/o any headers.
4112 * Length of all headers up to payload.
4114 TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4115 TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4117 TXD->cmd_and_length = htole32(adapter->txd_cmd |
4118 E1000_TXD_CMD_DEXT | /* Extended descr */
4119 E1000_TXD_CMD_TSE | /* TSE context */
4120 E1000_TXD_CMD_IP | /* Do IP csum */
4121 E1000_TXD_CMD_TCP | /* Do TCP checksum */
4122 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4124 tx_buffer->m_head = NULL;
4125 tx_buffer->next_eop = -1;
4127 if (++cur == adapter->num_tx_desc)
4131 txr->next_avail_desc = cur;
4136 /**********************************************************************
4138 * Examine each tx_buffer in the used queue. If the hardware is done
4139 * processing the packet then free associated resources. The
4140 * tx_buffer is put back on the free queue.
4142 **********************************************************************/
4144 em_txeof(struct tx_ring *txr)
4146 struct adapter *adapter = txr->adapter;
4147 int first, last, done, processed;
4148 struct em_txbuffer *tx_buffer;
4149 struct e1000_tx_desc *tx_desc, *eop_desc;
4150 struct ifnet *ifp = adapter->ifp;
4152 EM_TX_LOCK_ASSERT(txr);
4154 if (netmap_tx_irq(ifp, txr->me))
4156 #endif /* DEV_NETMAP */
4158 /* No work, make sure hang detection is disabled */
4159 if (txr->tx_avail == adapter->num_tx_desc) {
4160 txr->busy = EM_TX_IDLE;
4165 first = txr->next_to_clean;
4166 tx_desc = &txr->tx_base[first];
4167 tx_buffer = &txr->tx_buffers[first];
4168 last = tx_buffer->next_eop;
4169 eop_desc = &txr->tx_base[last];
4172 * What this does is get the index of the
4173 * first descriptor AFTER the EOP of the
4174 * first packet, that way we can do the
4175 * simple comparison on the inner while loop.
4177 if (++last == adapter->num_tx_desc)
4181 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4182 BUS_DMASYNC_POSTREAD);
4184 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4185 /* We clean the range of the packet */
4186 while (first != done) {
4187 tx_desc->upper.data = 0;
4188 tx_desc->lower.data = 0;
4189 tx_desc->buffer_addr = 0;
4193 if (tx_buffer->m_head) {
4194 bus_dmamap_sync(txr->txtag,
4196 BUS_DMASYNC_POSTWRITE);
4197 bus_dmamap_unload(txr->txtag,
4199 m_freem(tx_buffer->m_head);
4200 tx_buffer->m_head = NULL;
4202 tx_buffer->next_eop = -1;
4204 if (++first == adapter->num_tx_desc)
4207 tx_buffer = &txr->tx_buffers[first];
4208 tx_desc = &txr->tx_base[first];
4211 /* See if we can continue to the next packet */
4212 last = tx_buffer->next_eop;
4214 eop_desc = &txr->tx_base[last];
4215 /* Get new done point */
4216 if (++last == adapter->num_tx_desc) last = 0;
4221 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4222 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4224 txr->next_to_clean = first;
4227 ** Hang detection: we know there's work outstanding
4228 ** or the entry return would have been taken, so no
4229 ** descriptor processed here indicates a potential hang.
4230 ** The local timer will examine this and do a reset if needed.
4232 if (processed == 0) {
4233 if (txr->busy != EM_TX_HUNG)
4235 } else /* At least one descriptor was cleaned */
4236 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4239 * If we have a minimum free, clear IFF_DRV_OACTIVE
4240 * to tell the stack that it is OK to send packets.
4241 * Notice that all writes of OACTIVE happen under the
4242 * TX lock which, with a single queue, guarantees
4245 if (txr->tx_avail >= EM_MAX_SCATTER) {
4246 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
4249 /* Disable hang detection if all clean */
4250 if (txr->tx_avail == adapter->num_tx_desc)
4251 txr->busy = EM_TX_IDLE;
4254 /*********************************************************************
4256 * Refresh RX descriptor mbufs from system mbuf buffer pool.
4258 **********************************************************************/
4260 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4262 struct adapter *adapter = rxr->adapter;
4264 bus_dma_segment_t segs;
4265 struct em_rxbuffer *rxbuf;
4266 int i, j, error, nsegs;
4267 bool cleaned = FALSE;
4269 i = j = rxr->next_to_refresh;
4271 ** Get one descriptor beyond
4272 ** our work mark to control
4275 if (++j == adapter->num_rx_desc)
4278 while (j != limit) {
4279 rxbuf = &rxr->rx_buffers[i];
4280 if (rxbuf->m_head == NULL) {
4281 m = m_getjcl(M_NOWAIT, MT_DATA,
4282 M_PKTHDR, adapter->rx_mbuf_sz);
4284 ** If we have a temporary resource shortage
4285 ** that causes a failure, just abort refresh
4286 ** for now, we will return to this point when
4287 ** reinvoked from em_rxeof.
4294 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4295 m->m_flags |= M_PKTHDR;
4296 m->m_data = m->m_ext.ext_buf;
4298 /* Use bus_dma machinery to setup the memory mapping */
4299 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4300 m, &segs, &nsegs, BUS_DMA_NOWAIT);
4302 printf("Refresh mbufs: hdr dmamap load"
4303 " failure - %d\n", error);
4305 rxbuf->m_head = NULL;
4309 rxbuf->paddr = segs.ds_addr;
4310 bus_dmamap_sync(rxr->rxtag,
4311 rxbuf->map, BUS_DMASYNC_PREREAD);
4312 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4315 i = j; /* Next is precalulated for us */
4316 rxr->next_to_refresh = i;
4317 /* Calculate next controlling index */
4318 if (++j == adapter->num_rx_desc)
4323 ** Update the tail pointer only if,
4324 ** and as far as we have refreshed.
4327 E1000_WRITE_REG(&adapter->hw,
4328 E1000_RDT(rxr->me), rxr->next_to_refresh);
4334 /*********************************************************************
4336 * Allocate memory for rx_buffer structures. Since we use one
4337 * rx_buffer per received packet, the maximum number of rx_buffer's
4338 * that we'll need is equal to the number of receive descriptors
4339 * that we've allocated.
4341 **********************************************************************/
4343 em_allocate_receive_buffers(struct rx_ring *rxr)
4345 struct adapter *adapter = rxr->adapter;
4346 device_t dev = adapter->dev;
4347 struct em_rxbuffer *rxbuf;
4350 rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4351 adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4352 if (rxr->rx_buffers == NULL) {
4353 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4357 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4358 1, 0, /* alignment, bounds */
4359 BUS_SPACE_MAXADDR, /* lowaddr */
4360 BUS_SPACE_MAXADDR, /* highaddr */
4361 NULL, NULL, /* filter, filterarg */
4362 MJUM9BYTES, /* maxsize */
4364 MJUM9BYTES, /* maxsegsize */
4366 NULL, /* lockfunc */
4370 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4375 rxbuf = rxr->rx_buffers;
4376 for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4377 rxbuf = &rxr->rx_buffers[i];
4378 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4380 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4389 em_free_receive_structures(adapter);
4394 /*********************************************************************
4396 * Initialize a receive ring and its buffers.
4398 **********************************************************************/
4400 em_setup_receive_ring(struct rx_ring *rxr)
4402 struct adapter *adapter = rxr->adapter;
4403 struct em_rxbuffer *rxbuf;
4404 bus_dma_segment_t seg[1];
4405 int rsize, nsegs, error = 0;
4407 struct netmap_adapter *na = NA(adapter->ifp);
4408 struct netmap_slot *slot;
4412 /* Clear the ring contents */
4414 rsize = roundup2(adapter->num_rx_desc *
4415 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4416 bzero((void *)rxr->rx_base, rsize);
4418 slot = netmap_reset(na, NR_RX, 0, 0);
4422 ** Free current RX buffer structs and their mbufs
4424 for (int i = 0; i < adapter->num_rx_desc; i++) {
4425 rxbuf = &rxr->rx_buffers[i];
4426 if (rxbuf->m_head != NULL) {
4427 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4428 BUS_DMASYNC_POSTREAD);
4429 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4430 m_freem(rxbuf->m_head);
4431 rxbuf->m_head = NULL; /* mark as freed */
4435 /* Now replenish the mbufs */
4436 for (int j = 0; j != adapter->num_rx_desc; ++j) {
4437 rxbuf = &rxr->rx_buffers[j];
4440 int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4444 addr = PNMB(na, slot + si, &paddr);
4445 netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4446 rxbuf->paddr = paddr;
4447 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4450 #endif /* DEV_NETMAP */
4451 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4452 M_PKTHDR, adapter->rx_mbuf_sz);
4453 if (rxbuf->m_head == NULL) {
4457 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4458 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4459 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4461 /* Get the memory mapping */
4462 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4463 rxbuf->map, rxbuf->m_head, seg,
4464 &nsegs, BUS_DMA_NOWAIT);
4466 m_freem(rxbuf->m_head);
4467 rxbuf->m_head = NULL;
4470 bus_dmamap_sync(rxr->rxtag,
4471 rxbuf->map, BUS_DMASYNC_PREREAD);
4473 rxbuf->paddr = seg[0].ds_addr;
4474 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4476 rxr->next_to_check = 0;
4477 rxr->next_to_refresh = 0;
4478 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4479 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4486 /*********************************************************************
4488 * Initialize all receive rings.
4490 **********************************************************************/
4492 em_setup_receive_structures(struct adapter *adapter)
4494 struct rx_ring *rxr = adapter->rx_rings;
4497 for (q = 0; q < adapter->num_queues; q++, rxr++)
4498 if (em_setup_receive_ring(rxr))
4504 * Free RX buffers allocated so far, we will only handle
4505 * the rings that completed, the failing case will have
4506 * cleaned up for itself. 'q' failed, so its the terminus.
4508 for (int i = 0; i < q; ++i) {
4509 rxr = &adapter->rx_rings[i];
4510 for (int n = 0; n < adapter->num_rx_desc; n++) {
4511 struct em_rxbuffer *rxbuf;
4512 rxbuf = &rxr->rx_buffers[n];
4513 if (rxbuf->m_head != NULL) {
4514 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4515 BUS_DMASYNC_POSTREAD);
4516 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4517 m_freem(rxbuf->m_head);
4518 rxbuf->m_head = NULL;
4521 rxr->next_to_check = 0;
4522 rxr->next_to_refresh = 0;
4528 /*********************************************************************
4530 * Free all receive rings.
4532 **********************************************************************/
4534 em_free_receive_structures(struct adapter *adapter)
4536 struct rx_ring *rxr = adapter->rx_rings;
4538 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4539 em_free_receive_buffers(rxr);
4540 /* Free the ring memory as well */
4541 em_dma_free(adapter, &rxr->rxdma);
4542 EM_RX_LOCK_DESTROY(rxr);
4545 free(adapter->rx_rings, M_DEVBUF);
4549 /*********************************************************************
4551 * Free receive ring data structures
4553 **********************************************************************/
4555 em_free_receive_buffers(struct rx_ring *rxr)
4557 struct adapter *adapter = rxr->adapter;
4558 struct em_rxbuffer *rxbuf = NULL;
4560 INIT_DEBUGOUT("free_receive_buffers: begin");
4562 if (rxr->rx_buffers != NULL) {
4563 for (int i = 0; i < adapter->num_rx_desc; i++) {
4564 rxbuf = &rxr->rx_buffers[i];
4565 if (rxbuf->map != NULL) {
4566 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4567 BUS_DMASYNC_POSTREAD);
4568 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4569 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4571 if (rxbuf->m_head != NULL) {
4572 m_freem(rxbuf->m_head);
4573 rxbuf->m_head = NULL;
4576 free(rxr->rx_buffers, M_DEVBUF);
4577 rxr->rx_buffers = NULL;
4578 rxr->next_to_check = 0;
4579 rxr->next_to_refresh = 0;
4582 if (rxr->rxtag != NULL) {
4583 bus_dma_tag_destroy(rxr->rxtag);
4591 /*********************************************************************
4593 * Enable receive unit.
4595 **********************************************************************/
4598 em_initialize_receive_unit(struct adapter *adapter)
4600 struct rx_ring *rxr = adapter->rx_rings;
4601 struct ifnet *ifp = adapter->ifp;
4602 struct e1000_hw *hw = &adapter->hw;
4603 u32 rctl, rxcsum, rfctl;
4605 INIT_DEBUGOUT("em_initialize_receive_units: begin");
4608 * Make sure receives are disabled while setting
4609 * up the descriptor ring
4611 rctl = E1000_READ_REG(hw, E1000_RCTL);
4612 /* Do not disable if ever enabled on this hardware */
4613 if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4614 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4616 /* Setup the Receive Control Register */
4617 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4618 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4619 E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4620 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4622 /* Do not store bad packets */
4623 rctl &= ~E1000_RCTL_SBP;
4625 /* Enable Long Packet receive */
4626 if (ifp->if_mtu > ETHERMTU)
4627 rctl |= E1000_RCTL_LPE;
4629 rctl &= ~E1000_RCTL_LPE;
4632 if (!em_disable_crc_stripping)
4633 rctl |= E1000_RCTL_SECRC;
4635 E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4636 adapter->rx_abs_int_delay.value);
4638 E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4639 adapter->rx_int_delay.value);
4641 * Set the interrupt throttling rate. Value is calculated
4642 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4644 E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4646 /* Use extended rx descriptor formats */
4647 rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4648 rfctl |= E1000_RFCTL_EXTEN;
4650 ** When using MSIX interrupts we need to throttle
4651 ** using the EITR register (82574 only)
4653 if (hw->mac.type == e1000_82574) {
4654 for (int i = 0; i < 4; i++)
4655 E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4657 /* Disable accelerated acknowledge */
4658 rfctl |= E1000_RFCTL_ACK_DIS;
4660 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4662 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4663 if (ifp->if_capenable & IFCAP_RXCSUM) {
4664 #ifdef EM_MULTIQUEUE
4665 rxcsum |= E1000_RXCSUM_TUOFL |
4666 E1000_RXCSUM_IPOFL |
4669 rxcsum |= E1000_RXCSUM_TUOFL;
4672 rxcsum &= ~E1000_RXCSUM_TUOFL;
4674 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4676 #ifdef EM_MULTIQUEUE
4677 #define RSSKEYLEN 10
4678 if (adapter->num_queues > 1) {
4679 uint8_t rss_key[4 * RSSKEYLEN];
4686 arc4rand(rss_key, sizeof(rss_key), 0);
4687 for (i = 0; i < RSSKEYLEN; ++i) {
4690 rssrk = EM_RSSRK_VAL(rss_key, i);
4691 E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4695 * Configure RSS redirect table in following fashion:
4696 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4698 for (i = 0; i < sizeof(reta); ++i) {
4701 q = (i % adapter->num_queues) << 7;
4702 reta |= q << (8 * i);
4705 for (i = 0; i < 32; ++i) {
4706 E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4709 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4710 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4711 E1000_MRQC_RSS_FIELD_IPV4 |
4712 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4713 E1000_MRQC_RSS_FIELD_IPV6_EX |
4714 E1000_MRQC_RSS_FIELD_IPV6);
4718 ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4719 ** long latencies are observed, like Lenovo X60. This
4720 ** change eliminates the problem, but since having positive
4721 ** values in RDTR is a known source of problems on other
4722 ** platforms another solution is being sought.
4724 if (hw->mac.type == e1000_82573)
4725 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4727 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4728 /* Setup the Base and Length of the Rx Descriptor Ring */
4729 u64 bus_addr = rxr->rxdma.dma_paddr;
4730 u32 rdt = adapter->num_rx_desc - 1; /* default */
4732 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4733 adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4734 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4735 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4736 /* Setup the Head and Tail Descriptor Pointers */
4737 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4740 * an init() while a netmap client is active must
4741 * preserve the rx buffers passed to userspace.
4743 if (ifp->if_capenable & IFCAP_NETMAP)
4744 rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4745 #endif /* DEV_NETMAP */
4746 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4750 * Set PTHRESH for improved jumbo performance
4751 * According to 10.2.5.11 of Intel 82574 Datasheet,
4752 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4753 * Only write to RXDCTL(1) if there is a need for different
4756 if (((adapter->hw.mac.type == e1000_ich9lan) ||
4757 (adapter->hw.mac.type == e1000_pch2lan) ||
4758 (adapter->hw.mac.type == e1000_ich10lan)) &&
4759 (ifp->if_mtu > ETHERMTU)) {
4760 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4761 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4762 } else if (adapter->hw.mac.type == e1000_82574) {
4763 for (int i = 0; i < adapter->num_queues; i++) {
4764 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4766 rxdctl |= 0x20; /* PTHRESH */
4767 rxdctl |= 4 << 8; /* HTHRESH */
4768 rxdctl |= 4 << 16;/* WTHRESH */
4769 rxdctl |= 1 << 24; /* Switch to granularity */
4770 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4774 if (adapter->hw.mac.type >= e1000_pch2lan) {
4775 if (ifp->if_mtu > ETHERMTU)
4776 e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4778 e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4781 /* Make sure VLAN Filters are off */
4782 rctl &= ~E1000_RCTL_VFE;
4784 if (adapter->rx_mbuf_sz == MCLBYTES)
4785 rctl |= E1000_RCTL_SZ_2048;
4786 else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4787 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4788 else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4789 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4791 /* ensure we clear use DTYPE of 00 here */
4792 rctl &= ~0x00000C00;
4793 /* Write out the settings */
4794 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4800 /*********************************************************************
4802 * This routine executes in interrupt context. It replenishes
4803 * the mbufs in the descriptor and sends data which has been
4804 * dma'ed into host memory to upper layer.
4806 * We loop at most count times if count is > 0, or until done if
4809 * For polling we also now return the number of cleaned packets
4810 *********************************************************************/
4812 em_rxeof(struct rx_ring *rxr, int count, int *done)
4814 struct adapter *adapter = rxr->adapter;
4815 struct ifnet *ifp = adapter->ifp;
4816 struct mbuf *mp, *sendmp;
4819 int i, processed, rxdone = 0;
4821 union e1000_rx_desc_extended *cur;
4826 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4827 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4831 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4835 #endif /* DEV_NETMAP */
4837 for (i = rxr->next_to_check, processed = 0; count != 0;) {
4838 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4841 cur = &rxr->rx_base[i];
4842 status = le32toh(cur->wb.upper.status_error);
4845 if ((status & E1000_RXD_STAT_DD) == 0)
4848 len = le16toh(cur->wb.upper.length);
4849 eop = (status & E1000_RXD_STAT_EOP) != 0;
4851 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4852 (rxr->discard == TRUE)) {
4853 adapter->dropped_pkts++;
4854 ++rxr->rx_discarded;
4855 if (!eop) /* Catch subsequent segs */
4856 rxr->discard = TRUE;
4858 rxr->discard = FALSE;
4859 em_rx_discard(rxr, i);
4862 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4864 /* Assign correct length to the current fragment */
4865 mp = rxr->rx_buffers[i].m_head;
4868 /* Trigger for refresh */
4869 rxr->rx_buffers[i].m_head = NULL;
4871 /* First segment? */
4872 if (rxr->fmp == NULL) {
4873 mp->m_pkthdr.len = len;
4874 rxr->fmp = rxr->lmp = mp;
4876 /* Chain mbuf's together */
4877 mp->m_flags &= ~M_PKTHDR;
4878 rxr->lmp->m_next = mp;
4880 rxr->fmp->m_pkthdr.len += len;
4886 sendmp->m_pkthdr.rcvif = ifp;
4888 em_receive_checksum(status, sendmp);
4889 #ifndef __NO_STRICT_ALIGNMENT
4890 if (adapter->hw.mac.max_frame_size >
4891 (MCLBYTES - ETHER_ALIGN) &&
4892 em_fixup_rx(rxr) != 0)
4895 if (status & E1000_RXD_STAT_VP) {
4896 sendmp->m_pkthdr.ether_vtag =
4897 le16toh(cur->wb.upper.vlan);
4898 sendmp->m_flags |= M_VLANTAG;
4900 #ifndef __NO_STRICT_ALIGNMENT
4903 rxr->fmp = rxr->lmp = NULL;
4907 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4908 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4910 /* Zero out the receive descriptors status. */
4911 cur->wb.upper.status_error &= htole32(~0xFF);
4912 ++rxdone; /* cumulative for POLL */
4915 /* Advance our pointers to the next descriptor. */
4916 if (++i == adapter->num_rx_desc)
4919 /* Send to the stack */
4920 if (sendmp != NULL) {
4921 rxr->next_to_check = i;
4923 (*ifp->if_input)(ifp, sendmp);
4925 i = rxr->next_to_check;
4928 /* Only refresh mbufs every 8 descriptors */
4929 if (processed == 8) {
4930 em_refresh_mbufs(rxr, i);
4935 /* Catch any remaining refresh work */
4936 if (e1000_rx_unrefreshed(rxr))
4937 em_refresh_mbufs(rxr, i);
4939 rxr->next_to_check = i;
4944 return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4947 static __inline void
4948 em_rx_discard(struct rx_ring *rxr, int i)
4950 struct em_rxbuffer *rbuf;
4952 rbuf = &rxr->rx_buffers[i];
4953 bus_dmamap_unload(rxr->rxtag, rbuf->map);
4955 /* Free any previous pieces */
4956 if (rxr->fmp != NULL) {
4957 rxr->fmp->m_flags |= M_PKTHDR;
4963 ** Free buffer and allow em_refresh_mbufs()
4964 ** to clean up and recharge buffer.
4967 m_free(rbuf->m_head);
4968 rbuf->m_head = NULL;
4973 #ifndef __NO_STRICT_ALIGNMENT
4975 * When jumbo frames are enabled we should realign entire payload on
4976 * architecures with strict alignment. This is serious design mistake of 8254x
4977 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4978 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4979 * payload. On architecures without strict alignment restrictions 8254x still
4980 * performs unaligned memory access which would reduce the performance too.
4981 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4982 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4983 * existing mbuf chain.
4985 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4986 * not used at all on architectures with strict alignment.
4989 em_fixup_rx(struct rx_ring *rxr)
4991 struct adapter *adapter = rxr->adapter;
4997 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4998 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4999 m->m_data += ETHER_HDR_LEN;
5001 MGETHDR(n, M_NOWAIT, MT_DATA);
5003 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
5004 m->m_data += ETHER_HDR_LEN;
5005 m->m_len -= ETHER_HDR_LEN;
5006 n->m_len = ETHER_HDR_LEN;
5007 M_MOVE_PKTHDR(n, m);
5011 adapter->dropped_pkts++;
5023 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5025 rxd->read.buffer_addr = htole64(rxbuf->paddr);
5026 /* DD bits must be cleared */
5027 rxd->wb.upper.status_error= 0;
5030 /*********************************************************************
5032 * Verify that the hardware indicated that the checksum is valid.
5033 * Inform the stack about the status of checksum so that stack
5034 * doesn't spend time verifying the checksum.
5036 *********************************************************************/
5038 em_receive_checksum(uint32_t status, struct mbuf *mp)
5040 mp->m_pkthdr.csum_flags = 0;
5042 /* Ignore Checksum bit is set */
5043 if (status & E1000_RXD_STAT_IXSM)
5046 /* If the IP checksum exists and there is no IP Checksum error */
5047 if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5048 E1000_RXD_STAT_IPCS) {
5049 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5052 /* TCP or UDP checksum */
5053 if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5054 E1000_RXD_STAT_TCPCS) {
5055 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5056 mp->m_pkthdr.csum_data = htons(0xffff);
5058 if (status & E1000_RXD_STAT_UDPCS) {
5059 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5060 mp->m_pkthdr.csum_data = htons(0xffff);
5065 * This routine is run via an vlan
5069 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5071 struct adapter *adapter = ifp->if_softc;
5074 if (ifp->if_softc != arg) /* Not our event */
5077 if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */
5080 EM_CORE_LOCK(adapter);
5081 index = (vtag >> 5) & 0x7F;
5083 adapter->shadow_vfta[index] |= (1 << bit);
5084 ++adapter->num_vlans;
5085 /* Re-init to load the changes */
5086 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5087 em_init_locked(adapter);
5088 EM_CORE_UNLOCK(adapter);
5092 * This routine is run via an vlan
5096 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5098 struct adapter *adapter = ifp->if_softc;
5101 if (ifp->if_softc != arg)
5104 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5107 EM_CORE_LOCK(adapter);
5108 index = (vtag >> 5) & 0x7F;
5110 adapter->shadow_vfta[index] &= ~(1 << bit);
5111 --adapter->num_vlans;
5112 /* Re-init to load the changes */
5113 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5114 em_init_locked(adapter);
5115 EM_CORE_UNLOCK(adapter);
5119 em_setup_vlan_hw_support(struct adapter *adapter)
5121 struct e1000_hw *hw = &adapter->hw;
5125 ** We get here thru init_locked, meaning
5126 ** a soft reset, this has already cleared
5127 ** the VFTA and other state, so if there
5128 ** have been no vlan's registered do nothing.
5130 if (adapter->num_vlans == 0)
5134 ** A soft reset zero's out the VFTA, so
5135 ** we need to repopulate it now.
5137 for (int i = 0; i < EM_VFTA_SIZE; i++)
5138 if (adapter->shadow_vfta[i] != 0)
5139 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5140 i, adapter->shadow_vfta[i]);
5142 reg = E1000_READ_REG(hw, E1000_CTRL);
5143 reg |= E1000_CTRL_VME;
5144 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5146 /* Enable the Filter Table */
5147 reg = E1000_READ_REG(hw, E1000_RCTL);
5148 reg &= ~E1000_RCTL_CFIEN;
5149 reg |= E1000_RCTL_VFE;
5150 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5154 em_enable_intr(struct adapter *adapter)
5156 struct e1000_hw *hw = &adapter->hw;
5157 u32 ims_mask = IMS_ENABLE_MASK;
5159 if (hw->mac.type == e1000_82574) {
5160 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5161 ims_mask |= EM_MSIX_MASK;
5163 E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5167 em_disable_intr(struct adapter *adapter)
5169 struct e1000_hw *hw = &adapter->hw;
5171 if (hw->mac.type == e1000_82574)
5172 E1000_WRITE_REG(hw, EM_EIAC, 0);
5173 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5177 * Bit of a misnomer, what this really means is
5178 * to enable OS management of the system... aka
5179 * to disable special hardware management features
5182 em_init_manageability(struct adapter *adapter)
5184 /* A shared code workaround */
5185 #define E1000_82542_MANC2H E1000_MANC2H
5186 if (adapter->has_manage) {
5187 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5188 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5190 /* disable hardware interception of ARP */
5191 manc &= ~(E1000_MANC_ARP_EN);
5193 /* enable receiving management packets to the host */
5194 manc |= E1000_MANC_EN_MNG2HOST;
5195 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5196 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5197 manc2h |= E1000_MNG2HOST_PORT_623;
5198 manc2h |= E1000_MNG2HOST_PORT_664;
5199 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5200 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5205 * Give control back to hardware management
5206 * controller if there is one.
5209 em_release_manageability(struct adapter *adapter)
5211 if (adapter->has_manage) {
5212 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5214 /* re-enable hardware interception of ARP */
5215 manc |= E1000_MANC_ARP_EN;
5216 manc &= ~E1000_MANC_EN_MNG2HOST;
5218 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5223 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5224 * For ASF and Pass Through versions of f/w this means
5225 * that the driver is loaded. For AMT version type f/w
5226 * this means that the network i/f is open.
5229 em_get_hw_control(struct adapter *adapter)
5233 if (adapter->hw.mac.type == e1000_82573) {
5234 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5235 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5236 swsm | E1000_SWSM_DRV_LOAD);
5240 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5241 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5242 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5247 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5248 * For ASF and Pass Through versions of f/w this means that
5249 * the driver is no longer loaded. For AMT versions of the
5250 * f/w this means that the network i/f is closed.
5253 em_release_hw_control(struct adapter *adapter)
5257 if (!adapter->has_manage)
5260 if (adapter->hw.mac.type == e1000_82573) {
5261 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5262 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5263 swsm & ~E1000_SWSM_DRV_LOAD);
5267 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5268 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5269 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5274 em_is_valid_ether_addr(u8 *addr)
5276 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5278 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5286 ** Parse the interface capabilities with regard
5287 ** to both system management and wake-on-lan for
5291 em_get_wakeup(device_t dev)
5293 struct adapter *adapter = device_get_softc(dev);
5294 u16 eeprom_data = 0, device_id, apme_mask;
5296 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5297 apme_mask = EM_EEPROM_APME;
5299 switch (adapter->hw.mac.type) {
5302 adapter->has_amt = TRUE;
5306 case e1000_80003es2lan:
5307 if (adapter->hw.bus.func == 1) {
5308 e1000_read_nvm(&adapter->hw,
5309 NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5312 e1000_read_nvm(&adapter->hw,
5313 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5317 case e1000_ich10lan:
5320 apme_mask = E1000_WUC_APME;
5321 adapter->has_amt = TRUE;
5322 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5325 e1000_read_nvm(&adapter->hw,
5326 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5329 if (eeprom_data & apme_mask)
5330 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5332 * We have the eeprom settings, now apply the special cases
5333 * where the eeprom may be wrong or the board won't support
5334 * wake on lan on a particular port
5336 device_id = pci_get_device(dev);
5337 switch (device_id) {
5338 case E1000_DEV_ID_82571EB_FIBER:
5339 /* Wake events only supported on port A for dual fiber
5340 * regardless of eeprom setting */
5341 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5342 E1000_STATUS_FUNC_1)
5345 case E1000_DEV_ID_82571EB_QUAD_COPPER:
5346 case E1000_DEV_ID_82571EB_QUAD_FIBER:
5347 case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5348 /* if quad port adapter, disable WoL on all but port A */
5349 if (global_quad_port_a != 0)
5351 /* Reset for multiple quad port adapters */
5352 if (++global_quad_port_a == 4)
5353 global_quad_port_a = 0;
5361 * Enable PCI Wake On Lan capability
5364 em_enable_wakeup(device_t dev)
5366 struct adapter *adapter = device_get_softc(dev);
5367 struct ifnet *ifp = adapter->ifp;
5368 u32 pmc, ctrl, ctrl_ext, rctl;
5371 if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5374 /* Advertise the wakeup capability */
5375 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5376 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5377 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5378 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5380 if ((adapter->hw.mac.type == e1000_ich8lan) ||
5381 (adapter->hw.mac.type == e1000_pchlan) ||
5382 (adapter->hw.mac.type == e1000_ich9lan) ||
5383 (adapter->hw.mac.type == e1000_ich10lan))
5384 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5386 /* Keep the laser running on Fiber adapters */
5387 if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5388 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5389 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5390 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5391 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5395 ** Determine type of Wakeup: note that wol
5396 ** is set with all bits on by default.
5398 if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5399 adapter->wol &= ~E1000_WUFC_MAG;
5401 if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5402 adapter->wol &= ~E1000_WUFC_MC;
5404 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5405 rctl |= E1000_RCTL_MPE;
5406 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5409 if ((adapter->hw.mac.type == e1000_pchlan) ||
5410 (adapter->hw.mac.type == e1000_pch2lan)) {
5411 if (em_enable_phy_wakeup(adapter))
5414 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5415 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5418 if (adapter->hw.phy.type == e1000_phy_igp_3)
5419 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5422 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5423 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5424 if (ifp->if_capenable & IFCAP_WOL)
5425 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5426 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5432 ** WOL in the newer chipset interfaces (pchlan)
5433 ** require thing to be copied into the phy
5436 em_enable_phy_wakeup(struct adapter *adapter)
5438 struct e1000_hw *hw = &adapter->hw;
5442 /* copy MAC RARs to PHY RARs */
5443 e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5445 /* copy MAC MTA to PHY MTA */
5446 for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5447 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5448 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5449 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5450 (u16)((mreg >> 16) & 0xFFFF));
5453 /* configure PHY Rx Control register */
5454 e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5455 mreg = E1000_READ_REG(hw, E1000_RCTL);
5456 if (mreg & E1000_RCTL_UPE)
5457 preg |= BM_RCTL_UPE;
5458 if (mreg & E1000_RCTL_MPE)
5459 preg |= BM_RCTL_MPE;
5460 preg &= ~(BM_RCTL_MO_MASK);
5461 if (mreg & E1000_RCTL_MO_3)
5462 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5463 << BM_RCTL_MO_SHIFT);
5464 if (mreg & E1000_RCTL_BAM)
5465 preg |= BM_RCTL_BAM;
5466 if (mreg & E1000_RCTL_PMCF)
5467 preg |= BM_RCTL_PMCF;
5468 mreg = E1000_READ_REG(hw, E1000_CTRL);
5469 if (mreg & E1000_CTRL_RFCE)
5470 preg |= BM_RCTL_RFCE;
5471 e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5473 /* enable PHY wakeup in MAC register */
5474 E1000_WRITE_REG(hw, E1000_WUC,
5475 E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5476 E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5478 /* configure and enable PHY wakeup in PHY registers */
5479 e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5480 e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5482 /* activate PHY wakeup */
5483 ret = hw->phy.ops.acquire(hw);
5485 printf("Could not acquire PHY\n");
5488 e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5489 (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5490 ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5492 printf("Could not read PHY page 769\n");
5495 preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5496 ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5498 printf("Could not set PHY Host Wakeup bit\n");
5500 hw->phy.ops.release(hw);
5506 em_led_func(void *arg, int onoff)
5508 struct adapter *adapter = arg;
5510 EM_CORE_LOCK(adapter);
5512 e1000_setup_led(&adapter->hw);
5513 e1000_led_on(&adapter->hw);
5515 e1000_led_off(&adapter->hw);
5516 e1000_cleanup_led(&adapter->hw);
5518 EM_CORE_UNLOCK(adapter);
5522 ** Disable the L0S and L1 LINK states
5525 em_disable_aspm(struct adapter *adapter)
5528 u16 link_cap,link_ctrl;
5529 device_t dev = adapter->dev;
5531 switch (adapter->hw.mac.type) {
5539 if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5541 reg = base + PCIER_LINK_CAP;
5542 link_cap = pci_read_config(dev, reg, 2);
5543 if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5545 reg = base + PCIER_LINK_CTL;
5546 link_ctrl = pci_read_config(dev, reg, 2);
5547 link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5548 pci_write_config(dev, reg, link_ctrl, 2);
5552 /**********************************************************************
5554 * Update the board statistics counters.
5556 **********************************************************************/
5558 em_update_stats_counters(struct adapter *adapter)
5562 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5563 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5564 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5565 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5567 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5568 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5569 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5570 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5572 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5573 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5574 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5575 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5576 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5577 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5578 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5579 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5580 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5581 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5582 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5583 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5584 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5585 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5586 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5587 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5588 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5589 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5590 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5591 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5593 /* For the 64-bit byte counters the low dword must be read first. */
5594 /* Both registers clear on the read of the high dword */
5596 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5597 ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5598 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5599 ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5601 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5602 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5603 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5604 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5605 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5607 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5608 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5610 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5611 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5612 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5613 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5614 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5615 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5616 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5617 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5618 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5619 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5621 /* Interrupt Counts */
5623 adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5624 adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5625 adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5626 adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5627 adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5628 adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5629 adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5630 adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5631 adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5633 if (adapter->hw.mac.type >= e1000_82543) {
5634 adapter->stats.algnerrc +=
5635 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5636 adapter->stats.rxerrc +=
5637 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5638 adapter->stats.tncrs +=
5639 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5640 adapter->stats.cexterr +=
5641 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5642 adapter->stats.tsctc +=
5643 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5644 adapter->stats.tsctfc +=
5645 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5649 ifp->if_collisions = adapter->stats.colc;
5652 ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5653 adapter->stats.crcerrs + adapter->stats.algnerrc +
5654 adapter->stats.ruc + adapter->stats.roc +
5655 adapter->stats.mpc + adapter->stats.cexterr;
5658 ifp->if_oerrors = adapter->stats.ecol +
5659 adapter->stats.latecol + adapter->watchdog_events;
5662 /* Export a single 32-bit register via a read-only sysctl. */
5664 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5666 struct adapter *adapter;
5669 adapter = oidp->oid_arg1;
5670 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5671 return (sysctl_handle_int(oidp, &val, 0, req));
5675 * Add sysctl variables, one per statistic, to the system.
5678 em_add_hw_stats(struct adapter *adapter)
5680 device_t dev = adapter->dev;
5682 struct tx_ring *txr = adapter->tx_rings;
5683 struct rx_ring *rxr = adapter->rx_rings;
5685 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5686 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5687 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5688 struct e1000_hw_stats *stats = &adapter->stats;
5690 struct sysctl_oid *stat_node, *queue_node, *int_node;
5691 struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5693 #define QUEUE_NAME_LEN 32
5694 char namebuf[QUEUE_NAME_LEN];
5696 /* Driver Statistics */
5697 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5698 CTLFLAG_RD, &adapter->dropped_pkts,
5699 "Driver dropped packets");
5700 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5701 CTLFLAG_RD, &adapter->link_irq,
5702 "Link MSIX IRQ Handled");
5703 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5704 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5705 "Defragmenting mbuf chain failed");
5706 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5707 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5708 "Driver tx dma failure in xmit");
5709 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5710 CTLFLAG_RD, &adapter->rx_overruns,
5712 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5713 CTLFLAG_RD, &adapter->watchdog_events,
5714 "Watchdog timeouts");
5716 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5717 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5718 em_sysctl_reg_handler, "IU",
5719 "Device Control Register");
5720 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5721 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5722 em_sysctl_reg_handler, "IU",
5723 "Receiver Control Register");
5724 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5725 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5726 "Flow Control High Watermark");
5727 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5728 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5729 "Flow Control Low Watermark");
5731 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5732 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5733 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5734 CTLFLAG_RD, NULL, "TX Queue Name");
5735 queue_list = SYSCTL_CHILDREN(queue_node);
5737 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5738 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5740 em_sysctl_reg_handler, "IU",
5741 "Transmit Descriptor Head");
5742 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5743 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5745 em_sysctl_reg_handler, "IU",
5746 "Transmit Descriptor Tail");
5747 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5748 CTLFLAG_RD, &txr->tx_irq,
5749 "Queue MSI-X Transmit Interrupts");
5750 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5751 CTLFLAG_RD, &txr->no_desc_avail,
5752 "Queue No Descriptor Available");
5754 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5755 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5756 CTLFLAG_RD, NULL, "RX Queue Name");
5757 queue_list = SYSCTL_CHILDREN(queue_node);
5759 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5760 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5762 em_sysctl_reg_handler, "IU",
5763 "Receive Descriptor Head");
5764 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5765 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5767 em_sysctl_reg_handler, "IU",
5768 "Receive Descriptor Tail");
5769 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5770 CTLFLAG_RD, &rxr->rx_irq,
5771 "Queue MSI-X Receive Interrupts");
5774 /* MAC stats get their own sub node */
5776 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5777 CTLFLAG_RD, NULL, "Statistics");
5778 stat_list = SYSCTL_CHILDREN(stat_node);
5780 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5781 CTLFLAG_RD, &stats->ecol,
5782 "Excessive collisions");
5783 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5784 CTLFLAG_RD, &stats->scc,
5785 "Single collisions");
5786 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5787 CTLFLAG_RD, &stats->mcc,
5788 "Multiple collisions");
5789 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5790 CTLFLAG_RD, &stats->latecol,
5792 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5793 CTLFLAG_RD, &stats->colc,
5795 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5796 CTLFLAG_RD, &adapter->stats.symerrs,
5798 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5799 CTLFLAG_RD, &adapter->stats.sec,
5801 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5802 CTLFLAG_RD, &adapter->stats.dc,
5804 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5805 CTLFLAG_RD, &adapter->stats.mpc,
5807 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5808 CTLFLAG_RD, &adapter->stats.rnbc,
5809 "Receive No Buffers");
5810 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5811 CTLFLAG_RD, &adapter->stats.ruc,
5812 "Receive Undersize");
5813 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5814 CTLFLAG_RD, &adapter->stats.rfc,
5815 "Fragmented Packets Received ");
5816 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5817 CTLFLAG_RD, &adapter->stats.roc,
5818 "Oversized Packets Received");
5819 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5820 CTLFLAG_RD, &adapter->stats.rjc,
5822 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5823 CTLFLAG_RD, &adapter->stats.rxerrc,
5825 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5826 CTLFLAG_RD, &adapter->stats.crcerrs,
5828 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5829 CTLFLAG_RD, &adapter->stats.algnerrc,
5830 "Alignment Errors");
5831 /* On 82575 these are collision counts */
5832 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5833 CTLFLAG_RD, &adapter->stats.cexterr,
5834 "Collision/Carrier extension errors");
5835 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5836 CTLFLAG_RD, &adapter->stats.xonrxc,
5838 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5839 CTLFLAG_RD, &adapter->stats.xontxc,
5841 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5842 CTLFLAG_RD, &adapter->stats.xoffrxc,
5844 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5845 CTLFLAG_RD, &adapter->stats.xofftxc,
5846 "XOFF Transmitted");
5848 /* Packet Reception Stats */
5849 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5850 CTLFLAG_RD, &adapter->stats.tpr,
5851 "Total Packets Received ");
5852 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5853 CTLFLAG_RD, &adapter->stats.gprc,
5854 "Good Packets Received");
5855 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5856 CTLFLAG_RD, &adapter->stats.bprc,
5857 "Broadcast Packets Received");
5858 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5859 CTLFLAG_RD, &adapter->stats.mprc,
5860 "Multicast Packets Received");
5861 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5862 CTLFLAG_RD, &adapter->stats.prc64,
5863 "64 byte frames received ");
5864 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5865 CTLFLAG_RD, &adapter->stats.prc127,
5866 "65-127 byte frames received");
5867 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5868 CTLFLAG_RD, &adapter->stats.prc255,
5869 "128-255 byte frames received");
5870 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5871 CTLFLAG_RD, &adapter->stats.prc511,
5872 "256-511 byte frames received");
5873 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5874 CTLFLAG_RD, &adapter->stats.prc1023,
5875 "512-1023 byte frames received");
5876 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5877 CTLFLAG_RD, &adapter->stats.prc1522,
5878 "1023-1522 byte frames received");
5879 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5880 CTLFLAG_RD, &adapter->stats.gorc,
5881 "Good Octets Received");
5883 /* Packet Transmission Stats */
5884 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5885 CTLFLAG_RD, &adapter->stats.gotc,
5886 "Good Octets Transmitted");
5887 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5888 CTLFLAG_RD, &adapter->stats.tpt,
5889 "Total Packets Transmitted");
5890 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5891 CTLFLAG_RD, &adapter->stats.gptc,
5892 "Good Packets Transmitted");
5893 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5894 CTLFLAG_RD, &adapter->stats.bptc,
5895 "Broadcast Packets Transmitted");
5896 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5897 CTLFLAG_RD, &adapter->stats.mptc,
5898 "Multicast Packets Transmitted");
5899 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5900 CTLFLAG_RD, &adapter->stats.ptc64,
5901 "64 byte frames transmitted ");
5902 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5903 CTLFLAG_RD, &adapter->stats.ptc127,
5904 "65-127 byte frames transmitted");
5905 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5906 CTLFLAG_RD, &adapter->stats.ptc255,
5907 "128-255 byte frames transmitted");
5908 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5909 CTLFLAG_RD, &adapter->stats.ptc511,
5910 "256-511 byte frames transmitted");
5911 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5912 CTLFLAG_RD, &adapter->stats.ptc1023,
5913 "512-1023 byte frames transmitted");
5914 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5915 CTLFLAG_RD, &adapter->stats.ptc1522,
5916 "1024-1522 byte frames transmitted");
5917 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5918 CTLFLAG_RD, &adapter->stats.tsctc,
5919 "TSO Contexts Transmitted");
5920 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5921 CTLFLAG_RD, &adapter->stats.tsctfc,
5922 "TSO Contexts Failed");
5925 /* Interrupt Stats */
5927 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5928 CTLFLAG_RD, NULL, "Interrupt Statistics");
5929 int_list = SYSCTL_CHILDREN(int_node);
5931 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5932 CTLFLAG_RD, &adapter->stats.iac,
5933 "Interrupt Assertion Count");
5935 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5936 CTLFLAG_RD, &adapter->stats.icrxptc,
5937 "Interrupt Cause Rx Pkt Timer Expire Count");
5939 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5940 CTLFLAG_RD, &adapter->stats.icrxatc,
5941 "Interrupt Cause Rx Abs Timer Expire Count");
5943 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5944 CTLFLAG_RD, &adapter->stats.ictxptc,
5945 "Interrupt Cause Tx Pkt Timer Expire Count");
5947 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5948 CTLFLAG_RD, &adapter->stats.ictxatc,
5949 "Interrupt Cause Tx Abs Timer Expire Count");
5951 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5952 CTLFLAG_RD, &adapter->stats.ictxqec,
5953 "Interrupt Cause Tx Queue Empty Count");
5955 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5956 CTLFLAG_RD, &adapter->stats.ictxqmtc,
5957 "Interrupt Cause Tx Queue Min Thresh Count");
5959 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5960 CTLFLAG_RD, &adapter->stats.icrxdmtc,
5961 "Interrupt Cause Rx Desc Min Thresh Count");
5963 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5964 CTLFLAG_RD, &adapter->stats.icrxoc,
5965 "Interrupt Cause Receiver Overrun Count");
5968 /**********************************************************************
5970 * This routine provides a way to dump out the adapter eeprom,
5971 * often a useful debug/service tool. This only dumps the first
5972 * 32 words, stuff that matters is in that extent.
5974 **********************************************************************/
5976 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5978 struct adapter *adapter = (struct adapter *)arg1;
5983 error = sysctl_handle_int(oidp, &result, 0, req);
5985 if (error || !req->newptr)
5989 * This value will cause a hex dump of the
5990 * first 32 16-bit words of the EEPROM to
5994 em_print_nvm_info(adapter);
6000 em_print_nvm_info(struct adapter *adapter)
6005 /* Its a bit crude, but it gets the job done */
6006 printf("\nInterface EEPROM Dump:\n");
6007 printf("Offset\n0x0000 ");
6008 for (i = 0, j = 0; i < 32; i++, j++) {
6009 if (j == 8) { /* Make the offset block */
6011 printf("\n0x00%x0 ",row);
6013 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6014 printf("%04x ", eeprom_data);
6020 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6022 struct em_int_delay_info *info;
6023 struct adapter *adapter;
6025 int error, usecs, ticks;
6027 info = (struct em_int_delay_info *)arg1;
6028 usecs = info->value;
6029 error = sysctl_handle_int(oidp, &usecs, 0, req);
6030 if (error != 0 || req->newptr == NULL)
6032 if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6034 info->value = usecs;
6035 ticks = EM_USECS_TO_TICKS(usecs);
6036 if (info->offset == E1000_ITR) /* units are 256ns here */
6039 adapter = info->adapter;
6041 EM_CORE_LOCK(adapter);
6042 regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6043 regval = (regval & ~0xffff) | (ticks & 0xffff);
6044 /* Handle a few special cases. */
6045 switch (info->offset) {
6050 adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6051 /* Don't write 0 into the TIDV register. */
6054 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6057 E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6058 EM_CORE_UNLOCK(adapter);
6063 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6064 const char *description, struct em_int_delay_info *info,
6065 int offset, int value)
6067 info->adapter = adapter;
6068 info->offset = offset;
6069 info->value = value;
6070 SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6071 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6072 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6073 info, 0, em_sysctl_int_delay, "I", description);
6077 em_set_sysctl_value(struct adapter *adapter, const char *name,
6078 const char *description, int *limit, int value)
6081 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6082 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6083 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6088 ** Set flow control using sysctl:
6089 ** Flow control values:
6096 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6099 static int input = 3; /* default is full */
6100 struct adapter *adapter = (struct adapter *) arg1;
6102 error = sysctl_handle_int(oidp, &input, 0, req);
6104 if ((error) || (req->newptr == NULL))
6107 if (input == adapter->fc) /* no change? */
6111 case e1000_fc_rx_pause:
6112 case e1000_fc_tx_pause:
6115 adapter->hw.fc.requested_mode = input;
6116 adapter->fc = input;
6123 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6124 e1000_force_mac_fc(&adapter->hw);
6129 ** Manage Energy Efficient Ethernet:
6131 ** 0/1 - enabled/disabled
6134 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6136 struct adapter *adapter = (struct adapter *) arg1;
6139 value = adapter->hw.dev_spec.ich8lan.eee_disable;
6140 error = sysctl_handle_int(oidp, &value, 0, req);
6141 if (error || req->newptr == NULL)
6143 EM_CORE_LOCK(adapter);
6144 adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6145 em_init_locked(adapter);
6146 EM_CORE_UNLOCK(adapter);
6151 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6153 struct adapter *adapter;
6158 error = sysctl_handle_int(oidp, &result, 0, req);
6160 if (error || !req->newptr)
6164 adapter = (struct adapter *)arg1;
6165 em_print_debug_info(adapter);
6172 ** This routine is meant to be fluid, add whatever is
6173 ** needed for debugging a problem. -jfv
6176 em_print_debug_info(struct adapter *adapter)
6178 device_t dev = adapter->dev;
6179 struct tx_ring *txr = adapter->tx_rings;
6180 struct rx_ring *rxr = adapter->rx_rings;
6182 if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
6183 printf("Interface is RUNNING ");
6185 printf("Interface is NOT RUNNING\n");
6187 if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
6188 printf("and INACTIVE\n");
6190 printf("and ACTIVE\n");
6192 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6193 device_printf(dev, "TX Queue %d ------\n", i);
6194 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6195 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6196 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6197 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6198 device_printf(dev, "TX descriptors avail = %d\n",
6200 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6201 txr->no_desc_avail);
6202 device_printf(dev, "RX Queue %d ------\n", i);
6203 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6204 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6205 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6206 device_printf(dev, "RX discarded packets = %ld\n",
6208 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6209 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6213 #ifdef EM_MULTIQUEUE
6216 * Write a new value to the EEPROM increasing the number of MSIX
6217 * vectors from 3 to 5, for proper multiqueue support.
6220 em_enable_vectors_82574(struct adapter *adapter)
6222 struct e1000_hw *hw = &adapter->hw;
6223 device_t dev = adapter->dev;
6226 e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6227 printf("Current cap: %#06x\n", edata);
6228 if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6229 device_printf(dev, "Writing to eeprom: increasing "
6230 "reported MSIX vectors from 3 to 5...\n");
6231 edata &= ~(EM_NVM_MSIX_N_MASK);
6232 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6233 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6234 e1000_update_nvm_checksum(hw);
6235 device_printf(dev, "Writing to eeprom: done\n");
6241 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6246 dc = devclass_find("em");
6247 max_em = devclass_get_maxunit(dc);
6249 for (int index = 0; index < (max_em - 1); index++) {
6251 dev = devclass_get_device(dc, index);
6252 if (device_get_driver(dev) == &em_driver) {
6253 struct adapter *adapter = device_get_softc(dev);
6254 EM_CORE_LOCK(adapter);
6255 em_init_locked(adapter);
6256 EM_CORE_UNLOCK(adapter);
6260 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6265 dc = devclass_find("em");
6266 max_em = devclass_get_maxunit(dc);
6268 for (int index = 0; index < (max_em - 1); index++) {
6270 dev = devclass_get_device(dc, index);
6271 if (device_get_driver(dev) == &em_driver)
6272 em_print_debug_info(device_get_softc(dev));